Created
January 21, 2021 06:35
-
-
Save aqzlpm11/5517378adadaefb7a23bb45c19ad33c2 to your computer and use it in GitHub Desktop.
统计语音时长
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pathlib import Path | |
import soundfile as sf | |
from tqdm import tqdm | |
def get_info(audio_file_list): | |
res = [] | |
for wav in tqdm(audio_file_list): | |
if not Path(wav).exists(): | |
print(f"Warning: File not exists: {wav}") | |
continue | |
with sf.SoundFile(wav) as f: | |
res.append({ | |
'file': wav, | |
'duration': len(f) / f.samplerate, | |
'channels': f.channels, | |
'sample_rate':f.samplerate | |
}) | |
return res | |
import click | |
@click.command() | |
@click.argument('dir_path') | |
@click.option('--ext', default='.wav') | |
def run(dir_path, ext='.wav'): | |
""" | |
对dir_path下所有扩展名为ext文件,进行统计。 | |
""" | |
print(f"processing {dir_path}, ext={ext}") | |
audio_file_list = [str(w) for w in Path(dir_path).rglob('*'+ext)] | |
# audio_file_list = ds['wav'].values | |
infos = get_info(audio_file_list) | |
print(f"文件数:{len(infos)}") | |
print(f"信道数:{set([v['channels'] for v in infos])}") | |
print(f"采样率:{set([v['sample_rate'] for v in infos])}") | |
durations = [v['duration'] for v in infos] | |
print(f"时长区间: [{min(durations):.2f}s ~ {max(durations):.2f}s]") | |
print(f"总时长:{sum(durations) / 60 / 60:.2f}h") | |
if __name__ == '__main__': | |
run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment