Created January 21, 2021 06:35
from pathlib import Path
import soundfile as sf
from tqdm import tqdm
def get_info(audio_file_list):
res = []
for wav in tqdm(audio_file_list):
if not Path(wav).exists():
print(f"Warning: File not exists: {wav}")
with sf.SoundFile(wav) as f:
'file': wav,
'duration': len(f) / f.samplerate,
'channels': f.channels,
return res
import click
@click.option('--ext', default='.wav')
def run(dir_path, ext='.wav'):
print(f"processing {dir_path}, ext={ext}")
audio_file_list = [str(w) for w in Path(dir_path).rglob('*'+ext)]
# audio_file_list = ds['wav'].values
infos = get_info(audio_file_list)
print(f"信道数:{set([v['channels'] for v in infos])}")
print(f"采样率:{set([v['sample_rate'] for v in infos])}")
durations = [v['duration'] for v in infos]
print(f"时长区间: [{min(durations):.2f}s ~ {max(durations):.2f}s]")
print(f"总时长:{sum(durations) / 60 / 60:.2f}h")
if __name__ == '__main__':
