This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ai.train('result.txt', | |
line_by_line=False, | |
from_cache=False, | |
num_steps=100000, | |
generate_every=5000, | |
save_every=5000, | |
save_gdrive=True, | |
learning_rate=1e-3, | |
fp16=False, | |
batch_size=1, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' ffmpeg not found when using pydub utils | |
___ | |
If using Win, need to download from the official website and add to path, then reload git bash. | |
https://github.com/jiaaro/pydub/issues/348 | |
''' | |
''' AssertionError: Distributed mode requires CUDA | |
___ | |
a MUST-read to confirm that both GPU and drivers support the CUDA version you've installed (or about to install): | |
https://stackoverflow.com/questions/60987997/why-torch-cuda-is-available-returns-false-even-after-installing-pytorch-with |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Attention parameters | |
attention_rnn_dim=1024, # sets the number of units in the RNN | |
attention_dim=128, # sets the number of units in the attention mechanism | |
# These two values are relatively large and may require a significant amount of GPU memory during training and inference. | |
# Location Layer parameters | |
attention_location_n_filters=32, # sets the number of filters in the CNN | |
attention_location_kernel_size=31, # sets the size of the filters | |
# This means that the CNN has 32 filters and each filter has a kernel size of 31. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' Make metadata.csv and filelists via https://jaimeleal.github.io/how-to-speech-synthesis ''' | |
import os | |
import pandas as pd | |
from sklearn.model_selection import train_test_split | |
import numpy as np | |
filepath = 'E:/AlanWatts/dataset/transcripts2/' | |
files = os.listdir(filepath) | |
rows = [] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# snippet of instantiating a client | |
client = speech.SpeechClient() | |
config = speech.RecognitionConfig( | |
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, | |
sample_rate_hertz=22050, | |
audio_channel_count=1, | |
model='phone_call', # recognizes low quality audio better than default | |
use_enhanced=1, # if available | |
language_code="en-US") | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# using AudioSegment | |
def get_duration(self): | |
return self.audio.duration_seconds | |
def single_split(self, from_min, to_min, split_filename): | |
t1 = from_min * 7 * 1000 # convert to milliseconds | |
t2 = to_min * 7 * 1000 | |
split_audio = self.audio[t1:t2] | |
resampled = split_audio.set_frame_rate(22050) # change the sampling rate to 22050 Hz |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def remove_sil(file_path: str, file: str, dest_path: str, format="wav"): | |
sound = AudioSegment.from_file(os.path.join(file_path, file), format=format) | |
non_sil_times = detect_nonsilent(sound, min_silence_len=50, silence_thresh=sound.dBFS * 1.5) | |
if len(non_sil_times) == 0: | |
return None | |
elif len(non_sil_times) > 0: | |
non_sil_times_concat = [non_sil_times[0]] | |
if len(non_sil_times) > 1: | |
for t in non_sil_times[1:]: | |
if t[0] - non_sil_times_concat[-1][-1] < 200: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def reduce_noise(file_path: str, file: str, dest_path: str) -> None: | |
# load data | |
rate, data = wavfile.read(os.path.join(file_path, file)) | |
reduced_noise = nr.reduce_noise(y=data, sr=rate) | |
# perform noise reduction | |
try: | |
wavfile.write(os.path.join(dest_path, file), rate, reduced_noise) | |
except Exception(): | |
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def convert_audio_to_wav(filename: str, filepath: str, dest_path: str) -> None: | |
filepath = os.path.join(filepath, filename) | |
dest_filepath = os.path.join(dest_path, f"{filename[:-4]}.wav") | |
given_audio = AudioSegment.from_file(filepath, format="mp3") # replace with mp4 or avi | |
given_audio.export(dest_filepath, format="wav") | |
# create a list of input arguments for the function | |
inputs = [(filename, filepath, dest_path) for filename in os.listdir(filepath) if filename not in os.listdir(dest_path)] | |
# create a Process pool with 16 worker processes |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# United States of America Python Dictionary to translate States, | |
# Districts & Territories to Two-Letter codes and vice versa. | |
# | |
# https://gist.github.com/rogerallen/1583593 | |
# | |
# Dedicated to the public domain. To the extent possible under law, | |
# Roger Allen has waived all copyright and related or neighboring | |
# rights to this code. | |
us_state_abbrev = { |