This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Train a neural network to implement the discrete Fourier transform | |
""" | |
from tensorflow.keras.models import Sequential | |
from tensorflow.keras.layers import Dense | |
import numpy as np | |
import matplotlib.pyplot as plt | |
N = 32 | |
batch = 10000 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import pretty_midi | |
import jams | |
def jams_to_midi(filepath, q=1): | |
# q = 1: with pitch bend. q = 0: without pitch bend. | |
jam = jams.load(filepath) | |
midi = pretty_midi.PrettyMIDI() | |
annos = jam.search(namespace='note_midi') | |
if len(annos) == 0: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sox_dataset = dataset.map( | |
lambda speech: tf.py_function( | |
get_sox_effect('reverb', {}), | |
[speech], | |
tf.float32) | |
# num_parallel_calls=AUTOTUNE | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_pb_effect( | |
effect_type: str | |
) -> Callable[[tf.Tensor], np.ndarray]: | |
def pb_effect(y: tf.Tensor) -> np.ndarray: | |
y = y.numpy() | |
effect = getattr(pb, effect_type)() | |
y_out = effect(y, sample_rate=SR) | |
return y_out | |
return pb_effect |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sox_effects = { | |
'compand': {}, | |
'chorus': {}, | |
'highpass': {'frequency': 100}, | |
'lowpass': {'frequency': 8000}, | |
'phaser': {}, | |
'reverb': {} | |
} | |
def get_sox_effect( |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
dataset, info = tfds.load( | |
'ljspeech', split='train', | |
download=True, with_info=True) | |
# transform int16 audio to float32 in [-1, 1] | |
dataset = dataset.map( | |
lambda example_dict: tf.cast( | |
example_dict['speech'], tf.float32 | |
) / 32767, | |
num_parallel_calls=AUTOTUNE) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# down- and up-sampling by a factor of 4 | |
strides = 4 | |
inputs = tf.keras.Input(shape=(16384, 1)) | |
d = tf.keras.layers.Conv1D(16, kernel_size=64, strides=strides, | |
padding='same', activation='elu', | |
kernel_initializer='he_normal')(inputs) | |
d = tf.keras.layers.Conv1D(32, kernel_size=32, strides=strides, padding='same', | |
activation='elu', kernel_initializer='he_normal')(d) | |
d = tf.keras.layers.Conv1D(64, kernel_size=16, strides=strides, padding='same', | |
activation='elu', kernel_initializer='he_normal')(d) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Subpixel1D(tf.keras.layers.Layer): | |
def __init__(self, | |
r, | |
**kwargs): | |
super(Subpixel1D, self).__init__(**kwargs) | |
self.r = r | |
def build(self, input_shape): | |
# check if channels are evenly divisible for subpixel1d to work! | |
input_shape = tf.TensorShape(input_shape).as_list() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import google.auth | |
from google.auth.transport import requests | |
# check the scopes you need at https://developers.google.com/oauthplayground in the api list | |
SCOPES = ['https://www.googleapis.com/auth/devstorage.full_control'] | |
# env variable GOOGLE_APPLICATION_CREDENTIALS has to be set with service account key.json! | |
credentials, project_id = google.auth.default(scopes=SCOPES) | |
http_request = requests.Request() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# compile the model with adam optimizer and mean squeared error as its loss function | |
autoencoder.compile(optimizer='adam', loss='mse', metrics=['mse']) | |
# some constants for training | |
BATCH_SIZE = 32 | |
SHUFFLE_BUFFER_SIZE = 100 | |
# shuffle and batch the examples | |
train_dataset = train_dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE) | |
eval_dataset = eval_dataset.batch(BATCH_SIZE) |
NewerOlder