Created
September 5, 2017 17:10
-
-
Save jesseengel/e223622e255bd5b8c9130407397a0494 to your computer and use it in GitHub Desktop.
Script to plot "rainbowgrams" from NSynth (https://arxiv.org/abs/1704.01279)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import librosa | |
import matplotlib | |
import matplotlib.pyplot as plt | |
matplotlib.rcParams['svg.fonttype'] = 'none' | |
import numpy as np | |
from scipy.io.wavfile import read as readwav | |
# Constants | |
n_fft = 512 | |
hop_length = 256 | |
SR = 16000 | |
over_sample = 4 | |
res_factor = 0.8 | |
octaves = 6 | |
notes_per_octave=10 | |
# Plotting functions | |
cdict = {'red': ((0.0, 0.0, 0.0), | |
(1.0, 0.0, 0.0)), | |
'green': ((0.0, 0.0, 0.0), | |
(1.0, 0.0, 0.0)), | |
'blue': ((0.0, 0.0, 0.0), | |
(1.0, 0.0, 0.0)), | |
'alpha': ((0.0, 1.0, 1.0), | |
(1.0, 0.0, 0.0)) | |
} | |
my_mask = matplotlib.colors.LinearSegmentedColormap('MyMask', cdict) | |
plt.register_cmap(cmap=my_mask) | |
def note_specgram(path, ax, peak=70.0, use_cqt=True): | |
# Add several samples together | |
if isinstance(path, list): | |
for i, p in enumerate(path): | |
sr, a = readwav(f) | |
audio = a if i == 0 else a + audio | |
# Load one sample | |
else: | |
sr, audio = readwav(f) | |
audio = audio.astype(np.float32) | |
if use_cqt: | |
C = librosa.cqt(audio, sr=sr, hop_length=hop_length, | |
bins_per_octave=int(notes_per_octave*over_sample), | |
n_bins=int(octaves * notes_per_octave * over_sample), | |
real=False, | |
filter_scale=res_factor, | |
fmin=librosa.note_to_hz('C2')) | |
else: | |
C = librosa.stft(audio, n_fft=n_fft, win_length=n_fft, hop_length=hop_length, center=True) | |
mag, phase = librosa.core.magphase(C) | |
phase_angle = np.angle(phase) | |
phase_unwrapped = np.unwrap(phase_angle) | |
dphase = phase_unwrapped[:, 1:] - phase_unwrapped[:, :-1] | |
dphase = np.concatenate([phase_unwrapped[:, 0:1], dphase], axis=1) / np.pi | |
mag = (librosa.logamplitude(mag**2, amin=1e-13, top_db=peak, ref_power=np.max) / peak) + 1 | |
ax.matshow(dphase[::-1, :], cmap=plt.cm.rainbow) | |
ax.matshow(mag[::-1, :], cmap=my_mask) | |
def plot_notes(list_of_paths, rows=2, cols=4, col_labels=[], row_labels=[], | |
use_cqt=True, peak=70.0): | |
"""Build a CQT rowsXcols. | |
""" | |
column = 0 | |
N = len(list_of_paths) | |
assert N == rows*cols | |
fig, axes = plt.subplots(rows, cols, sharex=True, sharey=True) | |
fig.subplots_adjust(left=0.1, right=0.9, wspace=0.05, hspace=0.1) | |
# fig = plt.figure(figsize=(18, N * 1.25)) | |
for i, path in enumerate(list_of_paths): | |
row = i / cols | |
col = i % cols | |
if rows == 1: | |
ax = axes[col] | |
elif cols == 1: | |
ax = axes[row] | |
else: | |
ax = axes[row, col] | |
print row, col, path, ax, peak, use_cqt | |
note_specgram(path, ax, peak, use_cqt) | |
ax.set_axis_bgcolor('white') | |
ax.set_xticks([]); ax.set_yticks([]) | |
if col == 0 and row_labels: | |
ax.set_ylabel(row_labels[row]) | |
if row == rows-1 and col_labels: | |
ax.set_xlabel(col_labels[col]) |
Need to change print statement on line 87 to
print(row, col, path, ax, peak, use_cqt)
Then librosa.cqt cmd in line 48 to
` C = librosa.cqt(audio, sr=sr, hop_length=hop_length,
fmin=librosa.note_to_hz('C2'),
n_bins=int(octaves * notes_per_octave * over_sample),
bins_per_octave=int(notes_per_octave*over_sample),
filter_scale=res_factor)
In line 61 : logamplitude does not exist anymore (replaced by amplitude_to_db)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thank you for the answer, this makes sense. I had to figure out the hard way that librosa is much slower than scipy when it comes to loading in audio.