Last active
September 20, 2024 15:16
-
-
Save twobob/1c7644b3b55290c4f54ca50c3373b523 to your computer and use it in GitHub Desktop.
vauge midi like preprocessing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import librosa | |
import numpy as np | |
from mido import Message, MidiFile, MidiTrack, MetaMessage, bpm2tempo | |
import pretty_midi | |
import matplotlib.pyplot as plt | |
import os | |
import re | |
# Set this flag to True if you want to reprocess all files, even if MIDI files already exist | |
REPROCESS_ALL = False | |
def extract_bpm_from_filename(filename): | |
match = re.search(r'(\d+)Bpm', filename) | |
return int(match.group(1)) if match else None | |
def create_piano_roll(midi_file, output_image): | |
midi_data = pretty_midi.PrettyMIDI(midi_file) | |
piano_roll = midi_data.get_piano_roll(fs=100) | |
plt.figure(figsize=(12, 6)) | |
plt.imshow(piano_roll, aspect='auto', origin='lower', cmap='Blues') | |
plt.title('Piano Roll Representation') | |
plt.ylabel('MIDI Note Number') | |
plt.xlabel('Time (centiseconds)') | |
plt.colorbar(label='Velocity') | |
plt.savefig(output_image, dpi=300, bbox_inches='tight') | |
plt.close() | |
def audio_to_midi(audio_file): | |
base_name = os.path.splitext(audio_file)[0] | |
output_file = f"{base_name}.mid" | |
if os.path.exists(output_file) and not REPROCESS_ALL: | |
print(f"MIDI file {output_file} already exists. Skipping processing.") | |
return output_file | |
filename = os.path.basename(audio_file) | |
bpm = extract_bpm_from_filename(filename) | |
y, sr = librosa.load(audio_file) | |
y = librosa.util.normalize(y) | |
pitches, magnitudes = librosa.piptrack(y=y, sr=sr) | |
print(f"Processing: {audio_file}") | |
#print(f"Shape of pitches array: {pitches.shape}") | |
#print(f"Shape of magnitudes array: {magnitudes.shape}") | |
non_zero_pitches = pitches[pitches > 0] | |
#if len(non_zero_pitches) > 0: | |
# print(f"Detected {len(non_zero_pitches)} non-zero pitches overall") | |
# print(f"Pitch range: {np.min(non_zero_pitches):.2f} - {np.max(non_zero_pitches):.2f} Hz") | |
# print(f"Mean pitch: {np.mean(non_zero_pitches):.2f} Hz") | |
# print(f"Median pitch: {np.median(non_zero_pitches):.2f} Hz") | |
#else: | |
# print("Warning: No non-zero pitches detected at all") | |
#print(f"Magnitude range: {np.min(magnitudes):.2f} - {np.max(magnitudes):.2f}") | |
#print(f"Mean magnitude: {np.mean(magnitudes):.2f}") | |
if np.max(np.abs(y)) < 0.01: | |
print("Warning: Audio signal is very quiet or silent") | |
mid = MidiFile() | |
track = MidiTrack() | |
mid.tracks.append(track) | |
if bpm: | |
tempo = bpm2tempo(bpm) | |
track.append(MetaMessage('set_tempo', tempo=tempo)) | |
print(f"Tempo set to {bpm} BPM based on filename") | |
else: | |
#print("No BPM found in filename, setting no tempo") | |
pass | |
notes_generated = 0 | |
last_note = None | |
last_time = 0 | |
time_frames = librosa.times_like(pitches) | |
for i, (time, pitch) in enumerate(zip(time_frames, pitches.T)): | |
if i < pitches.shape[1]: # Ensure we don't go out of bounds | |
frame_index = int(time * sr / pitches.shape[1]) | |
if frame_index < magnitudes.shape[1]: | |
pitch_index = np.argmax(magnitudes[:, frame_index]) | |
if pitch_index < len(pitch): | |
pitch_value = pitch[pitch_index] | |
if pitch_value > 0: | |
note = int(round(librosa.hz_to_midi(pitch_value))) | |
if note != last_note: | |
if last_note is not None: | |
track.append(Message('note_off', note=last_note, velocity=64, time=int((time - last_time) * 1000))) | |
track.append(Message('note_on', note=note, velocity=64, time=int((time - last_time) * 1000))) | |
last_note = note | |
last_time = time | |
notes_generated += 1 | |
if last_note is not None: | |
track.append(Message('note_off', note=last_note, velocity=64, time=int((time - last_time) * 1000))) | |
mid.save(output_file) | |
#print(f"Audio duration: {librosa.get_duration(y=y, sr=sr):.2f} seconds") | |
#print(f"Number of MIDI events: {len(track)}") | |
#print(f"Number of notes generated: {notes_generated}") | |
#if notes_generated > 0: | |
# pitch_values = [msg.note for msg in track if msg.type == 'note_on'] | |
# print(f"MIDI note range: {min(pitch_values)} - {max(pitch_values)}") | |
#else: | |
# print("No MIDI notes were generated.") | |
output_image = f"{base_name}.png" | |
create_piano_roll(output_file, output_image) | |
#print(f"Piano roll visualization saved as {output_image}") | |
#print("-" * 50) | |
return output_file | |
def process_folder(folder_path): | |
audio_files = [f for f in os.listdir(folder_path) if f.lower().endswith(('.wav', '.mp3'))] | |
for audio_file in audio_files: | |
full_path = os.path.join(folder_path, audio_file) | |
audio_to_midi(full_path) | |
# Usage | |
input_path = r"E:\Dubstep_diffusion\tracks" | |
if os.path.isdir(input_path): | |
process_folder(input_path) | |
else: | |
output_midi = audio_to_midi(input_path) | |
print(f"MIDI file saved as: {output_midi}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment