Commit b457f06f authored by Richard Vogl's avatar Richard Vogl
Browse files

prepare dataset demo for librosa set

parent 1f6552b1
import os
home = os.path.expanduser("~")
PROJECT_PATH = os.path.abspath(os.path.dirname(__file__))
OUTPUT_PATH = os.path.join(PROJECT_PATH, 'output')
DATASET_PATH = os.path.join(home, 'datasets', 'piano_trans')
BEST_MODEL_FILE_NAME = 'best_model.npz'
SETTINGS_FILE_NAME = 'settings.npy'
LOSSES_FILE = 'losses.npy'
if not os.path.exists(OUTPUT_PATH):
os.makedirs(OUTPUT_PATH)
\ No newline at end of file
from madmom.audio.spectrogram import LogarithmicFilteredSpectrogram, SpectrogramDifference, LogarithmicFilterbank
import numpy as np
num_channels = 1
sample_rate = 44100
frame_sizes = [1024, 2048, 4096]
fps = 100
num_bands = 12
fmin = 30
fmax = 17000
norm_filters = True
start_silence = 0
diff = True
diff_ratio = 0.5
positive_diffs = True
number_notes = 88
note_offset = 21
def extract_features(audiofile):
num_channels = 1
sample_rate = 44100
frame_sizes = [1024, 2048, 4096]
fps = 100
num_bands = 12
fmin = 30
fmax = 17000
norm_filters = True
start_silence = 0
diff = True
diff_ratio = 0.5
positive_diffs = True
def extract_features(audiofile):
spectrograms = []
for frame_size in frame_sizes:
......@@ -33,4 +36,4 @@ def extract_features(audiofile):
spectrograms.append(spectrogram)
return np.hstack(spectrograms)
\ No newline at end of file
return np.hstack(spectrograms)
import os
import numpy as np
from piano_transcription import DATASET_PATH
from piano_transcription.data.features import extract_features
from midi2txt.midi_to_txt import midi_to_txt
from piano_transcription.data.annotations import compute_target_array_from_times, posprocess_annotations
from piano_transcription.data.features import fps, number_notes
def get_feats_and_targs(file_list, audio_path, midi_path):
feat_list = []
targ_list = []
name_list = []
for cur_file in file_list:
base_file_name, _ = os.path.splitext(cur_file)
midi_file = os.path.join(midi_path, base_file_name + '.mid')
if not os.path.exists(midi_file):
print('XX midi file for %s not found, skipping! ' % base_file_name)
continue
features = extract_features(os.path.join(audio_path, cur_file))
num_frames, feat_len = features.shape
print("number of frames: %d, featlen: %d" % (num_frames, feat_len))
times, _ = midi_to_txt(midi_file)
targets = compute_target_array_from_times(posprocess_annotations(times, offset=21, num_classes=number_notes),
fps, num_frames, number_notes)
feat_list.append(features)
targ_list.append(targets)
name_list.append(base_file_name)
return name_list, feat_list, targ_list
def prepare_librosa(origin_path):
train_path = os.path.join(origin_path, 'train')
test_path = os.path.join(origin_path, 'test')
train_file_list = os.listdir(train_path)
train_audio_file_list = [cur_file for cur_file in train_file_list if cur_file.endswith('.wav')]
test_file_list = os.listdir(test_path)
test_audio_file_list = [cur_file for cur_file in test_file_list if cur_file.endswith('.wav')]
train_name_list, train_feat_list, train_targ_list = get_feats_and_targs(train_audio_file_list, train_path, train_path)
test_name_list, test_feat_list, test_targ_list = get_feats_and_targs(test_audio_file_list, test_path, test_path)
np.save(os.path.join(origin_path, 'cached'), {'train_name_list': train_name_list, 'train_feat_list': train_feat_list,
'train_targ_list': train_targ_list,
'test_name_list': test_name_list, 'test_feat_list': test_feat_list,
'test_targ_list': test_targ_list})
if __name__ == '__main__':
prepare_librosa(os.path.join(DATASET_PATH, 'librosa_disklavier'))
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment