import os import numpy as np from piano_transcription import DATASET_PATH from piano_transcription.data.features import extract_features from midi2txt.midi_to_txt import midi_to_txt from piano_transcription.data.annotations import compute_target_array_from_times, posprocess_annotations from piano_transcription.data.features import fps, number_notes def get_feats_and_targs(file_list, audio_path, midi_path): feat_list = [] targ_list = [] name_list = [] for cur_file in file_list: base_file_name, _ = os.path.splitext(cur_file) midi_file = os.path.join(midi_path, base_file_name + '.mid') if not os.path.exists(midi_file): print('XX midi file for %s not found, skipping! ' % base_file_name) continue features = extract_features(os.path.join(audio_path, cur_file)) num_frames, feat_len = features.shape print("number of frames: %d, featlen: %d" % (num_frames, feat_len)) times, _ = midi_to_txt(midi_file) targets = compute_target_array_from_times(posprocess_annotations(np.asarray(times), offset=21, num_classes=number_notes), fps, num_frames, number_notes) feat_list.append(features) targ_list.append(targets) name_list.append(base_file_name) return name_list, feat_list, targ_list def prepare_librosa(origin_path): train_path = os.path.join(origin_path, 'train') test_path = os.path.join(origin_path, 'test') train_file_list = os.listdir(train_path) train_audio_file_list = [cur_file for cur_file in train_file_list if cur_file.endswith('.wav')] test_file_list = os.listdir(test_path) test_audio_file_list = [cur_file for cur_file in test_file_list if cur_file.endswith('.wav')] train_name_list, train_feat_list, train_targ_list = get_feats_and_targs(train_audio_file_list, train_path, train_path) test_name_list, test_feat_list, test_targ_list = get_feats_and_targs(test_audio_file_list, test_path, test_path) np.save(os.path.join(origin_path, 'cached'), {'train_name_list': train_name_list, 'train_feat_list': train_feat_list, 'train_targ_list': train_targ_list, 'test_name_list': test_name_list, 'test_feat_list': test_feat_list, 'test_targ_list': test_targ_list}) if __name__ == '__main__': prepare_librosa(os.path.join(DATASET_PATH, 'librosa_disklavier'))