prepare_dataset.py 2.49 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import os
import numpy as np
from piano_transcription import DATASET_PATH
from piano_transcription.data.features import extract_features
from midi2txt.midi_to_txt import midi_to_txt
from piano_transcription.data.annotations import compute_target_array_from_times, posprocess_annotations
from piano_transcription.data.features import fps, number_notes


def get_feats_and_targs(file_list, audio_path, midi_path):
    feat_list = []
    targ_list = []
    name_list = []

    for cur_file in file_list:
        base_file_name, _ = os.path.splitext(cur_file)
        midi_file = os.path.join(midi_path, base_file_name + '.mid')

        if not os.path.exists(midi_file):
            print('XX midi file for %s not found, skipping! ' % base_file_name)
            continue

        features = extract_features(os.path.join(audio_path, cur_file))
        num_frames, feat_len = features.shape
        print("number of frames: %d, featlen: %d" % (num_frames, feat_len))

        times, _ = midi_to_txt(midi_file)
Richard Vogl's avatar
Richard Vogl committed
28
        targets = compute_target_array_from_times(posprocess_annotations(np.asarray(times), offset=21, num_classes=number_notes),
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
                                                  fps, num_frames, number_notes)

        feat_list.append(features)
        targ_list.append(targets)
        name_list.append(base_file_name)

    return name_list, feat_list, targ_list


def prepare_librosa(origin_path):
    train_path = os.path.join(origin_path, 'train')
    test_path = os.path.join(origin_path, 'test')

    train_file_list = os.listdir(train_path)
    train_audio_file_list = [cur_file for cur_file in train_file_list if cur_file.endswith('.wav')]

    test_file_list = os.listdir(test_path)
    test_audio_file_list = [cur_file for cur_file in test_file_list if cur_file.endswith('.wav')]

    train_name_list, train_feat_list, train_targ_list = get_feats_and_targs(train_audio_file_list, train_path, train_path)

    test_name_list, test_feat_list, test_targ_list = get_feats_and_targs(test_audio_file_list, test_path, test_path)

    np.save(os.path.join(origin_path, 'cached'), {'train_name_list': train_name_list, 'train_feat_list': train_feat_list,
                                                  'train_targ_list': train_targ_list,
                                                  'test_name_list': test_name_list, 'test_feat_list': test_feat_list,
                                                  'test_targ_list': test_targ_list})


if __name__ == '__main__':
    prepare_librosa(os.path.join(DATASET_PATH, 'librosa_disklavier'))