Commit 1f6552b1 authored by Richard Vogl's avatar Richard Vogl
Browse files

transcription script for evaluation

parent 0114d182
......@@ -2,7 +2,9 @@ import os
PROJECT_PATH = os.path.abspath(os.path.dirname(__file__))
OUTPUT_PATH = os.path.join(PROJECT_PATH, 'output')
BEST_MODEL_FILE_NAME = 'best_model.npz'
SETTINGS_FILE_NAME = 'settings.npy'
LOSSES_FILE = 'losses.npy'
if not os.path.exists(OUTPUT_PATH):
\ No newline at end of file
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import print_function
import sys
import os
import time
import random
import argparse
import numpy as np
......@@ -19,14 +16,14 @@ import lasagne
from piano_transcription.utils import print_net_architecture, select_model, collect_inputs, BColors
from import load_data
from import UniversalRegressionDataPool
from piano_transcription import OUTPUT_PATH
col = BColors()
def run(model, model_name, learn_rate, batch_size, split, k_samples):
def run(model, model_name, model_name_full, learn_rate, batch_size, split, k_samples):
shuffle_data = True
# make output directory
......@@ -69,6 +66,10 @@ def run(model, model_name, learn_rate, batch_size, split, k_samples):
print("Starting training..."), SETTINGS_FILE_NAME), {'model': model_name_full, 'lr': learn_rate,
'batch_size': batch_size,
'split': split, 'k_samples': k_samples})
# prepare data pools for training and validation
train_data_pool = UniversalRegressionDataPool(feat_train, targ_train, model.SEQ_LENGTH, model.SPEC_CONTEXT,
model.STEP_SIZE, model.CENTRAL_TARGET, do_shuffle=shuffle_data)
......@@ -88,7 +89,7 @@ def run(model, model_name, learn_rate, batch_size, split, k_samples):
refinements = max_refinements
cur_patience = patience
new_lr = learn_rate
# Finally, launch the training loop.
for epoch in range(MAX_NUMEPOCHS):
# In each epoch, we do a full pass over the training data:
......@@ -118,15 +119,15 @@ def run(model, model_name, learn_rate, batch_size, split, k_samples):
print("\rEpoch %3d of %d took %1.3f s (valid: %1.3f s) -- patience: %d " %
(epoch + 1, MAX_NUMEPOCHS, time.time() - start_time, time.time() - valid_start_time, cur_patience))
error = train_loss_sum / train_batches
print((" training loss: %1.3f "+col.print_colored("valid loss: %1.3f", BColors.HEADER)+" @ lr %1.6f") %
print((" training loss: %1.3f "+col.print_colored("valid loss: %1.3f", BColors.WARNING)+" @ lr %1.6f") %
(error, valid_loss[epoch], new_lr))
better = valid_loss[epoch] < valid_loss[best_valid_loss_epoch]
if epoch == 0 or better:
best_valid_loss_epoch = epoch
np.savez(os.path.join(out_directory, 'best_model.npz'), *lasagne.layers.get_all_param_values(network))
np.savez(os.path.join(out_directory, BEST_MODEL_FILE_NAME), *lasagne.layers.get_all_param_values(network))
print(' new best validation loss at epoch %3d: %1.3f' % (epoch, valid_loss[epoch])), 'losses.npy'), [train_loss[:epoch], valid_loss[:epoch]]), LOSSES_FILE.npy), [train_loss[:epoch], valid_loss[:epoch]])
if epoch > 0 and not better:
cur_patience -= 1
......@@ -143,9 +144,6 @@ def run(model, model_name, learn_rate, batch_size, split, k_samples):
cur_patience = patience
# Optionally, you could now dump the network weights to a file like this:
# And load them again later on like this:
# with np.load('model.npz') as f:
# param_values = [f['arr_%d' % i] for i in range(len(f.files))]
......@@ -162,7 +160,8 @@ def main():
parser.add_argument('--ksamples', help='only use k samples for epoach.', type=int, default=None)
args = parser.parse_args()
model_arg, model_name_arg = select_model(args.model)
model_name_full = args.model
model_arg, model_name_arg = select_model(model_name_full)
lr = args.learnrate
if lr == -1:
lr = model_arg.INI_LEARNING_RATE
......@@ -172,7 +171,7 @@ def main():
split = args.split
k_samples = args.ksamples
run(model_arg, model_name_arg, lr, batchsize, split, k_samples)
run(model_arg, model_name_arg, model_name_full, lr, batchsize, split, k_samples)
if __name__ == '__main__':
import argparse
import os
import lasagne
import numpy as np
from madmom.features.notes import NotePeakPickingProcessor
from piano_transcription.utils import select_model
from import FEAT_SIZE, OUT_SIZE
from import write_txt_annotation
from import extract_features
def run(model_path, input_file):
settings = np.load(os.path.join(model_path, SETTINGS_FILE_NAME))
model = select_model(settings['model'])
model_seq_len = model.MAX_PRED_SIZE
features = extract_features(input_file)
network = model.build_eval_model(model_seq_len, FEAT_SIZE, OUT_SIZE)
with np.load(os.path.join(model_path, BEST_MODEL_FILE_NAME)) as f:
param_values = [f['arr_%d' % i] for i in range(len(f.files))]
lasagne.layers.set_all_param_values(network, param_values)
# transcribe using model
pred = model.predict(network, features, model_seq_len, OUT_SIZE)
peak_picker = NotePeakPickingProcessor(pitch_offset=0)
notes = peak_picker.process(pred)
write_txt_annotation(open(input_file+'out.txt', 'w'), notes)
def main():
# add argument parser
parser = argparse.ArgumentParser(description='Transcribe piano tracks.')
parser.add_argument('--file', help='file to be transcribed.')
parser.add_argument('--model', help='path to trained model file.')
args = parser.parse_args()
model_path = args.model
input_file = args.file
assert os.path.exists(model_path)
run(model_path, input_file)
if __name__ == '__main__':
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment