Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Richard Vogl
piano_transcription
Commits
1f6552b1
Commit
1f6552b1
authored
Jun 15, 2018
by
Richard Vogl
Browse files
transcription script for evaluation
parent
0114d182
Changes
3
Hide whitespace changes
Inline
Side-by-side
piano_transcription/__init__.py
View file @
1f6552b1
...
@@ -2,7 +2,9 @@ import os
...
@@ -2,7 +2,9 @@ import os
PROJECT_PATH
=
os
.
path
.
abspath
(
os
.
path
.
dirname
(
__file__
))
PROJECT_PATH
=
os
.
path
.
abspath
(
os
.
path
.
dirname
(
__file__
))
OUTPUT_PATH
=
os
.
path
.
join
(
PROJECT_PATH
,
'output'
)
OUTPUT_PATH
=
os
.
path
.
join
(
PROJECT_PATH
,
'output'
)
BEST_MODEL_FILE_NAME
=
'best_model.npz'
SETTINGS_FILE_NAME
=
'settings.npy'
LOSSES_FILE
=
'losses.npy'
if
not
os
.
path
.
exists
(
OUTPUT_PATH
):
if
not
os
.
path
.
exists
(
OUTPUT_PATH
):
os
.
makedirs
(
OUTPUT_PATH
)
os
.
makedirs
(
OUTPUT_PATH
)
\ No newline at end of file
piano_transcription/train_model.py
View file @
1f6552b1
#!/usr/bin/env python
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
from
__future__
import
print_function
from
__future__
import
print_function
import
sys
import
sys
import
os
import
os
import
time
import
time
import
random
import
argparse
import
argparse
import
numpy
as
np
import
numpy
as
np
...
@@ -19,14 +16,14 @@ import lasagne
...
@@ -19,14 +16,14 @@ import lasagne
from
piano_transcription.utils
import
print_net_architecture
,
select_model
,
collect_inputs
,
BColors
from
piano_transcription.utils
import
print_net_architecture
,
select_model
,
collect_inputs
,
BColors
from
piano_transcription.data
import
load_data
from
piano_transcription.data
import
load_data
from
piano_transcription.data.data_pools
import
UniversalRegressionDataPool
from
piano_transcription.data.data_pools
import
UniversalRegressionDataPool
from
piano_transcription
import
OUTPUT_PATH
from
piano_transcription
import
OUTPUT_PATH
,
BEST_MODEL_FILE_NAME
,
SETTINGS_FILE_NAME
,
LOSSES_FILE
THRESHOLD
=
0.5
THRESHOLD
=
0.5
MAX_NUMEPOCHS
=
10000
MAX_NUMEPOCHS
=
10000
col
=
BColors
()
col
=
BColors
()
def
run
(
model
,
model_name
,
learn_rate
,
batch_size
,
split
,
k_samples
):
def
run
(
model
,
model_name
,
model_name_full
,
learn_rate
,
batch_size
,
split
,
k_samples
):
shuffle_data
=
True
shuffle_data
=
True
# make output directory
# make output directory
...
@@ -69,6 +66,10 @@ def run(model, model_name, learn_rate, batch_size, split, k_samples):
...
@@ -69,6 +66,10 @@ def run(model, model_name, learn_rate, batch_size, split, k_samples):
[
loss
.
mean
()])
[
loss
.
mean
()])
print
(
"Starting training..."
)
print
(
"Starting training..."
)
np
.
save
(
os
.
path
.
join
(
out_directory
,
SETTINGS_FILE_NAME
),
{
'model'
:
model_name_full
,
'lr'
:
learn_rate
,
'batch_size'
:
batch_size
,
'split'
:
split
,
'k_samples'
:
k_samples
})
# prepare data pools for training and validation
# prepare data pools for training and validation
train_data_pool
=
UniversalRegressionDataPool
(
feat_train
,
targ_train
,
model
.
SEQ_LENGTH
,
model
.
SPEC_CONTEXT
,
train_data_pool
=
UniversalRegressionDataPool
(
feat_train
,
targ_train
,
model
.
SEQ_LENGTH
,
model
.
SPEC_CONTEXT
,
model
.
STEP_SIZE
,
model
.
CENTRAL_TARGET
,
do_shuffle
=
shuffle_data
)
model
.
STEP_SIZE
,
model
.
CENTRAL_TARGET
,
do_shuffle
=
shuffle_data
)
...
@@ -88,7 +89,7 @@ def run(model, model_name, learn_rate, batch_size, split, k_samples):
...
@@ -88,7 +89,7 @@ def run(model, model_name, learn_rate, batch_size, split, k_samples):
refinements
=
max_refinements
refinements
=
max_refinements
cur_patience
=
patience
cur_patience
=
patience
new_lr
=
learn_rate
new_lr
=
learn_rate
# Finally, launch the training loop.
# Finally, launch the training loop.
for
epoch
in
range
(
MAX_NUMEPOCHS
):
for
epoch
in
range
(
MAX_NUMEPOCHS
):
# In each epoch, we do a full pass over the training data:
# In each epoch, we do a full pass over the training data:
...
@@ -118,15 +119,15 @@ def run(model, model_name, learn_rate, batch_size, split, k_samples):
...
@@ -118,15 +119,15 @@ def run(model, model_name, learn_rate, batch_size, split, k_samples):
print
(
"
\r
Epoch %3d of %d took %1.3f s (valid: %1.3f s) -- patience: %d "
%
print
(
"
\r
Epoch %3d of %d took %1.3f s (valid: %1.3f s) -- patience: %d "
%
(
epoch
+
1
,
MAX_NUMEPOCHS
,
time
.
time
()
-
start_time
,
time
.
time
()
-
valid_start_time
,
cur_patience
))
(
epoch
+
1
,
MAX_NUMEPOCHS
,
time
.
time
()
-
start_time
,
time
.
time
()
-
valid_start_time
,
cur_patience
))
error
=
train_loss_sum
/
train_batches
error
=
train_loss_sum
/
train_batches
print
((
" training loss: %1.3f "
+
col
.
print_colored
(
"valid loss: %1.3f"
,
BColors
.
HEADER
)
+
" @ lr %1.6f"
)
%
print
((
" training loss: %1.3f "
+
col
.
print_colored
(
"valid loss: %1.3f"
,
BColors
.
WARNING
)
+
" @ lr %1.6f"
)
%
(
error
,
valid_loss
[
epoch
],
new_lr
))
(
error
,
valid_loss
[
epoch
],
new_lr
))
better
=
valid_loss
[
epoch
]
<
valid_loss
[
best_valid_loss_epoch
]
better
=
valid_loss
[
epoch
]
<
valid_loss
[
best_valid_loss_epoch
]
if
epoch
==
0
or
better
:
if
epoch
==
0
or
better
:
best_valid_loss_epoch
=
epoch
best_valid_loss_epoch
=
epoch
np
.
savez
(
os
.
path
.
join
(
out_directory
,
'best_model.npz'
),
*
lasagne
.
layers
.
get_all_param_values
(
network
))
np
.
savez
(
os
.
path
.
join
(
out_directory
,
BEST_MODEL_FILE_NAME
),
*
lasagne
.
layers
.
get_all_param_values
(
network
))
print
(
' new best validation loss at epoch %3d: %1.3f'
%
(
epoch
,
valid_loss
[
epoch
]))
print
(
' new best validation loss at epoch %3d: %1.3f'
%
(
epoch
,
valid_loss
[
epoch
]))
np
.
save
(
os
.
path
.
join
(
out_directory
,
'losses
.npy
'
),
[
train_loss
[:
epoch
],
valid_loss
[:
epoch
]])
np
.
save
(
os
.
path
.
join
(
out_directory
,
LOSSES_FILE
.
npy
),
[
train_loss
[:
epoch
],
valid_loss
[:
epoch
]])
if
epoch
>
0
and
not
better
:
if
epoch
>
0
and
not
better
:
cur_patience
-=
1
cur_patience
-=
1
...
@@ -143,9 +144,6 @@ def run(model, model_name, learn_rate, batch_size, split, k_samples):
...
@@ -143,9 +144,6 @@ def run(model, model_name, learn_rate, batch_size, split, k_samples):
else
:
else
:
cur_patience
=
patience
cur_patience
=
patience
# Optionally, you could now dump the network weights to a file like this:
#
# And load them again later on like this:
# And load them again later on like this:
# with np.load('model.npz') as f:
# with np.load('model.npz') as f:
# param_values = [f['arr_%d' % i] for i in range(len(f.files))]
# param_values = [f['arr_%d' % i] for i in range(len(f.files))]
...
@@ -162,7 +160,8 @@ def main():
...
@@ -162,7 +160,8 @@ def main():
parser
.
add_argument
(
'--ksamples'
,
help
=
'only use k samples for epoach.'
,
type
=
int
,
default
=
None
)
parser
.
add_argument
(
'--ksamples'
,
help
=
'only use k samples for epoach.'
,
type
=
int
,
default
=
None
)
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
model_arg
,
model_name_arg
=
select_model
(
args
.
model
)
model_name_full
=
args
.
model
model_arg
,
model_name_arg
=
select_model
(
model_name_full
)
lr
=
args
.
learnrate
lr
=
args
.
learnrate
if
lr
==
-
1
:
if
lr
==
-
1
:
lr
=
model_arg
.
INI_LEARNING_RATE
lr
=
model_arg
.
INI_LEARNING_RATE
...
@@ -172,7 +171,7 @@ def main():
...
@@ -172,7 +171,7 @@ def main():
split
=
args
.
split
split
=
args
.
split
k_samples
=
args
.
ksamples
k_samples
=
args
.
ksamples
run
(
model_arg
,
model_name_arg
,
lr
,
batchsize
,
split
,
k_samples
)
run
(
model_arg
,
model_name_arg
,
model_name_full
,
lr
,
batchsize
,
split
,
k_samples
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
piano_transcription/transcribe.py
0 → 100644
View file @
1f6552b1
import
argparse
import
os
import
lasagne
import
numpy
as
np
from
madmom.features.notes
import
NotePeakPickingProcessor
from
piano_transcription
import
BEST_MODEL_FILE_NAME
,
SETTINGS_FILE_NAME
,
LOSSES_FILE
from
piano_transcription.utils
import
select_model
from
piano_transcription.data
import
FEAT_SIZE
,
OUT_SIZE
from
piano_transcription.data.annotations
import
write_txt_annotation
from
piano_transcription.data.features
import
extract_features
def
run
(
model_path
,
input_file
):
settings
=
np
.
load
(
os
.
path
.
join
(
model_path
,
SETTINGS_FILE_NAME
))
model
=
select_model
(
settings
[
'model'
])
model_seq_len
=
model
.
MAX_PRED_SIZE
features
=
extract_features
(
input_file
)
network
=
model
.
build_eval_model
(
model_seq_len
,
FEAT_SIZE
,
OUT_SIZE
)
with
np
.
load
(
os
.
path
.
join
(
model_path
,
BEST_MODEL_FILE_NAME
))
as
f
:
param_values
=
[
f
[
'arr_%d'
%
i
]
for
i
in
range
(
len
(
f
.
files
))]
lasagne
.
layers
.
set_all_param_values
(
network
,
param_values
)
# transcribe using model
pred
=
model
.
predict
(
network
,
features
,
model_seq_len
,
OUT_SIZE
)
peak_picker
=
NotePeakPickingProcessor
(
pitch_offset
=
0
)
notes
=
peak_picker
.
process
(
pred
)
write_txt_annotation
(
open
(
input_file
+
'out.txt'
,
'w'
),
notes
)
def
main
():
# add argument parser
parser
=
argparse
.
ArgumentParser
(
description
=
'Transcribe piano tracks.'
)
parser
.
add_argument
(
'--file'
,
help
=
'file to be transcribed.'
)
parser
.
add_argument
(
'--model'
,
help
=
'path to trained model file.'
)
args
=
parser
.
parse_args
()
model_path
=
args
.
model
input_file
=
args
.
file
assert
os
.
path
.
exists
(
model_path
)
run
(
model_path
,
input_file
)
if
__name__
==
'__main__'
:
main
()
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment