cnn_1.py 3.86 KB
Newer Older
Richard Vogl's avatar
Richard Vogl committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#!/usr/bin/env python
import numpy as np

import theano

import lasagne
from lasagne.layers.dnn import Conv2DDNNLayer as Conv2DLayer
from lasagne.layers.dnn import batch_norm_dnn as batch_norm
from lasagne.layers.dnn import MaxPool2DDNNLayer as MaxPool2DLayer
from lasagne.layers import DropoutLayer, FlattenLayer, DenseLayer

from piano_transcription.data.data_pools import BatchIterator

INI_LEARNING_RATE = np.float32(0.0005)

BATCH_SIZE = 100
MAX_EPOCHS = 1000
PATIENCE = 4
L2 = 0.0004

SPEC_BINS = 168
OUT_LEN = 88

SPEC_CONTEXT = 25
STEP_SIZE = 1
SEQ_LENGTH = 1
CENTRAL_TARGET = True

init_conv = lasagne.init.HeNormal

MAX_PRED_SIZE = 100
USE_BATCHED_PREDICT = True

dense_layers = 2
dense_units = 256


def get_valid_batch_iterator():
    def batch_iterator(batch_size, k_samples, shuffle):
        return BatchIterator(batch_size=batch_size, prepare=prepare, k_samples=k_samples, shuffle=shuffle)
    return batch_iterator


def get_train_batch_iterator():
    def batch_iterator(batch_size, k_samples, shuffle):
        return BatchIterator(batch_size=batch_size, prepare=prepare_train, k_samples=k_samples, shuffle=shuffle)
    return batch_iterator


def predict(net, X, max_seq_len, out_len):
    seq_len, feat_len = X.shape

    pad_width = (SPEC_CONTEXT - 1) / 2
    x_b_p = np.pad(X, ((pad_width, pad_width), (0, 0)), 'constant')

    step_size = 1
    indices = np.arange(pad_width, x_b_p.shape[0] - pad_width, step_size).astype(np.int)
    n_seq_pred = len(indices)
    shape = [n_seq_pred, 1, SPEC_CONTEXT, feat_len]
    X_pred = np.zeros(shape, dtype=theano.config.floatX)
    for o_idx, x_idx in enumerate(indices):
        X_pred[o_idx, 0, :, :] = x_b_p[x_idx - pad_width:x_idx + pad_width + 1, :]

    if USE_BATCHED_PREDICT:
        p_b = np.zeros((seq_len, out_len))
        n_batches = int(np.ceil(X_pred.shape[0] / float(MAX_PRED_SIZE)))
        for batch in xrange(n_batches):
            i0 = batch * MAX_PRED_SIZE
            i1 = i0 + MAX_PRED_SIZE
            i1o = min(i1, seq_len)
            p_b[i0:i1o] = net.predict_proba(X_pred[i0:i1])[0:(i1o-i0)]
    else:
        p_b = net.predict_proba(X_pred)[:seq_len]

    return p_b


def prepare(x, y):
    y = np.squeeze(y)
    return x, y


def prepare_train(x, y):
    x, y = prepare(x, y)
    return x, y


def build_eval_model(max_seq_len, feat_len, out_len):
    if USE_BATCHED_PREDICT:
        return build_model(batch_size=MAX_PRED_SIZE, seq_length=1, feat_len=feat_len, out_len=out_len)
    else:
        return build_model(batch_size=max_seq_len, seq_length=1, feat_len=feat_len, out_len=out_len)


def build_model(batch_size=BATCH_SIZE, seq_length=None, feat_len=SPEC_BINS, out_len=OUT_LEN):
    """ Compile net architecture """
    nonlin = lasagne.nonlinearities.rectify

    # --- input layers ---
    l_in = lasagne.layers.InputLayer(shape=(batch_size, 1, SPEC_CONTEXT, feat_len))

    # --- conv layers ---
    net = Conv2DLayer(l_in, num_filters=32, filter_size=3, stride=1, pad=0, W=init_conv(), nonlinearity=nonlin)
    net = batch_norm(net)
    net = Conv2DLayer(net, num_filters=32, filter_size=3, stride=1, pad=0, W=init_conv(), nonlinearity=nonlin)
    net = batch_norm(net)
    net = MaxPool2DLayer(net, pool_size=3)
    net = DropoutLayer(net, p=0.3)

    net = Conv2DLayer(net, num_filters=64, filter_size=3, stride=1, pad=0, W=init_conv(), nonlinearity=nonlin)
    net = batch_norm(net)
    net = Conv2DLayer(net, num_filters=64, filter_size=3, stride=1, pad=0, W=init_conv(), nonlinearity=nonlin)
    net = batch_norm(net)
    net = MaxPool2DLayer(net, pool_size=3)
    net = DropoutLayer(net, p=0.3)

    # --- dense layers ---
    net = FlattenLayer(net)
    for _ in xrange(dense_layers):
        net = DenseLayer(net, num_units=dense_units, nonlinearity=nonlin)
        net = batch_norm(net)
        net = DropoutLayer(net, p=0.5)
    net = DenseLayer(net, num_units=out_len, nonlinearity=lasagne.nonlinearities.sigmoid)

    return net