#!/usr/bin/env python import numpy as np import theano import lasagne from lasagne.layers.dnn import Conv2DDNNLayer as Conv2DLayer from lasagne.layers.dnn import batch_norm_dnn as batch_norm from lasagne.layers.dnn import MaxPool2DDNNLayer as MaxPool2DLayer from lasagne.layers import DropoutLayer, FlattenLayer, DenseLayer from piano_transcription.data.data_pools import BatchIterator INI_LEARNING_RATE = np.float32(0.0005) BATCH_SIZE = 100 MAX_EPOCHS = 1000 PATIENCE = 4 L2 = 0.0004 SPEC_BINS = 168 OUT_LEN = 88 SPEC_CONTEXT = 25 STEP_SIZE = 1 SEQ_LENGTH = 1 CENTRAL_TARGET = True init_conv = lasagne.init.HeNormal MAX_PRED_SIZE = 100 USE_BATCHED_PREDICT = True dense_layers = 2 dense_units = 256 def get_valid_batch_iterator(): def batch_iterator(batch_size, k_samples, shuffle): return BatchIterator(batch_size=batch_size, prepare=prepare, k_samples=k_samples, shuffle=shuffle) return batch_iterator def get_train_batch_iterator(): def batch_iterator(batch_size, k_samples, shuffle): return BatchIterator(batch_size=batch_size, prepare=prepare_train, k_samples=k_samples, shuffle=shuffle) return batch_iterator def predict(net, X, max_seq_len, out_len): seq_len, feat_len = X.shape pad_width = (SPEC_CONTEXT - 1) / 2 x_b_p = np.pad(X, ((pad_width, pad_width), (0, 0)), 'constant') step_size = 1 indices = np.arange(pad_width, x_b_p.shape[0] - pad_width, step_size).astype(np.int) n_seq_pred = len(indices) shape = [n_seq_pred, 1, SPEC_CONTEXT, feat_len] X_pred = np.zeros(shape, dtype=theano.config.floatX) for o_idx, x_idx in enumerate(indices): X_pred[o_idx, 0, :, :] = x_b_p[x_idx - pad_width:x_idx + pad_width + 1, :] if USE_BATCHED_PREDICT: p_b = np.zeros((seq_len, out_len)) n_batches = int(np.ceil(X_pred.shape[0] / float(MAX_PRED_SIZE))) for batch in xrange(n_batches): i0 = batch * MAX_PRED_SIZE i1 = i0 + MAX_PRED_SIZE i1o = min(i1, seq_len) p_b[i0:i1o] = net.predict_proba(X_pred[i0:i1])[0:(i1o-i0)] else: p_b = net.predict_proba(X_pred)[:seq_len] return p_b def prepare(x, y): y = np.squeeze(y) return x, y def prepare_train(x, y): x, y = prepare(x, y) return x, y def build_eval_model(max_seq_len, feat_len, out_len): if USE_BATCHED_PREDICT: return build_model(batch_size=MAX_PRED_SIZE, seq_length=1, feat_len=feat_len, out_len=out_len) else: return build_model(batch_size=max_seq_len, seq_length=1, feat_len=feat_len, out_len=out_len) def build_model(batch_size=BATCH_SIZE, seq_length=None, feat_len=SPEC_BINS, out_len=OUT_LEN): """ Compile net architecture """ nonlin = lasagne.nonlinearities.rectify # --- input layers --- l_in = lasagne.layers.InputLayer(shape=(batch_size, 1, SPEC_CONTEXT, feat_len)) # --- conv layers --- net = Conv2DLayer(l_in, num_filters=32, filter_size=3, stride=1, pad=0, W=init_conv(), nonlinearity=nonlin) net = batch_norm(net) net = Conv2DLayer(net, num_filters=32, filter_size=3, stride=1, pad=0, W=init_conv(), nonlinearity=nonlin) net = batch_norm(net) net = MaxPool2DLayer(net, pool_size=3) net = DropoutLayer(net, p=0.3) net = Conv2DLayer(net, num_filters=64, filter_size=3, stride=1, pad=0, W=init_conv(), nonlinearity=nonlin) net = batch_norm(net) net = Conv2DLayer(net, num_filters=64, filter_size=3, stride=1, pad=0, W=init_conv(), nonlinearity=nonlin) net = batch_norm(net) net = MaxPool2DLayer(net, pool_size=3) net = DropoutLayer(net, p=0.3) # --- dense layers --- net = FlattenLayer(net) for _ in xrange(dense_layers): net = DenseLayer(net, num_units=dense_units, nonlinearity=nonlin) net = batch_norm(net) net = DropoutLayer(net, p=0.5) net = DenseLayer(net, num_units=out_len, nonlinearity=lasagne.nonlinearities.sigmoid) return net