Commit 9a6cf459 authored by Shreyan Chowdhury's avatar Shreyan Chowdhury

extend crnn file to have two variants of the model

parent 1c07d497
########################################################
# Has 2 models: CRNN and CRNN2
########################################################
from test_tube import HyperOptArgumentParser
from utils import *
from models.shared_stuff import BasePtlModel
......@@ -144,6 +148,7 @@ class CRNN(BasePtlModel):
# output, hidden = self.gru(x_pack)
# x = self.gru1(x)[0][-1] # TODO: Check if this is correct
x = self.gru1(x)[1][1] # TODO: Check if this is correct
x = self.dropout(x)
......@@ -154,14 +159,20 @@ class CRNN(BasePtlModel):
def training_step(self, data_batch, batch_i):
y = data_batch[-1]
y_hat = self.forward(data_batch)
y = torch.stack(y).float()
try:
y = torch.stack(y).float()
except:
y = y.float()
y_hat = y_hat.float()
return {'loss': self.loss(y_hat, y)}
def validation_step(self, data_batch, batch_i):
y = data_batch[-1]
y_hat = self.forward(data_batch)
y = torch.stack(y).float()
try:
y = torch.stack(y).float()
except:
y = y.float()
y_hat = y_hat.float()
return {
'val_loss': self.loss(y_hat, y),
......@@ -172,8 +183,10 @@ class CRNN(BasePtlModel):
def test_step(self, data_batch, batch_i):
y = data_batch[-1]
y_hat = self.forward(data_batch)
y = torch.stack(y).float()
y_hat = y_hat.float()
try:
y = torch.stack(y).float()
except:
y = y.float()
return {
'test_loss': self.loss(y_hat, y),
'y': y.cpu().numpy(),
......@@ -190,18 +203,203 @@ class CRNN(BasePtlModel):
parser.add_argument('--gru_hidden_size', default=320, type=int)
parser.add_argument('--gru_num_layers', default=2, type=int)
parser.opt_list('--drop_prob', default=0.2, options=[0.2, 0.5], type=float, tunable=False)
parser.opt_list('--learning_rate', default=0.0001, type=float,
options=[0.00001, 0.0005, 0.001], tunable=False)
parser.opt_list('--slicing_mode', default='full', options=['full', 'slice'], type=str, tunable=True)
parser.opt_list('--input_size', default=512, options=[512, 1024], type=int, tunable=False)
parser.opt_list('--learning_rate', default=0.0005, type=float,
options=[0.001, 0.0005], tunable=False)
parser.opt_list('--slicing_mode', default='slice', options=['full', 'slice'], type=str, tunable=False)
parser.opt_list('--input_size', default=2048, options=[1024, 512], type=int, tunable=False)
parser.add_argument('--processor', default='_ap_mtgjamendo44k', type=str)
# training params (opt)
parser.opt_list('--optimizer_name', default='adam', type=str,
options=['adam', 'radam'], tunable=False)
# if using 2 nodes with 4 gpus each the batch size here
# (256) will be 256 / (2*8) = 16 per gpu
parser.opt_list('--batch_size', default=4, type=int,
options=[16, 8, 4], tunable=False,
help='batch size will be divided over all gpus being used across all nodes')
return parser
class CRNN2(BasePtlModel):
def __init__(self, config, num_class, hparams):
super(CRNN2, self).__init__(config, hparams)
# init bn
self.bn_init = nn.BatchNorm2d(1)
self.conv1 = nn.Sequential(
nn.Conv2d(1, 64, 5, 2, 2), # (in_channels, out_channels, kernel_size, stride, padding)
nn.BatchNorm2d(64),
nn.ReLU()
)
self.conv2 = nn.Sequential(
nn.Conv2d(64, 64, 3, 1, 1),
nn.BatchNorm2d(64),
nn.ReLU()
)
self.mp2x2_dropout = nn.Sequential(
nn.MaxPool2d(2),
nn.Dropout2d(0.3)
)
self.ap2x2_dropout = nn.Sequential(
nn.AvgPool2d(2),
nn.Dropout2d(0.3)
)
self.conv3 = nn.Sequential(
nn.Conv2d(64, 128, 3, 1, 1),
nn.BatchNorm2d(128),
nn.ReLU()
)
self.conv4 = nn.Sequential(
nn.Conv2d(128, 128, 3, 1, 1),
nn.BatchNorm2d(128),
nn.ReLU()
)
self.conv5 = nn.Sequential(
nn.Conv2d(128, 256, 3, 1, 1),
nn.BatchNorm2d(256),
nn.ReLU()
)
self.conv6 = nn.Sequential(
nn.Conv2d(256, 256, 3, 1, 1),
nn.BatchNorm2d(256),
nn.ReLU()
)
self.conv7 = nn.Sequential(
nn.Conv2d(256, 384, 3, 1, 1),
nn.BatchNorm2d(384),
nn.ReLU()
)
self.conv7b = nn.Sequential(
nn.Conv2d(384, 512, 3, 1, 1),
nn.BatchNorm2d(512),
nn.ReLU()
)
self.conv11 = nn.Sequential(
nn.Conv2d(512, 256, 1, 1, 0),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.AdaptiveAvgPool2d((1, 1))
)
self.fc_ml = nn.Linear(256, 7)
# recurrent layer
self.gru1 = nn.GRU(input_size=hparams.gru_hidden_size,
hidden_size=hparams.gru_hidden_size,
num_layers=hparams.gru_num_layers
#,
#batch_first=True # TODO: check if this is needed
)
# classifier
self.dense = nn.Linear(hparams.gru_hidden_size, num_class)
self.dropout = nn.Dropout(self.hparams.drop_prob)
def forward(self, batch):
#print("batch", batch)
#print("x", x)
#print("xlen", x_lengths)
x, _, _ = batch # xs, xlens, labels
# init bn
x = self.bn_init(x)
x = self.conv1(x) # 157 * 75 * 64
x = self.conv2(x) # 157 * 75 * 64
x = self.ap2x2_dropout(x) # 78 * 37 * 64
x = self.conv3(x) # 78 * 37 * 128
x = self.conv4(x) # 78 * 37 * 128
x = self.ap2x2_dropout(x) # 39 * 18 * 128
x = self.conv5(x) # 39 * 18 * 256
x = self.conv6(x) # 39 * 18 * 256
x = self.conv7(x) # 39 * 18 * 384
x = self.conv7b(x)
x = self.conv11(x)
# x = self.ap2x2_dropout(x)
# classifier
x = x.view(-1, x.size(0), self.hparams.gru_hidden_size)
# output, hidden = self.gru(x_pack)
x = self.gru1(x)[0][-1] # TODO: Check if this is correct
x = self.dropout(x)
logit = nn.Sigmoid()(self.dense(x))
return logit
def training_step(self, data_batch, batch_i):
y = data_batch[-1]
y_hat = self.forward(data_batch)
try:
y = torch.stack(y).float()
except:
y = y.float()
y_hat = y_hat.float()
return {'loss': self.loss(y_hat, y)}
def validation_step(self, data_batch, batch_i):
y = data_batch[-1]
y_hat = self.forward(data_batch)
try:
y = torch.stack(y).float()
except:
y = y.float()
y_hat = y_hat.float()
return {
'val_loss': self.loss(y_hat, y),
'y': y.cpu().numpy(),
'y_hat': y_hat.cpu().numpy()
}
def test_step(self, data_batch, batch_i):
y = data_batch[-1]
y_hat = self.forward(data_batch)
try:
y = torch.stack(y).float()
except:
y = y.float()
return {
'test_loss': self.loss(y_hat, y),
'y': y.cpu().numpy(),
'y_hat': y_hat.cpu().numpy()
}
@staticmethod
def add_model_specific_args(parent_parser):
"""Parameters defined here will be available to your model through self.hparams
"""
parser = HyperOptArgumentParser(strategy=parent_parser.strategy, parents=[parent_parser])
# network params
parser.add_argument('--gru_hidden_size', default=32, type=int)
parser.add_argument('--gru_num_layers', default=2, type=int)
parser.opt_list('--drop_prob', default=0.2, options=[0.2, 0.5], type=float, tunable=False)
parser.opt_list('--learning_rate', default=0.0005, type=float,
options=[0.001, 0.0005], tunable=False)
parser.opt_list('--slicing_mode', default='slice', options=['full', 'slice'], type=str, tunable=False)
parser.opt_list('--input_size', default=2048, options=[1024, 512], type=int, tunable=False)
parser.add_argument('--processor', default='_ap_mtgjamendo44k', type=str)
# training params (opt)
parser.opt_list('--optimizer_name', default='adam', type=str,
options=['adam'], tunable=False)
# if using 2 nodes with 4 gpus each the batch size here
# (256) will be 256 / (2*8) = 16 per gpu
parser.opt_list('--batch_size', default=16, type=int,
options=[16, 8], tunable=True,
parser.opt_list('--batch_size', default=8, type=int,
options=[16, 8, 4], tunable=True,
help='batch size will be divided over all gpus being used across all nodes')
return parser
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment