Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Shreyan Chowdhury
moodwalk
Commits
9a6cf459
Commit
9a6cf459
authored
Sep 24, 2019
by
Shreyan Chowdhury
Browse files
extend crnn file to have two variants of the model
parent
1c07d497
Changes
1
Show whitespace changes
Inline
Side-by-side
models/crnn.py
View file @
9a6cf459
########################################################
# Has 2 models: CRNN and CRNN2
########################################################
from
test_tube
import
HyperOptArgumentParser
from
utils
import
*
from
models.shared_stuff
import
BasePtlModel
...
...
@@ -144,6 +148,7 @@ class CRNN(BasePtlModel):
# output, hidden = self.gru(x_pack)
# x = self.gru1(x)[0][-1] # TODO: Check if this is correct
x
=
self
.
gru1
(
x
)[
1
][
1
]
# TODO: Check if this is correct
x
=
self
.
dropout
(
x
)
...
...
@@ -154,14 +159,20 @@ class CRNN(BasePtlModel):
def
training_step
(
self
,
data_batch
,
batch_i
):
y
=
data_batch
[
-
1
]
y_hat
=
self
.
forward
(
data_batch
)
try
:
y
=
torch
.
stack
(
y
).
float
()
except
:
y
=
y
.
float
()
y_hat
=
y_hat
.
float
()
return
{
'loss'
:
self
.
loss
(
y_hat
,
y
)}
def
validation_step
(
self
,
data_batch
,
batch_i
):
y
=
data_batch
[
-
1
]
y_hat
=
self
.
forward
(
data_batch
)
try
:
y
=
torch
.
stack
(
y
).
float
()
except
:
y
=
y
.
float
()
y_hat
=
y_hat
.
float
()
return
{
'val_loss'
:
self
.
loss
(
y_hat
,
y
),
...
...
@@ -172,8 +183,10 @@ class CRNN(BasePtlModel):
def
test_step
(
self
,
data_batch
,
batch_i
):
y
=
data_batch
[
-
1
]
y_hat
=
self
.
forward
(
data_batch
)
try
:
y
=
torch
.
stack
(
y
).
float
()
y_hat
=
y_hat
.
float
()
except
:
y
=
y
.
float
()
return
{
'test_loss'
:
self
.
loss
(
y_hat
,
y
),
'y'
:
y
.
cpu
().
numpy
(),
...
...
@@ -190,18 +203,203 @@ class CRNN(BasePtlModel):
parser
.
add_argument
(
'--gru_hidden_size'
,
default
=
320
,
type
=
int
)
parser
.
add_argument
(
'--gru_num_layers'
,
default
=
2
,
type
=
int
)
parser
.
opt_list
(
'--drop_prob'
,
default
=
0.2
,
options
=
[
0.2
,
0.5
],
type
=
float
,
tunable
=
False
)
parser
.
opt_list
(
'--learning_rate'
,
default
=
0.0001
,
type
=
float
,
options
=
[
0.00001
,
0.0005
,
0.001
],
tunable
=
False
)
parser
.
opt_list
(
'--slicing_mode'
,
default
=
'full'
,
options
=
[
'full'
,
'slice'
],
type
=
str
,
tunable
=
True
)
parser
.
opt_list
(
'--input_size'
,
default
=
512
,
options
=
[
512
,
1024
],
type
=
int
,
tunable
=
False
)
parser
.
opt_list
(
'--learning_rate'
,
default
=
0.0005
,
type
=
float
,
options
=
[
0.001
,
0.0005
],
tunable
=
False
)
parser
.
opt_list
(
'--slicing_mode'
,
default
=
'slice'
,
options
=
[
'full'
,
'slice'
],
type
=
str
,
tunable
=
False
)
parser
.
opt_list
(
'--input_size'
,
default
=
2048
,
options
=
[
1024
,
512
],
type
=
int
,
tunable
=
False
)
parser
.
add_argument
(
'--processor'
,
default
=
'_ap_mtgjamendo44k'
,
type
=
str
)
# training params (opt)
parser
.
opt_list
(
'--optimizer_name'
,
default
=
'adam'
,
type
=
str
,
options
=
[
'adam'
,
'radam'
],
tunable
=
False
)
# if using 2 nodes with 4 gpus each the batch size here
# (256) will be 256 / (2*8) = 16 per gpu
parser
.
opt_list
(
'--batch_size'
,
default
=
4
,
type
=
int
,
options
=
[
16
,
8
,
4
],
tunable
=
False
,
help
=
'batch size will be divided over all gpus being used across all nodes'
)
return
parser
class
CRNN2
(
BasePtlModel
):
def
__init__
(
self
,
config
,
num_class
,
hparams
):
super
(
CRNN2
,
self
).
__init__
(
config
,
hparams
)
# init bn
self
.
bn_init
=
nn
.
BatchNorm2d
(
1
)
self
.
conv1
=
nn
.
Sequential
(
nn
.
Conv2d
(
1
,
64
,
5
,
2
,
2
),
# (in_channels, out_channels, kernel_size, stride, padding)
nn
.
BatchNorm2d
(
64
),
nn
.
ReLU
()
)
self
.
conv2
=
nn
.
Sequential
(
nn
.
Conv2d
(
64
,
64
,
3
,
1
,
1
),
nn
.
BatchNorm2d
(
64
),
nn
.
ReLU
()
)
self
.
mp2x2_dropout
=
nn
.
Sequential
(
nn
.
MaxPool2d
(
2
),
nn
.
Dropout2d
(
0.3
)
)
self
.
ap2x2_dropout
=
nn
.
Sequential
(
nn
.
AvgPool2d
(
2
),
nn
.
Dropout2d
(
0.3
)
)
self
.
conv3
=
nn
.
Sequential
(
nn
.
Conv2d
(
64
,
128
,
3
,
1
,
1
),
nn
.
BatchNorm2d
(
128
),
nn
.
ReLU
()
)
self
.
conv4
=
nn
.
Sequential
(
nn
.
Conv2d
(
128
,
128
,
3
,
1
,
1
),
nn
.
BatchNorm2d
(
128
),
nn
.
ReLU
()
)
self
.
conv5
=
nn
.
Sequential
(
nn
.
Conv2d
(
128
,
256
,
3
,
1
,
1
),
nn
.
BatchNorm2d
(
256
),
nn
.
ReLU
()
)
self
.
conv6
=
nn
.
Sequential
(
nn
.
Conv2d
(
256
,
256
,
3
,
1
,
1
),
nn
.
BatchNorm2d
(
256
),
nn
.
ReLU
()
)
self
.
conv7
=
nn
.
Sequential
(
nn
.
Conv2d
(
256
,
384
,
3
,
1
,
1
),
nn
.
BatchNorm2d
(
384
),
nn
.
ReLU
()
)
self
.
conv7b
=
nn
.
Sequential
(
nn
.
Conv2d
(
384
,
512
,
3
,
1
,
1
),
nn
.
BatchNorm2d
(
512
),
nn
.
ReLU
()
)
self
.
conv11
=
nn
.
Sequential
(
nn
.
Conv2d
(
512
,
256
,
1
,
1
,
0
),
nn
.
BatchNorm2d
(
256
),
nn
.
ReLU
(),
nn
.
AdaptiveAvgPool2d
((
1
,
1
))
)
self
.
fc_ml
=
nn
.
Linear
(
256
,
7
)
# recurrent layer
self
.
gru1
=
nn
.
GRU
(
input_size
=
hparams
.
gru_hidden_size
,
hidden_size
=
hparams
.
gru_hidden_size
,
num_layers
=
hparams
.
gru_num_layers
#,
#batch_first=True # TODO: check if this is needed
)
# classifier
self
.
dense
=
nn
.
Linear
(
hparams
.
gru_hidden_size
,
num_class
)
self
.
dropout
=
nn
.
Dropout
(
self
.
hparams
.
drop_prob
)
def
forward
(
self
,
batch
):
#print("batch", batch)
#print("x", x)
#print("xlen", x_lengths)
x
,
_
,
_
=
batch
# xs, xlens, labels
# init bn
x
=
self
.
bn_init
(
x
)
x
=
self
.
conv1
(
x
)
# 157 * 75 * 64
x
=
self
.
conv2
(
x
)
# 157 * 75 * 64
x
=
self
.
ap2x2_dropout
(
x
)
# 78 * 37 * 64
x
=
self
.
conv3
(
x
)
# 78 * 37 * 128
x
=
self
.
conv4
(
x
)
# 78 * 37 * 128
x
=
self
.
ap2x2_dropout
(
x
)
# 39 * 18 * 128
x
=
self
.
conv5
(
x
)
# 39 * 18 * 256
x
=
self
.
conv6
(
x
)
# 39 * 18 * 256
x
=
self
.
conv7
(
x
)
# 39 * 18 * 384
x
=
self
.
conv7b
(
x
)
x
=
self
.
conv11
(
x
)
# x = self.ap2x2_dropout(x)
# classifier
x
=
x
.
view
(
-
1
,
x
.
size
(
0
),
self
.
hparams
.
gru_hidden_size
)
# output, hidden = self.gru(x_pack)
x
=
self
.
gru1
(
x
)[
0
][
-
1
]
# TODO: Check if this is correct
x
=
self
.
dropout
(
x
)
logit
=
nn
.
Sigmoid
()(
self
.
dense
(
x
))
return
logit
def
training_step
(
self
,
data_batch
,
batch_i
):
y
=
data_batch
[
-
1
]
y_hat
=
self
.
forward
(
data_batch
)
try
:
y
=
torch
.
stack
(
y
).
float
()
except
:
y
=
y
.
float
()
y_hat
=
y_hat
.
float
()
return
{
'loss'
:
self
.
loss
(
y_hat
,
y
)}
def
validation_step
(
self
,
data_batch
,
batch_i
):
y
=
data_batch
[
-
1
]
y_hat
=
self
.
forward
(
data_batch
)
try
:
y
=
torch
.
stack
(
y
).
float
()
except
:
y
=
y
.
float
()
y_hat
=
y_hat
.
float
()
return
{
'val_loss'
:
self
.
loss
(
y_hat
,
y
),
'y'
:
y
.
cpu
().
numpy
(),
'y_hat'
:
y_hat
.
cpu
().
numpy
()
}
def
test_step
(
self
,
data_batch
,
batch_i
):
y
=
data_batch
[
-
1
]
y_hat
=
self
.
forward
(
data_batch
)
try
:
y
=
torch
.
stack
(
y
).
float
()
except
:
y
=
y
.
float
()
return
{
'test_loss'
:
self
.
loss
(
y_hat
,
y
),
'y'
:
y
.
cpu
().
numpy
(),
'y_hat'
:
y_hat
.
cpu
().
numpy
()
}
@
staticmethod
def
add_model_specific_args
(
parent_parser
):
"""Parameters defined here will be available to your model through self.hparams
"""
parser
=
HyperOptArgumentParser
(
strategy
=
parent_parser
.
strategy
,
parents
=
[
parent_parser
])
# network params
parser
.
add_argument
(
'--gru_hidden_size'
,
default
=
32
,
type
=
int
)
parser
.
add_argument
(
'--gru_num_layers'
,
default
=
2
,
type
=
int
)
parser
.
opt_list
(
'--drop_prob'
,
default
=
0.2
,
options
=
[
0.2
,
0.5
],
type
=
float
,
tunable
=
False
)
parser
.
opt_list
(
'--learning_rate'
,
default
=
0.0005
,
type
=
float
,
options
=
[
0.001
,
0.0005
],
tunable
=
False
)
parser
.
opt_list
(
'--slicing_mode'
,
default
=
'slice'
,
options
=
[
'full'
,
'slice'
],
type
=
str
,
tunable
=
False
)
parser
.
opt_list
(
'--input_size'
,
default
=
2048
,
options
=
[
1024
,
512
],
type
=
int
,
tunable
=
False
)
parser
.
add_argument
(
'--processor'
,
default
=
'_ap_mtgjamendo44k'
,
type
=
str
)
# training params (opt)
parser
.
opt_list
(
'--optimizer_name'
,
default
=
'adam'
,
type
=
str
,
options
=
[
'adam'
],
tunable
=
False
)
# if using 2 nodes with 4 gpus each the batch size here
# (256) will be 256 / (2*8) = 16 per gpu
parser
.
opt_list
(
'--batch_size'
,
default
=
16
,
type
=
int
,
options
=
[
16
,
8
],
tunable
=
True
,
parser
.
opt_list
(
'--batch_size'
,
default
=
8
,
type
=
int
,
options
=
[
16
,
8
,
4
],
tunable
=
True
,
help
=
'batch size will be divided over all gpus being used across all nodes'
)
return
parser
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment