Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Paul Primus
dcase2020_task2
Commits
cdb0061b
Commit
cdb0061b
authored
Jun 12, 2020
by
Paul Primus
Browse files
add audio set, change normalization
parent
9ca5631a
Changes
9
Expand all
Hide whitespace changes
Inline
Side-by-side
dcase2020_task2/data_sets/audio_set.py
View file @
cdb0061b
...
...
@@ -6,7 +6,6 @@ from dcase2020_task2.data_sets import BaseDataSet, CLASS_MAP, INVERSE_CLASS_MAP,
import
librosa
import
numpy
as
np
from
dcase2020_task2.data_sets
import
MCMDataSet
import
pickle
class
AudioSet
(
BaseDataSet
):
...
...
@@ -31,6 +30,7 @@ class AudioSet(BaseDataSet):
self
.
power
=
power
self
.
fmin
=
fmin
self
.
hop_all
=
hop_all
self
.
normalize_raw
=
normalize_raw
kwargs
=
{
'data_root'
:
self
.
data_root
,
...
...
@@ -38,13 +38,13 @@ class AudioSet(BaseDataSet):
'num_mel'
:
self
.
num_mel
,
'n_fft'
:
self
.
n_fft
,
'hop_size'
:
self
.
hop_size
,
'power'
:
power
,
'normalize'
:
normalize_raw
,
'fmin'
:
fmin
,
'hop_all'
:
hop_all
'power'
:
self
.
power
,
'normalize'
:
self
.
normalize_raw
,
'fmin'
:
self
.
fmin
,
'hop_all'
:
self
.
hop_all
}
class_names
=
sorted
([
class_name
for
class_name
in
os
.
listdir
(
data_root
)
if
os
.
path
.
isdir
(
os
.
path
.
join
(
data_root
,
class_name
))])
[:
30
]
class_names
=
sorted
([
class_name
for
class_name
in
os
.
listdir
(
data_root
)
if
os
.
path
.
isdir
(
os
.
path
.
join
(
data_root
,
class_name
))])
training_sets
=
[]
data_arrays
=
[]
...
...
@@ -88,6 +88,7 @@ class AudioSetClassSubset(torch.utils.data.Dataset):
normalize
=
True
,
fmin
=
0
,
hop_all
=
False
,
max_file_per_class
=
2
,
max_file_length
=
350
):
...
...
@@ -101,13 +102,14 @@ class AudioSetClassSubset(torch.utils.data.Dataset):
self
.
fmin
=
fmin
self
.
hop_all
=
hop_all
self
.
class_name
=
class_name
self
.
max_file_per_class
=
max_file_per_class
self
.
max_file_length
=
max_file_length
files
=
glob
.
glob
(
os
.
path
.
join
(
data_root
,
class_name
,
'*.wav'
))
assert
len
(
files
)
>
0
files
=
sorted
(
files
)
files
=
sorted
(
files
)
[:
max_file_per_class
]
self
.
files
=
files
self
.
meta_data
=
self
.
__load_meta_data__
(
files
)
...
...
@@ -171,6 +173,7 @@ class AudioSetClassSubset(torch.utils.data.Dataset):
x
,
sr
=
librosa
.
load
(
file
,
sr
=
16000
,
mono
=
True
)
if
len
(
x
)
>
(
self
.
max_file_length
+
1
*
self
.
hop_size
)
+
self
.
n_fft
:
x
=
x
[:(
self
.
max_file_length
+
1
)
*
self
.
hop_size
+
self
.
n_fft
]
if
self
.
normalize
:
x
=
(
x
-
x
.
mean
())
/
x
.
std
()
...
...
@@ -201,6 +204,7 @@ class AudioSetClassSubset(torch.utils.data.Dataset):
'file_ids'
:
os
.
sep
.
join
(
os
.
path
.
normpath
(
file_path
).
split
(
os
.
sep
)[
-
4
:])
}
if
__name__
==
'__main__'
:
mcmc
=
MCMDataSet
(
0
,
0
)
a
=
audio_set
=
AudioSet
(
...
...
dcase2020_task2/data_sets/complement_dataset.py
View file @
cdb0061b
...
...
@@ -4,6 +4,16 @@ from dcase2020_task2.data_sets import BaseDataSet, CLASS_MAP, INVERSE_CLASS_MAP,
from
dcase2020_task2.data_sets
import
MachineDataSet
import
numpy
as
np
valid_types
=
{
0
:
[
1
,
2
,
5
],
1
:
[
0
,
2
,
5
],
2
:
[
0
,
1
,
5
],
5
:
[
0
,
1
,
2
],
3
:
[
4
],
4
:
[
3
],
}
class
ComplementMCMDataSet
(
BaseDataSet
):
def
__init__
(
...
...
@@ -18,9 +28,11 @@ class ComplementMCMDataSet(BaseDataSet):
power
=
1.0
,
fmin
=
0
,
normalize_raw
=
False
,
normalize
=
None
,
hop_all
=
False
):
assert
type
(
machine_type
)
==
int
and
type
(
machine_id
)
==
int
self
.
data_root
=
data_root
self
.
context
=
context
self
.
num_mel
=
num_mel
...
...
@@ -28,10 +40,8 @@ class ComplementMCMDataSet(BaseDataSet):
self
.
hop_size
=
hop_size
self
.
power
=
power
self
.
fmin
=
fmin
self
.
normalize
=
normalize
self
.
hop_all
=
hop_all
assert
type
(
machine_type
)
==
int
and
type
(
machine_id
)
==
int
self
.
normalize_raw
=
normalize_raw
kwargs
=
{
'data_root'
:
self
.
data_root
,
...
...
@@ -39,77 +49,28 @@ class ComplementMCMDataSet(BaseDataSet):
'num_mel'
:
self
.
num_mel
,
'n_fft'
:
self
.
n_fft
,
'hop_size'
:
self
.
hop_size
,
'power'
:
power
,
'normalize'
:
normalize_raw
,
'fmin'
:
fmin
,
'hop_all'
:
hop_all
'power'
:
self
.
power
,
'normalize'
:
self
.
normalize_raw
,
'fmin'
:
self
.
fmin
,
'hop_all'
:
self
.
hop_all
}
if
machine_id
==
-
1
:
training_sets
=
[]
validation_sets
=
[]
data
=
[]
for
id_
in
ALL_ID_MAP
[
machine_type
]:
training_sets
.
append
(
MachineDataSet
(
machine_type
,
id_
,
mode
=
'training'
,
**
kwargs
))
validation_sets
.
append
(
MachineDataSet
(
machine_type
,
id_
,
mode
=
'validation'
,
**
kwargs
))
data
.
append
(
training_sets
[
-
1
].
data
)
if
normalize
is
None
:
data
=
np
.
concatenate
(
data
,
axis
=-
1
)
mean
=
data
.
mean
(
axis
=
1
,
keepdims
=
True
)
std
=
data
.
std
(
axis
=
1
,
keepdims
=
True
)
else
:
assert
type
(
normalize
)
==
tuple
assert
len
(
normalize
)
==
2
mean
,
std
=
normalize
for
training_set
,
validation_set
in
zip
(
training_sets
,
validation_sets
):
training_set
.
data
=
(
training_set
.
data
-
mean
)
/
std
validation_set
.
data
=
(
validation_set
.
data
-
mean
)
/
std
del
data
else
:
training_set
=
MachineDataSet
(
machine_type
,
machine_id
,
mode
=
'training'
,
**
kwargs
)
validation_set
=
MachineDataSet
(
machine_type
,
machine_id
,
mode
=
'validation'
,
**
kwargs
)
if
normalize
is
None
:
mean
=
training_set
.
data
.
mean
(
axis
=
1
,
keepdims
=
True
)
std
=
training_set
.
data
.
std
(
axis
=
1
,
keepdims
=
True
)
training_set
.
data
=
(
training_set
.
data
-
mean
)
/
std
validation_set
.
data
=
(
validation_set
.
data
-
mean
)
/
std
else
:
assert
type
(
normalize
)
==
tuple
assert
len
(
normalize
)
==
2
mean
,
std
=
normalize
training_set
.
data
=
(
training_set
.
data
-
mean
)
/
std
validation_set
.
data
=
(
validation_set
.
data
-
mean
)
/
std
training_sets
=
[]
# validation_sets = []
valid_types
=
{
0
:
[
1
,
2
,
5
],
1
:
[
0
,
2
,
5
],
2
:
[
0
,
1
,
5
],
5
:
[
0
,
1
,
2
],
3
:
[
4
],
4
:
[
3
],
}
data
=
[]
for
type_
in
ALL_ID_MAP
:
if
type_
in
valid_types
[
machine_type
]:
for
id_
in
ALL_ID_MAP
[
type_
]:
#if type_ != machine_type: #or (id_ != machine_id and machine_id != -1):
for
id_
in
ALL_ID_MAP
[
type_
]:
if
type_
!=
machine_type
or
(
id_
!=
machine_id
and
machine_id
!=
-
1
):
t
=
MachineDataSet
(
type_
,
id_
,
mode
=
'training'
,
**
kwargs
)
t
.
data
=
(
t
.
data
-
mean
)
/
std
data
.
append
(
t
.
data
)
training_sets
.
append
(
t
)
data
=
np
.
concatenate
(
data
,
axis
=-
1
)
# don't load validation set ...
# v = MachineDataSet(type_, id_, mode='validation', **kwargs)
# v.data = (v.data - mean) / std
# validation_sets.append(v)
self
.
mean
=
data
.
mean
(
axis
=
1
,
keepdims
=
True
)
self
.
std
=
data
.
std
(
axis
=
1
,
keepdims
=
True
)
del
data
self
.
training_set
=
torch
.
utils
.
data
.
ConcatDataset
(
training_sets
)
# self.validation_set = torch.utils.data.ConcatDataset(validation_sets)
self
.
mean
=
mean
self
.
std
=
std
@
property
def
observation_shape
(
self
)
->
tuple
:
...
...
dcase2020_task2/data_sets/mcm_dataset.py
View file @
cdb0061b
...
...
@@ -31,6 +31,7 @@ class MCMDataSet(BaseDataSet):
self
.
power
=
power
self
.
fmin
=
fmin
self
.
hop_all
=
hop_all
self
.
normalize_raw
=
normalize_raw
assert
type
(
machine_type
)
==
int
and
type
(
machine_id
)
==
int
...
...
@@ -40,10 +41,10 @@ class MCMDataSet(BaseDataSet):
'num_mel'
:
self
.
num_mel
,
'n_fft'
:
self
.
n_fft
,
'hop_size'
:
self
.
hop_size
,
'power'
:
power
,
'normalize'
:
normalize_raw
,
'fmin'
:
fmin
,
'hop_all'
:
hop_all
'power'
:
self
.
power
,
'normalize'
:
self
.
normalize_raw
,
'fmin'
:
self
.
fmin
,
'hop_all'
:
self
.
hop_all
}
if
machine_id
==
-
1
:
...
...
dcase2020_task2/experiments/classification_experiment.py
View file @
cdb0061b
...
...
@@ -11,7 +11,8 @@ from sacred import SETTINGS
SETTINGS
[
'CAPTURE_MODE'
]
=
'sys'
from
datetime
import
datetime
from
dcase2020_task2.data_sets
import
AudioSet
from
dcase2020_task2.data_sets
import
AudioSet
,
ComplementMCMDataSet
class
ClassificationExperiment
(
BaseExperiment
,
pl
.
LightningModule
):
...
...
@@ -31,12 +32,25 @@ class ClassificationExperiment(BaseExperiment, pl.LightningModule):
# will be set before each epoch
self
.
normal_data_set
=
self
.
objects
[
'data_set'
]
self
.
abnormal_data_set
=
AudioSet
(
self
.
abnormal_data_set
=
ComplementMCMDataSet
(
self
.
objects
[
'machine_type'
],
self
.
objects
[
'machine_id'
],
**
self
.
objects
[
'fetaure_settings'
]
)
self
.
mean
=
torch
.
from_numpy
((
self
.
normal_data_set
.
mean
+
self
.
abnormal_data_set
.
mean
)
/
2
)
self
.
std
=
torch
.
from_numpy
((
self
.
normal_data_set
.
std
+
self
.
abnormal_data_set
.
std
)
/
2
)
if
self
.
objects
.
get
(
'normalize'
)
==
'normal'
:
self
.
mean
=
torch
.
from_numpy
(
self
.
normal_data_set
.
mean
)
self
.
std
=
torch
.
from_numpy
(
self
.
normal_data_set
.
std
)
elif
self
.
objects
.
get
(
'normalize'
)
==
'abnormal'
:
self
.
mean
=
torch
.
from_numpy
(
self
.
abnormal_data_set
.
mean
)
self
.
std
=
torch
.
from_numpy
(
self
.
abnormal_data_set
.
std
)
elif
self
.
objects
.
get
(
'normalize'
)
==
'average'
:
self
.
mean
=
torch
.
from_numpy
((
self
.
normal_data_set
.
mean
+
self
.
abnormal_data_set
.
mean
)
/
2
)
self
.
std
=
torch
.
from_numpy
((
self
.
normal_data_set
.
std
+
self
.
abnormal_data_set
.
std
)
/
2
)
else
:
print
(
'No normalization.'
)
self
.
mean
=
torch
.
zeros
(
self
.
normal_data_set
.
mean
.
shape
)
self
.
std
=
torch
.
ones
(
self
.
normal_data_set
.
std
.
shape
)
self
.
inf_data_loader
=
self
.
get_inf_data_loader
(
torch
.
utils
.
data
.
DataLoader
(
...
...
@@ -165,8 +179,8 @@ def configuration():
context
=
32
model_class
=
'dcase2020_task2.models.CNN'
hidden_size
=
6
4
num_hidden
=
3
hidden_size
=
25
6
num_hidden
=
4
dropout_probability
=
0.0
epochs
=
100
...
...
@@ -176,7 +190,6 @@ def configuration():
if
debug
:
num_workers
=
0
epochs
=
100
hop_all
=
True
else
:
num_workers
=
4
...
...
@@ -186,6 +199,7 @@ def configuration():
weight_decay
=
0
normalize_raw
=
True
normalize
=
None
# TODO: change default descriptor
descriptor
=
"ClassificationExperiment_Model:[{}_{}_{}_{}]_Training:[{}_{}_{}_{}]_Features:[{}_{}_{}_{}_{}_{}_{}]_{}"
.
format
(
...
...
@@ -247,6 +261,7 @@ def configuration():
'kwargs'
:
{
'hidden_size'
:
hidden_size
,
'num_hidden'
:
num_hidden
,
'base_channels'
:
hidden_size
,
'dropout_probability'
:
dropout_probability
,
'batch_norm'
:
False
}
...
...
dcase2020_task2/models/classifier.py
View file @
cdb0061b
...
...
@@ -17,7 +17,7 @@ class FCNN(torch.nn.Module):
num_outputs
=
1
,
activation
=
'relu'
,
batch_norm
=
False
,
dropout_probability
=
0.1
**
kwargs
):
super
().
__init__
()
...
...
@@ -30,11 +30,9 @@ class FCNN(torch.nn.Module):
layers
.
append
(
torch
.
nn
.
Linear
(
i
,
o
))
if
batch_norm
:
layers
.
append
(
torch
.
nn
.
BatchNorm1d
(
o
))
layers
.
append
(
torch
.
nn
.
Dropout
(
p
=
dropout_probability
))
layers
.
append
(
activation_fn
())
_
=
layers
.
pop
()
_
=
layers
.
pop
()
if
batch_norm
:
_
=
layers
.
pop
()
...
...
@@ -59,7 +57,7 @@ class CNN(torch.nn.Module):
num_outputs
=
1
,
activation
=
'relu'
,
batch_norm
=
False
,
dropout_probability
=
0.1
**
kwargs
):
super
().
__init__
()
...
...
@@ -72,8 +70,6 @@ class CNN(torch.nn.Module):
layers
.
append
(
torch
.
nn
.
Conv2d
(
i
,
o
,
kernel_size
=
(
3
,
3
),
stride
=
2
,
padding
=
(
1
,
1
)))
if
batch_norm
:
layers
.
append
(
torch
.
nn
.
BatchNorm2d
(
o
))
if
dropout_probability
>
0
:
layers
.
append
(
torch
.
nn
.
Dropout2d
(
dropout_probability
))
layers
.
append
(
activation_fn
())
_
=
layers
.
pop
()
...
...
dcase2020_task2/models/cp_resnet.py
0 → 100644
View file @
cdb0061b
# coding: utf-8
import
math
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
torch.utils.checkpoint
import
checkpoint_sequential
from
librosa.filters
import
mel
as
librosa_mel_fn
def
initialize_weights
(
module
):
if
isinstance
(
module
,
nn
.
Conv2d
):
nn
.
init
.
kaiming_normal_
(
module
.
weight
.
data
,
mode
=
'fan_in'
,
nonlinearity
=
"relu"
)
# nn.init.kaiming_normal_(module.weight.data, mode='fan_out')
elif
isinstance
(
module
,
nn
.
BatchNorm2d
):
module
.
weight
.
data
.
fill_
(
1
)
module
.
bias
.
data
.
zero_
()
elif
isinstance
(
module
,
nn
.
Linear
):
module
.
bias
.
data
.
zero_
()
layer_index_total
=
0
def
initialize_weights_fixup
(
module
):
if
isinstance
(
module
,
AttentionAvg
):
print
(
"AttentionAvg init.."
)
module
.
forw_conv
[
0
].
weight
.
data
.
zero_
()
module
.
atten
[
0
].
bias
.
data
.
zero_
()
nn
.
init
.
kaiming_normal_
(
module
.
atten
[
0
].
weight
.
data
,
mode
=
'fan_in'
,
nonlinearity
=
"sigmoid"
)
if
isinstance
(
module
,
BasicBlock
):
# He init, rescaled by Fixup multiplier
b
=
module
n
=
b
.
conv1
.
kernel_size
[
0
]
*
b
.
conv1
.
kernel_size
[
1
]
*
b
.
conv1
.
out_channels
print
(
b
.
layer_index
,
math
.
sqrt
(
2.
/
n
),
layer_index_total
**
(
-
0.5
))
b
.
conv1
.
weight
.
data
.
normal_
(
0
,
(
layer_index_total
**
(
-
0.5
))
*
math
.
sqrt
(
2.
/
n
))
b
.
conv2
.
weight
.
data
.
zero_
()
if
b
.
shortcut
.
_modules
.
get
(
'conv'
)
is
not
None
:
convShortcut
=
b
.
shortcut
.
_modules
.
get
(
'conv'
)
n
=
convShortcut
.
kernel_size
[
0
]
*
convShortcut
.
kernel_size
[
1
]
*
convShortcut
.
out_channels
convShortcut
.
weight
.
data
.
normal_
(
0
,
math
.
sqrt
(
2.
/
n
))
if
isinstance
(
module
,
nn
.
Conv2d
):
pass
# nn.init.kaiming_normal_(module.weight.data, mode='fan_in', nonlinearity="relu")
# nn.init.kaiming_normal_(module.weight.data, mode='fan_out')
elif
isinstance
(
module
,
nn
.
BatchNorm2d
):
module
.
weight
.
data
.
fill_
(
1
)
module
.
bias
.
data
.
zero_
()
elif
isinstance
(
module
,
nn
.
Linear
):
module
.
bias
.
data
.
zero_
()
first_RUN
=
True
def
calc_padding
(
kernal
):
try
:
return
kernal
//
3
except
TypeError
:
return
[
k
//
3
for
k
in
kernal
]
class
AttentionAvg
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
out_channels
,
sum_all
=
True
):
super
(
AttentionAvg
,
self
).
__init__
()
self
.
sum_dims
=
[
2
,
3
]
if
sum_all
:
self
.
sum_dims
=
[
1
,
2
,
3
]
self
.
forw_conv
=
nn
.
Sequential
(
nn
.
Conv2d
(
in_channels
,
out_channels
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
bias
=
False
)
)
self
.
atten
=
nn
.
Sequential
(
nn
.
Conv2d
(
in_channels
,
out_channels
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
bias
=
True
),
nn
.
Sigmoid
()
)
def
forward
(
self
,
x
):
a1
=
self
.
forw_conv
(
x
)
atten
=
self
.
atten
(
x
)
num
=
atten
.
size
(
2
)
*
atten
.
size
(
3
)
asum
=
atten
.
sum
(
dim
=
self
.
sum_dims
,
keepdim
=
True
)
+
1e-8
return
a1
*
atten
*
num
/
asum
class
BasicBlock
(
nn
.
Module
):
expansion
=
1
def
__init__
(
self
,
in_channels
,
out_channels
,
stride
,
k1
=
3
,
k2
=
3
):
super
(
BasicBlock
,
self
).
__init__
()
global
layer_index_total
self
.
layer_index
=
layer_index_total
layer_index_total
=
layer_index_total
+
1
self
.
conv1
=
nn
.
Conv2d
(
in_channels
,
out_channels
,
kernel_size
=
k1
,
stride
=
stride
,
# downsample with first conv
padding
=
calc_padding
(
k1
),
bias
=
False
)
self
.
conv2
=
nn
.
Conv2d
(
out_channels
,
out_channels
,
kernel_size
=
k2
,
stride
=
1
,
padding
=
calc_padding
(
k2
),
bias
=
False
)
self
.
shortcut
=
nn
.
Sequential
()
if
in_channels
!=
out_channels
:
self
.
shortcut
.
add_module
(
'conv'
,
nn
.
Conv2d
(
in_channels
,
out_channels
,
kernel_size
=
1
,
stride
=
stride
,
# downsample
padding
=
0
,
bias
=
False
)
)
def
forward
(
self
,
x
):
y
=
F
.
relu
(
self
.
conv1
(
x
),
inplace
=
True
)
y
=
self
.
conv2
(
y
)
y
+=
self
.
shortcut
(
x
)
y
=
F
.
relu
(
y
,
inplace
=
True
)
# apply ReLU after addition
return
y
class
BottleneckBlock
(
nn
.
Module
):
expansion
=
4
def
__init__
(
self
,
in_channels
,
out_channels
,
stride
):
super
(
BottleneckBlock
,
self
).
__init__
()
bottleneck_channels
=
out_channels
//
self
.
expansion
self
.
conv1
=
nn
.
Conv2d
(
in_channels
,
bottleneck_channels
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
bias
=
False
)
self
.
conv2
=
nn
.
Conv2d
(
bottleneck_channels
,
bottleneck_channels
,
kernel_size
=
3
,
stride
=
stride
,
# downsample with 3x3 conv
padding
=
1
,
bias
=
False
)
self
.
conv3
=
nn
.
Conv2d
(
bottleneck_channels
,
out_channels
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
bias
=
False
)
self
.
shortcut
=
nn
.
Sequential
()
# identity
if
in_channels
!=
out_channels
:
self
.
shortcut
.
add_module
(
'conv'
,
nn
.
Conv2d
(
in_channels
,
out_channels
,
kernel_size
=
1
,
stride
=
stride
,
# downsample
padding
=
0
,
bias
=
False
))
def
forward
(
self
,
x
):
y
=
F
.
relu
(
self
.
conv1
(
x
),
inplace
=
True
)
y
=
F
.
relu
(
self
.
conv2
(
y
),
inplace
=
True
)
y
=
self
.
conv3
(
y
)
# not apply ReLU
y
+=
self
.
shortcut
(
x
)
y
=
F
.
relu
(
y
,
inplace
=
True
)
# apply ReLU after addition
return
y
class
Network
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
Network
,
self
).
__init__
()
input_shape
=
config
[
'input_shape'
]
n_classes
=
config
[
'n_classes'
]
base_channels
=
config
[
'base_channels'
]
block_type
=
config
[
'block_type'
]
depth
=
config
[
'depth'
]
self
.
pooling_padding
=
config
.
get
(
"pooling_padding"
,
0
)
or
0
self
.
use_raw_spectograms
=
config
.
get
(
"use_raw_spectograms"
)
or
False
self
.
apply_softmax
=
config
.
get
(
"apply_softmax"
)
or
False
assert
block_type
in
[
'basic'
,
'bottleneck'
]
if
self
.
use_raw_spectograms
:
mel_basis
=
librosa_mel_fn
(
22050
,
2048
,
256
)
mel_basis
=
torch
.
from_numpy
(
mel_basis
).
float
()
self
.
register_buffer
(
'mel_basis'
,
mel_basis
)
if
block_type
==
'basic'
:
block
=
BasicBlock
n_blocks_per_stage
=
(
depth
-
2
)
//
6
assert
n_blocks_per_stage
*
6
+
2
==
depth
else
:
block
=
BottleneckBlock
n_blocks_per_stage
=
(
depth
-
2
)
//
9
assert
n_blocks_per_stage
*
9
+
2
==
depth
n_blocks_per_stage
=
[
n_blocks_per_stage
,
n_blocks_per_stage
,
n_blocks_per_stage
]
if
config
.
get
(
"n_blocks_per_stage"
)
is
not
None
:
print
(
"n_blocks_per_stage is specified ignoring the depth param, nc="
+
str
(
config
.
get
(
"n_channels"
)))
n_blocks_per_stage
=
config
.
get
(
"n_blocks_per_stage"
)
n_channels
=
config
.
get
(
"n_channels"
)
if
n_channels
is
None
:
n_channels
=
[
base_channels
,
base_channels
*
2
*
block
.
expansion
,
base_channels
*
4
*
block
.
expansion
]
if
config
.
get
(
"grow_a_lot"
):
n_channels
[
2
]
=
base_channels
*
8
*
block
.
expansion
self
.
in_c
=
nn
.
Sequential
(
nn
.
Conv2d
(
input_shape
[
1
],
n_channels
[
0
],
kernel_size
=
5
,
stride
=
2
,
padding
=
1
,
bias
=
False
),
nn
.
ReLU
(
True
)
)
self
.
stage1
=
self
.
_make_stage
(
n_channels
[
0
],
n_channels
[
0
],
n_blocks_per_stage
[
0
],
block
,
stride
=
1
,
maxpool
=
config
[
'stage1'
][
'maxpool'
],
k1s
=
config
[
'stage1'
][
'k1s'
],
k2s
=
config
[
'stage1'
][
'k2s'
])
self
.
stage2
=
self
.
_make_stage
(
n_channels
[
0
],
n_channels
[
1
],
n_blocks_per_stage
[
1
],
block
,
stride
=
1
,
maxpool
=
config
[
'stage2'
][
'maxpool'
],
k1s
=
config
[
'stage2'
][
'k1s'
],
k2s
=
config
[
'stage2'
][
'k2s'
])
self
.
stage3
=
self
.
_make_stage
(