Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Paul Primus
dcase2020_task2
Commits
31821481
Commit
31821481
authored
Jun 09, 2020
by
Paul Primus
Browse files
add submission files
parent
52ca3258
Changes
24
Expand all
Hide whitespace changes
Inline
Side-by-side
dcase2020_task2/data_sets/__init__.py
View file @
31821481
CLASS_MAP
=
{
'fan'
:
0
,
'pump'
:
1
,
'slider'
:
2
,
'ToyCar'
:
3
,
'ToyConveyor'
:
4
,
'valve'
:
5
}
INVERSE_CLASS_MAP
=
{
0
:
'fan'
,
1
:
'pump'
,
2
:
'slider'
,
3
:
'ToyCar'
,
4
:
'ToyConveyor'
,
5
:
'valve'
}
TRAINING_ID_MAP
=
{
0
:
[
0
,
2
,
4
,
6
],
1
:
[
0
,
2
,
4
,
6
],
2
:
[
0
,
2
,
4
,
6
],
3
:
[
1
,
2
,
3
,
4
],
4
:
[
1
,
2
,
3
],
5
:
[
0
,
2
,
4
,
6
]
}
EVALUATION_ID_MAP
=
{
0
:
[
1
,
3
,
5
],
1
:
[
1
,
3
,
5
],
2
:
[
1
,
3
,
5
],
3
:
[
5
,
6
,
7
],
4
:
[
4
,
5
,
6
],
5
:
[
1
,
3
,
5
]
}
ALL_ID_MAP
=
{
0
:
[
0
,
1
,
2
,
3
,
4
,
5
,
6
],
1
:
[
0
,
1
,
2
,
3
,
4
,
5
,
6
],
2
:
[
0
,
1
,
2
,
3
,
4
,
5
,
6
],
3
:
[
1
,
2
,
3
,
4
,
5
,
6
,
7
],
4
:
[
1
,
2
,
3
,
4
,
5
,
6
],
5
:
[
0
,
1
,
2
,
3
,
4
,
5
,
6
]
}
def
enumerate_development_datasets
():
typ_id
=
[]
for
i
in
range
(
6
):
for
j
in
TRAINING_ID_MAP
[
i
]:
typ_id
.
append
((
i
,
j
))
return
typ_id
def
enumerate_evaluation_datasets
():
typ_id
=
[]
for
i
in
range
(
6
):
for
j
in
EVALUATION_ID_MAP
[
i
]:
typ_id
.
append
((
i
,
j
))
return
typ_id
from
dcase2020_task2.data_sets.base_data_set
import
BaseDataSet
from
dcase2020_task2.data_sets.mcm_dataset
import
MCMDataSet
,
MachineDataSet
,
INVERSE_CLASS_MAP
,
CLASS_MAP
,
TRAINING_ID_MAP
from
dcase2020_task2.data_sets.
abnormal
_dataset
s
import
ComplementMCMDataSet
from
dcase2020_task2.data_sets.mcm_dataset
import
MCMDataSet
,
MachineDataSet
from
dcase2020_task2.data_sets.
complement
_dataset
import
ComplementMCMDataSet
dcase2020_task2/data_sets/
abnormal
_dataset
s
.py
→
dcase2020_task2/data_sets/
complement
_dataset.py
View file @
31821481
import
os
import
torch.utils.data
import
glob
from
dcase2020_task2.data_sets
import
BaseDataSet
import
librosa
import
numpy
as
np
from
dcase2020_task2.data_sets
import
MachineDataSet
,
CLASS_MAP
,
TRAINING_ID_MAP
,
INVERSE_CLASS_MAP
from
dcase2020_task2.data_sets
import
BaseDataSet
,
CLASS_MAP
,
INVERSE_CLASS_MAP
,
TRAINING_ID_MAP
,
ALL_ID_MAP
from
dcase2020_task2.data_sets
import
MachineDataSet
class
ComplementMCMDataSet
(
BaseDataSet
):
...
...
@@ -53,7 +49,6 @@ class ComplementMCMDataSet(BaseDataSet):
training_set
=
MachineDataSet
(
machine_type
,
machine_id
,
mode
=
'training'
,
**
kwargs
)
validation_set
=
MachineDataSet
(
machine_type
,
machine_id
,
mode
=
'validation'
,
**
kwargs
)
if
normalize
is
None
:
mean
=
training_set
.
data
.
mean
(
axis
=
1
,
keepdims
=
True
)
std
=
training_set
.
data
.
std
(
axis
=
1
,
keepdims
=
True
)
...
...
@@ -69,16 +64,16 @@ class ComplementMCMDataSet(BaseDataSet):
training_sets
=
[]
# validation_sets = []
for
type_
in
TRAINING
_ID_MAP
:
for
id_
in
TRAINING
_ID_MAP
[
type_
]:
for
type_
in
ALL
_ID_MAP
:
for
id_
in
ALL
_ID_MAP
[
type_
]:
if
type_
!=
machine_type
or
(
id_
!=
machine_id
and
same_type
):
t
=
MachineDataSet
(
type_
,
id_
,
mode
=
'training'
,
**
kwargs
)
t
.
data
=
(
t
.
data
-
mean
)
/
std
#v = MachineDataSet(type_, id_, mode='validation', **kwargs)
#v.data = (v.data - mean) / std
training_sets
.
append
(
t
)
# don't load validation set ...
# v = MachineDataSet(type_, id_, mode='validation', **kwargs)
# v.data = (v.data - mean) / std
# validation_sets.append(v)
self
.
training_set
=
torch
.
utils
.
data
.
ConcatDataset
(
training_sets
)
...
...
dcase2020_task2/data_sets/mcm_dataset.py
View file @
31821481
import
os
import
torch.utils.data
import
glob
from
dcase2020_task2.data_sets
import
BaseDataSet
from
dcase2020_task2.data_sets
import
BaseDataSet
,
CLASS_MAP
,
INVERSE_CLASS_MAP
,
TRAINING_ID_MAP
,
EVALUATION_ID_MAP
,
\
enumerate_development_datasets
,
enumerate_evaluation_datasets
import
librosa
import
numpy
as
np
CLASS_MAP
=
{
'fan'
:
0
,
'pump'
:
1
,
'slider'
:
2
,
'ToyCar'
:
3
,
'ToyConveyor'
:
4
,
'valve'
:
5
}
INVERSE_CLASS_MAP
=
{
0
:
'fan'
,
1
:
'pump'
,
2
:
'slider'
,
3
:
'ToyCar'
,
4
:
'ToyConveyor'
,
5
:
'valve'
}
TRAINING_ID_MAP
=
{
0
:
[
0
,
2
,
4
,
6
],
1
:
[
0
,
2
,
4
,
6
],
2
:
[
0
,
2
,
4
,
6
],
3
:
[
1
,
2
,
3
,
4
],
4
:
[
1
,
2
,
3
],
5
:
[
0
,
2
,
4
,
6
]
}
EVALUATION_ID_MAP
=
{
0
:
[
1
,
3
,
5
],
1
:
[
1
,
3
,
5
],
2
:
[
1
,
3
,
5
],
3
:
[
5
,
6
,
7
],
4
:
[
4
,
5
,
6
],
5
:
[
1
,
3
,
5
],
}
def
enumerate_development_datasets
():
typ_id
=
[]
for
i
in
range
(
6
):
for
j
in
TRAINING_ID_MAP
[
i
]:
typ_id
.
append
((
i
,
j
))
return
typ_id
def
enumerate_evaluation_datasets
():
typ_id
=
[]
for
i
in
range
(
6
):
for
j
in
EVALUATION_ID_MAP
[
i
]:
typ_id
.
append
((
i
,
j
))
return
typ_id
class
MCMDataSet
(
BaseDataSet
):
...
...
@@ -152,6 +101,8 @@ class MachineDataSet(torch.utils.data.Dataset):
):
assert
mode
in
[
'training'
,
'validation'
]
if
mode
==
'validation'
:
hop_all
=
False
self
.
num_mel
=
num_mel
self
.
n_fft
=
n_fft
...
...
@@ -245,8 +196,20 @@ class MachineDataSet(torch.utils.data.Dataset):
data
=
np
.
empty
((
self
.
num_mel
,
self
.
file_length
*
len
(
files
)),
dtype
=
np
.
float32
)
for
i
,
f
in
enumerate
(
files
):
file
=
self
.
__load_preprocess_file__
(
f
)
assert
file
.
shape
[
1
]
==
self
.
file_length
data
[:,
i
*
self
.
file_length
:(
i
+
1
)
*
self
.
file_length
]
=
self
.
__load_preprocess_file__
(
f
)
if
file
.
shape
[
1
]
!=
self
.
file_length
:
if
file
.
shape
[
1
]
<
self
.
file_length
:
print
(
f
'Too short:
{
f
}
'
)
file
=
np
.
concatenate
([
file
,
file
[:,
:
self
.
file_length
-
file
.
shape
[
1
]]
],
-
1
)
elif
file
.
shape
[
1
]
>
self
.
file_length
:
print
(
f
'Too long:
{
f
}
'
)
file
=
file
[:,
:
self
.
file_length
]
data
[:,
i
*
self
.
file_length
:(
i
+
1
)
*
self
.
file_length
]
=
file
np
.
save
(
file_path
,
data
)
return
data
...
...
@@ -304,11 +267,11 @@ class MachineDataSet(torch.utils.data.Dataset):
if
__name__
==
'__main__'
:
for
type_
,
id_
in
enumerate_development_datasets
():
_
=
MachineDataSet
(
type_
,
id_
,
mode
=
'training'
)
_
=
MachineDataSet
(
type_
,
id_
,
mode
=
'validation'
)
_
=
MachineDataSet
(
type_
,
id_
,
mode
=
'training'
,
n_fft
=
256
)
_
=
MachineDataSet
(
type_
,
id_
,
mode
=
'validation'
,
n_fft
=
256
)
for
type_
,
id_
in
enumerate_evaluation_datasets
():
_
=
MachineDataSet
(
type_
,
id_
,
mode
=
'training'
)
_
=
MachineDataSet
(
type_
,
id_
,
mode
=
'validation'
)
_
=
MachineDataSet
(
type_
,
id_
,
mode
=
'training'
,
n_fft
=
256
)
_
=
MachineDataSet
(
type_
,
id_
,
mode
=
'validation'
,
n_fft
=
256
)
dcase2020_task2/experiments/anomaly_model_vae_experiment.py
deleted
100644 → 0
View file @
52ca3258
from
dcase2020_task2.experiments
import
BaseExperiment
from
dcase2020_task2.utils.logger
import
Logger
from
datetime
import
datetime
import
os
import
pytorch_lightning
as
pl
import
torch
from
sacred
import
Experiment
import
torch.utils.data
# workaround...
from
sacred
import
SETTINGS
SETTINGS
[
'CAPTURE_MODE'
]
=
'sys'
import
numpy
as
np
class
VAEExperiment
(
BaseExperiment
,
pl
.
LightningModule
):
'''
Reproduction of the DCASE Baseline. It is basically an Auto Encoder, the anomaly score is the reconstruction error.
'''
def
__init__
(
self
,
configuration_dict
,
_run
):
super
().
__init__
(
configuration_dict
)
self
.
network
=
self
.
objects
[
'auto_encoder_model'
]
self
.
prior
=
self
.
objects
[
'prior'
]
self
.
reconstruction
=
self
.
objects
[
'reconstruction'
]
self
.
logger_
=
Logger
(
_run
,
self
,
self
.
configuration_dict
,
self
.
objects
)
# experiment state variables
self
.
epoch
=
-
1
self
.
step
=
0
self
.
result
=
None
def
forward
(
self
,
batch
):
batch
[
'epoch'
]
=
self
.
epoch
batch
=
self
.
network
(
batch
)
return
batch
def
training_step
(
self
,
batch
,
batch_num
,
optimizer_idx
=
0
):
if
batch_num
==
0
and
optimizer_idx
==
0
:
self
.
epoch
+=
1
if
optimizer_idx
==
0
:
batch
=
self
(
batch
)
reconstruction_loss
=
self
.
reconstruction
.
loss
(
batch
)
prior_loss
=
self
.
prior
.
loss
(
batch
)
batch
[
'reconstruction_loss'
]
=
reconstruction_loss
/
(
self
.
objects
[
'batch_size'
]
*
self
.
objects
[
'num_mel'
]
*
self
.
objects
[
'context'
])
batch
[
'prior_loss'
]
=
prior_loss
/
self
.
objects
[
'batch_size'
]
batch
[
'loss'
]
=
reconstruction_loss
+
prior_loss
if
batch_num
==
0
:
self
.
logger_
.
log_image_reconstruction
(
batch
,
self
.
epoch
)
self
.
logger_
.
log_training_step
(
batch
,
self
.
step
)
self
.
step
+=
1
else
:
raise
AttributeError
return
{
'loss'
:
batch
[
'loss'
],
'tqdm'
:
{
'loss'
:
batch
[
'loss'
]},
}
def
validation_step
(
self
,
batch
,
batch_num
):
self
(
batch
)
return
{
'targets'
:
batch
[
'targets'
],
'scores'
:
batch
[
'scores'
],
'machine_types'
:
batch
[
'machine_types'
],
'machine_ids'
:
batch
[
'machine_ids'
],
'part_numbers'
:
batch
[
'part_numbers'
],
'file_ids'
:
batch
[
'file_ids'
]
}
def
validation_end
(
self
,
outputs
):
self
.
logger_
.
log_vae_validation
(
outputs
,
self
.
step
,
self
.
epoch
)
return
{
'val_loss'
:
np
.
concatenate
([
o
[
'scores'
].
detach
().
cpu
().
numpy
()
for
o
in
outputs
]).
mean
()
}
def
test_step
(
self
,
batch
,
batch_num
):
return
self
.
validation_step
(
batch
,
batch_num
)
def
test_end
(
self
,
outputs
):
# TODO: add new logging method
# self.result = self.logger_.log_testing(outputs)
self
.
logger_
.
close
()
return
{}
def
train_dataloader
(
self
):
if
self
.
objects
[
'debug'
]:
ds
=
torch
.
utils
.
data
.
Subset
(
self
.
objects
[
'data_set'
].
get_whole_training_data_set
(),
np
.
arange
(
1024
))
else
:
ds
=
self
.
objects
[
'data_set'
].
get_whole_training_data_set
()
dl
=
torch
.
utils
.
data
.
DataLoader
(
ds
,
batch_size
=
self
.
objects
[
'batch_size'
],
shuffle
=
True
,
num_workers
=
self
.
objects
[
'num_workers'
],
drop_last
=
False
)
return
dl
def
configuration
():
seed
=
1220
deterministic
=
False
id
=
datetime
.
now
().
strftime
(
"%Y-%m-%d_%H:%M:%S:%f"
)
log_path
=
os
.
path
.
join
(
'..'
,
'experiment_logs'
,
id
)
#####################
# quick configuration, uses default parameters of more detailed configuration
#####################
machine_type
=
0
machine_id
=
0
latent_size
=
40
batch_size
=
512
debug
=
False
if
debug
:
epochs
=
1
num_workers
=
0
else
:
epochs
=
50
num_workers
=
4
learning_rate
=
1e-4
weight_decay
=
0
normalize
=
'all'
normalize_raw
=
True
prior_class
=
'priors.StandardNormalPrior'
context
=
11
descriptor
=
"vae_training_{}_{}_{}_{}_{}_{}_{}_{}"
.
format
(
prior_class
,
latent_size
,
batch_size
,
learning_rate
,
weight_decay
,
normalize
,
normalize_raw
,
context
)
########################
# detailed configuration
########################
num_mel
=
40
n_fft
=
512
hop_size
=
256
prior
=
{
'class'
:
prior_class
,
'kwargs'
:
{
'latent_size'
:
latent_size
,
'weight'
:
1
}
}
data_set
=
{
'class'
:
'data_sets.MCMDataSet'
,
'kwargs'
:
{
'context'
:
context
,
'num_mel'
:
num_mel
,
'n_fft'
:
n_fft
,
'hop_size'
:
hop_size
,
'normalize'
:
normalize
,
'normalize_raw'
:
normalize_raw
}
}
reconstruction
=
{
'class'
:
'losses.MSE'
,
'kwargs'
:
{
'weight'
:
1
,
'input_shape'
:
'@data_set.observation_shape'
}
}
auto_encoder_model
=
{
'class'
:
'models.SamplingFCAE'
,
'args'
:
[
'@data_set.observation_shape'
,
'@reconstruction'
,
'@prior'
]
}
lr_scheduler
=
{
'class'
:
'torch.optim.lr_scheduler.StepLR'
,
'args'
:
[
'@optimizer'
,
],
'kwargs'
:
{
'step_size'
:
epochs
}
}
optimizer
=
{
'class'
:
'torch.optim.Adam'
,
'args'
:
[
'@auto_encoder_model.parameters()'
],
'kwargs'
:
{
'lr'
:
learning_rate
,
'betas'
:
(
0.9
,
0.999
),
'amsgrad'
:
False
,
'weight_decay'
:
weight_decay
,
}
}
trainer
=
{
'class'
:
'trainers.PTLTrainer'
,
'kwargs'
:
{
'max_epochs'
:
epochs
,
'checkpoint_callback'
:
False
,
'logger'
:
False
,
'early_stop_callback'
:
False
,
'gpus'
:
[
0
],
'show_progress_bar'
:
True
,
'progress_bar_refresh_rate'
:
1000
}
}
ex
=
Experiment
(
'dcase2020_task2_vae_training'
)
cfg
=
ex
.
config
(
configuration
)
@
ex
.
automain
def
run
(
_config
,
_run
):
experiment
=
VAEExperiment
(
_config
,
_run
)
return
experiment
.
run
()
dcase2020_task2/experiments/classification_experiment.py
View file @
31821481
...
...
@@ -140,7 +140,7 @@ def configuration():
# quick configuration, uses default parameters of more detailed configuration
#####################
machine_type
=
0
machine_type
=
1
machine_id
=
0
num_mel
=
128
...
...
@@ -148,16 +148,13 @@ def configuration():
hop_size
=
512
power
=
2.0
fmin
=
0
context
=
32
context
=
128
model_class
=
'dcase2020_task2.models.CNN'
hidden_size
=
5
12
num_hidden
=
4
hidden_size
=
12
8
num_hidden
=
3
dropout_probability
=
0.0
# complement set
same_type
=
True
debug
=
False
if
debug
:
num_workers
=
0
...
...
@@ -165,12 +162,14 @@ def configuration():
num_workers
=
4
epochs
=
100
loss_class
=
'dcase2020_task2.losses.
BCE
'
loss_class
=
'dcase2020_task2.losses.
AUC
'
batch_size
=
512
learning_rate
=
1e-4
weight_decay
=
0
same_type
=
True
normalize_raw
=
True
hop_all
=
False
# TODO: change default descriptor
descriptor
=
"ClassificationExperiment_Model:[{}_{}_{}_{}]_Training:[{}_{}_{}_{}]_Features:[{}_{}_{}_{}_{}_{}_{}]_Complement:[{}]{}"
.
format
(
...
...
@@ -197,7 +196,6 @@ def configuration():
# detailed configuration
########################
data_set
=
{
'class'
:
'dcase2020_task2.data_sets.MCMDataSet'
,
'args'
:
[
...
...
@@ -212,7 +210,7 @@ def configuration():
'normalize_raw'
:
normalize_raw
,
'power'
:
power
,
'fmin'
:
fmin
,
'hop_all'
:
False
'hop_all'
:
hop_all
}
}
...
...
@@ -230,7 +228,7 @@ def configuration():
'normalize_raw'
:
normalize_raw
,
'power'
:
power
,
'fmin'
:
fmin
,
'hop_all'
:
False
,
'hop_all'
:
hop_all
,
'same_type'
:
same_type
}
}
...
...
@@ -251,7 +249,8 @@ def configuration():
'kwargs'
:
{
'hidden_size'
:
hidden_size
,
'num_hidden'
:
num_hidden
,
'dropout_probability'
:
dropout_probability
'dropout_probability'
:
dropout_probability
,
'batch_norm'
:
False
}
}
...
...
dcase2020_task2/models/__init__.py
View file @
31821481
from
dcase2020_task2.models.base_model
import
ClassifierBase
,
VAEBase
from
dcase2020_task2.models.made
import
MADE
from
dcase2020_task2.models.
other.
made
import
MADE
from
dcase2020_task2.models.ae
import
AE
,
ConvAE
from
dcase2020_task2.models.classifier
import
FCNN
,
CNN
\ No newline at end of file
from
dcase2020_task2.models.classifier
import
FCNN
,
CNN
from
dcase2020_task2.models.resnet
import
ResNet
\ No newline at end of file
dcase2020_task2/models/ae.py
View file @
31821481
...
...
@@ -4,7 +4,7 @@ from dcase2020_task2.priors import NoPrior
import
numpy
as
np
import
torch
from
dcase2020_task2.models.custom
import
activation_dict
,
init_weights
from
dcase2020_task2.models.custom
import
ACTIVATION_DICT
,
init_weights
class
AE
(
torch
.
nn
.
Module
,
VAEBase
):
...
...
@@ -21,7 +21,7 @@ class AE(torch.nn.Module, VAEBase):
):
super
().
__init__
()
activation_fn
=
activation_dict
[
activation
]
activation_fn
=
ACTIVATION_DICT
[
activation
]
if
prior
is
None
:
prior
=
NoPrior
(
latent_size
=
hidden_size
)
self
.
input_shape
=
input_shape
...
...
@@ -101,7 +101,7 @@ class ConvlBlock(torch.nn.Module):
)
)
modules
.
append
(
activation_dict
[
activation
]()
ACTIVATION_DICT
[
activation
]()
)
self
.
last_activation
=
modules
.
pop
()
...
...
@@ -129,7 +129,7 @@ class ConvAE(torch.nn.Module, VAEBase):
):
super
().
__init__
()
activation_fn
=
activation_dict
[
activation
]
activation_fn
=
ACTIVATION_DICT
[
activation
]
if
prior
is
None
:
prior
=
NoPrior
(
latent_size
=
hidden_size
)
self
.
input_shape
=
input_shape
...
...
dcase2020_task2/models/classifier.py
View file @
31821481
...
...
@@ -4,7 +4,7 @@ from dcase2020_task2.priors import NoPrior
import
numpy
as
np
import
torch
from
dcase2020_task2.models.custom
import
activation_dict
,
init_weights
from
dcase2020_task2.models.custom
import
ACTIVATION_DICT
,
init_weights
class
FCNN
(
torch
.
nn
.
Module
):
...
...
@@ -21,7 +21,7 @@ class FCNN(torch.nn.Module):