Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Paul Primus
dcase2020_task2
Commits
fd3d3b7f
Commit
fd3d3b7f
authored
Jun 07, 2020
by
Paul Primus
Browse files
classification experiments
parent
711eaf79
Changes
20
Expand all
Hide whitespace changes
Inline
Side-by-side
dcase2020_task2/data_sets/__init__.py
View file @
fd3d3b7f
from
dcase2020_task2.data_sets.base_data_set
import
BaseDataSet
from
dcase2020_task2.data_sets.mcm_dataset
import
MCMDataSet
\ No newline at end of file
from
dcase2020_task2.data_sets.mcm_dataset
import
MCMDataSet
,
MachineDataSet
,
INVERSE_CLASS_MAP
,
CLASS_MAP
,
TRAINING_ID_MAP
from
dcase2020_task2.data_sets.abnormal_datasets
import
ComplementMCMDataSet
dcase2020_task2/data_sets/abnormal_datasets.py
0 → 100644
View file @
fd3d3b7f
import
os
import
torch.utils.data
import
glob
from
dcase2020_task2.data_sets
import
BaseDataSet
import
librosa
import
numpy
as
np
from
dcase2020_task2.data_sets
import
MachineDataSet
,
CLASS_MAP
,
TRAINING_ID_MAP
,
INVERSE_CLASS_MAP
class
ComplementMCMDataSet
(
BaseDataSet
):
def
__init__
(
self
,
machine_type
,
machine_id
,
data_root
=
os
.
path
.
join
(
os
.
path
.
expanduser
(
'~'
),
'shared'
,
'dcase2020_task2'
),
context
=
5
,
num_mel
=
128
,
n_fft
=
1024
,
hop_size
=
512
,
power
=
1.0
,
fmin
=
0
,
normalize_raw
=
False
,
normalize
=
None
,
hop_all
=
False
):
self
.
data_root
=
data_root
self
.
context
=
context
self
.
num_mel
=
num_mel
self
.
n_fft
=
n_fft
self
.
hop_size
=
hop_size
self
.
power
=
power
self
.
fmin
=
fmin
self
.
normalize
=
normalize
self
.
hop_all
=
hop_all
assert
type
(
machine_type
)
==
int
and
type
(
machine_id
)
==
int
kwargs
=
{
'data_root'
:
self
.
data_root
,
'context'
:
self
.
context
,
'num_mel'
:
self
.
num_mel
,
'n_fft'
:
self
.
n_fft
,
'hop_size'
:
self
.
hop_size
,
'power'
:
power
,
'normalize'
:
normalize_raw
,
'fmin'
:
fmin
,
'hop_all'
:
hop_all
}
training_set
=
MachineDataSet
(
machine_type
,
machine_id
,
mode
=
'training'
,
**
kwargs
)
validation_set
=
MachineDataSet
(
machine_type
,
machine_id
,
mode
=
'validation'
,
**
kwargs
)
if
normalize
is
None
:
mean
=
training_set
.
data
.
mean
(
axis
=
1
,
keepdims
=
True
)
std
=
training_set
.
data
.
std
(
axis
=
1
,
keepdims
=
True
)
training_set
.
data
=
(
training_set
.
data
-
mean
)
/
std
validation_set
.
data
=
(
validation_set
.
data
-
mean
)
/
std
else
:
assert
type
(
normalize
)
==
tuple
assert
len
(
normalize
)
==
2
mean
,
std
=
normalize
training_set
.
data
=
(
training_set
.
data
-
mean
)
/
std
validation_set
.
data
=
(
validation_set
.
data
-
mean
)
/
std
training_sets
=
[]
validation_sets
=
[]
for
type_
in
TRAINING_ID_MAP
:
for
id_
in
TRAINING_ID_MAP
[
type_
]:
if
type_
!=
machine_type
or
id_
!=
machine_id
:
t
=
MachineDataSet
(
type_
,
id_
,
mode
=
'training'
,
**
kwargs
)
t
.
data
=
(
t
.
data
-
mean
)
/
std
v
=
MachineDataSet
(
type_
,
id_
,
mode
=
'validation'
,
**
kwargs
)
v
.
data
=
(
v
.
data
-
mean
)
/
std
training_sets
.
append
(
t
)
validation_sets
.
append
(
v
)
self
.
training_set
=
torch
.
utils
.
data
.
ConcatDataset
(
training_sets
)
self
.
validation_set
=
torch
.
utils
.
data
.
ConcatDataset
(
validation_sets
)
self
.
mean
=
mean
self
.
std
=
std
@
property
def
observation_shape
(
self
)
->
tuple
:
return
1
,
self
.
num_mel
,
self
.
context
def
training_data_set
(
self
):
return
self
.
training_set
def
validation_data_set
(
self
):
return
self
.
validation_set
def
mean_std
(
self
):
return
self
.
mean
,
self
.
std
dcase2020_task2/experiments/baseline_dcase_experiment.py
deleted
100644 → 0
View file @
711eaf79
from
dcase2020_task2.experiments
import
BaseExperiment
from
datetime
import
datetime
import
os
import
pytorch_lightning
as
pl
import
torch
from
sacred
import
Experiment
from
dcase2020_task2.utils.logger
import
Logger
import
torch.utils.data
# workaround...
from
sacred
import
SETTINGS
SETTINGS
[
'CAPTURE_MODE'
]
=
'sys'
class
BaselineDCASEExperiment
(
BaseExperiment
,
pl
.
LightningModule
):
'''
Reproduction of the DCASE Baseline. It is basically an Auto Encoder, the anomaly score is the reconstruction error.
'''
def
__init__
(
self
,
configuration_dict
,
_run
):
super
().
__init__
(
configuration_dict
)
self
.
network
=
self
.
objects
[
'auto_encoder_model'
]
self
.
prior
=
self
.
objects
[
'prior'
]
self
.
reconstruction
=
self
.
objects
[
'reconstruction'
]
self
.
logger_
=
Logger
(
_run
,
self
,
self
.
configuration_dict
,
self
.
objects
)
# experiment state variables
self
.
epoch
=
-
1
self
.
step
=
0
self
.
result
=
None
def
forward
(
self
,
batch
):
batch
[
'epoch'
]
=
self
.
epoch
batch
=
self
.
network
(
batch
)
return
batch
def
training_step
(
self
,
batch_normal
,
batch_num
,
optimizer_idx
=
0
):
if
batch_num
==
0
and
optimizer_idx
==
0
:
self
.
epoch
+=
1
if
optimizer_idx
==
0
:
batch_normal
=
self
(
batch_normal
)
batch_normal
[
'loss'
]
=
batch_normal
[
'reconstruction_loss'
]
+
batch_normal
[
'prior_loss'
]
self
.
logger_
.
log_training_step
(
batch_normal
,
self
.
step
)
self
.
step
+=
1
else
:
raise
AttributeError
return
{
'loss'
:
batch_normal
[
'loss'
],
'tqdm'
:
{
'loss'
:
batch_normal
[
'loss'
]},
}
def
validation_step
(
self
,
batch
,
batch_num
):
self
(
batch
)
return
{
'targets'
:
batch
[
'targets'
],
'scores'
:
batch
[
'scores'
],
'machine_types'
:
batch
[
'machine_types'
],
'machine_ids'
:
batch
[
'machine_ids'
],
'file_ids'
:
batch
[
'file_ids'
]
}
def
validation_end
(
self
,
outputs
):
self
.
logger_
.
log_validation
(
outputs
,
self
.
step
,
self
.
epoch
)
return
{}
def
test_step
(
self
,
batch
,
batch_num
):
return
self
.
validation_step
(
batch
,
batch_num
)
def
test_end
(
self
,
outputs
):
self
.
result
=
self
.
logger_
.
log_test
(
outputs
)
self
.
logger_
.
close
()
return
self
.
result
def
train_dataloader
(
self
):
assert
False
,
'Need to merge training sets frst!'
dl
=
torch
.
utils
.
data
.
DataLoader
(
self
.
objects
[
'data_set'
].
get_machine_training_data_set
(
self
.
machine_type
),
batch_size
=
self
.
objects
[
'batch_size'
],
shuffle
=
True
,
num_workers
=
self
.
objects
[
'num_workers'
],
drop_last
=
False
)
return
dl
def
val_dataloader
(
self
):
assert
False
,
'Need to merge training sets frst!'
dl
=
torch
.
utils
.
data
.
DataLoader
(
self
.
objects
[
'data_set'
].
get_machine_validation_data_set
(
self
.
machine_type
),
batch_size
=
self
.
objects
[
'batch_size'
],
shuffle
=
False
,
num_workers
=
self
.
objects
[
'num_workers'
],
drop_last
=
False
)
return
dl
def
configuration
():
seed
=
1220
deterministic
=
False
id
=
datetime
.
now
().
strftime
(
"%Y-%m-%d_%H:%M:%S:%f"
)
log_path
=
os
.
path
.
join
(
'experiment_logs'
,
id
)
#####################
# quick configuration, uses default parameters of more detailed configuration
#####################
machine_type
=
0
machine_id
=
0
latent_size
=
8
batch_size
=
512
debug
=
False
if
debug
:
epochs
=
1
num_workers
=
0
else
:
epochs
=
100
num_workers
=
2
learning_rate
=
1e-3
weight_decay
=
0
normalize
=
'none'
normalize_raw
=
False
context
=
5
descriptor
=
"BaselineDCASEExperiment_{}_{}_{}_{}_{}_{}_{}"
.
format
(
latent_size
,
batch_size
,
learning_rate
,
weight_decay
,
normalize
,
normalize_raw
,
context
)
########################
# detailed configuration
########################
num_mel
=
128
n_fft
=
1024
hop_size
=
512
prior
=
{
'class'
:
'dcase2020_task2.priors.NoPrior'
,
'kwargs'
:
{
'latent_size'
:
latent_size
,
'weight'
:
1.0
}
}
data_set
=
{
'class'
:
'dcase2020_task2.data_sets.MCMDataSet'
,
'args'
:
[
machine_type
,
machine_id
],
'kwargs'
:
{
'context'
:
context
,
'num_mel'
:
num_mel
,
'n_fft'
:
n_fft
,
'hop_size'
:
hop_size
,
'normalize'
:
normalize
,
'normalize_raw'
:
normalize_raw
}
}
reconstruction
=
{
'class'
:
'dcase2020_task2.losses.MSEReconstruction'
,
'kwargs'
:
{
'weight'
:
1.0
,
'input_shape'
:
'@data_set.observation_shape'
}
}
auto_encoder_model
=
{
'class'
:
'dcase2020_task2.models.BaselineFCAE'
,
'args'
:
[
'@data_set.observation_shape'
,
'@reconstruction'
,
'@prior'
]
}
lr_scheduler
=
{
'class'
:
'torch.optim.lr_scheduler.StepLR'
,
'args'
:
[
'@optimizer'
,
],
'kwargs'
:
{
'step_size'
:
epochs
}
}
optimizer
=
{
'class'
:
'torch.optim.Adam'
,
'args'
:
[
'@auto_encoder_model.parameters()'
],
'kwargs'
:
{
'lr'
:
learning_rate
,
'betas'
:
(
0.9
,
0.999
),
'amsgrad'
:
False
,
'weight_decay'
:
weight_decay
,
}
}
trainer
=
{
'class'
:
'dcase2020_task2.trainers.PTLTrainer'
,
'kwargs'
:
{
'max_epochs'
:
epochs
,
'checkpoint_callback'
:
False
,
'logger'
:
False
,
'early_stop_callback'
:
False
,
'gpus'
:
[
0
],
'show_progress_bar'
:
True
,
'progress_bar_refresh_rate'
:
1000
}
}
ex
=
Experiment
(
'dcase2020_task2_BaselineDCASEExperiment'
)
cfg
=
ex
.
config
(
configuration
)
@
ex
.
automain
def
run
(
_config
,
_run
):
experiment
=
BaselineDCASEExperiment
(
_config
,
_run
)
return
experiment
.
run
()
dcase2020_task2/experiments/baseline_experiment.py
View file @
fd3d3b7f
...
...
@@ -98,12 +98,12 @@ def configuration():
hop_size
=
512
power
=
2.0
fmin
=
0
context
=
8
context
=
5
model_class
=
'dcase2020_task2.models.
Conv
AE'
# 'dcase2020_task2.models.MADE'
hidden_size
=
256
num_hidden
=
1
latent_size
=
8
# only used for AEs
model_class
=
'dcase2020_task2.models.AE'
# 'dcase2020_task2.models.MADE'
hidden_size
=
512
num_hidden
=
3
latent_size
=
64
# only used for AEs
debug
=
False
if
debug
:
...
...
@@ -113,9 +113,9 @@ def configuration():
epochs
=
100
reconstruction_class
=
'dcase2020_task2.losses.MSEReconstruction'
# 'dcase2020_task2.losses.NLLReconstruction'
batch_size
=
256
batch_size
=
512
learning_rate
=
1e-3
weight_decay
=
0
weight_decay
=
1e-5
normalize_raw
=
True
...
...
@@ -156,7 +156,7 @@ def configuration():
'normalize_raw'
:
normalize_raw
,
'power'
:
power
,
'fmin'
:
fmin
,
'hop_all'
:
Tru
e
'hop_all'
:
Fals
e
}
}
...
...
dcase2020_task2/experiments/classification_experiment.py
View file @
fd3d3b7f
...
...
@@ -17,15 +17,23 @@ class ClassificationExperiment(BaseExperiment, pl.LightningModule):
def
__init__
(
self
,
configuration_dict
,
_run
):
super
().
__init__
(
configuration_dict
)
# default stuff
self
.
network
=
self
.
objects
[
'model'
]
self
.
data_set
=
self
.
objects
[
'data_set'
]
self
.
loss
=
self
.
objects
[
'loss'
]
self
.
logger_
=
Logger
(
_run
,
self
,
self
.
configuration_dict
,
self
.
objects
)
# experiment state variables
self
.
epoch
=
-
1
self
.
step
=
0
self
.
result
=
None
# will be set before each epoch
self
.
normal_data_set
=
self
.
objects
[
'data_set'
]
self
.
abnormal_data_set
=
self
.
objects
[
'abnormal_data_set'
]
self
.
inf_data_loader
=
self
.
get_inf_data_loader
(
torch
.
utils
.
data
.
DataLoader
(
self
.
data_set
.
complement_data_set
(
self
.
machine_type
,
self
.
machine_id
),
self
.
abnormal_data_set
.
training_data_set
(
),
batch_size
=
self
.
objects
[
'batch_size'
],
shuffle
=
True
,
num_workers
=
self
.
objects
[
'num_workers'
],
...
...
@@ -58,12 +66,26 @@ class ClassificationExperiment(BaseExperiment, pl.LightningModule):
self
.
epoch
+=
1
if
optimizer_idx
==
0
:
batch_normal
=
self
(
batch_normal
)
batch_abnormal
=
self
(
next
(
self
.
inf_data_loader
))
abnormal_batch
=
next
(
self
.
inf_data_loader
)
normal_batch_size
=
len
(
batch_normal
[
'observations'
])
abnormal_batch_size
=
len
(
abnormal_batch
[
'observations'
])
device
=
batch_normal
[
'observations'
].
device
loss
=
self
.
loss
.
loss
(
batch_normal
,
batch_abnormal
)
batch_normal
[
'abnormal'
]
=
torch
.
cat
([
torch
.
zeros
(
normal_batch_size
,
1
).
to
(
device
),
torch
.
ones
(
abnormal_batch_size
,
1
).
to
(
device
)
])
batch_normal
[
'loss'
]
=
loss
batch_normal
[
'observations'
]
=
torch
.
cat
([
batch_normal
[
'observations'
],
abnormal_batch
[
'observations'
]
])
batch_normal
=
self
(
batch_normal
)
batch_normal
=
self
.
loss
(
batch_normal
)
self
.
logger_
.
log_training_step
(
batch_normal
,
self
.
step
)
self
.
step
+=
1
...
...
@@ -82,7 +104,6 @@ class ClassificationExperiment(BaseExperiment, pl.LightningModule):
'scores'
:
batch
[
'scores'
],
'machine_types'
:
batch
[
'machine_types'
],
'machine_ids'
:
batch
[
'machine_ids'
],
'part_numbers'
:
batch
[
'part_numbers'
],
'file_ids'
:
batch
[
'file_ids'
]
}
...
...
@@ -94,10 +115,20 @@ class ClassificationExperiment(BaseExperiment, pl.LightningModule):
return
self
.
validation_step
(
batch
,
batch_num
)
def
test_end
(
self
,
outputs
):
self
.
result
=
self
.
logger_
.
log_test
ing
(
outputs
)
self
.
result
=
self
.
logger_
.
log_test
(
outputs
)
self
.
logger_
.
close
()
return
self
.
result
def
train_dataloader
(
self
):
dl
=
torch
.
utils
.
data
.
DataLoader
(
self
.
objects
[
'data_set'
].
training_data_set
(),
batch_size
=
self
.
objects
[
'batch_size'
],
shuffle
=
True
,
num_workers
=
self
.
objects
[
'num_workers'
],
drop_last
=
False
)
return
dl
def
configuration
():
seed
=
1220
...
...
@@ -112,73 +143,89 @@ def configuration():
machine_type
=
0
machine_id
=
0
batch_size
=
512
num_mel
=
128
n_fft
=
1024
hop_size
=
512
power
=
2.0
fmin
=
0
context
=
5
model_class
=
'models.FCNN'
hidden_size
=
512
num_hidden
=
3
latent_size
=
64
debug
=
False
if
debug
:
epochs
=
1
num_workers
=
0
else
:
epochs
=
100
num_workers
=
4
learning_rate
=
1e-4
weight_decay
=
1e-4
rho
=
0.1
feature_context
=
'short'
loss_class
=
'losses.BCE'
mse_weight
=
0.0
model_class
=
'models.BaselineFCNN'
epochs
=
100
loss_class
=
'dcase2020_task2.losses.BCE'
batch_size
=
512
learning_rate
=
1e-3
weight_decay
=
1e-5
normalize
=
'all'
normalize_raw
=
True
complement
=
'all'
# TODO: change default descriptor
descriptor
=
"ClassificationExperiment_{}_{}_{}_{}
_{}_{}_{}_{}_{}
_{}"
.
format
(
descriptor
=
"ClassificationExperiment_
Model:[
{}_{}_{}_{}
]_Training:[{}_{}_{}]_Features:[{}_{}_{}_{}_{}_{}_{}]
_{}"
.
format
(
model_class
,
loss_class
,
hidden_size
,
num_hidden
,
latent_size
,
batch_size
,
learning_rate
,
weight_decay
,
normalize
,
normalize_raw
,
rho
,
feature_context
,
complement
num_mel
,
context
,
n_fft
,
hop_size
,
power
,
fmin
,
seed
)
########################
# detailed configuration
SamplingFCAE
# detailed configuration
########################
if
feature_context
==
'short'
:
context
=
5
num_mel
=
128
n_fft
=
1024
hop_size
=
512
elif
feature_context
==
'long'
:
context
=
11
num_mel
=
40
n_fft
=
512
hop_size
=
256
if
model_class
==
'models.SamplingCRNNAE'
:
context
=
1
data_set
=
{
'class'
:
'data_sets.MCMDataSet'
,
'class'
:
'dcase2020_task2.data_sets.MCMDataSet'
,
'args'
:
[
machine_type
,
machine_id
],
'kwargs'
:
{
'context'
:
context
,
'num_mel'
:
num_mel
,
'n_fft'
:
n_fft
,
'hop_size'
:
hop_size
,
'normalize_raw'
:
normalize_raw
,
'power'
:
power
,
'fmin'
:
fmin
,
'hop_all'
:
False
}
}
abnormal_data_set
=
{
'class'
:
'dcase2020_task2.data_sets.ComplementMCMDataSet'
,
'args'
:
[
machine_type
,
machine_id
],
'kwargs'
:
{
'context'
:
context
,
'num_mel'
:
num_mel
,
'n_fft'
:
n_fft
,
'hop_size'
:
hop_size
,
'normalize'
:
normalize
,
'normalize_raw'
:
normalize_raw
,
'complement'
:
complement
'power'
:
power
,
'fmin'
:
fmin
,
'hop_all'
:
False
}
}
...
...
@@ -186,17 +233,14 @@ def configuration():
'class'
:
loss_class
,
'kwargs'
:
{
'weight'
:
1.0
,
'input_shape'