Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Paul Primus
dcase2020_task2
Commits
2fa436dd
Commit
2fa436dd
authored
Jun 03, 2020
by
Paul Primus
Browse files
update dataset
parent
5baf14c5
Changes
9
Hide whitespace changes
Inline
Side-by-side
dcase2020_task2/data_sets/base_data_set.py
View file @
2fa436dd
...
...
@@ -11,15 +11,16 @@ class BaseDataSet(ABC):
@
property
@
abstractmethod
def
training_data_set
(
self
,
type
,
id
):
def
training_data_set
(
self
):
raise
NotImplementedError
@
property
@
abstractmethod
def
validation_data_set
(
self
,
type
,
id
):
def
validation_data_set
(
self
):
raise
NotImplementedError
@
property
@
abstractmethod
def
complement_data_set
(
self
,
type
,
id
):
def
mean_std
(
self
):
raise
NotImplementedError
dcase2020_task2/data_sets/mcm_dataset.py
View file @
2fa436dd
...
...
@@ -3,7 +3,6 @@ import torch.utils.data
import
glob
from
dcase2020_task2.data_sets
import
BaseDataSet
import
librosa
import
sys
import
numpy
as
np
CLASS_MAP
=
{
...
...
@@ -34,11 +33,20 @@ TRAINING_ID_MAP = {
}
def
enumerate_development_datasets
():
typ_id
=
[]
for
i
in
range
(
6
):
for
j
in
TRAINING_ID_MAP
[
i
]:
typ_id
.
append
((
i
,
j
))
return
typ_id
class
MCMDataSet
(
BaseDataSet
):
def
__init__
(
self
,
machine_type
,
machine_id
,
data_root
=
os
.
path
.
join
(
os
.
path
.
expanduser
(
'~'
),
'shared'
,
'dcase2020_task2'
),
context
=
5
,
num_mel
=
128
,
...
...
@@ -46,9 +54,8 @@ class MCMDataSet(BaseDataSet):
hop_size
=
512
,
power
=
1.0
,
fmin
=
40
,
normalize
=
'all'
,
normalize_raw
=
False
,
complement
=
'all'
normalize
=
None
):
self
.
data_root
=
data_root
self
.
context
=
context
...
...
@@ -56,147 +63,54 @@ class MCMDataSet(BaseDataSet):
self
.
n_fft
=
n_fft
self
.
hop_size
=
hop_size
self
.
power
=
power
self
.
complement
=
complement
self
.
fmin
=
fmin
self
.
normalize
=
normalize
assert
type
(
machine_type
)
==
int
and
type
(
machine_id
)
==
int
kwargs
=
{
'data_root'
:
self
.
data_root
,
'context'
:
self
.
context
,
'num_mel'
:
self
.
num_mel
,
'n_fft'
:
self
.
n_fft
,
'hop_size'
:
self
.
hop_size
,
'power'
:
power
,
'normalize'
:
normalize_raw
,
'fmin'
:
fmin
}
self
.
data_sets
=
dict
()
for
machine_type
in
range
(
6
):
self
.
data_sets
[
machine_type
]
=
dict
()
for
machine_id
in
TRAINING_ID_MAP
[
machine_type
]:
self
.
data_sets
[
machine_type
][
machine_id
]
=
(
MachineDataSet
(
machine_type
,
machine_id
,
data_root
=
self
.
data_root
,
mode
=
'training'
,
context
=
self
.
context
,
num_mel
=
self
.
num_mel
,
n_fft
=
self
.
n_fft
,
hop_size
=
self
.
hop_size
,
power
=
power
,
normalize
=
normalize_raw
,
fmin
=
fmin
),
MachineDataSet
(
machine_type
,
machine_id
,
data_root
=
self
.
data_root
,
mode
=
'validation'
,
context
=
self
.
context
,
num_mel
=
self
.
num_mel
,
n_fft
=
self
.
n_fft
,
hop_size
=
self
.
hop_size
,
power
=
power
,
normalize
=
normalize_raw
)
)
training_set
=
MachineDataSet
(
machine_type
,
machine_id
,
mode
=
'training'
,
**
kwargs
)
validation_set
=
MachineDataSet
(
machine_type
,
machine_id
,
mode
=
'validation'
,
**
kwargs
)
if
normalize
==
'all'
:
data
=
[]
for
machine_type
in
range
(
6
):
for
machine_id
in
TRAINING_ID_MAP
[
machine_type
]:
train
,
_
=
self
.
data_sets
[
machine_type
][
machine_id
]
data
.
append
(
train
.
data
)
data
=
np
.
concatenate
(
data
,
axis
=
1
)
mean
=
data
.
mean
(
axis
=
1
,
keepdims
=
True
)
std
=
data
.
std
(
axis
=
1
,
keepdims
=
True
)
for
machine_type
in
range
(
6
):
for
machine_id
in
TRAINING_ID_MAP
[
machine_type
]:
train
,
val
=
self
.
data_sets
[
machine_type
][
machine_id
]
train
.
data
=
(
train
.
data
-
mean
)
/
std
val
.
data
=
(
val
.
data
-
mean
)
/
std
elif
normalize
==
'per_machine_id'
:
for
machine_type
in
range
(
6
):
for
machine_id
in
TRAINING_ID_MAP
[
machine_type
]:
train
,
val
=
self
.
data_sets
[
machine_type
][
machine_id
]
data
=
train
.
data
mean
=
data
.
mean
(
axis
=
1
,
keepdims
=
True
)
std
=
data
.
std
(
axis
=
1
,
keepdims
=
True
)
train
.
data
=
(
train
.
data
-
mean
)
/
std
val
.
data
=
(
val
.
data
-
mean
)
/
std
elif
normalize
==
'none'
:
pass
if
normalize
is
None
:
mean
=
training_set
.
data
.
mean
(
axis
=
1
,
keepdims
=
True
)
std
=
training_set
.
data
.
std
(
axis
=
1
,
keepdims
=
True
)
training_set
.
data
=
(
training_set
.
data
-
mean
)
/
std
validation_set
.
data
=
(
validation_set
.
data
-
mean
)
/
std
else
:
raise
AttributeError
assert
type
(
normalize
)
==
tuple
assert
len
(
normalize
)
==
2
mean
,
std
=
normalize
training_set
.
data
=
(
training_set
.
data
-
mean
)
/
std
validation_set
.
data
=
(
validation_set
.
data
-
mean
)
/
std
self
.
training_set
=
training_set
self
.
validation_set
=
validation_set
self
.
mean
=
mean
self
.
std
=
std
@
property
def
observation_shape
(
self
)
->
tuple
:
return
1
,
self
.
num_mel
,
self
.
context
def
training_data_set
(
self
,
type
,
id
):
return
self
.
data_sets
[
type
][
id
][
0
]
def
validation_data_set
(
self
,
type
,
id
):
return
self
.
data_sets
[
type
][
id
][
1
]
def
complement_data_set
(
self
,
type
,
id
):
complement_sets
=
[]
if
self
.
complement
==
'all'
:
for
machine_type
in
range
(
6
):
for
machine_id
in
TRAINING_ID_MAP
[
machine_type
]:
if
machine_type
!=
type
or
machine_id
!=
id
:
complement_sets
.
append
(
self
.
data_sets
[
machine_type
][
machine_id
][
0
])
elif
self
.
complement
==
'same_mic_diff_type'
:
if
type
in
[
3
,
4
]:
types
=
[
3
,
4
]
else
:
types
=
[
0
,
1
,
2
,
5
]
for
machine_type
in
types
:
for
machine_id
in
TRAINING_ID_MAP
[
machine_type
]:
if
machine_type
!=
type
:
complement_sets
.
append
(
self
.
data_sets
[
machine_type
][
machine_id
][
0
])
elif
self
.
complement
==
'same_mic'
:
if
type
in
[
3
,
4
]:
types
=
[
3
,
4
]
else
:
types
=
[
0
,
1
,
2
,
5
]
for
machine_type
in
types
:
for
machine_id
in
TRAINING_ID_MAP
[
machine_type
]:
if
machine_type
!=
type
or
machine_id
!=
id
:
complement_sets
.
append
(
self
.
data_sets
[
machine_type
][
machine_id
][
0
])
elif
self
.
complement
==
'same_type'
:
for
machine_id
in
TRAINING_ID_MAP
[
type
]:
if
machine_id
!=
id
:
complement_sets
.
append
(
self
.
data_sets
[
type
][
machine_id
][
0
])
elif
self
.
complement
==
'different_type'
:
for
machine_type
in
range
(
6
):
if
machine_type
!=
type
:
for
machine_id
in
TRAINING_ID_MAP
[
type
]:
complement_sets
.
append
(
self
.
data_sets
[
type
][
machine_id
][
0
])
return
torch
.
utils
.
data
.
ConcatDataset
(
complement_sets
)
def
get_whole_training_data_set
(
self
):
complement_sets
=
[]
for
machine_type
in
range
(
6
):
for
machine_id
in
TRAINING_ID_MAP
[
machine_type
]:
complement_sets
.
append
(
self
.
data_sets
[
machine_type
][
machine_id
][
0
])
return
torch
.
utils
.
data
.
ConcatDataset
(
complement_sets
)
def
get_whole_validation_data_set
(
self
):
complement_sets
=
[]
for
machine_type
in
range
(
6
):
for
machine_id
in
TRAINING_ID_MAP
[
machine_type
]:
complement_sets
.
append
(
self
.
data_sets
[
machine_type
][
machine_id
][
1
])
return
torch
.
utils
.
data
.
ConcatDataset
(
complement_sets
)
def
get_machine_training_data_set
(
self
,
machine_type
):
complement_sets
=
[]
for
machine_id
in
TRAINING_ID_MAP
[
machine_type
]:
complement_sets
.
append
(
self
.
data_sets
[
machine_type
][
machine_id
][
0
])
return
torch
.
utils
.
data
.
ConcatDataset
(
complement_sets
)
def
get_machine_validation_data_set
(
self
,
machine_type
):
complement_sets
=
[]
for
machine_id
in
TRAINING_ID_MAP
[
machine_type
]:
complement_sets
.
append
(
self
.
data_sets
[
machine_type
][
machine_id
][
1
])
return
torch
.
utils
.
data
.
ConcatDataset
(
complement_sets
)
def
training_data_set
(
self
):
return
self
.
training_set
def
validation_data_set
(
self
):
return
self
.
validation_set
def
mean_std
(
self
):
return
self
.
mean
,
self
.
std
class
MachineDataSet
(
torch
.
utils
.
data
.
Dataset
):
...
...
@@ -222,7 +136,7 @@ class MachineDataSet(torch.utils.data.Dataset):
self
.
n_fft
=
n_fft
self
.
hop_size
=
hop_size
self
.
power
=
power
self
.
normalize
=
normalize
self
.
normalize
=
normalize
self
.
mode
=
mode
self
.
data_root
=
data_root
self
.
context
=
context
...
...
@@ -294,10 +208,12 @@ class MachineDataSet(torch.utils.data.Dataset):
file_path
=
os
.
path
.
join
(
self
.
data_root
,
file_name
)
if
os
.
path
.
exists
(
file_path
):
print
(
'Loading {} data set for machine type {} id {}...'
.
format
(
self
.
mode
,
self
.
machine_type
,
self
.
machine_id
))
print
(
'Loading {} data set for machine type {} id {}...'
.
format
(
self
.
mode
,
self
.
machine_type
,
self
.
machine_id
))
data
=
np
.
load
(
file_path
)
else
:
print
(
'Loading & saving {} data set for machine type {} id {}...'
.
format
(
self
.
mode
,
self
.
machine_type
,
self
.
machine_id
))
print
(
'Loading & saving {} data set for machine type {} id {}...'
.
format
(
self
.
mode
,
self
.
machine_type
,
self
.
machine_id
))
data
=
np
.
empty
((
self
.
num_mel
,
self
.
file_length
*
len
(
files
)),
dtype
=
np
.
float32
)
for
i
,
f
in
enumerate
(
files
):
data
[:,
i
*
self
.
file_length
:(
i
+
1
)
*
self
.
file_length
]
=
self
.
__load_preprocess_file__
(
f
)
...
...
dcase2020_task2/experiments/baseline_dcase_experiment.py
View file @
2fa436dd
...
...
@@ -76,6 +76,7 @@ class BaselineDCASEExperiment(BaseExperiment, pl.LightningModule):
return
self
.
result
def
train_dataloader
(
self
):
assert
False
,
'Need to merge training sets frst!'
dl
=
torch
.
utils
.
data
.
DataLoader
(
self
.
objects
[
'data_set'
].
get_machine_training_data_set
(
self
.
machine_type
),
batch_size
=
self
.
objects
[
'batch_size'
],
...
...
@@ -86,6 +87,7 @@ class BaselineDCASEExperiment(BaseExperiment, pl.LightningModule):
return
dl
def
val_dataloader
(
self
):
assert
False
,
'Need to merge training sets frst!'
dl
=
torch
.
utils
.
data
.
DataLoader
(
self
.
objects
[
'data_set'
].
get_machine_validation_data_set
(
self
.
machine_type
),
batch_size
=
self
.
objects
[
'batch_size'
],
...
...
@@ -154,6 +156,10 @@ def configuration():
data_set
=
{
'class'
:
'dcase2020_task2.data_sets.MCMDataSet'
,
'args'
:
[
machine_type
,
machine_id
],
'kwargs'
:
{
'context'
:
context
,
'num_mel'
:
num_mel
,
...
...
dcase2020_task2/experiments/baseline_experiment.py
View file @
2fa436dd
...
...
@@ -12,7 +12,7 @@ SETTINGS['CAPTURE_MODE'] = 'sys'
class
BaselineExperiment
(
BaseExperiment
,
pl
.
LightningModule
):
'''
DCASE Baseline with AE per machine ID.
DCASE Baseline with AE
, MADMOG & MAF
per machine ID.
'''
def
__init__
(
self
,
configuration_dict
,
_run
):
...
...
@@ -83,12 +83,14 @@ def configuration():
seed
=
1220
deterministic
=
False
id
=
datetime
.
now
().
strftime
(
"%Y-%m-%d_%H:%M:%S:%f"
)
log_path
=
os
.
path
.
join
(
'..'
,
'experiment_logs'
,
id
)
log_path
=
os
.
path
.
join
(
'experiment_logs'
,
id
)
#####################
# quick configuration, uses default parameters of more detailed configuration
#####################
architecture
=
'dcase2020_task2.models.MADE'
machine_type
=
0
machine_id
=
2
...
...
@@ -96,46 +98,49 @@ def configuration():
debug
=
False
if
debug
:
epochs
=
50
epochs
=
1
num_workers
=
0
else
:
epochs
=
10
0
epochs
=
5
0
num_workers
=
4
learning_rate
=
1e-3
weight_decay
=
1e-5
normalize
=
'per_machine_id'
#
normalize_raw
=
True
context
=
5
descriptor
=
"BaselineExperiment_{}_{}_{}_{}_{}_{}"
.
format
(
descriptor
=
"BaselineExperiment_{}_{}_{}_{}_{}_{}_{}"
.
format
(
architecture
,
batch_size
,
learning_rate
,
weight_decay
,
normalize
,
normalize_raw
,
context
context
,
seed
)
########################
# detailed configuration
########################
num_mel
=
256
num_mel
=
128
n_fft
=
1024
hop_size
=
512
power
=
1
.0
power
=
2
.0
fmin
=
0
data_set
=
{
'class'
:
'dcase2020_task2.data_sets.MCMDataSet'
,
'args'
:
[
machine_type
,
machine_id
],
'kwargs'
:
{
'context'
:
context
,
'num_mel'
:
num_mel
,
'n_fft'
:
n_fft
,
'hop_size'
:
hop_size
,
'normalize'
:
normalize
,
'normalize_raw'
:
normalize_raw
,
'power'
:
power
,
'fmin'
:
fmin
...
...
@@ -153,13 +158,13 @@ def configuration():
}
model
=
{
'class'
:
'dcase2020_task2.models.MADE'
,
'class'
:
architecture
,
'args'
:
[
'@data_set.observation_shape'
,
'@reconstruction'
],
'kwargs'
:
{
'hidden_size'
:
4096
,
'hidden_size'
:
1024
,
'num_hidden'
:
4
}
}
...
...
dcase2020_task2/experiments/experiment_base.py
View file @
2fa436dd
...
...
@@ -64,7 +64,7 @@ class BaseExperiment(ABC, torch.nn.Module):
def
train_dataloader
(
self
):
dl
=
torch
.
utils
.
data
.
DataLoader
(
self
.
objects
[
'data_set'
].
training_data_set
(
self
.
machine_type
,
self
.
machine_id
),
self
.
objects
[
'data_set'
].
training_data_set
(),
batch_size
=
self
.
objects
[
'batch_size'
],
shuffle
=
True
,
num_workers
=
self
.
objects
[
'num_workers'
],
...
...
@@ -74,7 +74,7 @@ class BaseExperiment(ABC, torch.nn.Module):
def
val_dataloader
(
self
):
dl
=
torch
.
utils
.
data
.
DataLoader
(
self
.
objects
[
'data_set'
].
validation_data_set
(
self
.
machine_type
,
self
.
machine_id
),
self
.
objects
[
'data_set'
].
validation_data_set
(),
batch_size
=
self
.
objects
[
'batch_size'
],
shuffle
=
False
,
num_workers
=
self
.
objects
[
'num_workers'
]
...
...
@@ -83,7 +83,7 @@ class BaseExperiment(ABC, torch.nn.Module):
def
test_dataloader
(
self
):
dl
=
torch
.
utils
.
data
.
DataLoader
(
self
.
objects
[
'data_set'
].
get_whole_
validation_data_set
(),
self
.
objects
[
'data_set'
].
validation_data_set
(),
batch_size
=
self
.
objects
[
'batch_size'
],
shuffle
=
False
,
num_workers
=
self
.
objects
[
'num_workers'
]
...
...
dcase2020_task2/losses/mse_loss.py
View file @
2fa436dd
...
...
@@ -4,8 +4,9 @@ import torch.nn.functional as F
class
MSEReconstruction
(
BaseReconstruction
):
def
__init__
(
self
,
weight
=
1.0
,
size_average
=
True
,
**
kwargs
):
def
__init__
(
self
,
input_shape
,
weight
=
1.0
,
size_average
=
True
,
**
kwargs
):
super
().
__init__
()
self
.
input_shape
=
input_shape
self
.
weight
=
weight
self
.
size_average
=
size_average
...
...
dcase2020_task2/models/__init__.py
View file @
2fa436dd
from
dcase2020_task2.models.base_model
import
ClassifierBase
,
VAEBase
from
dcase2020_task2.models.made
import
MADE
\ No newline at end of file
from
dcase2020_task2.models.made
import
MADE
from
dcase2020_task2.models.ae
import
AE
\ No newline at end of file
dcase2020_task2/models/ae.py
View file @
2fa436dd
import
torch.nn
from
dcase2020_task2.models
import
VAEBase
from
dcase2020_task2.models
import
VAEBase
#
from
dcase2020_task2.priors
import
NoPrior
import
numpy
as
np
import
torch
from
dcase2020_task2.models.custom
import
activation_dict
,
init_weights
class
BaselineFC
AE
(
torch
.
nn
.
Module
,
VAEBase
):
class
AE
(
torch
.
nn
.
Module
,
VAEBase
):
def
__init__
(
self
,
input_shape
,
prior
,
reconstruction_loss
,
prior
=
NoPrior
(
latent_size
=
8
),
hidden_size
=
128
,
num_hidden
=
3
,
activation
=
'relu'
,
...
...
dcase2020_task2/utils/logger.py
View file @
2fa436dd
...
...
@@ -201,7 +201,7 @@ class Logger:
for
i
,
machine_type
in
enumerate
(
unique_machine_types
):
machine_type
=
INVERSE_CLASS_MAP
[
machine_type
]
for
machine_id
in
unique_machine_ids
[
i
]:
result
.
setdefault
(
machine_type
,
dict
())[
machine_id
]
=
self
.
__rauc_from_score__
(
result
.
setdefault
(
machine_type
,
dict
())[
int
(
machine_id
)
]
=
self
.
__rauc_from_score__
(
scores_mean
,
scores_max
,
ground_truth
,
...
...
@@ -220,10 +220,10 @@ class Logger:
scores_mean_
=
scores_mean
[
np
.
logical_and
(
machine_types
==
machine_type
,
machine_ids
==
id
)]
scores_max_
=
scores_max
[
np
.
logical_and
(
machine_types
==
machine_type
,
machine_ids
==
id
)]
return
metrics
.
roc_auc_score
(
ground_truth_
,
scores_mean_
),
\
metrics
.
roc_auc_score
(
ground_truth_
,
scores_mean_
,
max_fpr
=
max_fpr
),
\
metrics
.
roc_auc_score
(
ground_truth_
,
scores_max_
),
\
metrics
.
roc_auc_score
(
ground_truth_
,
scores_max_
,
max_fpr
=
max_fpr
)
return
float
(
metrics
.
roc_auc_score
(
ground_truth_
,
scores_mean_
)
)
,
\
float
(
metrics
.
roc_auc_score
(
ground_truth_
,
scores_mean_
,
max_fpr
=
max_fpr
)
)
,
\
float
(
metrics
.
roc_auc_score
(
ground_truth_
,
scores_max_
)
)
,
\
float
(
metrics
.
roc_auc_score
(
ground_truth_
,
scores_max_
,
max_fpr
=
max_fpr
)
)
@
staticmethod
def
__batches_to_per_file_scores__
(
outputs
,
aggregation_fun
=
None
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment