Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Paul Primus
dcase2020_task2
Commits
9ca5631a
Commit
9ca5631a
authored
Jun 11, 2020
by
Paul Primus
Browse files
update run scripts
parent
6469ea38
Changes
9
Expand all
Hide whitespace changes
Inline
Side-by-side
dcase2020_task2/data_sets/__init__.py
View file @
9ca5631a
...
...
@@ -60,3 +60,4 @@ def enumerate_evaluation_datasets():
from
dcase2020_task2.data_sets.base_data_set
import
BaseDataSet
from
dcase2020_task2.data_sets.mcm_dataset
import
MCMDataSet
,
MachineDataSet
from
dcase2020_task2.data_sets.complement_dataset
import
ComplementMCMDataSet
from
dcase2020_task2.data_sets.audio_set
import
AudioSet
dcase2020_task2/data_sets/audio_set.py
0 → 100644
View file @
9ca5631a
import
os
import
torch.utils.data
import
glob
from
dcase2020_task2.data_sets
import
BaseDataSet
,
CLASS_MAP
,
INVERSE_CLASS_MAP
,
TRAINING_ID_MAP
,
EVALUATION_ID_MAP
,
ALL_ID_MAP
,
\
enumerate_development_datasets
,
enumerate_evaluation_datasets
import
librosa
import
numpy
as
np
from
dcase2020_task2.data_sets
import
MCMDataSet
import
pickle
class
AudioSet
(
BaseDataSet
):
def
__init__
(
self
,
data_root
=
os
.
path
.
join
(
os
.
path
.
expanduser
(
'~'
),
'shared'
,
'audioset'
,
'audiosetdata'
),
context
=
5
,
num_mel
=
128
,
n_fft
=
1024
,
hop_size
=
512
,
power
=
2.0
,
fmin
=
0
,
normalize_raw
=
True
,
hop_all
=
False
):
self
.
data_root
=
data_root
self
.
context
=
context
self
.
num_mel
=
num_mel
self
.
n_fft
=
n_fft
self
.
hop_size
=
hop_size
self
.
power
=
power
self
.
fmin
=
fmin
self
.
hop_all
=
hop_all
kwargs
=
{
'data_root'
:
self
.
data_root
,
'context'
:
self
.
context
,
'num_mel'
:
self
.
num_mel
,
'n_fft'
:
self
.
n_fft
,
'hop_size'
:
self
.
hop_size
,
'power'
:
power
,
'normalize'
:
normalize_raw
,
'fmin'
:
fmin
,
'hop_all'
:
hop_all
}
class_names
=
sorted
([
class_name
for
class_name
in
os
.
listdir
(
data_root
)
if
os
.
path
.
isdir
(
os
.
path
.
join
(
data_root
,
class_name
))])[:
30
]
training_sets
=
[]
data_arrays
=
[]
for
class_name
in
class_names
:
training_sets
.
append
(
AudioSetClassSubset
(
class_name
,
**
kwargs
))
data
=
training_sets
[
-
1
].
data
for
i
,
file
in
enumerate
(
data
):
data_arrays
.
append
(
file
)
data_arrays
=
np
.
concatenate
(
data_arrays
,
axis
=-
1
)
self
.
training_set
=
torch
.
utils
.
data
.
ConcatDataset
(
training_sets
)
self
.
validation_set
=
None
self
.
mean
=
data_arrays
.
mean
(
axis
=
1
,
keepdims
=
True
)
self
.
std
=
data_arrays
.
std
(
axis
=
1
,
keepdims
=
True
)
@
property
def
observation_shape
(
self
)
->
tuple
:
return
1
,
self
.
num_mel
,
self
.
context
def
training_data_set
(
self
):
return
self
.
training_set
def
validation_data_set
(
self
):
return
self
.
validation_set
def
mean_std
(
self
):
return
self
.
mean
,
self
.
std
class
AudioSetClassSubset
(
torch
.
utils
.
data
.
Dataset
):
def
__init__
(
self
,
class_name
,
data_root
=
os
.
path
.
join
(
os
.
path
.
expanduser
(
'~'
),
'shared'
,
'audioset'
,
'audiosetdata'
),
context
=
5
,
num_mel
=
128
,
n_fft
=
1024
,
hop_size
=
512
,
power
=
2.0
,
normalize
=
True
,
fmin
=
0
,
hop_all
=
False
,
max_file_length
=
350
):
self
.
num_mel
=
num_mel
self
.
n_fft
=
n_fft
self
.
hop_size
=
hop_size
self
.
power
=
power
self
.
normalize
=
normalize
self
.
data_root
=
data_root
self
.
context
=
context
self
.
fmin
=
fmin
self
.
hop_all
=
hop_all
self
.
class_name
=
class_name
self
.
max_file_length
=
max_file_length
files
=
glob
.
glob
(
os
.
path
.
join
(
data_root
,
class_name
,
'*.wav'
))
assert
len
(
files
)
>
0
files
=
sorted
(
files
)
self
.
files
=
files
self
.
meta_data
=
self
.
__load_meta_data__
(
files
)
self
.
data
=
self
.
__load_data__
(
files
)
self
.
index_map
=
{}
ctr
=
0
for
i
,
file
in
enumerate
(
self
.
data
):
for
j
in
range
(
file
.
shape
[
-
1
]
+
1
-
context
):
self
.
index_map
[
ctr
]
=
(
i
,
j
)
ctr
+=
1
self
.
length
=
ctr
def
__getitem__
(
self
,
item
):
file_idx
,
offset
=
self
.
index_map
[
item
]
observation
=
self
.
data
[
file_idx
][:,
offset
:
offset
+
self
.
context
]
meta_data
=
self
.
meta_data
[
file_idx
].
copy
()
meta_data
[
'observations'
]
=
observation
[
None
]
return
meta_data
def
__len__
(
self
):
return
self
.
length
def
__load_meta_data__
(
self
,
files
):
data
=
[]
for
f
in
files
:
md
=
self
.
__get_meta_data__
(
f
)
data
.
append
(
md
)
return
data
def
__load_data__
(
self
,
files
):
file_name
=
"{}_{}_{}_{}_{}_{}_{}.npz"
.
format
(
self
.
num_mel
,
self
.
n_fft
,
self
.
hop_size
,
self
.
power
,
self
.
normalize
,
self
.
fmin
,
self
.
class_name
)
file_path
=
os
.
path
.
join
(
self
.
data_root
,
file_name
)
data
=
[]
if
os
.
path
.
exists
(
file_path
):
print
(
'Loading audio set class {} '
.
format
(
self
.
class_name
))
container
=
np
.
load
(
file_path
)
data
=
[
container
[
key
]
for
key
in
container
]
else
:
print
(
'Loading & saving audio set class {} '
.
format
(
self
.
class_name
))
for
i
,
f
in
enumerate
(
files
):
file
=
self
.
__load_preprocess_file__
(
f
)
if
file
.
shape
[
1
]
>
self
.
max_file_length
:
print
(
f
'File too long:
{
f
}
-
{
file
.
shape
[
1
]
}
'
)
file
=
file
[:,
:
self
.
max_file_length
]
data
.
append
(
file
)
np
.
savez
(
file_path
,
*
data
)
return
data
def
__load_preprocess_file__
(
self
,
file
):
x
,
sr
=
librosa
.
load
(
file
,
sr
=
16000
,
mono
=
True
)
if
len
(
x
)
>
(
self
.
max_file_length
+
1
*
self
.
hop_size
)
+
self
.
n_fft
:
x
=
x
[:(
self
.
max_file_length
+
1
)
*
self
.
hop_size
+
self
.
n_fft
]
if
self
.
normalize
:
x
=
(
x
-
x
.
mean
())
/
x
.
std
()
x
=
librosa
.
feature
.
melspectrogram
(
y
=
x
,
sr
=
sr
,
n_fft
=
self
.
n_fft
,
hop_length
=
self
.
hop_size
,
n_mels
=
self
.
num_mel
,
power
=
self
.
power
,
fmin
=
self
.
fmin
)
if
self
.
power
==
1
:
x
=
librosa
.
core
.
amplitude_to_db
(
x
)
elif
self
.
power
==
2
:
x
=
librosa
.
core
.
power_to_db
(
x
)
else
:
raise
AttributeError
return
x
def
__get_meta_data__
(
self
,
file_path
):
return
{
'targets'
:
1
,
'machine_types'
:
-
1
,
'machine_ids'
:
-
1
,
'file_ids'
:
os
.
sep
.
join
(
os
.
path
.
normpath
(
file_path
).
split
(
os
.
sep
)[
-
4
:])
}
if
__name__
==
'__main__'
:
mcmc
=
MCMDataSet
(
0
,
0
)
a
=
audio_set
=
AudioSet
(
normalize
=
(
mcmc
.
mean
,
mcmc
.
std
)
).
training_data_set
()[
0
]
print
(
a
)
dcase2020_task2/data_sets/complement_dataset.py
View file @
9ca5631a
...
...
@@ -85,10 +85,18 @@ class ComplementMCMDataSet(BaseDataSet):
training_sets
=
[]
# validation_sets = []
valid_types
=
{
0
:
[
1
,
2
,
5
],
1
:
[
0
,
2
,
5
],
2
:
[
0
,
1
,
5
],
5
:
[
0
,
1
,
2
],
3
:
[
4
],
4
:
[
3
],
}
for
type_
in
ALL_ID_MAP
:
for
id_
in
ALL_ID_MAP
[
type_
]:
if
type_
!=
machine_type
or
(
id_
!=
machine_id
and
machine_id
!=
-
1
):
if
type_
in
valid_types
[
machine_type
]:
for
id_
in
ALL_ID_MAP
[
type_
]:
#if type_ != machine_type: #or (id_ != machine_id and machine_id != -1):
t
=
MachineDataSet
(
type_
,
id_
,
mode
=
'training'
,
**
kwargs
)
t
.
data
=
(
t
.
data
-
mean
)
/
std
training_sets
.
append
(
t
)
...
...
dcase2020_task2/data_sets/mcm_dataset.py
View file @
9ca5631a
...
...
@@ -21,7 +21,6 @@ class MCMDataSet(BaseDataSet):
power
=
1.0
,
fmin
=
0
,
normalize_raw
=
False
,
normalize
=
None
,
hop_all
=
False
):
self
.
data_root
=
data_root
...
...
@@ -31,7 +30,6 @@ class MCMDataSet(BaseDataSet):
self
.
hop_size
=
hop_size
self
.
power
=
power
self
.
fmin
=
fmin
self
.
normalize
=
normalize
self
.
hop_all
=
hop_all
assert
type
(
machine_type
)
==
int
and
type
(
machine_id
)
==
int
...
...
@@ -57,36 +55,20 @@ class MCMDataSet(BaseDataSet):
validation_sets
.
append
(
MachineDataSet
(
machine_type
,
id_
,
mode
=
'validation'
,
**
kwargs
))
data
.
append
(
training_sets
[
-
1
].
data
)
if
normalize
is
None
:
data
=
np
.
concatenate
(
data
,
axis
=-
1
)
mean
=
data
.
mean
(
axis
=
1
,
keepdims
=
True
)
std
=
data
.
std
(
axis
=
1
,
keepdims
=
True
)
else
:
assert
type
(
normalize
)
==
tuple
assert
len
(
normalize
)
==
2
mean
,
std
=
normalize
for
training_set
,
validation_set
in
zip
(
training_sets
,
validation_sets
):
training_set
.
data
=
(
training_set
.
data
-
mean
)
/
std
validation_set
.
data
=
(
validation_set
.
data
-
mean
)
/
std
data
=
np
.
concatenate
(
data
,
axis
=-
1
)
mean
=
data
.
mean
(
axis
=
1
,
keepdims
=
True
)
std
=
data
.
std
(
axis
=
1
,
keepdims
=
True
)
del
data
training_set
=
torch
.
utils
.
data
.
ConcatDataset
(
training_sets
)
validation_set
=
torch
.
utils
.
data
.
ConcatDataset
(
validation_sets
)
else
:
training_set
=
MachineDataSet
(
machine_type
,
machine_id
,
mode
=
'training'
,
**
kwargs
)
validation_set
=
MachineDataSet
(
machine_type
,
machine_id
,
mode
=
'validation'
,
**
kwargs
)
if
normalize
is
None
:
mean
=
training_set
.
data
.
mean
(
axis
=
1
,
keepdims
=
True
)
std
=
training_set
.
data
.
std
(
axis
=
1
,
keepdims
=
True
)
training_set
.
data
=
(
training_set
.
data
-
mean
)
/
std
validation_set
.
data
=
(
validation_set
.
data
-
mean
)
/
std
else
:
assert
type
(
normalize
)
==
tuple
assert
len
(
normalize
)
==
2
mean
,
std
=
normalize
training_set
.
data
=
(
training_set
.
data
-
mean
)
/
std
validation_set
.
data
=
(
validation_set
.
data
-
mean
)
/
std
mean
=
training_set
.
data
.
mean
(
axis
=
1
,
keepdims
=
True
)
std
=
training_set
.
data
.
std
(
axis
=
1
,
keepdims
=
True
)
self
.
training_set
=
training_set
self
.
validation_set
=
validation_set
...
...
dcase2020_task2/experiments/classification_experiment.py
View file @
9ca5631a
...
...
@@ -11,6 +11,7 @@ from sacred import SETTINGS
SETTINGS
[
'CAPTURE_MODE'
]
=
'sys'
from
datetime
import
datetime
from
dcase2020_task2.data_sets
import
AudioSet
class
ClassificationExperiment
(
BaseExperiment
,
pl
.
LightningModule
):
...
...
@@ -29,7 +30,13 @@ class ClassificationExperiment(BaseExperiment, pl.LightningModule):
# will be set before each epoch
self
.
normal_data_set
=
self
.
objects
[
'data_set'
]
self
.
abnormal_data_set
=
self
.
objects
[
'abnormal_data_set'
]
self
.
abnormal_data_set
=
AudioSet
(
**
self
.
objects
[
'fetaure_settings'
]
)
self
.
mean
=
torch
.
from_numpy
((
self
.
normal_data_set
.
mean
+
self
.
abnormal_data_set
.
mean
)
/
2
)
self
.
std
=
torch
.
from_numpy
((
self
.
normal_data_set
.
std
+
self
.
abnormal_data_set
.
std
)
/
2
)
self
.
inf_data_loader
=
self
.
get_inf_data_loader
(
torch
.
utils
.
data
.
DataLoader
(
...
...
@@ -60,6 +67,10 @@ class ClassificationExperiment(BaseExperiment, pl.LightningModule):
batch
=
self
.
network
(
batch
)
return
batch
def
normalize_batch
(
self
,
batch
):
device
=
batch
[
'observations'
].
device
batch
[
'observations'
]
=
(
batch
[
'observations'
]
-
self
.
mean
.
to
(
device
))
/
self
.
std
.
to
(
device
)
def
training_step
(
self
,
batch_normal
,
batch_num
,
optimizer_idx
=
0
):
if
batch_num
==
0
and
optimizer_idx
==
0
:
...
...
@@ -68,6 +79,9 @@ class ClassificationExperiment(BaseExperiment, pl.LightningModule):
if
optimizer_idx
==
0
:
abnormal_batch
=
next
(
self
.
inf_data_loader
)
self
.
normalize_batch
(
batch_normal
)
self
.
normalize_batch
(
abnormal_batch
)
normal_batch_size
=
len
(
batch_normal
[
'observations'
])
abnormal_batch_size
=
len
(
abnormal_batch
[
'observations'
])
...
...
@@ -141,7 +155,7 @@ def configuration():
#####################
machine_type
=
0
machine_id
=
-
1
machine_id
=
0
num_mel
=
128
n_fft
=
1024
...
...
@@ -151,15 +165,18 @@ def configuration():
context
=
32
model_class
=
'dcase2020_task2.models.CNN'
hidden_size
=
128
hidden_size
=
64
num_hidden
=
3
dropout_probability
=
0.0
epochs
=
100
hop_all
=
False
debug
=
False
if
debug
:
num_workers
=
0
epochs
=
1
epochs
=
100
hop_all
=
True
else
:
num_workers
=
4
...
...
@@ -169,7 +186,6 @@ def configuration():
weight_decay
=
0
normalize_raw
=
True
hop_all
=
False
# TODO: change default descriptor
descriptor
=
"ClassificationExperiment_Model:[{}_{}_{}_{}]_Training:[{}_{}_{}_{}]_Features:[{}_{}_{}_{}_{}_{}_{}]_{}"
.
format
(
...
...
@@ -195,40 +211,24 @@ def configuration():
# detailed configuration
########################
data_set
=
{
'class'
:
'dcase2020_task2.data_sets.MCMDataSet'
,
'args'
:
[
machine_type
,
machine_id
],
'kwargs'
:
{
'context'
:
context
,
'num_mel'
:
num_mel
,
'n_fft'
:
n_fft
,
'hop_size'
:
hop_size
,
'normalize_raw'
:
normalize_raw
,
'power'
:
power
,
'fmin'
:
fmin
,
'hop_all'
:
hop_all
}
fetaure_settings
=
{
'context'
:
context
,
'num_mel'
:
num_mel
,
'n_fft'
:
n_fft
,
'hop_size'
:
hop_size
,
'normalize_raw'
:
normalize_raw
,
'power'
:
power
,
'fmin'
:
fmin
,
'hop_all'
:
hop_all
}
abnormal_
data_set
=
{
'class'
:
'dcase2020_task2.data_sets.
Complement
MCMDataSet'
,
data_set
=
{
'class'
:
'dcase2020_task2.data_sets.MCMDataSet'
,
'args'
:
[
machine_type
,
machine_id
],
'kwargs'
:
{
'context'
:
context
,
'num_mel'
:
num_mel
,
'n_fft'
:
n_fft
,
'hop_size'
:
hop_size
,
'normalize_raw'
:
normalize_raw
,
'power'
:
power
,
'fmin'
:
fmin
,
'hop_all'
:
hop_all
}
'kwargs'
:
fetaure_settings
}
loss
=
{
...
...
notebooks/bar_plot.ipynb
View file @
9ca5631a
This diff is collapsed.
Click to expand it.
scripts/submission/per_id_cnn_classification
_validation
.sh
→
scripts/submission/per_id_cnn_classification.sh
View file @
9ca5631a
conda activate dcase2020_task2
./scripts/per_id_run_parallel.sh classification_experiment
"debug=False num_hidden=3 hidden_size=256 batch_size=512 learning_rate=1e-4 weight_decay=0 model_class=dcase2020_task2.models.CNN loss_class=dcase2020_task2.losses.AUC -m student2.cp.jku.at:27017:dcase2020_task2_submission"
\ No newline at end of file
./scripts/per_id_run_parallel.sh classification_experiment
"debug=False num_hidden=3 hidden_size=256 batch_size=512 learning_rate=1e-4 weight_decay=0 model_class=dcase2020_task2.models.CNN loss_class=dcase2020_task2.losses.AUC -m student2.cp.jku.at:27017:dcase2020_task2_submission"
./scripts/per_id_create_submission.sh classification_experiment
"id=cnn_classification_machine_data_set debug=False num_hidden=3 hidden_size=256 batch_size=512 learning_rate=1e-4 weight_decay=0 model_class=dcase2020_task2.models.CNN loss_class=dcase2020_task2.losses.AUC -m student2.cp.jku.at:27017:dcase2020_task2_submission"
\ No newline at end of file
scripts/submission/per_id_cnn_classification_submission.sh
deleted
100755 → 0
View file @
6469ea38
conda activate dcase2020_task2
./scripts/per_id_create_submission.sh classification_experiment
"id=cnn_classification_machine_data_set debug=False same_type=True num_hidden=3 hidden_size=256 batch_size=512 learning_rate=1e-4 weight_decay=0 model_class=dcase2020_task2.models.CNN loss_class=dcase2020_task2.losses.AUC -m student2.cp.jku.at:27017:dcase2020_task2_submission"
\ No newline at end of file
scripts/submission/per_id_flat_classification.sh
View file @
9ca5631a
conda activate dcase2020_task2
./scripts/per_id_create_submission.sh classification_experiment
"id=flat_classification_machine_data_set same_type=True debug=False num_hidden=3 hidden_size=128 batch_size=4096 weight_decay=1e-5 model_class=dcase2020_task2.models.FCNN loss_class=dcase2020_task2.losses.AUC -m student2.cp.jku.at:27017:dcase2020_task2_submission"
./scripts/per_id_run_parallel.sh classification_experiment
"same_type=True debug=False num_hidden=3 hidden_size=128 batch_size=4096 weight_decay=1e-5 model_class=dcase2020_task2.models.FCNN loss_class=dcase2020_task2.losses.AUC -m student2.cp.jku.at:27017:dcase2020_task2_submission"
\ No newline at end of file
./scripts/per_id_create_submission.sh classification_experiment
"id=flat_classification_machine_data_set debug=False num_hidden=3 hidden_size=128 batch_size=4096 weight_decay=1e-5 model_class=dcase2020_task2.models.FCNN loss_class=dcase2020_task2.losses.AUC -m student2.cp.jku.at:27017:dcase2020_task2_submission"
./scripts/per_id_run_parallel.sh classification_experiment
"debug=False num_hidden=3 hidden_size=128 batch_size=4096 weight_decay=1e-5 model_class=dcase2020_task2.models.FCNN loss_class=dcase2020_task2.losses.AUC -m student2.cp.jku.at:27017:dcase2020_task2_submission"
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment