Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Paul Primus
dcase2020_task2
Commits
bea8173f
Commit
bea8173f
authored
Jun 14, 2020
by
Paul Primus
Browse files
fix issue with different file lengths
parent
e71e703c
Changes
12
Expand all
Hide whitespace changes
Inline
Side-by-side
dcase2020_task2/data_sets/audio_set.py
View file @
bea8173f
...
...
@@ -50,19 +50,12 @@ class AudioSet(BaseDataSet):
class_names
=
sorted
([
class_name
for
class_name
in
os
.
listdir
(
data_root
)
if
os
.
path
.
isdir
(
os
.
path
.
join
(
data_root
,
class_name
))])
training_sets
=
[]
data_arrays
=
[]
for
class_name
in
class_names
:
training_sets
.
append
(
AudioSetClassSubset
(
class_name
,
**
kwargs
))
data
=
training_sets
[
-
1
].
data
for
i
,
file
in
enumerate
(
data
):
data_arrays
.
append
(
file
)
data_arrays
=
np
.
concatenate
(
data_arrays
,
axis
=-
1
)
self
.
training_set
=
torch
.
utils
.
data
.
ConcatDataset
(
training_sets
)
self
.
validation_set
=
None
self
.
mean
=
data_arrays
.
mean
(
axis
=
1
,
keepdims
=
True
)
self
.
std
=
data_arrays
.
std
(
axis
=
1
,
keepdims
=
True
)
del
data_arrays
@
property
def
observation_shape
(
self
)
->
tuple
:
...
...
@@ -74,9 +67,6 @@ class AudioSet(BaseDataSet):
def
validation_data_set
(
self
):
return
self
.
validation_set
def
mean_std
(
self
):
return
self
.
mean
,
self
.
std
class
AudioSetClassSubset
(
torch
.
utils
.
data
.
Dataset
):
...
...
@@ -93,7 +83,7 @@ class AudioSetClassSubset(torch.utils.data.Dataset):
normalize_spec
=
False
,
fmin
=
0
,
hop_all
=
False
,
max_file_per_class
=
2
,
max_file_per_class
=
10
,
max_file_length
=
350
):
...
...
dcase2020_task2/data_sets/base_data_set.py
View file @
bea8173f
...
...
@@ -18,9 +18,3 @@ class BaseDataSet(ABC):
@
abstractmethod
def
validation_data_set
(
self
):
raise
NotImplementedError
@
property
@
abstractmethod
def
mean_std
(
self
):
raise
NotImplementedError
dcase2020_task2/data_sets/complement_dataset.py
View file @
bea8173f
...
...
@@ -2,7 +2,6 @@ import os
import
torch.utils.data
from
dcase2020_task2.data_sets
import
BaseDataSet
,
CLASS_MAP
,
INVERSE_CLASS_MAP
,
TRAINING_ID_MAP
,
ALL_ID_MAP
from
dcase2020_task2.data_sets
import
MachineDataSet
import
numpy
as
np
VALID_TYPES
=
{
...
...
@@ -84,18 +83,11 @@ class ComplementMCMDataSet(BaseDataSet):
training_sets
=
[]
data
=
[]
for
type_
in
VALID_TYPES
[
self
.
valid_types
][
machine_type
]:
for
id_
in
ALL_ID_MAP
[
type_
]:
if
type_
!=
machine_type
or
id_
!=
machine_id
:
t
=
MachineDataSet
(
type_
,
id_
,
mode
=
'training'
,
**
kwargs
)
data
.
append
(
t
.
data
)
training_sets
.
append
(
t
)
data
=
np
.
concatenate
(
data
,
axis
=-
1
)
self
.
mean
=
data
.
mean
(
axis
=
1
,
keepdims
=
True
)
self
.
std
=
data
.
std
(
axis
=
1
,
keepdims
=
True
)
del
data
self
.
training_set
=
torch
.
utils
.
data
.
ConcatDataset
(
training_sets
)
...
...
@@ -109,6 +101,3 @@ class ComplementMCMDataSet(BaseDataSet):
def
validation_data_set
(
self
):
raise
NotImplementedError
def
mean_std
(
self
):
return
self
.
mean
,
self
.
std
dcase2020_task2/data_sets/mcm_dataset.py
View file @
bea8173f
...
...
@@ -51,17 +51,10 @@ class MCMDataSet(BaseDataSet):
if
machine_id
==
-
1
:
training_sets
=
[]
validation_sets
=
[]
data
=
[]
for
id_
in
ALL_ID_MAP
[
machine_type
]:
training_sets
.
append
(
MachineDataSet
(
machine_type
,
id_
,
mode
=
'training'
,
**
kwargs
))
validation_sets
.
append
(
MachineDataSet
(
machine_type
,
id_
,
mode
=
'validation'
,
**
kwargs
))
data
.
append
(
training_sets
[
-
1
].
data
)
data
=
np
.
concatenate
(
data
,
axis
=-
1
)
mean
=
data
.
mean
(
axis
=
1
,
keepdims
=
True
)
std
=
data
.
std
(
axis
=
1
,
keepdims
=
True
)
del
data
training_set
=
torch
.
utils
.
data
.
ConcatDataset
(
training_sets
)
validation_set
=
torch
.
utils
.
data
.
ConcatDataset
(
validation_sets
)
...
...
@@ -69,13 +62,8 @@ class MCMDataSet(BaseDataSet):
training_set
=
MachineDataSet
(
machine_type
,
machine_id
,
mode
=
'training'
,
**
kwargs
)
validation_set
=
MachineDataSet
(
machine_type
,
machine_id
,
mode
=
'validation'
,
**
kwargs
)
mean
=
training_set
.
data
.
mean
(
axis
=
1
,
keepdims
=
True
)
std
=
training_set
.
data
.
std
(
axis
=
1
,
keepdims
=
True
)
self
.
training_set
=
training_set
self
.
validation_set
=
validation_set
self
.
mean
=
mean
self
.
std
=
std
@
property
def
observation_shape
(
self
)
->
tuple
:
...
...
@@ -87,9 +75,6 @@ class MCMDataSet(BaseDataSet):
def
validation_data_set
(
self
):
return
self
.
validation_set
def
mean_std
(
self
):
return
self
.
mean
,
self
.
std
class
MachineDataSet
(
torch
.
utils
.
data
.
Dataset
):
...
...
@@ -154,30 +139,35 @@ class MachineDataSet(torch.utils.data.Dataset):
files
=
sorted
(
files
)
self
.
files
=
files
self
.
file_length
=
self
.
__load_preprocess_file__
(
files
[
0
]).
shape
[
-
1
]
self
.
num_samples_per_file
=
(
self
.
file_length
//
self
.
context
)
if
hop_all
else
(
self
.
file_length
-
self
.
context
+
1
)
files
=
sorted
(
files
)
self
.
files
=
files
self
.
meta_data
=
self
.
__load_meta_data__
(
files
)
self
.
data
=
self
.
__load_data__
(
files
)
self
.
index_map
=
{}
ctr
=
0
for
i
,
file
in
enumerate
(
self
.
data
):
if
hop_all
:
residual
=
file
.
shape
[
-
1
]
-
context
self
.
index_map
[
ctr
]
=
(
i
,
residual
)
ctr
+=
1
else
:
for
j
in
range
(
file
.
shape
[
-
1
]
+
1
-
context
):
self
.
index_map
[
ctr
]
=
(
i
,
j
)
ctr
+=
1
self
.
length
=
ctr
def
__getitem__
(
self
,
item
):
# get offset in audio file
offset
=
item
%
self
.
num_samples_per_file
# get audio file index
item
=
item
//
self
.
num_samples_per_file
# load audio file and extract audio junk
residual
=
(
self
.
file_length
%
self
.
context
)
+
1
offset
=
item
*
self
.
file_length
+
((
offset
*
self
.
context
+
np
.
random
.
randint
(
0
,
residual
))
if
self
.
hop_all
else
offset
)
observation
=
self
.
data
[:,
offset
:
offset
+
self
.
context
]
# create data object
meta_data
=
self
.
meta_data
[
item
].
copy
()
file_idx
,
offset
=
self
.
index_map
[
item
]
if
self
.
hop_all
:
offset
=
np
.
random
.
randint
(
0
,
offset
)
observation
=
self
.
data
[
file_idx
][:,
offset
:
offset
+
self
.
context
]
meta_data
=
self
.
meta_data
[
file_idx
].
copy
()
meta_data
[
'observations'
]
=
observation
[
None
]
return
meta_data
def
__len__
(
self
):
return
len
(
self
.
files
)
*
self
.
num_samples_per_file
return
self
.
length
def
__load_meta_data__
(
self
,
files
):
data
=
[]
...
...
@@ -187,7 +177,7 @@ class MachineDataSet(torch.utils.data.Dataset):
return
data
def
__load_data__
(
self
,
files
):
file_name
=
"{}_{}_{}_{}_{}_{}_{}_{}_{}_{}.np
y
"
.
format
(
file_name
=
"{}_{}_{}_{}_{}_{}_{}_{}_{}_{}.np
z
"
.
format
(
self
.
num_mel
,
self
.
n_fft
,
self
.
hop_size
,
...
...
@@ -201,32 +191,19 @@ class MachineDataSet(torch.utils.data.Dataset):
)
file_path
=
os
.
path
.
join
(
self
.
data_root
,
file_name
)
data
=
[]
if
os
.
path
.
exists
(
file_path
):
print
(
'Loading {} data set for machine type {} id {}...'
.
format
(
self
.
mode
,
self
.
machine_type
,
self
.
machine_id
))
data
=
np
.
load
(
file_path
)
container
=
np
.
load
(
file_path
)
data
=
[
container
[
key
]
for
key
in
container
]
else
:
print
(
'Loading & saving {} data set for machine type {} id {}...'
.
format
(
self
.
mode
,
self
.
machine_type
,
self
.
machine_id
))
data
=
np
.
empty
((
self
.
num_mel
,
self
.
file_length
*
len
(
files
)),
dtype
=
np
.
float32
)
print
(
'Loading & Saving {} data set for machine type {} id {}...'
.
format
(
self
.
mode
,
self
.
machine_type
,
self
.
machine_id
))
for
i
,
f
in
enumerate
(
files
):
file
=
self
.
__load_preprocess_file__
(
f
)
if
file
.
shape
[
1
]
!=
self
.
file_length
:
if
file
.
shape
[
1
]
<
self
.
file_length
:
print
(
f
'Too short:
{
f
}
'
)
file
=
np
.
concatenate
([
file
,
file
[:,
:
self
.
file_length
-
file
.
shape
[
1
]]
],
-
1
)
elif
file
.
shape
[
1
]
>
self
.
file_length
:
print
(
f
'Too long:
{
f
}
'
)
file
=
file
[:,
:
self
.
file_length
]
data
[:,
i
*
self
.
file_length
:(
i
+
1
)
*
self
.
file_length
]
=
file
np
.
save
(
file_path
,
data
)
data
.
append
(
file
)
np
.
savez
(
file_path
,
*
data
)
return
data
def
__load_preprocess_file__
(
self
,
file
):
...
...
@@ -283,7 +260,6 @@ class MachineDataSet(torch.utils.data.Dataset):
}
if
__name__
==
'__main__'
:
for
type_
,
id_
in
enumerate_development_datasets
():
...
...
dcase2020_task2/experiments/classification_experiment.py
View file @
bea8173f
...
...
@@ -4,6 +4,7 @@ import torch
from
sacred
import
Experiment
from
dcase2020_task2.utils.logger
import
Logger
import
os
import
numpy
as
np
import
torch.utils.data
# workaround...
from
sacred
import
SETTINGS
...
...
@@ -42,20 +43,16 @@ class ClassificationExperiment(BaseExperiment, pl.LightningModule):
# **self.objects['fetaure_settings']
# )
if
self
.
objects
.
get
(
'normalize_dataset'
)
==
'normal'
:
self
.
mean
=
torch
.
from_numpy
(
self
.
normal_data_set
.
mean
)
self
.
std
=
torch
.
from_numpy
(
self
.
normal_data_set
.
std
)
elif
self
.
objects
.
get
(
'normalize_dataset'
)
==
'abnormal'
:
self
.
mean
=
torch
.
from_numpy
(
self
.
abnormal_data_set
.
mean
)
self
.
std
=
torch
.
from_numpy
(
self
.
abnormal_data_set
.
std
)
elif
self
.
objects
.
get
(
'normalize_dataset'
)
==
'average'
:
self
.
mean
=
torch
.
from_numpy
((
self
.
normal_data_set
.
mean
+
self
.
abnormal_data_set
.
mean
)
/
2
)
# TODO: this is not correct (?)
self
.
std
=
torch
.
from_numpy
((
self
.
normal_data_set
.
std
+
self
.
abnormal_data_set
.
std
)
/
2
)
elif
self
.
objects
.
get
(
'normalize_dataset'
)
is
None
:
if
self
.
objects
.
get
(
'normalize_dataset'
)
is
None
:
print
(
'No normalization.'
)
self
.
mean
=
torch
.
zeros
(
self
.
normal_data_set
.
mean
.
shape
)
self
.
std
=
torch
.
ones
(
self
.
normal_data_set
.
std
.
shape
)
elif
self
.
objects
.
get
(
'normalize_dataset'
)
is
'min_max'
:
print
(
'Min/Max normalization.'
)
self
.
min
,
self
.
max
=
None
,
None
raise
NotImplementedError
elif
self
.
objects
.
get
(
'normalize_dataset'
)
is
'mean_std'
:
print
(
'Mean/Std normalization.'
)
self
.
mean
,
self
.
std
=
None
,
None
raise
NotImplementedError
else
:
raise
AttributeError
...
...
@@ -89,8 +86,12 @@ class ClassificationExperiment(BaseExperiment, pl.LightningModule):
return
batch
def
normalize_batch
(
self
,
batch
):
device
=
batch
[
'observations'
].
device
batch
[
'observations'
]
=
(
batch
[
'observations'
]
-
self
.
mean
.
to
(
device
))
/
self
.
std
.
to
(
device
)
if
self
.
objects
.
get
(
'normalize_dataset'
)
is
'min_max'
:
assert
self
.
mean
is
None
batch
[
'observations'
]
=
(((
batch
[
'observations'
]
-
self
.
min
)
/
(
self
.
max
-
self
.
min
))
-
0.5
)
*
2
elif
self
.
objects
.
get
(
'normalize_dataset'
)
is
'mean_std'
:
assert
self
.
min
is
None
batch
[
'observations'
]
=
(
batch
[
'observations'
]
-
self
.
mean
)
/
self
.
std
def
training_step
(
self
,
batch_normal
,
batch_num
,
optimizer_idx
=
0
):
...
...
@@ -200,7 +201,7 @@ def configuration():
else
:
num_workers
=
4
loss_class
=
'dcase2020_task2.losses.
AUC
'
loss_class
=
'dcase2020_task2.losses.
BCE
'
batch_size
=
32
learning_rate
=
1e-4
weight_decay
=
0
...
...
notebooks/bar_plot.ipynb
deleted
100644 → 0
View file @
e71e703c
This diff is collapsed.
Click to expand it.
notebooks/create_bar_plot.ipynb
0 → 100644
View file @
bea8173f
This diff is collapsed.
Click to expand it.
notebooks/
load_from_mongo
.ipynb
→
notebooks/
create_results_table
.ipynb
View file @
bea8173f
File moved
notebooks/data_normalization.ipynb
deleted
100644 → 0
View file @
e71e703c
This diff is collapsed.
Click to expand it.
notebooks/parallel_axis_plot.ipynb
deleted
100644 → 0
View file @
e71e703c
This diff is collapsed.
Click to expand it.
scripts/resnet_grid_search/gs_rk_0_2.sh
0 → 100755
View file @
bea8173f
epochs
=
100
loss_class
=
BCE
valid_types
=
loose
for
learning_rate
in
1e-4
do
for
rf
in
a_bit_larger normal a_bit_smaller
do
for
learning_rate_decay
in
0.99 0.98
./scripts/per_id_run_parallel.sh classification_experiment
"id=resnet_gridsearch_2_
${
rf
}
_
${
valid_types
}
_
${
learning_rate
}
_
${
learning_rate_decay
}
_
${
epochs
}
_
${
loss_class
}
learning_rate=
$learning_rate
learning_rate_decay=
$learning_rate_decay
epochs=
$epochs
rf=
$rf
valid_types=
$valid_types
loss_class=dcase2020_task2.losses.
$loss_class
-m student2.cp.jku.at:27017:resnet_gridsearch"
done
done
done
scripts/resnet_grid_search/gs_rk_3_2.sh
0 → 100755
View file @
bea8173f
epochs
=
100
loss_class
=
BCE
valid_types
=
loose
for
learning_rate
in
1e-4
do
for
rf
in
a_bit_larger normal a_bit_smaller
do
for
learning_rate_decay
in
0.97 0.96
./scripts/per_id_run_parallel.sh classification_experiment
"id=resnet_gridsearch_2_
${
rf
}
_
${
valid_types
}
_
${
learning_rate
}
_
${
learning_rate_decay
}
_
${
epochs
}
_
${
loss_class
}
learning_rate=
$learning_rate
learning_rate_decay=
$learning_rate_decay
epochs=
$epochs
rf=
$rf
valid_types=
$valid_types
loss_class=dcase2020_task2.losses.
$loss_class
-m student2.cp.jku.at:27017:resnet_gridsearch"
done
done
done
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment