Commit 2abcfd34 authored by Paul Primus's avatar Paul Primus
Browse files

add

parent d6d43f92
......@@ -2,7 +2,7 @@ import os
import torch.utils.data
from dcase2020_task2.data_sets import BaseDataSet, CLASS_MAP, INVERSE_CLASS_MAP, TRAINING_ID_MAP, ALL_ID_MAP
from dcase2020_task2.data_sets import MachineDataSet
import numpy as np
class ComplementMCMDataSet(BaseDataSet):
......@@ -19,8 +19,7 @@ class ComplementMCMDataSet(BaseDataSet):
fmin=0,
normalize_raw=False,
normalize=None,
hop_all=False,
same_type=False
hop_all=False
):
self.data_root = data_root
self.context = context
......@@ -46,27 +45,50 @@ class ComplementMCMDataSet(BaseDataSet):
'hop_all': hop_all
}
training_set = MachineDataSet(machine_type, machine_id, mode='training', **kwargs)
validation_set = MachineDataSet(machine_type, machine_id, mode='validation', **kwargs)
if normalize is None:
mean = training_set.data.mean(axis=1, keepdims=True)
std = training_set.data.std(axis=1, keepdims=True)
training_set.data = (training_set.data - mean) / std
validation_set.data = (validation_set.data - mean) / std
if machine_id == -1:
training_sets = []
validation_sets = []
data = []
for id_ in ALL_ID_MAP[machine_type]:
training_sets.append(MachineDataSet(machine_type, id_, mode='training', **kwargs))
validation_sets.append(MachineDataSet(machine_type, id_, mode='validation', **kwargs))
data.append(training_sets[-1].data)
if normalize is None:
data = np.concatenate(data, axis=-1)
mean = data.mean(axis=1, keepdims=True)
std = data.std(axis=1, keepdims=True)
else:
assert type(normalize) == tuple
assert len(normalize) == 2
mean, std = normalize
for training_set, validation_set in zip(training_sets, validation_sets):
training_set.data = (training_set.data - mean) / std
validation_set.data = (validation_set.data - mean) / std
del data
else:
assert type(normalize) == tuple
assert len(normalize) == 2
mean, std = normalize
training_set.data = (training_set.data - mean) / std
validation_set.data = (validation_set.data - mean) / std
training_set = MachineDataSet(machine_type, machine_id, mode='training', **kwargs)
validation_set = MachineDataSet(machine_type, machine_id, mode='validation', **kwargs)
if normalize is None:
mean = training_set.data.mean(axis=1, keepdims=True)
std = training_set.data.std(axis=1, keepdims=True)
training_set.data = (training_set.data - mean) / std
validation_set.data = (validation_set.data - mean) / std
else:
assert type(normalize) == tuple
assert len(normalize) == 2
mean, std = normalize
training_set.data = (training_set.data - mean) / std
validation_set.data = (validation_set.data - mean) / std
training_sets = []
# validation_sets = []
for type_ in ALL_ID_MAP:
for id_ in ALL_ID_MAP[type_]:
if type_ != machine_type or (id_ != machine_id and same_type):
if type_ != machine_type or (id_ != machine_id and machine_id != -1):
t = MachineDataSet(type_, id_, mode='training', **kwargs)
t.data = (t.data - mean) / std
training_sets.append(t)
......
import os
import torch.utils.data
import glob
from dcase2020_task2.data_sets import BaseDataSet, CLASS_MAP, INVERSE_CLASS_MAP, TRAINING_ID_MAP, EVALUATION_ID_MAP, \
from dcase2020_task2.data_sets import BaseDataSet, CLASS_MAP, INVERSE_CLASS_MAP, TRAINING_ID_MAP, EVALUATION_ID_MAP, ALL_ID_MAP,\
enumerate_development_datasets, enumerate_evaluation_datasets
import librosa
import numpy as np
......@@ -48,20 +48,45 @@ class MCMDataSet(BaseDataSet):
'hop_all': hop_all
}
training_set = MachineDataSet(machine_type, machine_id, mode='training', **kwargs)
validation_set = MachineDataSet(machine_type, machine_id, mode='validation', **kwargs)
if machine_id == -1:
training_sets = []
validation_sets = []
data = []
for id_ in ALL_ID_MAP[machine_type]:
training_sets.append(MachineDataSet(machine_type, id_, mode='training', **kwargs))
validation_sets.append(MachineDataSet(machine_type, id_, mode='validation', **kwargs))
data.append(training_sets[-1].data)
if normalize is None:
data = np.concatenate(data, axis=-1)
mean = data.mean(axis=1, keepdims=True)
std = data.std(axis=1, keepdims=True)
else:
assert type(normalize) == tuple
assert len(normalize) == 2
mean, std = normalize
for training_set, validation_set in zip(training_sets, validation_sets):
training_set.data = (training_set.data - mean) / std
validation_set.data = (validation_set.data - mean) / std
if normalize is None:
mean = training_set.data.mean(axis=1, keepdims=True)
std = training_set.data.std(axis=1, keepdims=True)
training_set.data = (training_set.data - mean) / std
validation_set.data = (validation_set.data - mean) / std
del data
training_set = torch.utils.data.ConcatDataset(training_sets)
validation_set = torch.utils.data.ConcatDataset(validation_sets)
else:
assert type(normalize) == tuple
assert len(normalize) == 2
mean, std = normalize
training_set.data = (training_set.data - mean) / std
validation_set.data = (validation_set.data - mean) / std
training_set = MachineDataSet(machine_type, machine_id, mode='training', **kwargs)
validation_set = MachineDataSet(machine_type, machine_id, mode='validation', **kwargs)
if normalize is None:
mean = training_set.data.mean(axis=1, keepdims=True)
std = training_set.data.std(axis=1, keepdims=True)
training_set.data = (training_set.data - mean) / std
validation_set.data = (validation_set.data - mean) / std
else:
assert type(normalize) == tuple
assert len(normalize) == 2
mean, std = normalize
training_set.data = (training_set.data - mean) / std
validation_set.data = (validation_set.data - mean) / std
self.training_set = training_set
self.validation_set = validation_set
......
......@@ -140,8 +140,8 @@ def configuration():
# quick configuration, uses default parameters of more detailed configuration
#####################
machine_type = 1
machine_id = 0
machine_type = 0
machine_id = -1
num_mel = 128
n_fft = 1024
......@@ -155,24 +155,24 @@ def configuration():
num_hidden = 3
dropout_probability = 0.0
epochs = 100
debug = False
if debug:
num_workers = 0
epochs = 1
else:
num_workers = 4
epochs = 100
loss_class = 'dcase2020_task2.losses.AUC'
batch_size = 512
learning_rate = 1e-4
weight_decay = 0
same_type = True
normalize_raw = True
hop_all = False
# TODO: change default descriptor
descriptor = "ClassificationExperiment_Model:[{}_{}_{}_{}]_Training:[{}_{}_{}_{}]_Features:[{}_{}_{}_{}_{}_{}_{}]_Complement:[{}]{}".format(
descriptor = "ClassificationExperiment_Model:[{}_{}_{}_{}]_Training:[{}_{}_{}_{}]_Features:[{}_{}_{}_{}_{}_{}_{}]_{}".format(
model_class,
hidden_size,
num_hidden,
......@@ -188,7 +188,6 @@ def configuration():
hop_size,
power,
fmin,
same_type,
seed
)
......@@ -228,8 +227,7 @@ def configuration():
'normalize_raw': normalize_raw,
'power': power,
'fmin': fmin,
'hop_all': hop_all,
'same_type': same_type
'hop_all': hop_all
}
}
......
......@@ -222,7 +222,7 @@ class Logger:
scores_mean_ = scores_mean[np.logical_and(machine_types == machine_type, machine_ids == id)]
scores_max_ = scores_max[np.logical_and(machine_types == machine_type, machine_ids == id)]
if all(ground_truth[0] == np.array(ground_truth)):
if all(ground_truth_[0] == np.array(ground_truth_)):
return 0, 0, 0, 0
return float(metrics.roc_auc_score(ground_truth_, scores_mean_)), \
......
This source diff could not be displayed because it is too large. You can view the blob instead.
conda activate dcase2020_task2
./scripts/run_and_create_submission_for_per_type_models.sh classification_experiment "id=cnn_classification_machine_data_set_per_type debug=False same_type=True num_hidden=3 hidden_size=256 batch_size=512 learning_rate=1e-4 weight_decay=0 model_class=dcase2020_task2.models.CNN loss_class=dcase2020_task2.losses.AUC -m student2.cp.jku.at:27017:dcase2020_task2_submission"
\ No newline at end of file
conda activate dcase2020_task2
OMP_NUM_THREADS=1 CUDA_VISIBLE_DEVICES=0 python -m dcase2020_task2.experiments.$1 with num_workers=4 machine_type=0 machine_id=-1 $2 > /dev/null 2>&1 &
OMP_NUM_THREADS=1 CUDA_VISIBLE_DEVICES=1 python -m dcase2020_task2.experiments.$1 with num_workers=4 machine_type=1 machine_id=-1 $2 > /dev/null 2>&1 &
OMP_NUM_THREADS=1 CUDA_VISIBLE_DEVICES=2 python -m dcase2020_task2.experiments.$1 with num_workers=4 machine_type=2 machine_id=-1 $2 > /dev/null 2>&1 &
OMP_NUM_THREADS=1 CUDA_VISIBLE_DEVICES=3 python -m dcase2020_task2.experiments.$1 with num_workers=4 machine_type=3 machine_id=-1 $2 > /dev/null 2>&1 &
wait
OMP_NUM_THREADS=1 CUDA_VISIBLE_DEVICES=0 python -m dcase2020_task2.experiments.$1 with num_workers=4 machine_type=4 machine_id=-1 $2 > /dev/null 2>&1 &
OMP_NUM_THREADS=1 CUDA_VISIBLE_DEVICES=1 python -m dcase2020_task2.experiments.$1 with num_workers=4 machine_type=5 machine_id=-1 $2 > /dev/null 2>&1 &
# Submission information
submission:
# Submission label
# Label is used to index submissions.
# Generate your label following way to avoid overlapping codes among submissions:
# [Last name of corresponding author]_[Abbreviation of institute of the corresponding author]_task[task number]_[index number of your submission (1-4)]
label: Primus_JKU_task2_1
# Submission name
# This name will be used in the results tables when space permits.
name: Outlier Exposed Convolutional Classifier
# Submission name abbreviated
# This abbreviated name will be used in the results table when space is tight.
# Use a maximum of 10 characters.
abbreviation: OECC
# Authors of the submitted system.
# Mark authors in the order you want them to appear in submission lists.
# One of the authors has to be marked as corresponding author, this will be listed next to the submission in the results tables.
authors:
# First author
- lastname: Primus
firstname: Paul
email: paul.primus@jku.at # Contact email address
corresponding: true # Mark true for one of the authors
# Affiliation information for the author
affiliation:
institution: JKU
department: Computational Perception
location: Austria, Linz
# System information
system:
# System description, metadata provided here will be used to do a meta-analysis of the submitted system.
# Use general level tags, when possible use the tags provided in comments.
# If information field is not applicable to the system, use "!!null".
description:
# Audio input
# Please specify all sampling rates (comma-separated list).
# e.g. 16kHz, 22.05kHz, 44.1kHz
input_sampling_rate: 16kHz
# Data augmentation methods
# Please specify all methods used (comma-separated list).
# e.g. mixup, time stretching, block mixing, pitch shifting, ...
data_augmentation: !!null
# Front-end (preprocessing) methods
# Please specify all methods used (comma-separated list).
# e.g. HPSS, WPE, NMF, NN filter, RPCA, ...
front_end: !!null
# Acoustic representation
# one or multiple labels, e.g. MFCC, log-mel energies, spectrogram, CQT, raw waveform, ...
acoustic_features: log-mel energies
# Embeddings
# Please specify all embedings used (comma-separated list).
# one or multiple, e.g. VGGish, OpenL3, ...
embeddings: !!null
# Machine learning
# In case using ensemble methods, please specify all methods used (comma-separated list).
# e.g. AE, VAE, GAN, GMM, k-means, OCSVM, normalizing flow, CNN, LSTM, random forest, ensemble, ...
machine_learning_method: CNN
# Method for aggregating predictions over time
# Please specify all methods used (comma-separated list).
# e.g. average, median, maximum, minimum, ...
aggregation_method: average
# Ensemble method subsystem count
# In case ensemble method is not used, mark !!null.
# e.g. 2, 3, 4, 5, ...
ensemble_method_subsystem_count: !!null
# Decision making in ensemble
# e.g. average, median, maximum, minimum, ...
decision_making: !!null
# External data usage method
# Please specify all usages (comma-separated list).
# e.g. simulation of anomalous samples, embeddings, pre-trained model, ...
external_data_usage: !!null
# Usage of the development dataset
# Please specify all usages (comma-separated list).
# e.g. development, pre-training, fine-tuning
development_data_usage: development
# System complexity, metadata provided here may be used to evaluate submitted systems from the computational load perspective.
complexity:
# Total amount of parameters used in the acoustic model.
# For neural networks, this information is usually given before training process in the network summary.
# For other than neural networks, if parameter count information is not directly available, try estimating the count as accurately as possible.
# In case of ensemble approaches, add up parameters for all subsystems.
# In case embeddings are used, add up parameter count of the embedding extraction networks and classification network.
# Use numerical value.
total_parameters: 269992
# List of external datasets used in the submission.
# Development dataset is used here only as an example, list only external datasets
external_datasets:
# Dataset name
- name: DCASE 2020 Challenge Task 2 Development Dataset
# Dataset access URL
url: https://zenodo.org/record/3678171
# URL to the source code of the system [optional, highly recommended]
# Reproducibility will be used to evaluate submitted systems.
source_code: https://github.com/y-kawagu/dcase2020_task2_baseline
# System results
results:
development_dataset:
# System results for development dataset.
# Full results are not mandatory, however, they are highly recommended as they are needed for a thorough analysis of the challenge submissions.
# If you are unable to provide all results, also incomplete results can be reported.
# Average of AUCs over all Machine IDs [%]
# No need to round numbers
ToyCar:
averaged_auc: 78.77
averaged_pauc: 67.58
ToyConveyor:
averaged_auc: 72.53
averaged_pauc: 60.43
fan:
averaged_auc: 65.83
averaged_pauc: 52.45
pump:
averaged_auc: 72.89
averaged_pauc: 59.99
slider:
averaged_auc: 84.76
averaged_pauc: 66.53
valve:
averaged_auc: 66.28
averaged_pauc: 50.98
# Submission information
submission:
# Submission label
# Label is used to index submissions.
# Generate your label following way to avoid overlapping codes among submissions:
# [Last name of corresponding author]_[Abbreviation of institute of the corresponding author]_task[task number]_[index number of your submission (1-4)]
label: Primus_JKU_task2_1
# Submission name
# This name will be used in the results tables when space permits.
name: Outlier Exposed Convolutional Classifier
# Submission name abbreviated
# This abbreviated name will be used in the results table when space is tight.
# Use a maximum of 10 characters.
abbreviation: OECC
# Authors of the submitted system.
# Mark authors in the order you want them to appear in submission lists.
# One of the authors has to be marked as corresponding author, this will be listed next to the submission in the results tables.
authors:
# First author
- lastname: Primus
firstname: Paul
email: paul.primus@jku.at # Contact email address
corresponding: true # Mark true for one of the authors
# Affiliation information for the author
affiliation:
institution: JKU
department: Computational Perception
location: Austria, Linz
# System information
system:
# System description, metadata provided here will be used to do a meta-analysis of the submitted system.
# Use general level tags, when possible use the tags provided in comments.
# If information field is not applicable to the system, use "!!null".
description:
# Audio input
# Please specify all sampling rates (comma-separated list).
# e.g. 16kHz, 22.05kHz, 44.1kHz
input_sampling_rate: 16kHz
# Data augmentation methods
# Please specify all methods used (comma-separated list).
# e.g. mixup, time stretching, block mixing, pitch shifting, ...
data_augmentation: !!null
# Front-end (preprocessing) methods
# Please specify all methods used (comma-separated list).
# e.g. HPSS, WPE, NMF, NN filter, RPCA, ...
front_end: !!null
# Acoustic representation
# one or multiple labels, e.g. MFCC, log-mel energies, spectrogram, CQT, raw waveform, ...
acoustic_features: log-mel energies
# Embeddings
# Please specify all embedings used (comma-separated list).
# one or multiple, e.g. VGGish, OpenL3, ...
embeddings: !!null
# Machine learning
# In case using ensemble methods, please specify all methods used (comma-separated list).
# e.g. AE, VAE, GAN, GMM, k-means, OCSVM, normalizing flow, CNN, LSTM, random forest, ensemble, ...
machine_learning_method: CNN
# Method for aggregating predictions over time
# Please specify all methods used (comma-separated list).
# e.g. average, median, maximum, minimum, ...
aggregation_method: average
# Ensemble method subsystem count
# In case ensemble method is not used, mark !!null.
# e.g. 2, 3, 4, 5, ...
ensemble_method_subsystem_count: !!null
# Decision making in ensemble
# e.g. average, median, maximum, minimum, ...
decision_making: !!null
# External data usage method
# Please specify all usages (comma-separated list).
# e.g. simulation of anomalous samples, embeddings, pre-trained model, ...
external_data_usage: !!null
# Usage of the development dataset
# Please specify all usages (comma-separated list).
# e.g. development, pre-training, fine-tuning
development_data_usage: development
# System complexity, metadata provided here may be used to evaluate submitted systems from the computational load perspective.
complexity:
# Total amount of parameters used in the acoustic model.
# For neural networks, this information is usually given before training process in the network summary.
# For other than neural networks, if parameter count information is not directly available, try estimating the count as accurately as possible.
# In case of ensemble approaches, add up parameters for all subsystems.
# In case embeddings are used, add up parameter count of the embedding extraction networks and classification network.
# Use numerical value.
total_parameters: 269992
# List of external datasets used in the submission.
# Development dataset is used here only as an example, list only external datasets
external_datasets:
# Dataset name
- name: DCASE 2020 Challenge Task 2 Development Dataset
# Dataset access URL
url: https://zenodo.org/record/3678171
# URL to the source code of the system [optional, highly recommended]
# Reproducibility will be used to evaluate submitted systems.
source_code: https://github.com/y-kawagu/dcase2020_task2_baseline
# System results
results:
development_dataset:
# System results for development dataset.
# Full results are not mandatory, however, they are highly recommended as they are needed for a thorough analysis of the challenge submissions.
# If you are unable to provide all results, also incomplete results can be reported.
# Average of AUCs over all Machine IDs [%]
# No need to round numbers
ToyCar:
averaged_auc: 78.77
averaged_pauc: 67.58
ToyConveyor:
averaged_auc: 72.53
averaged_pauc: 60.43
fan:
averaged_auc: 65.83
averaged_pauc: 52.45
pump:
averaged_auc: 72.89
averaged_pauc: 59.99
slider:
averaged_auc: 84.76
averaged_pauc: 66.53
valve:
averaged_auc: 66.28
averaged_pauc: 50.98
# Submission information
submission:
# Submission label
# Label is used to index submissions.
# Generate your label following way to avoid overlapping codes among submissions:
# [Last name of corresponding author]_[Abbreviation of institute of the corresponding author]_task[task number]_[index number of your submission (1-4)]
label: Primus_JKU_task2_1
# Submission name
# This name will be used in the results tables when space permits.
name: Outlier Exposed Convolutional Classifier
# Submission name abbreviated
# This abbreviated name will be used in the results table when space is tight.
# Use a maximum of 10 characters.
abbreviation: OECC
# Authors of the submitted system.
# Mark authors in the order you want them to appear in submission lists.
# One of the authors has to be marked as corresponding author, this will be listed next to the submission in the results tables.
authors:
# First author
- lastname: Primus
firstname: Paul
email: paul.primus@jku.at # Contact email address
corresponding: true # Mark true for one of the authors
# Affiliation information for the author
affiliation:
institution: JKU
department: Computational Perception
location: Austria, Linz
# System information
system:
# System description, metadata provided here will be used to do a meta-analysis of the submitted system.
# Use general level tags, when possible use the tags provided in comments.
# If information field is not applicable to the system, use "!!null".
description: