Commit 31821481 authored by Paul Primus's avatar Paul Primus
Browse files

add submission files

parent 52ca3258
CLASS_MAP = {
'fan': 0,
'pump': 1,
'slider': 2,
'ToyCar': 3,
'ToyConveyor': 4,
'valve': 5
}
INVERSE_CLASS_MAP = {
0: 'fan',
1: 'pump',
2: 'slider',
3: 'ToyCar',
4: 'ToyConveyor',
5: 'valve'
}
TRAINING_ID_MAP = {
0: [0, 2, 4, 6],
1: [0, 2, 4, 6],
2: [0, 2, 4, 6],
3: [1, 2, 3, 4],
4: [1, 2, 3],
5: [0, 2, 4, 6]
}
EVALUATION_ID_MAP = {
0: [1, 3, 5],
1: [1, 3, 5],
2: [1, 3, 5],
3: [5, 6, 7],
4: [4, 5, 6],
5: [1, 3, 5]
}
ALL_ID_MAP = {
0: [0, 1, 2, 3, 4, 5, 6],
1: [0, 1, 2, 3, 4, 5, 6],
2: [0, 1, 2, 3, 4, 5, 6],
3: [1, 2, 3, 4, 5, 6, 7],
4: [1, 2, 3, 4, 5, 6],
5: [0, 1, 2, 3, 4, 5, 6]
}
def enumerate_development_datasets():
typ_id = []
for i in range(6):
for j in TRAINING_ID_MAP[i]:
typ_id.append((i, j))
return typ_id
def enumerate_evaluation_datasets():
typ_id = []
for i in range(6):
for j in EVALUATION_ID_MAP[i]:
typ_id.append((i, j))
return typ_id
from dcase2020_task2.data_sets.base_data_set import BaseDataSet
from dcase2020_task2.data_sets.mcm_dataset import MCMDataSet, MachineDataSet, INVERSE_CLASS_MAP, CLASS_MAP, TRAINING_ID_MAP
from dcase2020_task2.data_sets.abnormal_datasets import ComplementMCMDataSet
from dcase2020_task2.data_sets.mcm_dataset import MCMDataSet, MachineDataSet
from dcase2020_task2.data_sets.complement_dataset import ComplementMCMDataSet
import os
import torch.utils.data
import glob
from dcase2020_task2.data_sets import BaseDataSet
import librosa
import numpy as np
from dcase2020_task2.data_sets import MachineDataSet, CLASS_MAP, TRAINING_ID_MAP, INVERSE_CLASS_MAP
from dcase2020_task2.data_sets import BaseDataSet, CLASS_MAP, INVERSE_CLASS_MAP, TRAINING_ID_MAP, ALL_ID_MAP
from dcase2020_task2.data_sets import MachineDataSet
class ComplementMCMDataSet(BaseDataSet):
......@@ -53,7 +49,6 @@ class ComplementMCMDataSet(BaseDataSet):
training_set = MachineDataSet(machine_type, machine_id, mode='training', **kwargs)
validation_set = MachineDataSet(machine_type, machine_id, mode='validation', **kwargs)
if normalize is None:
mean = training_set.data.mean(axis=1, keepdims=True)
std = training_set.data.std(axis=1, keepdims=True)
......@@ -69,16 +64,16 @@ class ComplementMCMDataSet(BaseDataSet):
training_sets = []
# validation_sets = []
for type_ in TRAINING_ID_MAP:
for id_ in TRAINING_ID_MAP[type_]:
for type_ in ALL_ID_MAP:
for id_ in ALL_ID_MAP[type_]:
if type_ != machine_type or (id_ != machine_id and same_type):
t = MachineDataSet(type_, id_, mode='training', **kwargs)
t.data = (t.data - mean) / std
#v = MachineDataSet(type_, id_, mode='validation', **kwargs)
#v.data = (v.data - mean) / std
training_sets.append(t)
# don't load validation set ...
# v = MachineDataSet(type_, id_, mode='validation', **kwargs)
# v.data = (v.data - mean) / std
# validation_sets.append(v)
self.training_set = torch.utils.data.ConcatDataset(training_sets)
......
import os
import torch.utils.data
import glob
from dcase2020_task2.data_sets import BaseDataSet
from dcase2020_task2.data_sets import BaseDataSet, CLASS_MAP, INVERSE_CLASS_MAP, TRAINING_ID_MAP, EVALUATION_ID_MAP, \
enumerate_development_datasets, enumerate_evaluation_datasets
import librosa
import numpy as np
CLASS_MAP = {
'fan': 0,
'pump': 1,
'slider': 2,
'ToyCar': 3,
'ToyConveyor': 4,
'valve': 5
}
INVERSE_CLASS_MAP = {
0: 'fan',
1: 'pump',
2: 'slider',
3: 'ToyCar',
4: 'ToyConveyor',
5: 'valve'
}
TRAINING_ID_MAP = {
0: [0, 2, 4, 6],
1: [0, 2, 4, 6],
2: [0, 2, 4, 6],
3: [1, 2, 3, 4],
4: [1, 2, 3],
5: [0, 2, 4, 6]
}
EVALUATION_ID_MAP = {
0: [1, 3, 5],
1: [1, 3, 5],
2: [1, 3, 5],
3: [5, 6, 7],
4: [4, 5, 6],
5: [1, 3, 5],
}
def enumerate_development_datasets():
typ_id = []
for i in range(6):
for j in TRAINING_ID_MAP[i]:
typ_id.append((i, j))
return typ_id
def enumerate_evaluation_datasets():
typ_id = []
for i in range(6):
for j in EVALUATION_ID_MAP[i]:
typ_id.append((i, j))
return typ_id
class MCMDataSet(BaseDataSet):
......@@ -152,6 +101,8 @@ class MachineDataSet(torch.utils.data.Dataset):
):
assert mode in ['training', 'validation']
if mode == 'validation':
hop_all = False
self.num_mel = num_mel
self.n_fft = n_fft
......@@ -245,8 +196,20 @@ class MachineDataSet(torch.utils.data.Dataset):
data = np.empty((self.num_mel, self.file_length * len(files)), dtype=np.float32)
for i, f in enumerate(files):
file = self.__load_preprocess_file__(f)
assert file.shape[1] == self.file_length
data[:, i * self.file_length:(i + 1) * self.file_length] = self.__load_preprocess_file__(f)
if file.shape[1] != self.file_length:
if file.shape[1] < self.file_length:
print(f'Too short: {f}')
file = np.concatenate([
file,
file[:, :self.file_length - file.shape[1]]
], -1)
elif file.shape[1] > self.file_length:
print(f'Too long: {f}')
file = file[:, :self.file_length]
data[:, i * self.file_length:(i + 1) * self.file_length] = file
np.save(file_path, data)
return data
......@@ -304,11 +267,11 @@ class MachineDataSet(torch.utils.data.Dataset):
if __name__ == '__main__':
for type_, id_ in enumerate_development_datasets():
_ = MachineDataSet(type_, id_, mode='training')
_ = MachineDataSet(type_, id_, mode='validation')
_ = MachineDataSet(type_, id_, mode='training', n_fft=256)
_ = MachineDataSet(type_, id_, mode='validation', n_fft=256)
for type_, id_ in enumerate_evaluation_datasets():
_ = MachineDataSet(type_, id_, mode='training')
_ = MachineDataSet(type_, id_, mode='validation')
_ = MachineDataSet(type_, id_, mode='training', n_fft=256)
_ = MachineDataSet(type_, id_, mode='validation', n_fft=256)
from dcase2020_task2.experiments import BaseExperiment
from dcase2020_task2.utils.logger import Logger
from datetime import datetime
import os
import pytorch_lightning as pl
import torch
from sacred import Experiment
import torch.utils.data
# workaround...
from sacred import SETTINGS
SETTINGS['CAPTURE_MODE'] = 'sys'
import numpy as np
class VAEExperiment(BaseExperiment, pl.LightningModule):
'''
Reproduction of the DCASE Baseline. It is basically an Auto Encoder, the anomaly score is the reconstruction error.
'''
def __init__(self, configuration_dict, _run):
super().__init__(configuration_dict)
self.network = self.objects['auto_encoder_model']
self.prior = self.objects['prior']
self.reconstruction = self.objects['reconstruction']
self.logger_ = Logger(_run, self, self.configuration_dict, self.objects)
# experiment state variables
self.epoch = -1
self.step = 0
self.result = None
def forward(self, batch):
batch['epoch'] = self.epoch
batch = self.network(batch)
return batch
def training_step(self, batch, batch_num, optimizer_idx=0):
if batch_num == 0 and optimizer_idx == 0:
self.epoch += 1
if optimizer_idx == 0:
batch = self(batch)
reconstruction_loss = self.reconstruction.loss(batch)
prior_loss = self.prior.loss(batch)
batch['reconstruction_loss'] = reconstruction_loss / (self.objects['batch_size'] * self.objects['num_mel'] * self.objects['context'])
batch['prior_loss'] = prior_loss / self.objects['batch_size']
batch['loss'] = reconstruction_loss + prior_loss
if batch_num == 0:
self.logger_.log_image_reconstruction(batch, self.epoch)
self.logger_.log_training_step(batch, self.step)
self.step += 1
else:
raise AttributeError
return {
'loss': batch['loss'],
'tqdm': {'loss': batch['loss']},
}
def validation_step(self, batch, batch_num):
self(batch)
return {
'targets': batch['targets'],
'scores': batch['scores'],
'machine_types': batch['machine_types'],
'machine_ids': batch['machine_ids'],
'part_numbers': batch['part_numbers'],
'file_ids': batch['file_ids']
}
def validation_end(self, outputs):
self.logger_.log_vae_validation(outputs, self.step, self.epoch)
return {
'val_loss': np.concatenate([o['scores'].detach().cpu().numpy() for o in outputs]).mean()
}
def test_step(self, batch, batch_num):
return self.validation_step(batch, batch_num)
def test_end(self, outputs):
# TODO: add new logging method
# self.result = self.logger_.log_testing(outputs)
self.logger_.close()
return {}
def train_dataloader(self):
if self.objects['debug']:
ds = torch.utils.data.Subset(self.objects['data_set'].get_whole_training_data_set(), np.arange(1024))
else:
ds = self.objects['data_set'].get_whole_training_data_set()
dl = torch.utils.data.DataLoader(
ds,
batch_size=self.objects['batch_size'],
shuffle=True,
num_workers=self.objects['num_workers'],
drop_last=False
)
return dl
def configuration():
seed = 1220
deterministic = False
id = datetime.now().strftime("%Y-%m-%d_%H:%M:%S:%f")
log_path = os.path.join('..', 'experiment_logs', id)
#####################
# quick configuration, uses default parameters of more detailed configuration
#####################
machine_type = 0
machine_id = 0
latent_size = 40
batch_size = 512
debug = False
if debug:
epochs = 1
num_workers = 0
else:
epochs = 50
num_workers = 4
learning_rate = 1e-4
weight_decay = 0
normalize = 'all'
normalize_raw = True
prior_class = 'priors.StandardNormalPrior'
context = 11
descriptor = "vae_training_{}_{}_{}_{}_{}_{}_{}_{}".format(
prior_class,
latent_size,
batch_size,
learning_rate,
weight_decay,
normalize,
normalize_raw,
context
)
########################
# detailed configuration
########################
num_mel = 40
n_fft = 512
hop_size = 256
prior = {
'class': prior_class,
'kwargs': {
'latent_size': latent_size,
'weight': 1
}
}
data_set = {
'class': 'data_sets.MCMDataSet',
'kwargs': {
'context': context,
'num_mel': num_mel,
'n_fft': n_fft,
'hop_size': hop_size,
'normalize': normalize,
'normalize_raw': normalize_raw
}
}
reconstruction = {
'class': 'losses.MSE',
'kwargs': {
'weight': 1,
'input_shape': '@data_set.observation_shape'
}
}
auto_encoder_model = {
'class': 'models.SamplingFCAE',
'args': [
'@data_set.observation_shape',
'@reconstruction',
'@prior'
]
}
lr_scheduler = {
'class': 'torch.optim.lr_scheduler.StepLR',
'args': [
'@optimizer',
],
'kwargs': {
'step_size': epochs
}
}
optimizer = {
'class': 'torch.optim.Adam',
'args': [
'@auto_encoder_model.parameters()'
],
'kwargs': {
'lr': learning_rate,
'betas': (0.9, 0.999),
'amsgrad': False,
'weight_decay': weight_decay,
}
}
trainer = {
'class': 'trainers.PTLTrainer',
'kwargs': {
'max_epochs': epochs,
'checkpoint_callback': False,
'logger': False,
'early_stop_callback': False,
'gpus': [0],
'show_progress_bar': True,
'progress_bar_refresh_rate': 1000
}
}
ex = Experiment('dcase2020_task2_vae_training')
cfg = ex.config(configuration)
@ex.automain
def run(_config, _run):
experiment = VAEExperiment(_config, _run)
return experiment.run()
......@@ -140,7 +140,7 @@ def configuration():
# quick configuration, uses default parameters of more detailed configuration
#####################
machine_type = 0
machine_type = 1
machine_id = 0
num_mel = 128
......@@ -148,16 +148,13 @@ def configuration():
hop_size = 512
power = 2.0
fmin = 0
context = 32
context = 128
model_class = 'dcase2020_task2.models.CNN'
hidden_size = 512
num_hidden = 4
hidden_size = 128
num_hidden = 3
dropout_probability = 0.0
# complement set
same_type = True
debug = False
if debug:
num_workers = 0
......@@ -165,12 +162,14 @@ def configuration():
num_workers = 4
epochs = 100
loss_class = 'dcase2020_task2.losses.BCE'
loss_class = 'dcase2020_task2.losses.AUC'
batch_size = 512
learning_rate = 1e-4
weight_decay = 0
same_type = True
normalize_raw = True
hop_all = False
# TODO: change default descriptor
descriptor = "ClassificationExperiment_Model:[{}_{}_{}_{}]_Training:[{}_{}_{}_{}]_Features:[{}_{}_{}_{}_{}_{}_{}]_Complement:[{}]{}".format(
......@@ -197,7 +196,6 @@ def configuration():
# detailed configuration
########################
data_set = {
'class': 'dcase2020_task2.data_sets.MCMDataSet',
'args': [
......@@ -212,7 +210,7 @@ def configuration():
'normalize_raw': normalize_raw,
'power': power,
'fmin': fmin,
'hop_all': False
'hop_all': hop_all
}
}
......@@ -230,7 +228,7 @@ def configuration():
'normalize_raw': normalize_raw,
'power': power,
'fmin': fmin,
'hop_all': False,
'hop_all': hop_all,
'same_type': same_type
}
}
......@@ -251,7 +249,8 @@ def configuration():
'kwargs': {
'hidden_size': hidden_size,
'num_hidden': num_hidden,
'dropout_probability': dropout_probability
'dropout_probability': dropout_probability,
'batch_norm': False
}
}
......
from dcase2020_task2.models.base_model import ClassifierBase, VAEBase
from dcase2020_task2.models.made import MADE
from dcase2020_task2.models.other.made import MADE
from dcase2020_task2.models.ae import AE, ConvAE
from dcase2020_task2.models.classifier import FCNN, CNN
\ No newline at end of file
from dcase2020_task2.models.classifier import FCNN, CNN
from dcase2020_task2.models.resnet import ResNet
\ No newline at end of file
......@@ -4,7 +4,7 @@ from dcase2020_task2.priors import NoPrior
import numpy as np
import torch
from dcase2020_task2.models.custom import activation_dict, init_weights
from dcase2020_task2.models.custom import ACTIVATION_DICT, init_weights
class AE(torch.nn.Module, VAEBase):
......@@ -21,7 +21,7 @@ class AE(torch.nn.Module, VAEBase):
):
super().__init__()
activation_fn = activation_dict[activation]
activation_fn = ACTIVATION_DICT[activation]
if prior is None:
prior = NoPrior(latent_size=hidden_size)
self.input_shape = input_shape
......@@ -101,7 +101,7 @@ class ConvlBlock(torch.nn.Module):
)
)
modules.append(
activation_dict[activation]()
ACTIVATION_DICT[activation]()
)
self.last_activation = modules.pop()
......@@ -129,7 +129,7 @@ class ConvAE(torch.nn.Module, VAEBase):
):
super().__init__()
activation_fn = activation_dict[activation]
activation_fn = ACTIVATION_DICT[activation]
if prior is None:
prior = NoPrior(latent_size=hidden_size)
self.input_shape = input_shape
......
......@@ -4,7 +4,7 @@ from dcase2020_task2.priors import NoPrior
import numpy as np
import torch
from dcase2020_task2.models.custom import activation_dict, init_weights
from dcase2020_task2.models.custom import ACTIVATION_DICT, init_weights
class FCNN(torch.nn.Module):
......@@ -21,7 +21,7 @@ class FCNN(torch.nn.Module):