Commit fd3d3b7f authored by Paul Primus's avatar Paul Primus
Browse files

classification experiments

parent 711eaf79
from dcase2020_task2.data_sets.base_data_set import BaseDataSet
from dcase2020_task2.data_sets.mcm_dataset import MCMDataSet
\ No newline at end of file
from dcase2020_task2.data_sets.mcm_dataset import MCMDataSet, MachineDataSet, INVERSE_CLASS_MAP, CLASS_MAP, TRAINING_ID_MAP
from dcase2020_task2.data_sets.abnormal_datasets import ComplementMCMDataSet
import os
import torch.utils.data
import glob
from dcase2020_task2.data_sets import BaseDataSet
import librosa
import numpy as np
from dcase2020_task2.data_sets import MachineDataSet, CLASS_MAP, TRAINING_ID_MAP, INVERSE_CLASS_MAP
class ComplementMCMDataSet(BaseDataSet):
def __init__(
self,
machine_type,
machine_id,
data_root=os.path.join(os.path.expanduser('~'), 'shared', 'dcase2020_task2'),
context=5,
num_mel=128,
n_fft=1024,
hop_size=512,
power=1.0,
fmin=0,
normalize_raw=False,
normalize=None,
hop_all=False
):
self.data_root = data_root
self.context = context
self.num_mel = num_mel
self.n_fft = n_fft
self.hop_size = hop_size
self.power = power
self.fmin = fmin
self.normalize = normalize
self.hop_all = hop_all
assert type(machine_type) == int and type(machine_id) == int
kwargs = {
'data_root': self.data_root,
'context': self.context,
'num_mel': self.num_mel,
'n_fft': self.n_fft,
'hop_size': self.hop_size,
'power': power,
'normalize': normalize_raw,
'fmin': fmin,
'hop_all': hop_all
}
training_set = MachineDataSet(machine_type, machine_id, mode='training', **kwargs)
validation_set = MachineDataSet(machine_type, machine_id, mode='validation', **kwargs)
if normalize is None:
mean = training_set.data.mean(axis=1, keepdims=True)
std = training_set.data.std(axis=1, keepdims=True)
training_set.data = (training_set.data - mean) / std
validation_set.data = (validation_set.data - mean) / std
else:
assert type(normalize) == tuple
assert len(normalize) == 2
mean, std = normalize
training_set.data = (training_set.data - mean) / std
validation_set.data = (validation_set.data - mean) / std
training_sets = []
validation_sets = []
for type_ in TRAINING_ID_MAP:
for id_ in TRAINING_ID_MAP[type_]:
if type_ != machine_type or id_ != machine_id:
t = MachineDataSet(type_, id_, mode='training', **kwargs)
t.data = (t.data - mean) / std
v = MachineDataSet(type_, id_, mode='validation', **kwargs)
v.data = (v.data - mean) / std
training_sets.append(t)
validation_sets.append(v)
self.training_set = torch.utils.data.ConcatDataset(training_sets)
self.validation_set = torch.utils.data.ConcatDataset(validation_sets)
self.mean = mean
self.std = std
@property
def observation_shape(self) -> tuple:
return 1, self.num_mel, self.context
def training_data_set(self):
return self.training_set
def validation_data_set(self):
return self.validation_set
def mean_std(self):
return self.mean, self.std
from dcase2020_task2.experiments import BaseExperiment
from datetime import datetime
import os
import pytorch_lightning as pl
import torch
from sacred import Experiment
from dcase2020_task2.utils.logger import Logger
import torch.utils.data
# workaround...
from sacred import SETTINGS
SETTINGS['CAPTURE_MODE'] = 'sys'
class BaselineDCASEExperiment(BaseExperiment, pl.LightningModule):
'''
Reproduction of the DCASE Baseline. It is basically an Auto Encoder, the anomaly score is the reconstruction error.
'''
def __init__(self, configuration_dict, _run):
super().__init__(configuration_dict)
self.network = self.objects['auto_encoder_model']
self.prior = self.objects['prior']
self.reconstruction = self.objects['reconstruction']
self.logger_ = Logger(_run, self, self.configuration_dict, self.objects)
# experiment state variables
self.epoch = -1
self.step = 0
self.result = None
def forward(self, batch):
batch['epoch'] = self.epoch
batch = self.network(batch)
return batch
def training_step(self, batch_normal, batch_num, optimizer_idx=0):
if batch_num == 0 and optimizer_idx == 0:
self.epoch += 1
if optimizer_idx == 0:
batch_normal = self(batch_normal)
batch_normal['loss'] = batch_normal['reconstruction_loss'] + batch_normal['prior_loss']
self.logger_.log_training_step(batch_normal, self.step)
self.step += 1
else:
raise AttributeError
return {
'loss': batch_normal['loss'],
'tqdm': {'loss': batch_normal['loss']},
}
def validation_step(self, batch, batch_num):
self(batch)
return {
'targets': batch['targets'],
'scores': batch['scores'],
'machine_types': batch['machine_types'],
'machine_ids': batch['machine_ids'],
'file_ids': batch['file_ids']
}
def validation_end(self, outputs):
self.logger_.log_validation(outputs, self.step, self.epoch)
return {}
def test_step(self, batch, batch_num):
return self.validation_step(batch, batch_num)
def test_end(self, outputs):
self.result = self.logger_.log_test(outputs)
self.logger_.close()
return self.result
def train_dataloader(self):
assert False, 'Need to merge training sets frst!'
dl = torch.utils.data.DataLoader(
self.objects['data_set'].get_machine_training_data_set(self.machine_type),
batch_size=self.objects['batch_size'],
shuffle=True,
num_workers=self.objects['num_workers'],
drop_last=False
)
return dl
def val_dataloader(self):
assert False, 'Need to merge training sets frst!'
dl = torch.utils.data.DataLoader(
self.objects['data_set'].get_machine_validation_data_set(self.machine_type),
batch_size=self.objects['batch_size'],
shuffle=False,
num_workers=self.objects['num_workers'],
drop_last=False
)
return dl
def configuration():
seed = 1220
deterministic = False
id = datetime.now().strftime("%Y-%m-%d_%H:%M:%S:%f")
log_path = os.path.join('experiment_logs', id)
#####################
# quick configuration, uses default parameters of more detailed configuration
#####################
machine_type = 0
machine_id = 0
latent_size = 8
batch_size = 512
debug = False
if debug:
epochs = 1
num_workers = 0
else:
epochs = 100
num_workers = 2
learning_rate = 1e-3
weight_decay = 0
normalize = 'none'
normalize_raw = False
context = 5
descriptor = "BaselineDCASEExperiment_{}_{}_{}_{}_{}_{}_{}".format(
latent_size,
batch_size,
learning_rate,
weight_decay,
normalize,
normalize_raw,
context
)
########################
# detailed configuration
########################
num_mel = 128
n_fft = 1024
hop_size = 512
prior = {
'class': 'dcase2020_task2.priors.NoPrior',
'kwargs': {
'latent_size': latent_size,
'weight': 1.0
}
}
data_set = {
'class': 'dcase2020_task2.data_sets.MCMDataSet',
'args': [
machine_type,
machine_id
],
'kwargs': {
'context': context,
'num_mel': num_mel,
'n_fft': n_fft,
'hop_size': hop_size,
'normalize': normalize,
'normalize_raw': normalize_raw
}
}
reconstruction = {
'class': 'dcase2020_task2.losses.MSEReconstruction',
'kwargs': {
'weight': 1.0,
'input_shape': '@data_set.observation_shape'
}
}
auto_encoder_model = {
'class': 'dcase2020_task2.models.BaselineFCAE',
'args': [
'@data_set.observation_shape',
'@reconstruction',
'@prior'
]
}
lr_scheduler = {
'class': 'torch.optim.lr_scheduler.StepLR',
'args': [
'@optimizer',
],
'kwargs': {
'step_size': epochs
}
}
optimizer = {
'class': 'torch.optim.Adam',
'args': [
'@auto_encoder_model.parameters()'
],
'kwargs': {
'lr': learning_rate,
'betas': (0.9, 0.999),
'amsgrad': False,
'weight_decay': weight_decay,
}
}
trainer = {
'class': 'dcase2020_task2.trainers.PTLTrainer',
'kwargs': {
'max_epochs': epochs,
'checkpoint_callback': False,
'logger': False,
'early_stop_callback': False,
'gpus': [0],
'show_progress_bar': True,
'progress_bar_refresh_rate': 1000
}
}
ex = Experiment('dcase2020_task2_BaselineDCASEExperiment')
cfg = ex.config(configuration)
@ex.automain
def run(_config, _run):
experiment = BaselineDCASEExperiment(_config, _run)
return experiment.run()
......@@ -98,12 +98,12 @@ def configuration():
hop_size = 512
power = 2.0
fmin = 0
context = 8
context = 5
model_class = 'dcase2020_task2.models.ConvAE' # 'dcase2020_task2.models.MADE'
hidden_size = 256
num_hidden = 1
latent_size = 8 # only used for AEs
model_class = 'dcase2020_task2.models.AE' # 'dcase2020_task2.models.MADE'
hidden_size = 512
num_hidden = 3
latent_size = 64 # only used for AEs
debug = False
if debug:
......@@ -113,9 +113,9 @@ def configuration():
epochs = 100
reconstruction_class = 'dcase2020_task2.losses.MSEReconstruction' # 'dcase2020_task2.losses.NLLReconstruction'
batch_size = 256
batch_size = 512
learning_rate = 1e-3
weight_decay = 0
weight_decay = 1e-5
normalize_raw = True
......@@ -156,7 +156,7 @@ def configuration():
'normalize_raw': normalize_raw,
'power': power,
'fmin': fmin,
'hop_all': True
'hop_all': False
}
}
......
......@@ -17,15 +17,23 @@ class ClassificationExperiment(BaseExperiment, pl.LightningModule):
def __init__(self, configuration_dict, _run):
super().__init__(configuration_dict)
# default stuff
self.network = self.objects['model']
self.data_set = self.objects['data_set']
self.loss = self.objects['loss']
self.logger_ = Logger(_run, self, self.configuration_dict, self.objects)
# experiment state variables
self.epoch = -1
self.step = 0
self.result = None
# will be set before each epoch
self.normal_data_set = self.objects['data_set']
self.abnormal_data_set = self.objects['abnormal_data_set']
self.inf_data_loader = self.get_inf_data_loader(
torch.utils.data.DataLoader(
self.data_set.complement_data_set(self.machine_type, self.machine_id),
self.abnormal_data_set.training_data_set(),
batch_size=self.objects['batch_size'],
shuffle=True,
num_workers=self.objects['num_workers'],
......@@ -58,12 +66,26 @@ class ClassificationExperiment(BaseExperiment, pl.LightningModule):
self.epoch += 1
if optimizer_idx == 0:
batch_normal = self(batch_normal)
batch_abnormal = self(next(self.inf_data_loader))
abnormal_batch = next(self.inf_data_loader)
normal_batch_size = len(batch_normal['observations'])
abnormal_batch_size = len(abnormal_batch['observations'])
device = batch_normal['observations'].device
loss = self.loss.loss(batch_normal, batch_abnormal)
batch_normal['abnormal'] = torch.cat([
torch.zeros(normal_batch_size, 1).to(device),
torch.ones(abnormal_batch_size, 1).to(device)
])
batch_normal['loss'] = loss
batch_normal['observations'] = torch.cat([
batch_normal['observations'],
abnormal_batch['observations']
])
batch_normal = self(batch_normal)
batch_normal = self.loss(batch_normal)
self.logger_.log_training_step(batch_normal, self.step)
self.step += 1
......@@ -82,7 +104,6 @@ class ClassificationExperiment(BaseExperiment, pl.LightningModule):
'scores': batch['scores'],
'machine_types': batch['machine_types'],
'machine_ids': batch['machine_ids'],
'part_numbers': batch['part_numbers'],
'file_ids': batch['file_ids']
}
......@@ -94,10 +115,20 @@ class ClassificationExperiment(BaseExperiment, pl.LightningModule):
return self.validation_step(batch, batch_num)
def test_end(self, outputs):
self.result = self.logger_.log_testing(outputs)
self.result = self.logger_.log_test(outputs)
self.logger_.close()
return self.result
def train_dataloader(self):
dl = torch.utils.data.DataLoader(
self.objects['data_set'].training_data_set(),
batch_size=self.objects['batch_size'],
shuffle=True,
num_workers=self.objects['num_workers'],
drop_last=False
)
return dl
def configuration():
seed = 1220
......@@ -112,73 +143,89 @@ def configuration():
machine_type = 0
machine_id = 0
batch_size = 512
num_mel = 128
n_fft = 1024
hop_size = 512
power = 2.0
fmin = 0
context = 5
model_class = 'models.FCNN'
hidden_size = 512
num_hidden = 3
latent_size = 64
debug = False
if debug:
epochs = 1
num_workers = 0
else:
epochs = 100
num_workers = 4
learning_rate = 1e-4
weight_decay = 1e-4
rho = 0.1
feature_context = 'short'
loss_class = 'losses.BCE'
mse_weight = 0.0
model_class = 'models.BaselineFCNN'
epochs = 100
loss_class = 'dcase2020_task2.losses.BCE'
batch_size = 512
learning_rate = 1e-3
weight_decay = 1e-5
normalize = 'all'
normalize_raw = True
complement = 'all'
# TODO: change default descriptor
descriptor = "ClassificationExperiment_{}_{}_{}_{}_{}_{}_{}_{}_{}_{}".format(
descriptor = "ClassificationExperiment_Model:[{}_{}_{}_{}]_Training:[{}_{}_{}]_Features:[{}_{}_{}_{}_{}_{}_{}]_{}".format(
model_class,
loss_class,
hidden_size,
num_hidden,
latent_size,
batch_size,
learning_rate,
weight_decay,
normalize,
normalize_raw,
rho,
feature_context,
complement
num_mel,
context,
n_fft,
hop_size,
power,
fmin,
seed
)
########################
# detailed configurationSamplingFCAE
# detailed configuration
########################
if feature_context == 'short':
context = 5
num_mel = 128
n_fft = 1024
hop_size = 512
elif feature_context == 'long':
context = 11
num_mel = 40
n_fft = 512
hop_size = 256
if model_class == 'models.SamplingCRNNAE':
context = 1
data_set = {
'class': 'data_sets.MCMDataSet',
'class': 'dcase2020_task2.data_sets.MCMDataSet',
'args': [
machine_type,
machine_id
],
'kwargs': {
'context': context,
'num_mel': num_mel,
'n_fft': n_fft,
'hop_size': hop_size,
'normalize_raw': normalize_raw,
'power': power,
'fmin': fmin,
'hop_all': False
}
}
abnormal_data_set = {
'class': 'dcase2020_task2.data_sets.ComplementMCMDataSet',
'args': [
machine_type,
machine_id
],
'kwargs': {
'context': context,
'num_mel': num_mel,
'n_fft': n_fft,
'hop_size': hop_size,
'normalize': normalize,
'normalize_raw': normalize_raw,
'complement': complement
'power': power,
'fmin': fmin,
'hop_all': False
}
}
......@@ -186,17 +233,14 @@ def configuration():
'class': loss_class,
'kwargs': {
'weight': 1.0,
'input_shape': '@data_set.observation_shape',