Commit cdb0061b authored by Paul Primus's avatar Paul Primus
Browse files

add audio set, change normalization

parent 9ca5631a
......@@ -6,7 +6,6 @@ from dcase2020_task2.data_sets import BaseDataSet, CLASS_MAP, INVERSE_CLASS_MAP,
import librosa
import numpy as np
from dcase2020_task2.data_sets import MCMDataSet
import pickle
class AudioSet(BaseDataSet):
......@@ -31,6 +30,7 @@ class AudioSet(BaseDataSet):
self.power = power
self.fmin = fmin
self.hop_all = hop_all
self.normalize_raw = normalize_raw
kwargs = {
'data_root': self.data_root,
......@@ -38,13 +38,13 @@ class AudioSet(BaseDataSet):
'num_mel': self.num_mel,
'n_fft': self.n_fft,
'hop_size': self.hop_size,
'power': power,
'normalize': normalize_raw,
'fmin': fmin,
'hop_all': hop_all
'power': self.power,
'normalize': self.normalize_raw,
'fmin': self.fmin,
'hop_all': self.hop_all
}
class_names = sorted([class_name for class_name in os.listdir(data_root) if os.path.isdir(os.path.join(data_root, class_name))])[:30]
class_names = sorted([class_name for class_name in os.listdir(data_root) if os.path.isdir(os.path.join(data_root, class_name))])
training_sets = []
data_arrays = []
......@@ -88,6 +88,7 @@ class AudioSetClassSubset(torch.utils.data.Dataset):
normalize=True,
fmin=0,
hop_all=False,
max_file_per_class=2,
max_file_length=350
):
......@@ -101,13 +102,14 @@ class AudioSetClassSubset(torch.utils.data.Dataset):
self.fmin = fmin
self.hop_all = hop_all
self.class_name = class_name
self.max_file_per_class = max_file_per_class
self.max_file_length = max_file_length
files = glob.glob(os.path.join(data_root, class_name, '*.wav'))
assert len(files) > 0
files = sorted(files)
files = sorted(files)[:max_file_per_class]
self.files = files
self.meta_data = self.__load_meta_data__(files)
......@@ -171,6 +173,7 @@ class AudioSetClassSubset(torch.utils.data.Dataset):
x, sr = librosa.load(file, sr=16000, mono=True)
if len(x) > (self.max_file_length + 1 * self.hop_size) + self.n_fft:
x = x[:(self.max_file_length + 1) * self.hop_size + self.n_fft]
if self.normalize:
x = (x - x.mean()) / x.std()
......@@ -201,6 +204,7 @@ class AudioSetClassSubset(torch.utils.data.Dataset):
'file_ids': os.sep.join(os.path.normpath(file_path).split(os.sep)[-4:])
}
if __name__ == '__main__':
mcmc = MCMDataSet(0, 0)
a = audio_set = AudioSet(
......
......@@ -4,6 +4,16 @@ from dcase2020_task2.data_sets import BaseDataSet, CLASS_MAP, INVERSE_CLASS_MAP,
from dcase2020_task2.data_sets import MachineDataSet
import numpy as np
valid_types = {
0: [1, 2, 5],
1: [0, 2, 5],
2: [0, 1, 5],
5: [0, 1, 2],
3: [4],
4: [3],
}
class ComplementMCMDataSet(BaseDataSet):
def __init__(
......@@ -18,9 +28,11 @@ class ComplementMCMDataSet(BaseDataSet):
power=1.0,
fmin=0,
normalize_raw=False,
normalize=None,
hop_all=False
):
assert type(machine_type) == int and type(machine_id) == int
self.data_root = data_root
self.context = context
self.num_mel = num_mel
......@@ -28,10 +40,8 @@ class ComplementMCMDataSet(BaseDataSet):
self.hop_size = hop_size
self.power = power
self.fmin = fmin
self.normalize = normalize
self.hop_all = hop_all
assert type(machine_type) == int and type(machine_id) == int
self.normalize_raw = normalize_raw
kwargs = {
'data_root': self.data_root,
......@@ -39,77 +49,28 @@ class ComplementMCMDataSet(BaseDataSet):
'num_mel': self.num_mel,
'n_fft': self.n_fft,
'hop_size': self.hop_size,
'power': power,
'normalize': normalize_raw,
'fmin': fmin,
'hop_all': hop_all
'power': self.power,
'normalize': self.normalize_raw,
'fmin': self.fmin,
'hop_all': self.hop_all
}
if machine_id == -1:
training_sets = []
validation_sets = []
data = []
for id_ in ALL_ID_MAP[machine_type]:
training_sets.append(MachineDataSet(machine_type, id_, mode='training', **kwargs))
validation_sets.append(MachineDataSet(machine_type, id_, mode='validation', **kwargs))
data.append(training_sets[-1].data)
if normalize is None:
data = np.concatenate(data, axis=-1)
mean = data.mean(axis=1, keepdims=True)
std = data.std(axis=1, keepdims=True)
else:
assert type(normalize) == tuple
assert len(normalize) == 2
mean, std = normalize
for training_set, validation_set in zip(training_sets, validation_sets):
training_set.data = (training_set.data - mean) / std
validation_set.data = (validation_set.data - mean) / std
del data
else:
training_set = MachineDataSet(machine_type, machine_id, mode='training', **kwargs)
validation_set = MachineDataSet(machine_type, machine_id, mode='validation', **kwargs)
if normalize is None:
mean = training_set.data.mean(axis=1, keepdims=True)
std = training_set.data.std(axis=1, keepdims=True)
training_set.data = (training_set.data - mean) / std
validation_set.data = (validation_set.data - mean) / std
else:
assert type(normalize) == tuple
assert len(normalize) == 2
mean, std = normalize
training_set.data = (training_set.data - mean) / std
validation_set.data = (validation_set.data - mean) / std
training_sets = []
# validation_sets = []
valid_types = {
0: [1, 2, 5],
1: [0, 2, 5],
2: [0, 1, 5],
5: [0, 1, 2],
3: [4],
4: [3],
}
data = []
for type_ in ALL_ID_MAP:
if type_ in valid_types[machine_type]:
for id_ in ALL_ID_MAP[type_]:
#if type_ != machine_type: #or (id_ != machine_id and machine_id != -1):
for id_ in ALL_ID_MAP[type_]:
if type_ != machine_type or (id_ != machine_id and machine_id != -1):
t = MachineDataSet(type_, id_, mode='training', **kwargs)
t.data = (t.data - mean) / std
data.append(t.data)
training_sets.append(t)
data = np.concatenate(data, axis=-1)
# don't load validation set ...
# v = MachineDataSet(type_, id_, mode='validation', **kwargs)
# v.data = (v.data - mean) / std
# validation_sets.append(v)
self.mean = data.mean(axis=1, keepdims=True)
self.std = data.std(axis=1, keepdims=True)
del data
self.training_set = torch.utils.data.ConcatDataset(training_sets)
# self.validation_set = torch.utils.data.ConcatDataset(validation_sets)
self.mean = mean
self.std = std
@property
def observation_shape(self) -> tuple:
......
......@@ -31,6 +31,7 @@ class MCMDataSet(BaseDataSet):
self.power = power
self.fmin = fmin
self.hop_all = hop_all
self.normalize_raw = normalize_raw
assert type(machine_type) == int and type(machine_id) == int
......@@ -40,10 +41,10 @@ class MCMDataSet(BaseDataSet):
'num_mel': self.num_mel,
'n_fft': self.n_fft,
'hop_size': self.hop_size,
'power': power,
'normalize': normalize_raw,
'fmin': fmin,
'hop_all': hop_all
'power': self.power,
'normalize': self.normalize_raw,
'fmin': self.fmin,
'hop_all': self.hop_all
}
if machine_id == -1:
......
......@@ -11,7 +11,8 @@ from sacred import SETTINGS
SETTINGS['CAPTURE_MODE'] = 'sys'
from datetime import datetime
from dcase2020_task2.data_sets import AudioSet
from dcase2020_task2.data_sets import AudioSet, ComplementMCMDataSet
class ClassificationExperiment(BaseExperiment, pl.LightningModule):
......@@ -31,12 +32,25 @@ class ClassificationExperiment(BaseExperiment, pl.LightningModule):
# will be set before each epoch
self.normal_data_set = self.objects['data_set']
self.abnormal_data_set = AudioSet(
self.abnormal_data_set = ComplementMCMDataSet(
self.objects['machine_type'],
self.objects['machine_id'],
**self.objects['fetaure_settings']
)
self.mean = torch.from_numpy((self.normal_data_set.mean + self.abnormal_data_set.mean) / 2)
self.std = torch.from_numpy((self.normal_data_set.std + self.abnormal_data_set.std) / 2)
if self.objects.get('normalize') == 'normal':
self.mean = torch.from_numpy(self.normal_data_set.mean)
self.std = torch.from_numpy(self.normal_data_set.std)
elif self.objects.get('normalize') == 'abnormal':
self.mean = torch.from_numpy(self.abnormal_data_set.mean)
self.std = torch.from_numpy(self.abnormal_data_set.std)
elif self.objects.get('normalize') == 'average':
self.mean = torch.from_numpy((self.normal_data_set.mean + self.abnormal_data_set.mean) / 2)
self.std = torch.from_numpy((self.normal_data_set.std + self.abnormal_data_set.std) / 2)
else:
print('No normalization.')
self.mean = torch.zeros(self.normal_data_set.mean.shape)
self.std = torch.ones(self.normal_data_set.std.shape)
self.inf_data_loader = self.get_inf_data_loader(
torch.utils.data.DataLoader(
......@@ -165,8 +179,8 @@ def configuration():
context = 32
model_class = 'dcase2020_task2.models.CNN'
hidden_size = 64
num_hidden = 3
hidden_size = 256
num_hidden = 4
dropout_probability = 0.0
epochs = 100
......@@ -176,7 +190,6 @@ def configuration():
if debug:
num_workers = 0
epochs = 100
hop_all = True
else:
num_workers = 4
......@@ -186,6 +199,7 @@ def configuration():
weight_decay = 0
normalize_raw = True
normalize = None
# TODO: change default descriptor
descriptor = "ClassificationExperiment_Model:[{}_{}_{}_{}]_Training:[{}_{}_{}_{}]_Features:[{}_{}_{}_{}_{}_{}_{}]_{}".format(
......@@ -247,6 +261,7 @@ def configuration():
'kwargs': {
'hidden_size': hidden_size,
'num_hidden': num_hidden,
'base_channels': hidden_size,
'dropout_probability': dropout_probability,
'batch_norm': False
}
......
......@@ -17,7 +17,7 @@ class FCNN(torch.nn.Module):
num_outputs=1,
activation='relu',
batch_norm=False,
dropout_probability=0.1
**kwargs
):
super().__init__()
......@@ -30,11 +30,9 @@ class FCNN(torch.nn.Module):
layers.append(torch.nn.Linear(i, o))
if batch_norm:
layers.append(torch.nn.BatchNorm1d(o))
layers.append(torch.nn.Dropout(p=dropout_probability))
layers.append(activation_fn())
_ = layers.pop()
_ = layers.pop()
if batch_norm:
_ = layers.pop()
......@@ -59,7 +57,7 @@ class CNN(torch.nn.Module):
num_outputs=1,
activation='relu',
batch_norm=False,
dropout_probability=0.1
**kwargs
):
super().__init__()
......@@ -72,8 +70,6 @@ class CNN(torch.nn.Module):
layers.append(torch.nn.Conv2d(i, o, kernel_size=(3, 3), stride=2, padding=(1, 1)))
if batch_norm:
layers.append(torch.nn.BatchNorm2d(o))
if dropout_probability > 0:
layers.append(torch.nn.Dropout2d(dropout_probability))
layers.append(activation_fn())
_ = layers.pop()
......
# coding: utf-8
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.checkpoint import checkpoint_sequential
from librosa.filters import mel as librosa_mel_fn
def initialize_weights(module):
if isinstance(module, nn.Conv2d):
nn.init.kaiming_normal_(module.weight.data, mode='fan_in', nonlinearity="relu")
# nn.init.kaiming_normal_(module.weight.data, mode='fan_out')
elif isinstance(module, nn.BatchNorm2d):
module.weight.data.fill_(1)
module.bias.data.zero_()
elif isinstance(module, nn.Linear):
module.bias.data.zero_()
layer_index_total = 0
def initialize_weights_fixup(module):
if isinstance(module, AttentionAvg):
print("AttentionAvg init..")
module.forw_conv[0].weight.data.zero_()
module.atten[0].bias.data.zero_()
nn.init.kaiming_normal_(module.atten[0].weight.data, mode='fan_in', nonlinearity="sigmoid")
if isinstance(module, BasicBlock):
# He init, rescaled by Fixup multiplier
b = module
n = b.conv1.kernel_size[0] * b.conv1.kernel_size[1] * b.conv1.out_channels
print(b.layer_index, math.sqrt(2. / n), layer_index_total ** (-0.5))
b.conv1.weight.data.normal_(0, (layer_index_total ** (-0.5)) * math.sqrt(2. / n))
b.conv2.weight.data.zero_()
if b.shortcut._modules.get('conv') is not None:
convShortcut = b.shortcut._modules.get('conv')
n = convShortcut.kernel_size[0] * convShortcut.kernel_size[1] * convShortcut.out_channels
convShortcut.weight.data.normal_(0, math.sqrt(2. / n))
if isinstance(module, nn.Conv2d):
pass
# nn.init.kaiming_normal_(module.weight.data, mode='fan_in', nonlinearity="relu")
# nn.init.kaiming_normal_(module.weight.data, mode='fan_out')
elif isinstance(module, nn.BatchNorm2d):
module.weight.data.fill_(1)
module.bias.data.zero_()
elif isinstance(module, nn.Linear):
module.bias.data.zero_()
first_RUN = True
def calc_padding(kernal):
try:
return kernal // 3
except TypeError:
return [k // 3 for k in kernal]
class AttentionAvg(nn.Module):
def __init__(self, in_channels, out_channels, sum_all=True):
super(AttentionAvg, self).__init__()
self.sum_dims = [2, 3]
if sum_all:
self.sum_dims = [1, 2, 3]
self.forw_conv = nn.Sequential(
nn.Conv2d(
in_channels,
out_channels,
kernel_size=1,
stride=1,
padding=0,
bias=False)
)
self.atten = nn.Sequential(
nn.Conv2d(
in_channels,
out_channels,
kernel_size=1,
stride=1,
padding=0,
bias=True),
nn.Sigmoid()
)
def forward(self, x):
a1 = self.forw_conv(x)
atten = self.atten(x)
num = atten.size(2) * atten.size(3)
asum = atten.sum(dim=self.sum_dims, keepdim=True) + 1e-8
return a1 * atten * num / asum
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, in_channels, out_channels, stride, k1=3, k2=3):
super(BasicBlock, self).__init__()
global layer_index_total
self.layer_index = layer_index_total
layer_index_total = layer_index_total + 1
self.conv1 = nn.Conv2d(
in_channels,
out_channels,
kernel_size=k1,
stride=stride, # downsample with first conv
padding=calc_padding(k1),
bias=False
)
self.conv2 = nn.Conv2d(
out_channels,
out_channels,
kernel_size=k2,
stride=1,
padding=calc_padding(k2),
bias=False
)
self.shortcut = nn.Sequential()
if in_channels != out_channels:
self.shortcut.add_module(
'conv',
nn.Conv2d(
in_channels,
out_channels,
kernel_size=1,
stride=stride, # downsample
padding=0,
bias=False)
)
def forward(self, x):
y = F.relu(self.conv1(x), inplace=True)
y = self.conv2(y)
y += self.shortcut(x)
y = F.relu(y, inplace=True) # apply ReLU after addition
return y
class BottleneckBlock(nn.Module):
expansion = 4
def __init__(self, in_channels, out_channels, stride):
super(BottleneckBlock, self).__init__()
bottleneck_channels = out_channels // self.expansion
self.conv1 = nn.Conv2d(
in_channels,
bottleneck_channels,
kernel_size=1,
stride=1,
padding=0,
bias=False)
self.conv2 = nn.Conv2d(
bottleneck_channels,
bottleneck_channels,
kernel_size=3,
stride=stride, # downsample with 3x3 conv
padding=1,
bias=False)
self.conv3 = nn.Conv2d(
bottleneck_channels,
out_channels,
kernel_size=1,
stride=1,
padding=0,
bias=False)
self.shortcut = nn.Sequential() # identity
if in_channels != out_channels:
self.shortcut.add_module(
'conv',
nn.Conv2d(
in_channels,
out_channels,
kernel_size=1,
stride=stride, # downsample
padding=0,
bias=False))
def forward(self, x):
y = F.relu(self.conv1(x), inplace=True)
y = F.relu(self.conv2(y), inplace=True)
y = self.conv3(y) # not apply ReLU
y += self.shortcut(x)
y = F.relu(y, inplace=True) # apply ReLU after addition
return y
class Network(nn.Module):
def __init__(self, config):
super(Network, self).__init__()
input_shape = config['input_shape']
n_classes = config['n_classes']
base_channels = config['base_channels']
block_type = config['block_type']
depth = config['depth']
self.pooling_padding = config.get("pooling_padding", 0) or 0
self.use_raw_spectograms = config.get("use_raw_spectograms") or False
self.apply_softmax = config.get("apply_softmax") or False
assert block_type in ['basic', 'bottleneck']
if self.use_raw_spectograms:
mel_basis = librosa_mel_fn(
22050, 2048, 256)
mel_basis = torch.from_numpy(mel_basis).float()
self.register_buffer('mel_basis', mel_basis)
if block_type == 'basic':
block = BasicBlock
n_blocks_per_stage = (depth - 2) // 6
assert n_blocks_per_stage * 6 + 2 == depth
else:
block = BottleneckBlock
n_blocks_per_stage = (depth - 2) // 9
assert n_blocks_per_stage * 9 + 2 == depth
n_blocks_per_stage = [n_blocks_per_stage, n_blocks_per_stage, n_blocks_per_stage]
if config.get("n_blocks_per_stage") is not None:
print("n_blocks_per_stage is specified ignoring the depth param, nc=" + str(config.get("n_channels")))
n_blocks_per_stage = config.get("n_blocks_per_stage")
n_channels = config.get("n_channels")
if n_channels is None:
n_channels = [
base_channels,
base_channels * 2 * block.expansion,
base_channels * 4 * block.expansion
]
if config.get("grow_a_lot"):
n_channels[2] = base_channels * 8 * block.expansion
self.in_c = nn.Sequential(nn.Conv2d(
input_shape[1],
n_channels[0],
kernel_size=5,
stride=2,
padding=1,
bias=False),
nn.ReLU(True)
)
self.stage1 = self._make_stage(
n_channels[0], n_channels[0], n_blocks_per_stage[0], block, stride=1, maxpool=config['stage1']['maxpool'],
k1s=config['stage1']['k1s'], k2s=config['stage1']['k2s'])
self.stage2 = self._make_stage(
n_channels[0], n_channels[1], n_blocks_per_stage[1], block, stride=1, maxpool=config['stage2']['maxpool'],
k1s=config['stage2']['k1s'], k2s=config['stage2']['k2s'])
self.stage3 = self._make_stage(
n_channels[1], n_channels[2], n_blocks_per_stage[2], block, stride=1, maxpool=config['stage3']['maxpool'],