Commit 0d4fec2d authored by Shreyan Chowdhury's avatar Shreyan Chowdhury
Browse files

complete baseline model with hdf5 caching

parent fb5c9ff4
...@@ -227,7 +227,7 @@ class AudioPreprocessDataset(Dataset): ...@@ -227,7 +227,7 @@ class AudioPreprocessDataset(Dataset):
self.return_tensor = return_tensor self.return_tensor = return_tensor
def __getitem__(self, index): def __getitem__(self, index):
x = self.preprocessor(self.base_dir + self.files[index]) x = self.preprocessor(os.path.join(self.base_dir, self.files[index]))
if self.return_tensor and not isinstance(x, torch.Tensor): if self.return_tensor and not isinstance(x, torch.Tensor):
x = torch.from_numpy(x) x = torch.from_numpy(x)
return x, self.files[index], self.labels[index] return x, self.files[index], self.labels[index]
...@@ -341,13 +341,13 @@ if __name__=='__main__': ...@@ -341,13 +341,13 @@ if __name__=='__main__':
# dataset = HDF5Dataset('/mnt/2tb/datasets/MTG-Jamendo/HDF5Cache_spectrograms/', recursive=False, load_data=False) # dataset = HDF5Dataset('/mnt/2tb/datasets/MTG-Jamendo/HDF5Cache_spectrograms/', recursive=False, load_data=False)
train_files_csv = "~/shared/datasets/MTG-Jamendo/MTG-Jamendo_annotations/train_processed.tsv" train_files_csv = "/mnt/2tb/datasets/MTG-Jamendo/MTG-Jamendo_annotations/validation_processed.tsv"
audio_path = "~/shared/datasets/MTG-Jamendo/MTG-Jamendo_audio/" audio_path = "/mnt/2tb/datasets/MTG-Jamendo/MTG-Jamendo_audio/"
cache_x_name = "_ap_mtgjamendo44k" cache_x_name = "_ap_mtgjamendo44k"
from datasets.mtgjamendo import df_get_mtg_set from datasets.mtgjamendo import df_get_mtg_set
# name, mtg_files_csv, audio_path, cache_x_name # name, mtg_files_csv, audio_path, cache_x_name
dataset = df_get_mtg_set('mtgjamendo', train_files_csv, audio_path, cache_x_name) dataset = df_get_mtg_set('mtgjamendo_validation', train_files_csv, audio_path, cache_x_name)
train_loader = DataLoader(dataset=dataset, train_loader = DataLoader(dataset=dataset,
batch_size=32, batch_size=32,
shuffle=True) shuffle=True)
......
import os import os
from datasets.datasets import H5FCachedDataset, AudioPreprocessDataset from datasets.dataset import H5FCachedDataset, AudioPreprocessDataset
import torch import torch
import librosa import librosa
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from utils import PATH_DATA_CACHE
def sample_slicing_function(h5data, idx, xlen): def sample_slicing_function(h5data, idx, xlen):
timeframes = 600 timeframes = 2048
k = torch.randint(xlen - timeframes + 1, (1,))[0].item() k = torch.randint(xlen - timeframes + 1, (1,))[0].item()
x = h5data[idx + k:idx + k + timeframes] x = h5data[idx + k:idx + k + timeframes]
...@@ -98,7 +100,9 @@ def df_get_mtg_set(name, mtg_files_csv, audio_path, cache_x_name): ...@@ -98,7 +100,9 @@ def df_get_mtg_set(name, mtg_files_csv, audio_path, cache_x_name):
df_trset = H5FCachedDataset(getdatset, name, slicing_function=sample_slicing_function, df_trset = H5FCachedDataset(getdatset, name, slicing_function=sample_slicing_function,
x_name=cache_x_name, x_name=cache_x_name,
cache_path=PATH_DATA_CACHE
) )
return df_trset return df_trset
...@@ -3,7 +3,7 @@ from pytorch_lightning import Trainer ...@@ -3,7 +3,7 @@ from pytorch_lightning import Trainer
from test_tube import Experiment from test_tube import Experiment
from models.cp_resnet import Network from models.cp_resnet import Network
import torch import torch
from datasets.datasets import MelSpecDataset from datasets.dataset import MelSpecDataset
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from matplotlib.transforms import Affine2D from matplotlib.transforms import Affine2D
......
import torch.nn as nn import torch.nn as nn
from datasets.mtgjamendo import df_get_mtg_set
from utils import * from utils import *
from datasets.datasets import HDF5Dataset from datasets.dataset import HDF5Dataset
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
import pytorch_lightning as pl import pytorch_lightning as pl
from sklearn.metrics import roc_auc_score from sklearn import metrics
# TODO pr-auc # TODO pr-auc
# TODO f1-score # TODO f1-score
...@@ -46,57 +47,126 @@ class CNN(pl.LightningModule): ...@@ -46,57 +47,126 @@ class CNN(pl.LightningModule):
self.mp_5 = nn.MaxPool2d((4, 4)) self.mp_5 = nn.MaxPool2d((4, 4))
# classifier # classifier
self.dense = nn.Linear(64, num_class) self.dense = nn.Linear(320, num_class)
self.dropout = nn.Dropout(0.5) self.dropout = nn.Dropout(0.5)
def forward(self, x): def forward(self, x):
x = x.unsqueeze(1) # x = x.unsqueeze(1)
# init bn # init bn
x = self.bn_init(x) x = self.bn_init(x)
# print(x.shape)
# layer 1 # layer 1
x = self.mp_1(nn.ELU()(self.bn_1(self.conv_1(x)))) x = self.mp_1(nn.ELU()(self.bn_1(self.conv_1(x))))
# print(x.shape)
# layer 2 # layer 2
x = self.mp_2(nn.ELU()(self.bn_2(self.conv_2(x)))) x = nn.ELU()(self.bn_2(self.conv_2(x)))
# x = self.mp_2(nn.ELU()(self.bn_2(self.conv_2(x))))
# print(x.shape)
# layer 3 # layer 3
x = self.mp_3(nn.ELU()(self.bn_3(self.conv_3(x)))) x = self.mp_3(nn.ELU()(self.bn_3(self.conv_3(x))))
# print(x.shape)
# layer 4 # layer 4
# x = nn.ELU()(self.bn_4(self.conv_4(x)))
x = self.mp_4(nn.ELU()(self.bn_4(self.conv_4(x)))) x = self.mp_4(nn.ELU()(self.bn_4(self.conv_4(x))))
# print(x.shape)
# layer 5 # layer 5
x = self.mp_5(nn.ELU()(self.bn_5(self.conv_5(x)))) x = self.mp_5(nn.ELU()(self.bn_5(self.conv_5(x))))
# print(x.shape)
# classifier # classifier
x = x.view(x.size(0), -1) x = x.view(x.size(0), -1)
# print("Lin input", x.shape)
x = self.dropout(x) x = self.dropout(x)
logit = nn.Sigmoid()(self.dense(x)) logit = nn.Sigmoid()(self.dense(x))
# print(x.shape)
return logit return logit
def my_loss(self, y_hat, y): def my_loss(self, y_hat, y):
return F.binary_cross_entropy(y_hat, y) return F.binary_cross_entropy(y_hat, y)
def forward_full_song(self, x, y):
# print(x.shape)
#TODO full song???
return self.forward(x[:, :, :, :512])
# y_hat = torch.zeros((x.shape[0], 56), requires_grad=True).cuda()
# hop_size = 256
# i=0
# count = 0
# while i < x.shape[-1]:
# y_hat += self.forward(x[:,:,:,i:i+512])
# i += hop_size
# count += 1
# return y_hat/count
def training_step(self, data_batch, batch_nb):
x, _, y = data_batch
y_hat = self.forward_full_song(x, y)
y = y.float()
y_hat = y_hat.float()
return {'loss':self.my_loss(y_hat, y)}
def validation_step(self, data_batch, batch_nb):
# print("data_batch", data_batch)
x, _, y = data_batch
# print("x", x)
# print("y", y)
y_hat = self.forward_full_song(x, y)
y = y.float()
y_hat = y_hat.float()
rocauc = metrics.roc_auc_score(y.t().cpu(), y_hat.t().cpu())
prauc = metrics.average_precision_score(y.t().cpu(), y_hat.t().cpu())
# _, _, fscore, _ = metrics.precision_recall_fscore_support(y.t().cpu(), y_hat.t().cpu())
fscore = 0.
return {'val_loss': self.my_loss(y_hat, y),
'rocauc':rocauc,
'prauc':prauc,
'fscore':fscore}
def validation_end(self, outputs):
avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
avg_auc = torch.stack([torch.tensor([x['rocauc']]) for x in outputs]).mean()
avg_prauc = torch.stack([torch.tensor([x['prauc']]) for x in outputs]).mean()
avg_fscore = torch.stack([torch.tensor([x['fscore']]) for x in outputs]).mean()
return {'val_loss':avg_loss,
'rocauc':avg_auc,
'prauc':avg_prauc,
'fscore':avg_fscore}
def configure_optimizers(self): def configure_optimizers(self):
return [torch.optim.Adam(self.parameters(), lr=1e-4)] # from their code return [torch.optim.Adam(self.parameters(), lr=1e-4)] # from their code
@pl.data_loader @pl.data_loader
def tng_dataloader(self): def tng_dataloader(self):
trainset = HDF5Dataset(os.path.join(PATH_MELSPEC_DOWNLOADED_HDF5, 'train.h5'), recursive=False, load_data=False) train_csv = os.path.join(PATH_ANNOTATIONS, 'train_processed.tsv')
return DataLoader(dataset=trainset, batch_size=32, shuffle=True) cache_x_name = "_ap_mtgjamendo44k"
dataset = df_get_mtg_set('mtgjamendo', train_csv, PATH_AUDIO, cache_x_name)
return DataLoader(dataset=dataset,
batch_size=32,
shuffle=True)
@pl.data_loader @pl.data_loader
def val_dataloader(self): def val_dataloader(self):
validationset = HDF5Dataset(os.path.join(PATH_MELSPEC_DOWNLOADED_HDF5, 'val.h5'), recursive=False, load_data=False) validation_csv = os.path.join(PATH_ANNOTATIONS, 'validation_processed.tsv')
return DataLoader(dataset=validationset, batch_size=128, shuffle=True) cache_x_name = "_ap_mtgjamendo44k"
dataset = df_get_mtg_set('mtgjamendo_val', validation_csv, PATH_AUDIO, cache_x_name)
return DataLoader(dataset=dataset,
batch_size=32,
shuffle=True)
@pl.data_loader @pl.data_loader
def test_dataloader(self): def test_dataloader(self):
testset = HDF5Dataset(os.path.join(PATH_MELSPEC_DOWNLOADED_HDF5, 'test.h5'), recursive=False, load_data=False) test_csv = os.path.join(PATH_ANNOTATIONS, 'test_processed.tsv')
return DataLoader(dataset=testset, batch_size=32, shuffle=True) cache_x_name = "_ap_mtgjamendo44k"
dataset = df_get_mtg_set('mtgjamendo_test', test_csv, PATH_AUDIO, cache_x_name)
return DataLoader(dataset=dataset,
batch_size=32,
shuffle=True)
@staticmethod @staticmethod
def add_model_specific_args(parent_parser, root_dir): def add_model_specific_args(parent_parser, root_dir):
......
...@@ -6,7 +6,7 @@ import torch.nn.functional as F ...@@ -6,7 +6,7 @@ import torch.nn.functional as F
from librosa.filters import mel as librosa_mel_fn from librosa.filters import mel as librosa_mel_fn
from utils import * from utils import *
from datasets.datasets import MelSpecDataset from datasets.dataset import MelSpecDataset
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
import pytorch_lightning as pl import pytorch_lightning as pl
......
...@@ -115,7 +115,7 @@ def hdf_cache_specs(source_root, destination_root, annotations_file='all', outpu ...@@ -115,7 +115,7 @@ def hdf_cache_specs(source_root, destination_root, annotations_file='all', outpu
tagslist = np.load(os.path.join(PATH_PROJECT_ROOT, 'tagslist.npy')) tagslist = np.load(os.path.join(PATH_PROJECT_ROOT, 'tagslist.npy'))
with h5py.File(hdf_filepath, 'w') as hdf: with h5py.File(hdf_filepath, 'w') as hdf:
for idx in tqdm(annotations.index): for idx in tqdm(annotations.index[:10]):
filename = annotations.PATH.iloc[idx].split('.')[0] # discard '.mp3' extension filename = annotations.PATH.iloc[idx].split('.')[0] # discard '.mp3' extension
labels_str = annotations.TAGS.iloc[idx] # get labels in string format labels_str = annotations.TAGS.iloc[idx] # get labels in string format
labels_onehot = np.array([int(i in labels_str) for i in tagslist]) # convert to onehot encoding labels_onehot = np.array([int(i in labels_str) for i in tagslist]) # convert to onehot encoding
......
...@@ -34,6 +34,7 @@ username = getpass.getuser() ...@@ -34,6 +34,7 @@ username = getpass.getuser()
if hostname in ['rechenknecht3.cp.jku.at', 'rechenknecht2.cp.jku.at']: if hostname in ['rechenknecht3.cp.jku.at', 'rechenknecht2.cp.jku.at']:
plt.switch_backend('agg') plt.switch_backend('agg')
PATH_DATA_ROOT = '/media/rk3/shared/datasets/MTG-Jamendo' PATH_DATA_ROOT = '/media/rk3/shared/datasets/MTG-Jamendo'
PATH_DATA_CACHE = '/media/rk3/shared/kofta_cached_datasets'
USE_GPU = True USE_GPU = True
elif hostname == 'hermine': # PC verena elif hostname == 'hermine': # PC verena
plt.switch_backend('agg') plt.switch_backend('agg')
...@@ -47,6 +48,7 @@ elif hostname == 'shreyan-HP': # Laptop Shreyan ...@@ -47,6 +48,7 @@ elif hostname == 'shreyan-HP': # Laptop Shreyan
USE_GPU = False USE_GPU = False
else: else:
PATH_DATA_ROOT = '/mnt/2tb/datasets/MTG-Jamendo' PATH_DATA_ROOT = '/mnt/2tb/datasets/MTG-Jamendo'
PATH_DATA_CACHE = os.path.join(PATH_DATA_ROOT, 'HDF5Cache_spectrograms')
USE_GPU = False USE_GPU = False
if username == 'verena': if username == 'verena':
...@@ -60,9 +62,6 @@ PATH_MELSPEC_DOWNLOADED_FRAMED = os.path.join(PATH_MELSPEC_DOWNLOADED, 'framed') ...@@ -60,9 +62,6 @@ PATH_MELSPEC_DOWNLOADED_FRAMED = os.path.join(PATH_MELSPEC_DOWNLOADED, 'framed')
PATH_MELSPEC_DOWNLOADED_HDF5 = os.path.join(PATH_DATA_ROOT, 'HDF5Cache_spectrograms') PATH_MELSPEC_DOWNLOADED_HDF5 = os.path.join(PATH_DATA_ROOT, 'HDF5Cache_spectrograms')
TRAINED_MODELS_PATH = '' TRAINED_MODELS_PATH = ''
# run name # run name
def make_run_name(suffix=''): def make_run_name(suffix=''):
assert ' ' not in suffix assert ' ' not in suffix
...@@ -155,9 +154,9 @@ def save(model, path): ...@@ -155,9 +154,9 @@ def save(model, path):
if __name__=='__main__': if __name__=='__main__':
# TESTS # TESTS
# c = nn.Conv2d(512, 256, 1, 1, 0) # (in_channels, out_channels, kernel_size, stride, padding) c = nn.Conv2d(1, 64, 3, 1, 1) # (in_channels, out_channels, kernel_size, stride, padding)
# m = nn.MaxPool2d(2) # m = nn.MaxPool2d(2)
# print(dims_calc(c, [37, 17, 512])) print(dims_calc(c, [256, 600, 1]))
# preprocess_specs(source_root=PATH_MELSPEC_DOWNLOADED, # preprocess_specs(source_root=PATH_MELSPEC_DOWNLOADED,
# destination_root=PATH_MELSPEC_DOWNLOADED_FRAMED) # destination_root=PATH_MELSPEC_DOWNLOADED_FRAMED)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment