Commit 0d4fec2d authored by Shreyan Chowdhury's avatar Shreyan Chowdhury
Browse files

complete baseline model with hdf5 caching

parent fb5c9ff4
......@@ -227,7 +227,7 @@ class AudioPreprocessDataset(Dataset):
self.return_tensor = return_tensor
def __getitem__(self, index):
x = self.preprocessor(self.base_dir + self.files[index])
x = self.preprocessor(os.path.join(self.base_dir, self.files[index]))
if self.return_tensor and not isinstance(x, torch.Tensor):
x = torch.from_numpy(x)
return x, self.files[index], self.labels[index]
......@@ -341,13 +341,13 @@ if __name__=='__main__':
# dataset = HDF5Dataset('/mnt/2tb/datasets/MTG-Jamendo/HDF5Cache_spectrograms/', recursive=False, load_data=False)
train_files_csv = "~/shared/datasets/MTG-Jamendo/MTG-Jamendo_annotations/train_processed.tsv"
audio_path = "~/shared/datasets/MTG-Jamendo/MTG-Jamendo_audio/"
train_files_csv = "/mnt/2tb/datasets/MTG-Jamendo/MTG-Jamendo_annotations/validation_processed.tsv"
audio_path = "/mnt/2tb/datasets/MTG-Jamendo/MTG-Jamendo_audio/"
cache_x_name = "_ap_mtgjamendo44k"
from datasets.mtgjamendo import df_get_mtg_set
# name, mtg_files_csv, audio_path, cache_x_name
dataset = df_get_mtg_set('mtgjamendo', train_files_csv, audio_path, cache_x_name)
dataset = df_get_mtg_set('mtgjamendo_validation', train_files_csv, audio_path, cache_x_name)
train_loader = DataLoader(dataset=dataset,
batch_size=32,
shuffle=True)
......
import os
from datasets.datasets import H5FCachedDataset, AudioPreprocessDataset
from datasets.dataset import H5FCachedDataset, AudioPreprocessDataset
import torch
import librosa
import numpy as np
import pandas as pd
from utils import PATH_DATA_CACHE
def sample_slicing_function(h5data, idx, xlen):
timeframes = 600
timeframes = 2048
k = torch.randint(xlen - timeframes + 1, (1,))[0].item()
x = h5data[idx + k:idx + k + timeframes]
......@@ -98,7 +100,9 @@ def df_get_mtg_set(name, mtg_files_csv, audio_path, cache_x_name):
df_trset = H5FCachedDataset(getdatset, name, slicing_function=sample_slicing_function,
x_name=cache_x_name,
cache_path=PATH_DATA_CACHE
)
return df_trset
......@@ -3,7 +3,7 @@ from pytorch_lightning import Trainer
from test_tube import Experiment
from models.cp_resnet import Network
import torch
from datasets.datasets import MelSpecDataset
from datasets.dataset import MelSpecDataset
from torch.utils.data import DataLoader
from matplotlib.transforms import Affine2D
......
import torch.nn as nn
from datasets.mtgjamendo import df_get_mtg_set
from utils import *
from datasets.datasets import HDF5Dataset
from datasets.dataset import HDF5Dataset
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import pytorch_lightning as pl
from sklearn.metrics import roc_auc_score
from sklearn import metrics
# TODO pr-auc
# TODO f1-score
......@@ -46,57 +47,126 @@ class CNN(pl.LightningModule):
self.mp_5 = nn.MaxPool2d((4, 4))
# classifier
self.dense = nn.Linear(64, num_class)
self.dense = nn.Linear(320, num_class)
self.dropout = nn.Dropout(0.5)
def forward(self, x):
x = x.unsqueeze(1)
# x = x.unsqueeze(1)
# init bn
x = self.bn_init(x)
# print(x.shape)
# layer 1
x = self.mp_1(nn.ELU()(self.bn_1(self.conv_1(x))))
# print(x.shape)
# layer 2
x = self.mp_2(nn.ELU()(self.bn_2(self.conv_2(x))))
x = nn.ELU()(self.bn_2(self.conv_2(x)))
# x = self.mp_2(nn.ELU()(self.bn_2(self.conv_2(x))))
# print(x.shape)
# layer 3
x = self.mp_3(nn.ELU()(self.bn_3(self.conv_3(x))))
# print(x.shape)
# layer 4
# x = nn.ELU()(self.bn_4(self.conv_4(x)))
x = self.mp_4(nn.ELU()(self.bn_4(self.conv_4(x))))
# print(x.shape)
# layer 5
x = self.mp_5(nn.ELU()(self.bn_5(self.conv_5(x))))
# print(x.shape)
# classifier
x = x.view(x.size(0), -1)
# print("Lin input", x.shape)
x = self.dropout(x)
logit = nn.Sigmoid()(self.dense(x))
# print(x.shape)
return logit
def my_loss(self, y_hat, y):
return F.binary_cross_entropy(y_hat, y)
def forward_full_song(self, x, y):
# print(x.shape)
#TODO full song???
return self.forward(x[:, :, :, :512])
# y_hat = torch.zeros((x.shape[0], 56), requires_grad=True).cuda()
# hop_size = 256
# i=0
# count = 0
# while i < x.shape[-1]:
# y_hat += self.forward(x[:,:,:,i:i+512])
# i += hop_size
# count += 1
# return y_hat/count
def training_step(self, data_batch, batch_nb):
x, _, y = data_batch
y_hat = self.forward_full_song(x, y)
y = y.float()
y_hat = y_hat.float()
return {'loss':self.my_loss(y_hat, y)}
def validation_step(self, data_batch, batch_nb):
# print("data_batch", data_batch)
x, _, y = data_batch
# print("x", x)
# print("y", y)
y_hat = self.forward_full_song(x, y)
y = y.float()
y_hat = y_hat.float()
rocauc = metrics.roc_auc_score(y.t().cpu(), y_hat.t().cpu())
prauc = metrics.average_precision_score(y.t().cpu(), y_hat.t().cpu())
# _, _, fscore, _ = metrics.precision_recall_fscore_support(y.t().cpu(), y_hat.t().cpu())
fscore = 0.
return {'val_loss': self.my_loss(y_hat, y),
'rocauc':rocauc,
'prauc':prauc,
'fscore':fscore}
def validation_end(self, outputs):
avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
avg_auc = torch.stack([torch.tensor([x['rocauc']]) for x in outputs]).mean()
avg_prauc = torch.stack([torch.tensor([x['prauc']]) for x in outputs]).mean()
avg_fscore = torch.stack([torch.tensor([x['fscore']]) for x in outputs]).mean()
return {'val_loss':avg_loss,
'rocauc':avg_auc,
'prauc':avg_prauc,
'fscore':avg_fscore}
def configure_optimizers(self):
return [torch.optim.Adam(self.parameters(), lr=1e-4)] # from their code
@pl.data_loader
def tng_dataloader(self):
trainset = HDF5Dataset(os.path.join(PATH_MELSPEC_DOWNLOADED_HDF5, 'train.h5'), recursive=False, load_data=False)
return DataLoader(dataset=trainset, batch_size=32, shuffle=True)
train_csv = os.path.join(PATH_ANNOTATIONS, 'train_processed.tsv')
cache_x_name = "_ap_mtgjamendo44k"
dataset = df_get_mtg_set('mtgjamendo', train_csv, PATH_AUDIO, cache_x_name)
return DataLoader(dataset=dataset,
batch_size=32,
shuffle=True)
@pl.data_loader
def val_dataloader(self):
validationset = HDF5Dataset(os.path.join(PATH_MELSPEC_DOWNLOADED_HDF5, 'val.h5'), recursive=False, load_data=False)
return DataLoader(dataset=validationset, batch_size=128, shuffle=True)
validation_csv = os.path.join(PATH_ANNOTATIONS, 'validation_processed.tsv')
cache_x_name = "_ap_mtgjamendo44k"
dataset = df_get_mtg_set('mtgjamendo_val', validation_csv, PATH_AUDIO, cache_x_name)
return DataLoader(dataset=dataset,
batch_size=32,
shuffle=True)
@pl.data_loader
def test_dataloader(self):
testset = HDF5Dataset(os.path.join(PATH_MELSPEC_DOWNLOADED_HDF5, 'test.h5'), recursive=False, load_data=False)
return DataLoader(dataset=testset, batch_size=32, shuffle=True)
test_csv = os.path.join(PATH_ANNOTATIONS, 'test_processed.tsv')
cache_x_name = "_ap_mtgjamendo44k"
dataset = df_get_mtg_set('mtgjamendo_test', test_csv, PATH_AUDIO, cache_x_name)
return DataLoader(dataset=dataset,
batch_size=32,
shuffle=True)
@staticmethod
def add_model_specific_args(parent_parser, root_dir):
......
......@@ -6,7 +6,7 @@ import torch.nn.functional as F
from librosa.filters import mel as librosa_mel_fn
from utils import *
from datasets.datasets import MelSpecDataset
from datasets.dataset import MelSpecDataset
from torch.utils.data import DataLoader
import pytorch_lightning as pl
......
......@@ -115,7 +115,7 @@ def hdf_cache_specs(source_root, destination_root, annotations_file='all', outpu
tagslist = np.load(os.path.join(PATH_PROJECT_ROOT, 'tagslist.npy'))
with h5py.File(hdf_filepath, 'w') as hdf:
for idx in tqdm(annotations.index):
for idx in tqdm(annotations.index[:10]):
filename = annotations.PATH.iloc[idx].split('.')[0] # discard '.mp3' extension
labels_str = annotations.TAGS.iloc[idx] # get labels in string format
labels_onehot = np.array([int(i in labels_str) for i in tagslist]) # convert to onehot encoding
......
......@@ -34,6 +34,7 @@ username = getpass.getuser()
if hostname in ['rechenknecht3.cp.jku.at', 'rechenknecht2.cp.jku.at']:
plt.switch_backend('agg')
PATH_DATA_ROOT = '/media/rk3/shared/datasets/MTG-Jamendo'
PATH_DATA_CACHE = '/media/rk3/shared/kofta_cached_datasets'
USE_GPU = True
elif hostname == 'hermine': # PC verena
plt.switch_backend('agg')
......@@ -47,6 +48,7 @@ elif hostname == 'shreyan-HP': # Laptop Shreyan
USE_GPU = False
else:
PATH_DATA_ROOT = '/mnt/2tb/datasets/MTG-Jamendo'
PATH_DATA_CACHE = os.path.join(PATH_DATA_ROOT, 'HDF5Cache_spectrograms')
USE_GPU = False
if username == 'verena':
......@@ -60,9 +62,6 @@ PATH_MELSPEC_DOWNLOADED_FRAMED = os.path.join(PATH_MELSPEC_DOWNLOADED, 'framed')
PATH_MELSPEC_DOWNLOADED_HDF5 = os.path.join(PATH_DATA_ROOT, 'HDF5Cache_spectrograms')
TRAINED_MODELS_PATH = ''
# run name
def make_run_name(suffix=''):
assert ' ' not in suffix
......@@ -155,9 +154,9 @@ def save(model, path):
if __name__=='__main__':
# TESTS
# c = nn.Conv2d(512, 256, 1, 1, 0) # (in_channels, out_channels, kernel_size, stride, padding)
c = nn.Conv2d(1, 64, 3, 1, 1) # (in_channels, out_channels, kernel_size, stride, padding)
# m = nn.MaxPool2d(2)
# print(dims_calc(c, [37, 17, 512]))
print(dims_calc(c, [256, 600, 1]))
# preprocess_specs(source_root=PATH_MELSPEC_DOWNLOADED,
# destination_root=PATH_MELSPEC_DOWNLOADED_FRAMED)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment