Commit 2e3be6de authored by Shreyan Chowdhury's avatar Shreyan Chowdhury

modify dataloading for midlevel and implement aljanaki split

parent 01e50a64
......@@ -4,7 +4,8 @@ import torch
import librosa
import numpy as np
import pandas as pd
from utils import PATH_DATA_CACHE
from datasets.shared_data_utils import path_data_cache
from sklearn.utils import check_random_state
def sample_slicing_function(h5data, idx, xlen):
......@@ -17,16 +18,26 @@ def sample_slicing_function(h5data, idx, xlen):
t2_parse_labels_cache = {}
def midlevel_parse_labels(csvf):
global t2_parse_labels_cache
if t2_parse_labels_cache.get(csvf) is not None:
return t2_parse_labels_cache.get(csvf)
def midlevel_parse_labels(csvf, csv_meta=None, aljanaki=False, dset='train'):
# global t2_parse_labels_cache
# if t2_parse_labels_cache.get(csvf) is not None:
# return t2_parse_labels_cache.get(csvf)
df = pd.read_csv(csvf, sep=',')
song_ids = df['song_id'].astype(str)+'.mp3'
labels = df[df.columns[1:]]
# t2_parse_labels_cache[csvf] = song_ids, labels
t2_parse_labels_cache[csvf] = song_ids, labels
return t2_parse_labels_cache[csvf]
if aljanaki:
a_tr_song_ids, a_tst_song_ids = aljanaki_split(midlevel_annotations_csv=csvf, midlevel_metadata_csv=csv_meta)
a_tr_song_ids, a_tst_song_ids = [str(s)+'.mp3' for s in a_tr_song_ids], [str(s)+'.mp3' for s in a_tst_song_ids]
if dset == 'train':
song_ids, labels = song_ids[song_ids.isin(list(set(song_ids).intersection(a_tr_song_ids)))], labels[
song_ids.isin(list(set(song_ids).intersection(a_tr_song_ids)))]
elif dset == 'test':
song_ids, labels = song_ids[song_ids.isin(list(set(song_ids).intersection(a_tst_song_ids)))], labels[
song_ids.isin(list(set(song_ids).intersection(a_tst_song_ids)))]
return song_ids, labels
def processor_midlevel44k(file_path):
......@@ -85,20 +96,46 @@ audio_processor = processor_midlevel44k
label_encoder = None
def df_get_midlevel_set(name, midlevel_files_csv, audio_path, cache_x_name):
def df_get_midlevel_set(name, midlevel_files_csv, midlevel_files_meta_csv, audio_path, cache_x_name, aljanaki=False, dset='train'):
audio_path = os.path.expanduser(audio_path)
global label_encoder
print("loading dataset from '{}'".format(name))
def getdatset():
files, labels = midlevel_parse_labels(midlevel_files_csv)
files, labels = midlevel_parse_labels(midlevel_files_csv, midlevel_files_meta_csv, aljanaki=aljanaki, dset=dset)
return AudioPreprocessDataset(files, labels, label_encoder, audio_path, audio_processor)
df_trset = H5FCachedDataset(getdatset, name, slicing_function=sample_slicing_function,
x_name=cache_x_name,
cache_path=PATH_DATA_CACHE
cache_path=path_data_cache
)
return df_trset, len(df_trset)
def aljanaki_split(midlevel_annotations_csv, midlevel_metadata_csv, seed=None):
"""Returns test and train song_ids according to the paper A DATA-DRIVEN APPROACH TO MID-LEVEL PERCEPTUAL MUSICAL
FEATURE MODELING: 8% of the data as a test set. (no performer from the test
set appears in the training set). Also, all the performers in
the test set are unique.
For this purpose, artists with only 1 song in the dataset are selected for the test set, so that they are guaranteed to satisfy the above two criteria.
"""
randState = check_random_state(seed)
meta = pd.read_csv(midlevel_metadata_csv, sep=';')
annotations = pd.read_csv(midlevel_annotations_csv)
assert meta['song id'].equals(
annotations['song_id']), "Song IDs in metadata file does not equal those in annotations file."
artists = meta['Artist']
test_set_size = int(0.08 * len(meta))
artist_value_counts = artists.value_counts()
single_artists = artist_value_counts.index[artist_value_counts == 1]
assert len(
single_artists) >= test_set_size, "Single artist test set size is greater than number of single artists in dataset."
single_artists = single_artists.sort_values()
selected_artists = randState.choice(single_artists, test_set_size, replace=False)
selected_tracks_for_test = meta[meta['Artist'].isin(selected_artists)]
selected_tracks_for_train = meta[~meta['Artist'].isin(selected_artists)]
return list(selected_tracks_for_train['song id']), list(selected_tracks_for_test['song id'])
......@@ -8,6 +8,7 @@ from torch.utils.data import DataLoader
import pytorch_lightning as pl
from datasets.midlevel import df_get_midlevel_set
from datasets.mtgjamendo import df_get_mtg_set
from datasets.shared_data_utils import *
def initialize_weights(module):
......@@ -22,18 +23,18 @@ def initialize_weights(module):
class ModelMidlevel(BasePtlModel):
def __init__(self, config, hparams, num_targets, initialize=True, dataset='midlevel', load_from=None, on_gpu=None, map_location=None):
def __init__(self, config, hparams, num_targets, initialize=True, source_dataset='midlevel', load_from=None, on_gpu=None, map_location=None):
super(ModelMidlevel, self).__init__(config, hparams)
self.dataset = dataset
if dataset=='midlevel':
data_root, audio_path, csvs_path = get_paths('midlevel')
self.dataset = source_dataset
if source_dataset== 'midlevel':
audio_path, csvs_path = path_midlevel_audio_dir, path_midlevel_annotations_dir
cache_x_name = '_ap_midlevel44k'
from torch.utils.data import random_split
dataset, dataset_length = df_get_midlevel_set('midlevel', os.path.join(csvs_path, 'annotations.csv'), audio_path, cache_x_name)
self.trainset, self.validationset, self.testset = random_split(dataset, [int(i*dataset_length) for i in [0.7, 0.2, 0.1]])
elif dataset=='mtgjamendo':
data_root, audio_path, csvs_path = get_paths('mtgjamendo')
self.trainset, self.validationset, self.testset = random_split(dataset, [int(i * dataset_length) for i in [0.7, 0.2, 0.1]])
elif source_dataset== 'mtgjamendo':
audio_path, csvs_path = path_mtgjamendo_audio_dir, path_mtgjamendo_annotations_dir
cache_x_name = "_ap_mtgjamendo44k"
train_csv = os.path.join(csvs_path, 'train_processed.tsv')
validation_csv = os.path.join(csvs_path, 'validation_processed.tsv')
......@@ -117,7 +118,7 @@ class ModelMidlevel(BasePtlModel):
if load_from:
self._load_model(load_from, map_location, on_gpu)
if dataset == 'mtgjamendo':
if source_dataset == 'mtgjamendo':
self.fc_mtg1 = nn.Sequential(
# nn.AdaptiveAvgPool2d((1, 1)),
nn.Linear(256, 56))
......@@ -261,7 +262,7 @@ class ModelMidlevel(BasePtlModel):
return [torch.optim.Adam(self.parameters(), lr=1e-4)] # from their code
@pl.data_loader
def tng_dataloader(self):
def train_dataloader(self):
return DataLoader(dataset=self.trainset, batch_size=32, shuffle=True)
@pl.data_loader
......@@ -286,6 +287,7 @@ class ModelMidlevel(BasePtlModel):
# tunable=True)
parser.opt_list('--slicing_mode', default='slice', options=['full', 'slice'], type=str, tunable=False)
parser.opt_list('--input_size', default=1024, options=[512, 1024], type=int, tunable=True)
parser.opt_list('--batch_size', default=8, options=[8,16], type=int, tunable=True)
# training params (opt)
......@@ -298,3 +300,262 @@ class ModelMidlevel(BasePtlModel):
# options=[16, 32], tunable=False,
# help='batch size will be divided over all gpus being used across all nodes')
return parser
class ModelMidlevelBasic(BasePtlModel):
def __init__(self, config, hparams, num_targets, initialize=True, source_dataset='midlevel', load_from=None,
on_gpu=None, map_location=None):
super(ModelMidlevelBasic, self).__init__(config, hparams)
self.logger = logging.getLogger('mw_log')
audio_path, csvs_path = path_midlevel_audio_dir, path_midlevel_annotations_dir
cache_x_name = '_ap_midlevel44k'
from torch.utils.data import random_split
tr_dataset, tr_dataset_length = df_get_midlevel_set('midlevel', os.path.join(csvs_path, 'annotations.csv'), os.path.join(csvs_path, 'metadata.csv'),
audio_path, cache_x_name, aljanaki=True, dset='train')
tst_dataset, tst_dataset_length = df_get_midlevel_set('midlevel', os.path.join(csvs_path, 'annotations.csv'),
os.path.join(csvs_path, 'metadata.csv'),
audio_path, cache_x_name, aljanaki=True, dset='test')
self.testset = tst_dataset
self.trainset, self.validationset = random_split(tr_dataset, [int(i * tr_dataset_length) for i in [0.98, 0.02]])
self.num_targets = num_targets
self.conv1 = nn.Sequential(
nn.Conv2d(1, 64, 5, 2, 2), # (in_channels, out_channels, kernel_size, stride, padding)
nn.BatchNorm2d(64),
nn.ReLU()
)
self.conv2 = nn.Sequential(
nn.Conv2d(64, 64, 3, 1, 1),
nn.BatchNorm2d(64),
nn.ReLU()
)
self.mp2x2_dropout = nn.Sequential(
nn.MaxPool2d(2),
nn.Dropout2d(self.hparams.dropout)
)
self.ap2x2_dropout = nn.Sequential(
nn.AvgPool2d(2),
nn.Dropout2d(0.3)
)
self.conv3 = nn.Sequential(
nn.Conv2d(64, 128, 3, 1, 1),
nn.BatchNorm2d(128),
nn.ReLU()
)
self.conv4 = nn.Sequential(
nn.Conv2d(128, 128, 3, 1, 1),
nn.BatchNorm2d(128),
nn.ReLU()
)
self.conv5 = nn.Sequential(
nn.Conv2d(128, 256, 3, 1, 1),
nn.BatchNorm2d(256),
nn.ReLU()
)
self.conv6 = nn.Sequential(
nn.Conv2d(256, 256, 3, 1, 1),
nn.BatchNorm2d(256),
nn.ReLU()
)
self.conv7 = nn.Sequential(
nn.Conv2d(256, 384, 3, 1, 1),
nn.BatchNorm2d(384),
nn.ReLU()
)
self.conv7b = nn.Sequential(
nn.Conv2d(384, 512, 3, 1, 1),
nn.BatchNorm2d(512),
nn.ReLU()
)
self.conv11 = nn.Sequential(
nn.Conv2d(512, 256, 1, 1, 0),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.AdaptiveAvgPool2d((1, 1))
)
self.fc_ml = nn.Linear(256, 7)
if initialize:
self.apply(initialize_weights)
if load_from:
self._load_model(load_from, map_location, on_gpu)
self.log("initialized model")
def forward(self, x):
# 313 * 149 * 1
x = self.conv1(x) # 157 * 75 * 64
x = self.conv2(x) # 157 * 75 * 64
x = self.ap2x2_dropout(x) # 78 * 37 * 64
x = self.conv3(x) # 78 * 37 * 128
x = self.conv4(x) # 78 * 37 * 128
x = self.ap2x2_dropout(x) # 39 * 18 * 128
x = self.conv5(x) # 39 * 18 * 256
x = self.conv6(x) # 39 * 18 * 256
x = self.conv7(x) # 39 * 18 * 384
x = self.conv7b(x) # 39 * 18 * 384
# x = self.conv11(x) # 2 * 2 * 256
x = self.conv11(x) # 2 * 2 * 256
x = x.view(x.size(0), -1)
x = self.fc_ml(x)
return x
@classmethod
def load_from_metrics(cls, weights_path, config, tags_csv=None, on_gpu=True):
def load_hparams_from_tags_csv(tags_csv):
from argparse import Namespace
import pandas as pd
tags_df = pd.read_csv(tags_csv)
dic = tags_df.to_dict(orient='records')
ns_dict = {row['key']: convert(row['value']) for row in dic}
ns = Namespace(**ns_dict)
return ns
def convert(val):
constructors = [int, float, str]
if type(val) is str:
if val.lower() == 'true':
return True
if val.lower() == 'false':
return False
for c in constructors:
try:
return c(val)
except ValueError:
pass
return val
hparams = load_hparams_from_tags_csv(tags_csv)
hparams.__setattr__('on_gpu', on_gpu)
# load on CPU only to avoid OOM issues
# then its up to user to put back on GPUs
checkpoint = torch.load(weights_path, map_location=lambda storage, loc: storage)
# load the state_dict on the model automatically
model = cls(config, hparams, num_targets=7)
model.load_state_dict(checkpoint['state_dict'])
# give model a chance to load something
model.on_load_checkpoint(checkpoint)
return model
def log(self, msg):
self.logger.info(msg)
def my_loss(self, y_hat, y):
return F.mse_loss(y_hat, y)
def training_step(self, data_batch, batch_nb):
x, _, y = data_batch
y_hat = self.forward(x)
y = y.float()
y_hat = y_hat.float()
return {'loss': self.my_loss(y_hat, y)}
def validation_step(self, data_batch, batch_nb):
x, _, y = data_batch
y_hat = self.forward(x)
y = y.float()
y_hat = y_hat.float()
return {'val_loss': self.my_loss(y_hat, y),
'y': y.cpu().numpy(),
'y_hat': y_hat.cpu().numpy()}
def validation_end(self, outputs):
avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
y = []
y_hat = []
for output in outputs:
y.append(output['y'])
y_hat.append(output['y_hat'])
y = np.concatenate(y)
y_hat = np.concatenate(y_hat)
metrics = self._compute_metrics(y, y_hat, self.validation_metrics)
metrics['val_loss'] = avg_loss
self.log('Val: '+dict_to_entry(metrics, filter=['corr_avg']))
return metrics
def test_step(self, data_batch, batch_nb):
x, _, y = data_batch
y_hat = self.forward(x)
y = y.float()
y_hat = y_hat.float()
return {'test_loss': self.my_loss(y_hat, y),
'y': y.cpu().numpy(),
'y_hat': y_hat.cpu().numpy(),
}
def test_end(self, outputs):
avg_test_loss = torch.stack([x['test_loss'] for x in outputs]).mean()
y = []
y_hat = []
for output in outputs:
y.append(output['y'])
y_hat.append(output['y_hat'])
y = np.concatenate(y)
y_hat = np.concatenate(y_hat)
test_metrics = self._compute_metrics(y, y_hat, self.test_metrics)
test_metrics['avg_test_loss'] = avg_test_loss
# print(test_metrics)
# self.experiment.log(test_metrics)
self.log('Test: '+dict_to_entry(test_metrics, filter=['corr_avg', 'corr']))
return test_metrics
def configure_optimizers(self):
return [torch.optim.Adam(self.parameters(), lr=self.hparams.learning_rate)] # from their code
@pl.data_loader
def train_dataloader(self):
return DataLoader(dataset=self.trainset, batch_size=self.hparams.batch_size, shuffle=True)
@pl.data_loader
def val_dataloader(self):
return DataLoader(dataset=self.validationset, batch_size=self.hparams.batch_size, shuffle=True)
@pl.data_loader
def test_dataloader(self):
return DataLoader(dataset=self.testset, batch_size=self.hparams.batch_size, shuffle=True)
@staticmethod
def add_model_specific_args(parent_parser):
"""Parameters defined here will be available to your model through self.hparams
"""
parser = HyperOptArgumentParser(strategy=parent_parser.strategy, parents=[parent_parser])
# network params
parser.opt_list('--dropout', default=0.3, type=float,
options=[0.2, 0.5, 0.8],
tunable=True)
parser.opt_list('--learning_rate', default=0.0001, type=float,
options=[0.00001, 0.0005, 0.001],
tunable=True)
# parser.opt_list('--input_size', default=1024, options=[512, 1024], type=int, tunable=False)
parser.opt_list('--batch_size', default=8, options=[8, 16], type=int, tunable=True)
return parser
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment