Commit 51e35b88 authored by Verena Praher's avatar Verena Praher

Merge branch 'master' of gitlab.cp.jku.at:shreyan/moodwalk

parents 3ccce6cd fe17fc86
......@@ -4,7 +4,8 @@ import torch
import librosa
import numpy as np
import pandas as pd
from utils import PATH_DATA_CACHE
from datasets.shared_data_utils import path_data_cache
from sklearn.utils import check_random_state
def sample_slicing_function(h5data, idx, xlen):
......@@ -17,16 +18,26 @@ def sample_slicing_function(h5data, idx, xlen):
t2_parse_labels_cache = {}
def midlevel_parse_labels(csvf):
global t2_parse_labels_cache
if t2_parse_labels_cache.get(csvf) is not None:
return t2_parse_labels_cache.get(csvf)
def midlevel_parse_labels(csvf, csv_meta=None, aljanaki=False, dset='train'):
# global t2_parse_labels_cache
# if t2_parse_labels_cache.get(csvf) is not None:
# return t2_parse_labels_cache.get(csvf)
df = pd.read_csv(csvf, sep=',')
song_ids = df['song_id'].astype(str)+'.mp3'
labels = df[df.columns[1:]]
# t2_parse_labels_cache[csvf] = song_ids, labels
t2_parse_labels_cache[csvf] = song_ids, labels
return t2_parse_labels_cache[csvf]
if aljanaki:
a_tr_song_ids, a_tst_song_ids = aljanaki_split(midlevel_annotations_csv=csvf, midlevel_metadata_csv=csv_meta)
a_tr_song_ids, a_tst_song_ids = [str(s)+'.mp3' for s in a_tr_song_ids], [str(s)+'.mp3' for s in a_tst_song_ids]
if dset == 'train':
song_ids, labels = song_ids[song_ids.isin(list(set(song_ids).intersection(a_tr_song_ids)))], labels[
song_ids.isin(list(set(song_ids).intersection(a_tr_song_ids)))]
elif dset == 'test':
song_ids, labels = song_ids[song_ids.isin(list(set(song_ids).intersection(a_tst_song_ids)))], labels[
song_ids.isin(list(set(song_ids).intersection(a_tst_song_ids)))]
return song_ids, labels
def processor_midlevel44k(file_path):
......@@ -85,20 +96,46 @@ audio_processor = processor_midlevel44k
label_encoder = None
def df_get_midlevel_set(name, midlevel_files_csv, audio_path, cache_x_name):
def df_get_midlevel_set(name, midlevel_files_csv, midlevel_files_meta_csv, audio_path, cache_x_name, aljanaki=False, dset='train'):
audio_path = os.path.expanduser(audio_path)
global label_encoder
print("loading dataset from '{}'".format(name))
def getdatset():
files, labels = midlevel_parse_labels(midlevel_files_csv)
files, labels = midlevel_parse_labels(midlevel_files_csv, midlevel_files_meta_csv, aljanaki=aljanaki, dset=dset)
return AudioPreprocessDataset(files, labels, label_encoder, audio_path, audio_processor)
df_trset = H5FCachedDataset(getdatset, name, slicing_function=sample_slicing_function,
x_name=cache_x_name,
cache_path=PATH_DATA_CACHE
cache_path=path_data_cache
)
return df_trset, len(df_trset)
def aljanaki_split(midlevel_annotations_csv, midlevel_metadata_csv, seed=None):
"""Returns test and train song_ids according to the paper A DATA-DRIVEN APPROACH TO MID-LEVEL PERCEPTUAL MUSICAL
FEATURE MODELING: 8% of the data as a test set. (no performer from the test
set appears in the training set). Also, all the performers in
the test set are unique.
For this purpose, artists with only 1 song in the dataset are selected for the test set, so that they are guaranteed to satisfy the above two criteria.
"""
randState = check_random_state(seed)
meta = pd.read_csv(midlevel_metadata_csv, sep=';')
annotations = pd.read_csv(midlevel_annotations_csv)
assert meta['song id'].equals(
annotations['song_id']), "Song IDs in metadata file does not equal those in annotations file."
artists = meta['Artist']
test_set_size = int(0.08 * len(meta))
artist_value_counts = artists.value_counts()
single_artists = artist_value_counts.index[artist_value_counts == 1]
assert len(
single_artists) >= test_set_size, "Single artist test set size is greater than number of single artists in dataset."
single_artists = single_artists.sort_values()
selected_artists = randState.choice(single_artists, test_set_size, replace=False)
selected_tracks_for_test = meta[meta['Artist'].isin(selected_artists)]
selected_tracks_for_train = meta[~meta['Artist'].isin(selected_artists)]
return list(selected_tracks_for_train['song id']), list(selected_tracks_for_test['song id'])
......@@ -4,7 +4,7 @@ import torch
import librosa
import numpy as np
import pandas as pd
from utils import PATH_DATA_CACHE
from datasets.shared_data_utils import path_data_cache
slice_length = 512 #TODO: Find a better way
n_mels = 256
......@@ -163,7 +163,7 @@ def df_get_mtg_set(name, mtg_files_csv, audio_path, cache_x_name, slicing_func=N
df_trset = H5FCachedDataset(getdatset, name, slicing_function=slicing_func,
x_name=cache_x_name,
cache_path=PATH_DATA_CACHE,
cache_path=path_data_cache,
augment_options=augment_options
)
......
......@@ -33,7 +33,7 @@ def run(hparams):
logger.info(hparams)
exp = Experiment(name=trial_name, save_dir=CURR_RUN_PATH)
# exp.tag(hparams)
exp.tag(hparams)
# callbacks
early_stop = EarlyStopping(
......@@ -52,7 +52,7 @@ def run(hparams):
)
if USE_GPU:
trainer = Trainer(gpus=[0], distributed_backend=None,
trainer = Trainer(gpus=[hparams.gpu], distributed_backend=None,
experiment=exp, max_nb_epochs=hparams.max_epochs,
train_percent_check=hparams.train_percent,
fast_dev_run=False, early_stop_callback=early_stop,
......@@ -70,6 +70,7 @@ def run(hparams):
trainer.fit(model)
test_metrics = trainer.test()
logger.info(test_metrics)
exp.log(test_metrics)
except KeyboardInterrupt:
logger.info("Run interrupted")
except Exception as e:
......@@ -93,6 +94,8 @@ def run(hparams):
if __name__ == '__main__':
parent_parser = HyperOptArgumentParser(strategy='grid_search', add_help=False)
parent_parser.add_argument('--gpu', type=int,
default=0, help='which gpu to use')
parent_parser.add_argument('--experiment_name', type=str,
default='pt_lightning_exp_a', help='test tube exp name')
parent_parser.add_argument('--train_percent', type=float,
......
......@@ -71,7 +71,7 @@ def train_mtgjamendo(hparams, midlevel_chkpt_dir):
exp = Experiment(name='mtg', save_dir=CURR_RUN_PATH)
# mtg_configs()
logger.info(f"Loading model from {midlevel_chkpt_dir}")
model = Network(model_config, hparams, num_targets=7, dataset='mtgjamendo', on_gpu=USE_GPU, load_from=midlevel_chkpt_dir)
model = Network(model_config, hparams, num_targets=7, source_dataset='mtgjamendo', on_gpu=USE_GPU, load_from=midlevel_chkpt_dir)
logger.info(f"Loaded model successfully")
early_stop = EarlyStopping(
......
import logging
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
from test_tube import Experiment, HyperOptArgumentParser
import os
from models.cp_resnet_locpad import Network
# from models.cp_preactbnresnetshakelocpad import Network
from utils import init_experiment, USE_GPU, dict_to_entry
initialized = False # TODO: Find a better way to do this
trial_counter = 0
def main(hparams, gpu=None):
if gpu is None:
gpu = [hparams.gpu]
global initialized, trial_counter
trial_counter += 1
if not initialized:
init_experiment(comment=hparams.name)
from utils import CURR_RUN_PATH # import these after init_experiment
logger = logging.getLogger('mw_log')
if not initialized:
logger.info(f"tensorboard --logdir={CURR_RUN_PATH}")
initialized = True
trial_name = f"trial_{trial_counter}"
logger.info(trial_name)
hpdict = hparams.__dict__.copy()
for key in ['trials', 'hpc_exp_number', 'optimize_trials_parallel_gpu', 'generate_trials', 'optimize_parallel_cpu',
'optimize_parallel_gpu', 'optimize_parallel', 'load_from_trial_name']:
if key in hpdict: del hpdict[key]
logger.info(dict_to_entry(hpdict, roundoff=None))
exp = Experiment(name=trial_name, save_dir=CURR_RUN_PATH)
batch_size = 10
model_config_basic = {
"input_shape": [batch_size, 1, -1, -1],
"arch": "",
"use_bn": True,
"multi_label": False,
"n_classes": 7,
"prediction_threshold": 0,
"depth": 26,
"base_channels": 64,
"stage1": {"maxpool": [1, 2, 4], "k1s": [3, 3, 3, 3, 3, 3], "k2s": [1, 3, 3, 3, 3, 3]},
"stage2": {"maxpool": [], "k1s": [3, 3, 3, 3, 3, 3], "k2s": [3, 3, 3, 3, 3, 3]},
"stage3": {"maxpool": [], "k1s": [3, 3, 3, 3, 3, 3], "k2s": [3, 3, 3, 3, 3, 3]},
"block_type": "basic",
"binary_classifier": True,
'validation_metrics': ['corr_avg'],
'test_metrics': ['corr_avg', 'corr']
}
model_config_shakefa = {
"arch":"cp_preactbnresnetshakelocpad",
"base_channels":128,
"binary_classifier":True,
"block_type":"basic",
"depth":26,
"input_shape":[10,1,-1,-1],
"multi_label":False,
"n_classes":7,
"prediction_threshold":0,
"stage1":{"k1s":[3,3,3,3], "k2s":[1,3,3,3], "maxpool":[1,2,4]},
"stage2":{"k1s":[1,1,1,1], "k2s":[1,1,1,1], "maxpool":[]},
"stage3":{"k1s":[1,1,1,1],"k2s":[1,1,1,1],"maxpool":[]},
"use_bn":True,
"weight_init":"fixup",
'validation_metrics': ['corr_avg'],
'test_metrics': ['corr_avg', 'corr']
}
model_config = model_config_basic
logger.info(dict_to_entry(model_config, roundoff=None))
# build model
model = Network(config=model_config, hparams=hparams, num_targets=7)
# callbacks
early_stop = EarlyStopping(
monitor='corr_avg',
patience=20,
verbose=True,
mode='max'
)
model_save_path = os.path.join(CURR_RUN_PATH, trial_name, 'best.ckpt')
# model_save_path = os.path.join(CURR_RUN_PATH, exp.name)
checkpoint = ModelCheckpoint(
filepath=model_save_path,
save_best_only=True,
verbose=True,
monitor='corr_avg',
mode='max'
)
# configure trainer
if USE_GPU:
trainer = Trainer(gpus=gpu, distributed_backend=None,
experiment=exp, max_nb_epochs=hparams.epochs,
train_percent_check=hparams.train_percent,
fast_dev_run=False, early_stop_callback=early_stop,
checkpoint_callback=checkpoint,
nb_sanity_val_steps=0) # don't run sanity validation run
else:
trainer = Trainer(experiment=exp, max_nb_epochs=1, train_percent_check=0.01, val_percent_check=0.01,
fast_dev_run=True, nb_sanity_val_steps=0)
if hparams.mode == 'train':
# train model
exp.tag(hpdict)
trainer.fit(model)
if hparams.mode == 'test':
from utils import PATH_RESULTS
exp_dir = os.path.join(PATH_RESULTS, 'runs', hparams.name)
ckpt_dir = os.path.join(exp_dir, hparams.trial_name, 'best.ckpt')
ckpt_files = os.listdir(ckpt_dir)
if len(ckpt_files) == 1:
weights_file = os.path.join(ckpt_dir, ckpt_files[0])
else:
logger.warning(f"Multiple weight files, picking {ckpt_files[-1]}")
weights_file = os.path.join(ckpt_dir, ckpt_files[-1])
model = Network.load_from_metrics(
weights_path=weights_file,
tags_csv=os.path.join(exp_dir, trial_name, 'version_0/meta_tags.csv'),
on_gpu=True,
config=model_config
)
trainer.test(model=model)
if hparams.mode == 'train and test':
exp.tag(hpdict)
trainer.fit(model)
trainer.test()
if __name__=='__main__':
parent_parser = HyperOptArgumentParser(strategy='grid_search', add_help=False)
parent_parser.add_argument('--gpu', type=int,
default=0, help='which gpu to use')
parent_parser.add_argument('--name', type=str,
default='pt_lightning_exp_a', help='test tube exp name')
parent_parser.add_argument('--train_percent', type=float,
default=1.0, help='how much train data to use')
parent_parser.add_argument('--epochs', type=int,
default=10, help='maximum number of epochs')
parent_parser.add_argument('--mode', type=str,
default='train and test', help='train/test/train and test')
parent_parser.add_argument('--load_from_trial_name', type=str,
default='trial_0', help='which trial name to load a model from')
parser = Network.add_model_specific_args(parent_parser)
hyperparams = parser.parse_args()
# hyperparams.optimize_parallel_gpu(main, gpu_ids=['0','1','2','3'])
main(hyperparams)
# for hparams in hyperparams.trials(18):
# main(hparams)
import logging
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
from test_tube import Experiment, HyperOptArgumentParser
import os
from models.midlevel_vgg import ModelMidlevelBasic as Network
from utils import init_experiment, USE_GPU, dict_to_entry
initialized = False # TODO: Find a better way to do this
trial_counter = 0
def main(hparams, gpu=None):
if gpu is None:
gpu = [hparams.gpu]
global initialized, trial_counter
trial_counter += 1
if not initialized:
init_experiment(comment=hparams.name)
from utils import CURR_RUN_PATH # import these after init_experiment
logger = logging.getLogger('mw_log')
if not initialized:
logger.info(f"tensorboard --logdir={CURR_RUN_PATH}")
initialized = True
trial_name = f"trial_{trial_counter}"
logger.info(trial_name)
hpdict = hparams.__dict__.copy()
for key in ['trials', 'hpc_exp_number', 'optimize_trials_parallel_gpu', 'generate_trials', 'optimize_parallel_cpu',
'optimize_parallel_gpu', 'optimize_parallel', 'load_from_trial_name']:
if key in hpdict: del hpdict[key]
logger.info(dict_to_entry(hpdict, roundoff=None))
exp = Experiment(name=trial_name, save_dir=CURR_RUN_PATH)
model_config = {
'validation_metrics': ['corr_avg'],
'test_metrics': ['corr_avg', 'corr']
}
# build model
model = Network(config=model_config, hparams=hparams, num_targets=7)
# callbacks
early_stop = EarlyStopping(
monitor='corr_avg',
patience=20,
verbose=True,
mode='max'
)
model_save_path = os.path.join(CURR_RUN_PATH, trial_name, 'best.ckpt')
# model_save_path = os.path.join(CURR_RUN_PATH, exp.name)
checkpoint = ModelCheckpoint(
filepath=model_save_path,
save_best_only=True,
verbose=True,
monitor='corr_avg',
mode='max'
)
# configure trainer
if USE_GPU:
trainer = Trainer(gpus=gpu, distributed_backend=None,
experiment=exp, max_nb_epochs=hparams.epochs,
train_percent_check=hparams.train_percent,
fast_dev_run=False, early_stop_callback=early_stop,
checkpoint_callback=checkpoint,
nb_sanity_val_steps=0) # don't run sanity validation run
else:
trainer = Trainer(experiment=exp, max_nb_epochs=1, train_percent_check=0.01, val_percent_check=0.01,
fast_dev_run=True, nb_sanity_val_steps=0)
if hparams.mode == 'train':
# train model
exp.tag(hpdict)
trainer.fit(model)
if hparams.mode == 'test':
from utils import PATH_RESULTS
exp_dir = os.path.join(PATH_RESULTS, 'runs', hparams.name)
ckpt_dir = os.path.join(exp_dir, hparams.trial_name, 'best.ckpt')
ckpt_files = os.listdir(ckpt_dir)
if len(ckpt_files) == 1:
weights_file = os.path.join(ckpt_dir, ckpt_files[0])
else:
logger.warning(f"Multiple weight files, picking {ckpt_files[-1]}")
weights_file = os.path.join(ckpt_dir, ckpt_files[-1])
model = Network.load_from_metrics(
weights_path=weights_file,
tags_csv=os.path.join(exp_dir, trial_name, 'version_0/meta_tags.csv'),
on_gpu=True,
config=model_config
)
trainer.test(model=model)
if hparams.mode == 'train and test':
exp.tag(hpdict)
trainer.fit(model)
trainer.test()
if __name__=='__main__':
parent_parser = HyperOptArgumentParser(strategy='grid_search', add_help=False)
parent_parser.add_argument('--gpu', type=int,
default=0, help='which gpu to use')
parent_parser.add_argument('--name', type=str,
default='pt_lightning_exp_a', help='test tube exp name')
parent_parser.add_argument('--train_percent', type=float,
default=1.0, help='how much train data to use')
parent_parser.add_argument('--epochs', type=int,
default=10, help='maximum number of epochs')
parent_parser.add_argument('--mode', type=str,
default='train and test', help='train/test/train and test')
parent_parser.add_argument('--load_from_trial_name', type=str,
default='trial_0', help='which trial name to load a model from')
parser = Network.add_model_specific_args(parent_parser)
hyperparams = parser.parse_args()
# hyperparams.optimize_parallel_gpu(main, gpu_ids=['0','1','2','3'])
for hparams in hyperparams.trials(18):
main(hparams)
This diff is collapsed.
This diff is collapsed.
......@@ -80,7 +80,7 @@ class CRNN(BasePtlModel):
def rnn_forward(x):
x = x.squeeze()
x = self.gru1(x)[1][1] # TODO: Check if this is correct
x = self.gru1(x)[0][-1] # TODO: Check if this is correct
x = self.dropout(x)
logit = nn.Sigmoid()(self.dense(x))
return logit
......
......@@ -8,6 +8,7 @@ from torch.utils.data import DataLoader
import pytorch_lightning as pl
from datasets.midlevel import df_get_midlevel_set
from datasets.mtgjamendo import df_get_mtg_set
from datasets.shared_data_utils import *
def initialize_weights(module):
......@@ -22,18 +23,18 @@ def initialize_weights(module):
class ModelMidlevel(BasePtlModel):
def __init__(self, config, hparams, num_targets, initialize=True, dataset='midlevel', load_from=None, on_gpu=None, map_location=None):
def __init__(self, config, hparams, num_targets, initialize=True, source_dataset='midlevel', load_from=None, on_gpu=None, map_location=None):
super(ModelMidlevel, self).__init__(config, hparams)
self.dataset = dataset
if dataset=='midlevel':
data_root, audio_path, csvs_path = get_paths('midlevel')
self.dataset = source_dataset
if source_dataset== 'midlevel':
audio_path, csvs_path = path_midlevel_audio_dir, path_midlevel_annotations_dir
cache_x_name = '_ap_midlevel44k'
from torch.utils.data import random_split
dataset, dataset_length = df_get_midlevel_set('midlevel', os.path.join(csvs_path, 'annotations.csv'), audio_path, cache_x_name)
self.trainset, self.validationset, self.testset = random_split(dataset, [int(i*dataset_length) for i in [0.7, 0.2, 0.1]])
elif dataset=='mtgjamendo':
data_root, audio_path, csvs_path = get_paths('mtgjamendo')
self.trainset, self.validationset, self.testset = random_split(dataset, [int(i * dataset_length) for i in [0.7, 0.2, 0.1]])
elif source_dataset== 'mtgjamendo':
audio_path, csvs_path = path_mtgjamendo_audio_dir, path_mtgjamendo_annotations_dir
cache_x_name = "_ap_mtgjamendo44k"
train_csv = os.path.join(csvs_path, 'train_processed.tsv')
validation_csv = os.path.join(csvs_path, 'validation_processed.tsv')
......@@ -117,7 +118,7 @@ class ModelMidlevel(BasePtlModel):
if load_from:
self._load_model(load_from, map_location, on_gpu)
if dataset == 'mtgjamendo':
if source_dataset == 'mtgjamendo':
self.fc_mtg1 = nn.Sequential(
# nn.AdaptiveAvgPool2d((1, 1)),
nn.Linear(256, 56))
......@@ -261,7 +262,7 @@ class ModelMidlevel(BasePtlModel):
return [torch.optim.Adam(self.parameters(), lr=1e-4)] # from their code
@pl.data_loader
def tng_dataloader(self):
def train_dataloader(self):
return DataLoader(dataset=self.trainset, batch_size=32, shuffle=True)
@pl.data_loader
......@@ -286,6 +287,7 @@ class ModelMidlevel(BasePtlModel):
# tunable=True)
parser.opt_list('--slicing_mode', default='slice', options=['full', 'slice'], type=str, tunable=False)
parser.opt_list('--input_size', default=1024, options=[512, 1024], type=int, tunable=True)
parser.opt_list('--batch_size', default=8, options=[8,16], type=int, tunable=True)
# training params (opt)
......@@ -298,3 +300,262 @@ class ModelMidlevel(BasePtlModel):
# options=[16, 32], tunable=False,
# help='batch size will be divided over all gpus being used across all nodes')
return parser
class ModelMidlevelBasic(BasePtlModel):
def __init__(self, config, hparams, num_targets, initialize=True, source_dataset='midlevel', load_from=None,
on_gpu=None, map_location=None):
super(ModelMidlevelBasic, self).__init__(config, hparams)
self.logger = logging.getLogger('mw_log')
audio_path, csvs_path = path_midlevel_audio_dir, path_midlevel_annotations_dir
cache_x_name = '_ap_midlevel44k'
from torch.utils.data import random_split
tr_dataset, tr_dataset_length = df_get_midlevel_set('midlevel', os.path.join(csvs_path, 'annotations.csv'), os.path.join(csvs_path, 'metadata.csv'),
audio_path, cache_x_name, aljanaki=True, dset='train')
tst_dataset, tst_dataset_length = df_get_midlevel_set('midlevel', os.path.join(csvs_path, 'annotations.csv'),
os.path.join(csvs_path, 'metadata.csv'),
audio_path, cache_x_name, aljanaki=True, dset='test')
self.testset = tst_dataset
self.trainset, self.validationset = random_split(tr_dataset, [int(i * tr_dataset_length) for i in [0.98, 0.02]])
self.num_targets = num_targets
self.conv1 = nn.Sequential(
nn.Conv2d(1, 64, 5, 2, 2), # (in_channels, out_channels, kernel_size, stride, padding)
nn.BatchNorm2d(64),
nn.ReLU()
)
self.conv2 = nn.Sequential(
nn.Conv2d(64, 64, 3, 1, 1),
nn.BatchNorm2d(64),
nn.ReLU()
)
self.mp2x2_dropout = nn.Sequential(
nn.MaxPool2d(2),
nn.Dropout2d(self.hparams.dropout)
)
self.ap2x2_dropout = nn.Sequential(
nn.AvgPool2d(2),
nn.Dropout2d(0.3)
)
self.conv3 = nn.Sequential(
nn.Conv2d(64, 128, 3, 1, 1),
nn.BatchNorm2d(128),
nn.ReLU()
)
self.conv4 = nn.Sequential(
nn.Conv2d(128, 128, 3, 1, 1),
nn.BatchNorm2d(128),
nn.ReLU()
)
self.conv5 = nn.Sequential(
nn.Conv2d(128, 256, 3, 1, 1),