Commit ca903fab authored by Shreyan Chowdhury's avatar Shreyan Chowdhury
Browse files

reorganize project

parent d2a228f9
from utils import *
from torch.utils.data import Dataset, DataLoader
from processors.spectrogram_processors import make_framed_spec
class MelSpecDataset(Dataset):
def __init__(self, phase='train', ann_root=None, spec_root=None, length=MAX_FRAMES):
def __init__(self, phase='train', ann_root=None, spec_root=None, length=MAX_FRAMES, framed=True):
assert ann_root is not None, logger.error("ann_root (root directory containing annotation files) required")
assert spec_root is not None, logger.error("spec_root (root directory of spectrograms) required")
assert phase in ['train', 'validation', 'test'], \
......@@ -12,6 +13,7 @@ class MelSpecDataset(Dataset):
self.ann_root = ann_root
self.spec_root = spec_root
self.length = length
self.framed = framed
xy = pd.read_csv(os.path.join(self.ann_root, f'{phase}_processed.tsv'), sep='\t')
self.len = len(xy)
......@@ -27,6 +29,9 @@ class MelSpecDataset(Dataset):
returns framed mel spectrogram and multi-hot encoded labels
"""
x_melspec = np.load(os.path.join(self.spec_root, self.x_path[index]))
if not self.framed:
x_melspec = make_framed_spec(x_melspec, frame_length=256, hop=1.0)
reqd_len = self.length
spec_len = len(x_melspec)
x_trimmed = x_melspec[:reqd_len] if spec_len>reqd_len else \
......@@ -51,12 +56,15 @@ class MelSpecDataset(Dataset):
return np.array([])
return tagslist
def _make_unique_tags_list_(self, labels, saveto='./tagslist'):
def _make_unique_tags_list_(self, labels, saveto=None):
labelslist = []
for label in labels:
labelslist.extend(label)
tagslist = np.sort(np.unique(np.array(labelslist)))
if saveto is not None:
if saveto is None:
saveto = os.path.join(PATH_PROJECT_ROOT, 'tagslist')
np.save(saveto, tagslist)
else:
np.save(saveto, tagslist)
return tagslist
......@@ -64,7 +72,8 @@ class MelSpecDataset(Dataset):
if __name__=='__main__':
# Tests
torch.manual_seed(6)
dataset = MelSpecDataset(phase='train', ann_root=PATH_ANNOTATIONS, spec_root=PATH_MELSPEC_DOWNLOADED_FRAMED)
dataset = MelSpecDataset(phase='train', ann_root=PATH_ANNOTATIONS,
spec_root=PATH_MELSPEC_DOWNLOADED_FRAMED, framed=True)
train_loader = DataLoader(dataset=dataset,
batch_size=32,
shuffle=True)
......
......@@ -17,7 +17,7 @@ def run():
trainer = Trainer(experiment=exp, max_nb_epochs=1, train_percent_check=0.1,
fast_dev_run=True)
model = Network() # TODO num_class
model = Network(num_class=56) # TODO num_class
print(model)
......
......@@ -3,8 +3,8 @@ from pytorch_lightning import Trainer
from test_tube import Experiment
from models.cp_resnet import Network
import torch
from datasets import MelSpecDataset
from torch.utils.data import Dataset, DataLoader
from datasets.datasets import MelSpecDataset
from torch.utils.data import DataLoader
from matplotlib.transforms import Affine2D
import mpl_toolkits.axisartist.floating_axes as floating_axes
......@@ -96,7 +96,7 @@ def ERF_generate(model, loader):
accum += me
counter += 1
# torch.save({"arr": accum, "counter": counter}, os.path.join(self.config.out_dir, 'ERF_dict.pth'))
ERF_plot(accum, savefile=os.path.join('/home/verena/experiments/moodwalk', 'erf.png'))
ERF_plot(accum, savefile=os.path.join(CURR_RUN_PATH, 'erf.png'))
# self.experiment.add_artifact(os.path.join(self.config.out_dir, 'erf.png'), "erf.png", {"dataset": dataset_name})
return True
......
from utils import *
from pytorch_lightning import Trainer
from pytorch_lightning.utilities import arg_parse
from pytorch_lightning.callbacks.pt_callbacks import ModelCheckpoint
from test_tube import Experiment
from models import vgg_basic
import argparse
def run(hparams):
logger.info(CURR_RUN_PATH)
exp = Experiment(
name=hparams.tt_name,
debug=hparams.debug,
save_dir=CURR_RUN_PATH,
version='1',
autosave=False,
description=hparams.tt_description
)
exp.argparse(hparams)
exp.save()
# model = cp_resnet.Network(model_config)
model = vgg_basic.Network()
model_save_path = '{}/{}/{}'.format(hparams.model_save_path, exp.name, exp.version)
checkpoint = ModelCheckpoint(
filepath=model_save_path,
save_best_only=True,
verbose=True,
monitor='rocauc',
mode='max'
)
if USE_GPU:
trainer = Trainer(gpus=[1], distributed_backend='ddp',
experiment=exp, max_nb_epochs=10, train_percent_check=1.0,
fast_dev_run=False, checkpoint_callback=checkpoint)
else:
trainer = Trainer(experiment=exp, max_nb_epochs=1, train_percent_check=0.1,
fast_dev_run=True)
trainer.fit(model)
if __name__=='__main__':
parent_parser = argparse.ArgumentParser(description='hyperparameters')
arg_parse.add_default_args(parent_parser, root_dir=CURR_RUN_PATH)
parser = vgg_basic.Network.add_model_specific_args(parent_parser, root_dir=CURR_RUN_PATH)
hyperparams = parser.parse_args()
run(hyperparams)
\ No newline at end of file
from matplotlib import pyplot as plt
import numpy as np
from utils import *
def plot_tag_frequencies(df, norm=True, index_sort=True, out=None):
def plot_tag_frequencies(df, norm=True, index_sort=True, out=None, title=None):
tag_freqs = df['TAGS'].str.split(',', expand=True).stack().value_counts()
title = 'tag counts'
title_ = 'tag counts'
if norm:
tag_freqs = tag_freqs / max(tag_freqs)
title = f'normalized tag frequencies'
title_ = f'normalized tag frequencies'
if index_sort:
tag_freqs = tag_freqs.sort_index()
tag_freqs.plot.bar(title=title)
from itertools import islice, cycle
colors = list(islice(cycle(['b', 'r', 'g', 'y', 'k']), None, len(tag_freqs)))
if title is not None:
title_=title
tag_freqs.plot.bar(title=title_, color=colors)
if out is None:
plt.show()
......@@ -38,4 +45,10 @@ def plot_melspectrogram(melspec, scale=False, title=None, out=None):
if out is None:
plt.show()
else:
plt.savefig(out)
\ No newline at end of file
plt.savefig(out)
if __name__=='__main__':
dataset_path = os.path.join(PATH_ANNOTATIONS, 'test_processed.tsv')
df = pd.read_csv(dataset_path, sep='\t')
plot_tag_frequencies(df, title='normalized tag frequencies, test')
from utils import *
def compute_duration_stats(filepath):
df = pd.read_csv(filepath, sep='\t')
total_secs = df.DURATION.sum()
hours = int(total_secs//3600)
mins = int((total_secs%3600)//60)
secs = np.round(total_secs%60, 1)
return {'average':time.strftime('%H:%M:%S', time.gmtime(df.DURATION.mean())),
'total':f'{hours}:{mins}:{secs}'} #todo: improve format for better readability
def compute_melspec_length_stats(filepath):
df = pd.read_csv(filepath, sep='\t')
lengths = []
paths = df.PATH.str.split(pat='.').str[0]+'.npy'
for i in tqdm(df.index):
x_melspec = np.load(os.path.join(PATH_MELSPEC_DOWNLOADED, paths[i]))
lengths.append(x_melspec.shape[1])
lengths = np.array(lengths)
return {'avg':lengths.mean(), 'max':lengths.max(), 'min':lengths.min()}
def compute_tag_stats():
trainset_path = os.path.join(PATH_ANNOTATIONS, 'train_processed.tsv')
validset_path = os.path.join(PATH_ANNOTATIONS, 'validation_processed.tsv')
testset_path = os.path.join(PATH_ANNOTATIONS, 'test_processed.tsv')
print(f"\nComputing stats for:\n{trainset_path}\n{validset_path}\n{testset_path}\n")
trainset = pd.read_csv(trainset_path, sep='\t')
validset = pd.read_csv(validset_path, sep='\t')
testset = pd.read_csv(testset_path, sep='\t')
trainset_tags = trainset.TAGS.str.split(pat=',')
validset_tags = validset.TAGS.str.split(pat=',')
testset_tags = testset.TAGS.str.split(pat=',')
trainset_tags_unique = np.sort(np.unique(np.hstack(trainset_tags)))
validset_tags_unique = np.sort(np.unique(np.hstack(validset_tags)))
testset_tags_unique = np.sort(np.unique(np.hstack(testset_tags)))
print("TRAINING SET")
print(f"Number of songs \t\t\t\t{len(trainset)}")
print(f"Number of unique tags\t\t\t{len(trainset_tags_unique)}")
print(f"Avg number of tags per song\t\t{np.round(trainset_tags.apply(len).mean(),2)}")
print(f"Max number of tags per song\t\t{trainset_tags.apply(len).max()}")
print(f"Min number of tags per song\t\t{trainset_tags.apply(len).min()}")
print("\nVALIDATION SET")
print(f"Number of songs \t\t\t\t{len(validset)}")
print(f"Number of unique tags\t\t\t{len(validset_tags_unique)}")
print(f"Avg number of tags per song\t\t{np.round(validset_tags.apply(len).mean(),2)}")
print(f"Max number of tags per song\t\t{validset_tags.apply(len).max()}")
print(f"Min number of tags per song\t\t{validset_tags.apply(len).min()}")
print("\nTEST SET")
print(f"Number of songs \t\t\t\t{len(testset)}")
print(f"Number of unique tags\t\t\t{len(testset_tags_unique)}")
print(f"Avg number of tags per song\t\t{np.round(testset_tags.apply(len).mean(),2)}")
print(f"Max number of tags per song\t\t{testset_tags.apply(len).max()}")
print(f"Min number of tags per song\t\t{testset_tags.apply(len).min()}")
def correlations():
data_path = os.path.join(PATH_ANNOTATIONS, 'train_processed.tsv')
df = pd.read_csv(data_path, sep='\t')
tags = df.TAGS.str.split(pat=',')
artists = df.ARTIST_ID
u_tags = np.sort(np.unique(np.hstack(tags)))
u_artists, counts = np.unique(artists, return_counts=True)
counts_idx = counts.argsort()
relevant_artists = u_artists[counts_idx[::-1]][:sum((counts>10).astype(int))]
contingency = pd.DataFrame(0, index=relevant_artists, columns=u_tags)
for tag in tqdm(u_tags):
for i in range(len(tags)):
if tag in df.TAGS[i]:
if df.ARTIST_ID[i] in relevant_artists:
contingency.loc[df.ARTIST_ID[i]][tag] += 1
else:
continue
pass
if __name__=='__main__':
# stats = compute_duration_stats(os.path.join(PATH_ANNOTATIONS, 'train_processed.tsv'))
# stats = compute_melspec_length_stats(os.path.join(PATH_ANNOTATIONS, 'train_processed.tsv'))
# print(stats)
# compute_tag_stats()
correlations()
\ No newline at end of file
from utils import *
from pytorch_lightning import Trainer
from test_tube import Experiment
from models.vgg_basic import MultiTagger
from models import cp_resnet
def run():
logger.info(CURR_RUN_PATH)
exp = Experiment(save_dir=CURR_RUN_PATH)
if USE_GPU:
trainer = Trainer(gpus=[0], distributed_backend='ddp',
experiment=exp, max_nb_epochs=10, train_percent_check=1.0,
fast_dev_run=False)
else:
trainer = Trainer(experiment=exp, max_nb_epochs=1, train_percent_check=0.1,
fast_dev_run=True)
from strategies import model_config
model = cp_resnet.Network(model_config)
trainer.fit(model)
if __name__=='__main__':
run()
\ No newline at end of file
import torch.nn as nn
from utils import *
from datasets import MelSpecDataset
from datasets.datasets import MelSpecDataset
import torch
import torch.nn as nn
import torch.nn.functional as F
......
# coding: utf-8
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from librosa.filters import mel as librosa_mel_fn
from utils import *
from datasets import MelSpecDataset
from datasets.datasets import MelSpecDataset
from torch.utils.data import DataLoader
import pytorch_lightning as pl
......@@ -390,18 +388,16 @@ class Network(pl.LightningModule):
y_hat_probs = F.softmax(y_hat, dim=1)
y_hat_binary = (y_hat_probs > 0.5).type(torch.int)
rocauc = roc_auc_score(y.t().cpu(), y_hat_probs.t().cpu())
fscore = f1_score(y.t().cpu(), y_hat_probs.t().cpu(), average='micro')
# fscore = f1_score(y.t().cpu(), y_hat_probs.t().cpu(), average='micro')
return {'val_loss': self.my_loss(y_hat, y),
'rocauc':rocauc,
'fscore': fscore}
'rocauc':rocauc}
def validation_end(self, outputs):
avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
avg_auc = torch.stack([torch.tensor([x['rocauc']]) for x in outputs]).mean()
avg_f = torch.stack([torch.tensor([x['fscore']]) for x in outputs]).mean()
# avg_f = torch.stack([torch.tensor([x['fscore']]) for x in outputs]).mean()
return {'val_loss':avg_loss,
'rocauc':avg_auc,
'fscore':avg_f}
'rocauc':avg_auc}
def configure_optimizers(self):
return [torch.optim.Adam(self.parameters(), lr=0.02)]
......@@ -409,21 +405,26 @@ class Network(pl.LightningModule):
@pl.data_loader
def tng_dataloader(self):
trainset = MelSpecDataset(phase='train', ann_root=PATH_ANNOTATIONS,
spec_root=PATH_MELSPEC_DOWNLOADED_FRAMED)
spec_root=PATH_MELSPEC_DOWNLOADED_FRAMED, framed=True)
return DataLoader(dataset=trainset, batch_size=32, shuffle=True)
@pl.data_loader
def val_dataloader(self):
validationset = MelSpecDataset(phase='validation', ann_root=PATH_ANNOTATIONS,
spec_root=PATH_MELSPEC_DOWNLOADED_FRAMED)
spec_root=PATH_MELSPEC_DOWNLOADED_FRAMED, framed=True)
return DataLoader(dataset=validationset, batch_size=128, shuffle=True)
@pl.data_loader
def test_dataloader(self):
testset = MelSpecDataset(phase='test', ann_root=PATH_ANNOTATIONS,
spec_root=PATH_MELSPEC_DOWNLOADED_FRAMED)
spec_root=PATH_MELSPEC_DOWNLOADED_FRAMED, framed=True)
return DataLoader(dataset=testset, batch_size=32, shuffle=True)
@staticmethod
def add_model_specific_args(parent_parser, root_dir):
return parent_parser
pass
if __name__=='__main__':
model_config = {
......
from utils import *
from datasets import MelSpecDataset
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import pytorch_lightning as pl
from sklearn.metrics import roc_auc_score
def initialize_weights(module):
if isinstance(module, nn.Conv2d):
......
from utils import *
from datasets import MelSpecDataset
from datasets.datasets import MelSpecDataset
import torch
import torch.nn as nn
import torch.nn.functional as F
......@@ -9,9 +9,9 @@ import pytorch_lightning as pl
from sklearn.metrics import roc_auc_score
class MultiTagger(pl.LightningModule):
class Network(pl.LightningModule):
def __init__(self, num_tags=8):
super(MultiTagger, self).__init__()
super(Network, self).__init__()
self.num_tags = num_tags
self.conv1 = nn.Sequential(
......@@ -147,17 +147,22 @@ class MultiTagger(pl.LightningModule):
@pl.data_loader
def tng_dataloader(self):
trainset = MelSpecDataset(phase='train', ann_root=PATH_ANNOTATIONS,
spec_root=PATH_MELSPEC_DOWNLOADED_FRAMED)
spec_root=PATH_MELSPEC_DOWNLOADED_FRAMED, framed=True)
return DataLoader(dataset=trainset, batch_size=32, shuffle=True)
@pl.data_loader
def val_dataloader(self):
validationset = MelSpecDataset(phase='validation', ann_root=PATH_ANNOTATIONS,
spec_root=PATH_MELSPEC_DOWNLOADED_FRAMED)
spec_root=PATH_MELSPEC_DOWNLOADED_FRAMED, framed=True)
return DataLoader(dataset=validationset, batch_size=128, shuffle=True)
@pl.data_loader
def test_dataloader(self):
testset = MelSpecDataset(phase='test', ann_root=PATH_ANNOTATIONS,
spec_root=PATH_MELSPEC_DOWNLOADED_FRAMED)
spec_root=PATH_MELSPEC_DOWNLOADED_FRAMED, framed=True)
return DataLoader(dataset=testset, batch_size=32, shuffle=True)
@staticmethod
def add_model_specific_args(parent_parser, root_dir):
return parent_parser
pass
\ No newline at end of file
from utils import *
def preprocess_and_save_annotation_files():
"""
Removes 'mood/theme---' from tag names, and replaces tabs between multiple tag names with commas.
Writes processed filename.ext as filename_processed.ext
"""
import re
filelist = os.listdir(PATH_ANNOTATIONS)
for file in filelist:
# Check if the current file is processed or has a processed copy.
# Skip if either of this is true. Else process.
if 'processed' in os.path.splitext(file)[0].split('_') or\
f'{os.path.splitext(file)[0]}_processed{os.path.splitext(file)[1]}' in filelist:
continue
else:
with open(os.path.join(PATH_ANNOTATIONS, file), 'r') as f:
text = f.read()
text = re.sub(r'mood/theme---(\w*)\n', r'\1\n', text) # matches last or singular tags
text = re.sub(r'mood/theme---(\w*)(\s*)', r'\1,', text) # matches all other tags
with open(os.path.join(PATH_ANNOTATIONS,
f'{os.path.splitext(file)[0]}_processed{os.path.splitext(file)[1]}'), 'w') as fw:
fw.write(text)
import math
import sys
import time
import numpy as np
import wave
import scipy
import scipy.signal
from pylab import *
import array
import os
from os.path import expanduser
import scipy.io.wavfile
# Author: Brian K. Vogel
# brian.vogel@gmail.com
def hz_to_mel(f_hz):
"""Convert Hz to mel scale.
This uses the formula from O'Shaugnessy's book.
Args:
f_hz (float): The value in Hz.
Returns:
The value in mels.
"""
return 2595*np.log10(1.0 + f_hz/700.0)
def mel_to_hz(m_mel):
"""Convert mel scale to Hz.
This uses the formula from O'Shaugnessy's book.
Args:
m_mel (float): The value in mels
Returns:
The value in Hz
"""
return 700*(10**(m_mel/2595) - 1.0)
def fft_bin_to_hz(n_bin, sample_rate_hz, fft_size):
"""Convert FFT bin index to frequency in Hz.
Args:
n_bin (int or float): The FFT bin index.
sample_rate_hz (int or float): The sample rate in Hz.
fft_size (int or float): The FFT size.
Returns:
The value in Hz.
"""
n_bin = float(n_bin)
sample_rate_hz = float(sample_rate_hz)
fft_size = float(fft_size)
return n_bin*sample_rate_hz/(2.0*fft_size)
def hz_to_fft_bin(f_hz, sample_rate_hz, fft_size):
"""Convert frequency in Hz to FFT bin index.
Args:
f_hz (int or float): The frequency in Hz.
sample_rate_hz (int or float): The sample rate in Hz.
fft_size (int or float): The FFT size.
Returns:
The FFT bin index as an int.
"""
f_hz = float(f_hz)
sample_rate_hz = float(sample_rate_hz)
fft_size = float(fft_size)
fft_bin = int(np.round((f_hz*2.0*fft_size/sample_rate_hz)))
if fft_bin >= fft_size:
fft_bin = fft_size-1
return fft_bin
def make_mel_filterbank(min_freq_hz, max_freq_hz, mel_bin_count,
linear_bin_count, sample_rate_hz):
"""Create a mel filterbank matrix.
Create and return a mel filterbank matrix `filterbank` of shape (`mel_bin_count`,
`linear_bin_couont`). The `filterbank` matrix can be used to transform a
(linear scale) spectrum or spectrogram into a mel scale spectrum or
spectrogram as follows:
`mel_scale_spectrum` = `filterbank`*'linear_scale_spectrum'
where linear_scale_spectrum' is a shape (`linear_bin_count`, `m`) and
`mel_scale_spectrum` is shape ('mel_bin_count', `m`) where `m` is the number
of spectral time slices.
Likewise, the reverse-direction transform can be performed as:
'linear_scale_spectrum' = filterbank.T`*`mel_scale_spectrum`
Note that the process of converting to mel scale and then back to linear
scale is lossy.
This function computes the mel-spaced filters such that each filter is triangular
(in linear frequency) with response 1 at the center frequency and decreases linearly
to 0 upon reaching an adjacent filter's center frequency. Note that any two adjacent
filters will overlap having a response of 0.5 at the mean frequency of their
respective center frequencies.
Args:
min_freq_hz (float): The frequency in Hz corresponding to the lowest
mel scale bin.
max_freq_hz (flloat): The frequency in Hz corresponding to the highest
mel scale bin.
mel_bin_count (int): The number of mel scale bins.
linear_bin_count (int): The number of linear scale (fft) bins.
sample_rate_hz (float): The sample rate in Hz.
Returns:
The mel filterbank matrix as an 2-dim Numpy array.
"""
min_mels = hz_to_mel(min_freq_hz)
max_mels = hz_to_mel(max_freq_hz)
# Create mel_bin_count linearly spaced values between these extreme mel values.
mel_lin_spaced = np.linspace(min_mels, max_mels, num=mel_bin_count)
# Map each of these mel values back into linear frequency (Hz).
center_frequencies_hz = np.array([mel_to_hz(n) for n in mel_lin_spaced])
mels_per_bin = float(max_mels - min_mels)/float(mel_bin_count - 1)
mels_start = min_mels - mels_per_bin
hz_start = mel_to_hz(mels_start)
fft_bin_start = hz_to_fft_bin(hz_start, sample_rate_hz, linear_bin_count)
#print('fft_bin_start: ', fft_bin_start)
mels_end = max_mels + mels_per_bin
hz_stop = mel_to_hz(mels_end)
fft_bin_stop = hz_to_fft_bin(hz_stop, sample_rate_hz, linear_bin_count)
#print('fft_bin_stop: ', fft_bin_stop)
# Map each center frequency to the closest fft bin index.
linear_bin_indices = np.array([hz_to_fft_bin(f_hz, sample_rate_hz, linear_bin_count) for f_hz in center_frequencies_hz])
# Create filterbank matrix.
filterbank = np.zeros((mel_bin_count, linear_bin_count))
for mel_bin in range(mel_bin_count):
center_freq_linear_bin = linear_bin_indices[mel_bin]
# Create a triangular filter having the current center freq.
# The filter will start with 0 response at left_bin (if it exists)
# and ramp up to 1.0 at center_freq_linear_bin, and then ramp
# back down to 0 response at right_bin (if it exists).
# Create the left side of the triangular filter that ramps up
# from 0 to a response of 1 at the center frequency.
if center_freq_linear_bin > 1:
# It is possible to create the left triangular filter.
if mel_bin == 0:
# Since this is the first center frequency, the left side