Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Shreyan Chowdhury
moodwalk
Commits
ca903fab
Commit
ca903fab
authored
Sep 06, 2019
by
Shreyan Chowdhury
Browse files
reorganize project
parent
d2a228f9
Changes
23
Hide whitespace changes
Inline
Side-by-side
datasets.py
→
datasets/
datasets.py
View file @
ca903fab
from
utils
import
*
from
torch.utils.data
import
Dataset
,
DataLoader
from
processors.spectrogram_processors
import
make_framed_spec
class
MelSpecDataset
(
Dataset
):
def
__init__
(
self
,
phase
=
'train'
,
ann_root
=
None
,
spec_root
=
None
,
length
=
MAX_FRAMES
):
def
__init__
(
self
,
phase
=
'train'
,
ann_root
=
None
,
spec_root
=
None
,
length
=
MAX_FRAMES
,
framed
=
True
):
assert
ann_root
is
not
None
,
logger
.
error
(
"ann_root (root directory containing annotation files) required"
)
assert
spec_root
is
not
None
,
logger
.
error
(
"spec_root (root directory of spectrograms) required"
)
assert
phase
in
[
'train'
,
'validation'
,
'test'
],
\
...
...
@@ -12,6 +13,7 @@ class MelSpecDataset(Dataset):
self
.
ann_root
=
ann_root
self
.
spec_root
=
spec_root
self
.
length
=
length
self
.
framed
=
framed
xy
=
pd
.
read_csv
(
os
.
path
.
join
(
self
.
ann_root
,
f
'
{
phase
}
_processed.tsv'
),
sep
=
'
\t
'
)
self
.
len
=
len
(
xy
)
...
...
@@ -27,6 +29,9 @@ class MelSpecDataset(Dataset):
returns framed mel spectrogram and multi-hot encoded labels
"""
x_melspec
=
np
.
load
(
os
.
path
.
join
(
self
.
spec_root
,
self
.
x_path
[
index
]))
if
not
self
.
framed
:
x_melspec
=
make_framed_spec
(
x_melspec
,
frame_length
=
256
,
hop
=
1.0
)
reqd_len
=
self
.
length
spec_len
=
len
(
x_melspec
)
x_trimmed
=
x_melspec
[:
reqd_len
]
if
spec_len
>
reqd_len
else
\
...
...
@@ -51,12 +56,15 @@ class MelSpecDataset(Dataset):
return
np
.
array
([])
return
tagslist
def
_make_unique_tags_list_
(
self
,
labels
,
saveto
=
'./tagslist'
):
def
_make_unique_tags_list_
(
self
,
labels
,
saveto
=
None
):
labelslist
=
[]
for
label
in
labels
:
labelslist
.
extend
(
label
)
tagslist
=
np
.
sort
(
np
.
unique
(
np
.
array
(
labelslist
)))
if
saveto
is
not
None
:
if
saveto
is
None
:
saveto
=
os
.
path
.
join
(
PATH_PROJECT_ROOT
,
'tagslist'
)
np
.
save
(
saveto
,
tagslist
)
else
:
np
.
save
(
saveto
,
tagslist
)
return
tagslist
...
...
@@ -64,7 +72,8 @@ class MelSpecDataset(Dataset):
if
__name__
==
'__main__'
:
# Tests
torch
.
manual_seed
(
6
)
dataset
=
MelSpecDataset
(
phase
=
'train'
,
ann_root
=
PATH_ANNOTATIONS
,
spec_root
=
PATH_MELSPEC_DOWNLOADED_FRAMED
)
dataset
=
MelSpecDataset
(
phase
=
'train'
,
ann_root
=
PATH_ANNOTATIONS
,
spec_root
=
PATH_MELSPEC_DOWNLOADED_FRAMED
,
framed
=
True
)
train_loader
=
DataLoader
(
dataset
=
dataset
,
batch_size
=
32
,
shuffle
=
True
)
...
...
experiment_baseline.py
→
experiments/
experiment_baseline.py
View file @
ca903fab
...
...
@@ -17,7 +17,7 @@ def run():
trainer
=
Trainer
(
experiment
=
exp
,
max_nb_epochs
=
1
,
train_percent_check
=
0.1
,
fast_dev_run
=
True
)
model
=
Network
()
# TODO num_class
model
=
Network
(
num_class
=
56
)
# TODO num_class
print
(
model
)
...
...
experiment_erf.py
→
experiments/
experiment_erf.py
View file @
ca903fab
...
...
@@ -3,8 +3,8 @@ from pytorch_lightning import Trainer
from
test_tube
import
Experiment
from
models.cp_resnet
import
Network
import
torch
from
datasets
import
MelSpecDataset
from
torch.utils.data
import
Dataset
,
DataLoader
from
datasets
.datasets
import
MelSpecDataset
from
torch.utils.data
import
DataLoader
from
matplotlib.transforms
import
Affine2D
import
mpl_toolkits.axisartist.floating_axes
as
floating_axes
...
...
@@ -96,7 +96,7 @@ def ERF_generate(model, loader):
accum
+=
me
counter
+=
1
# torch.save({"arr": accum, "counter": counter}, os.path.join(self.config.out_dir, 'ERF_dict.pth'))
ERF_plot
(
accum
,
savefile
=
os
.
path
.
join
(
'/home/verena/experiments/moodwalk'
,
'erf.png'
))
ERF_plot
(
accum
,
savefile
=
os
.
path
.
join
(
CURR_RUN_PATH
,
'erf.png'
))
# self.experiment.add_artifact(os.path.join(self.config.out_dir, 'erf.png'), "erf.png", {"dataset": dataset_name})
return
True
...
...
experiment_resnets.py
→
experiments/
experiment_resnets.py
View file @
ca903fab
File moved
experiments/main.py
0 → 100644
View file @
ca903fab
from
utils
import
*
from
pytorch_lightning
import
Trainer
from
pytorch_lightning.utilities
import
arg_parse
from
pytorch_lightning.callbacks.pt_callbacks
import
ModelCheckpoint
from
test_tube
import
Experiment
from
models
import
vgg_basic
import
argparse
def
run
(
hparams
):
logger
.
info
(
CURR_RUN_PATH
)
exp
=
Experiment
(
name
=
hparams
.
tt_name
,
debug
=
hparams
.
debug
,
save_dir
=
CURR_RUN_PATH
,
version
=
'1'
,
autosave
=
False
,
description
=
hparams
.
tt_description
)
exp
.
argparse
(
hparams
)
exp
.
save
()
# model = cp_resnet.Network(model_config)
model
=
vgg_basic
.
Network
()
model_save_path
=
'{}/{}/{}'
.
format
(
hparams
.
model_save_path
,
exp
.
name
,
exp
.
version
)
checkpoint
=
ModelCheckpoint
(
filepath
=
model_save_path
,
save_best_only
=
True
,
verbose
=
True
,
monitor
=
'rocauc'
,
mode
=
'max'
)
if
USE_GPU
:
trainer
=
Trainer
(
gpus
=
[
1
],
distributed_backend
=
'ddp'
,
experiment
=
exp
,
max_nb_epochs
=
10
,
train_percent_check
=
1.0
,
fast_dev_run
=
False
,
checkpoint_callback
=
checkpoint
)
else
:
trainer
=
Trainer
(
experiment
=
exp
,
max_nb_epochs
=
1
,
train_percent_check
=
0.1
,
fast_dev_run
=
True
)
trainer
.
fit
(
model
)
if
__name__
==
'__main__'
:
parent_parser
=
argparse
.
ArgumentParser
(
description
=
'hyperparameters'
)
arg_parse
.
add_default_args
(
parent_parser
,
root_dir
=
CURR_RUN_PATH
)
parser
=
vgg_basic
.
Network
.
add_model_specific_args
(
parent_parser
,
root_dir
=
CURR_RUN_PATH
)
hyperparams
=
parser
.
parse_args
()
run
(
hyperparams
)
\ No newline at end of file
strategies.py
→
experiments/
strategies.py
View file @
ca903fab
File moved
plotting.py
→
helpers/
plotting.py
View file @
ca903fab
from
matplotlib
import
pyplot
as
plt
import
numpy
as
np
from
utils
import
*
def
plot_tag_frequencies
(
df
,
norm
=
True
,
index_sort
=
True
,
out
=
None
):
def
plot_tag_frequencies
(
df
,
norm
=
True
,
index_sort
=
True
,
out
=
None
,
title
=
None
):
tag_freqs
=
df
[
'TAGS'
].
str
.
split
(
','
,
expand
=
True
).
stack
().
value_counts
()
title
=
'tag counts'
title
_
=
'tag counts'
if
norm
:
tag_freqs
=
tag_freqs
/
max
(
tag_freqs
)
title
=
f
'normalized tag frequencies'
title
_
=
f
'normalized tag frequencies'
if
index_sort
:
tag_freqs
=
tag_freqs
.
sort_index
()
tag_freqs
.
plot
.
bar
(
title
=
title
)
from
itertools
import
islice
,
cycle
colors
=
list
(
islice
(
cycle
([
'b'
,
'r'
,
'g'
,
'y'
,
'k'
]),
None
,
len
(
tag_freqs
)))
if
title
is
not
None
:
title_
=
title
tag_freqs
.
plot
.
bar
(
title
=
title_
,
color
=
colors
)
if
out
is
None
:
plt
.
show
()
...
...
@@ -38,4 +45,10 @@ def plot_melspectrogram(melspec, scale=False, title=None, out=None):
if
out
is
None
:
plt
.
show
()
else
:
plt
.
savefig
(
out
)
\ No newline at end of file
plt
.
savefig
(
out
)
if
__name__
==
'__main__'
:
dataset_path
=
os
.
path
.
join
(
PATH_ANNOTATIONS
,
'test_processed.tsv'
)
df
=
pd
.
read_csv
(
dataset_path
,
sep
=
'
\t
'
)
plot_tag_frequencies
(
df
,
title
=
'normalized tag frequencies, test'
)
helpers/stats.py
0 → 100644
View file @
ca903fab
from
utils
import
*
def
compute_duration_stats
(
filepath
):
df
=
pd
.
read_csv
(
filepath
,
sep
=
'
\t
'
)
total_secs
=
df
.
DURATION
.
sum
()
hours
=
int
(
total_secs
//
3600
)
mins
=
int
((
total_secs
%
3600
)
//
60
)
secs
=
np
.
round
(
total_secs
%
60
,
1
)
return
{
'average'
:
time
.
strftime
(
'%H:%M:%S'
,
time
.
gmtime
(
df
.
DURATION
.
mean
())),
'total'
:
f
'
{
hours
}
:
{
mins
}
:
{
secs
}
'
}
#todo: improve format for better readability
def
compute_melspec_length_stats
(
filepath
):
df
=
pd
.
read_csv
(
filepath
,
sep
=
'
\t
'
)
lengths
=
[]
paths
=
df
.
PATH
.
str
.
split
(
pat
=
'.'
).
str
[
0
]
+
'.npy'
for
i
in
tqdm
(
df
.
index
):
x_melspec
=
np
.
load
(
os
.
path
.
join
(
PATH_MELSPEC_DOWNLOADED
,
paths
[
i
]))
lengths
.
append
(
x_melspec
.
shape
[
1
])
lengths
=
np
.
array
(
lengths
)
return
{
'avg'
:
lengths
.
mean
(),
'max'
:
lengths
.
max
(),
'min'
:
lengths
.
min
()}
def
compute_tag_stats
():
trainset_path
=
os
.
path
.
join
(
PATH_ANNOTATIONS
,
'train_processed.tsv'
)
validset_path
=
os
.
path
.
join
(
PATH_ANNOTATIONS
,
'validation_processed.tsv'
)
testset_path
=
os
.
path
.
join
(
PATH_ANNOTATIONS
,
'test_processed.tsv'
)
print
(
f
"
\n
Computing stats for:
\n
{
trainset_path
}
\n
{
validset_path
}
\n
{
testset_path
}
\n
"
)
trainset
=
pd
.
read_csv
(
trainset_path
,
sep
=
'
\t
'
)
validset
=
pd
.
read_csv
(
validset_path
,
sep
=
'
\t
'
)
testset
=
pd
.
read_csv
(
testset_path
,
sep
=
'
\t
'
)
trainset_tags
=
trainset
.
TAGS
.
str
.
split
(
pat
=
','
)
validset_tags
=
validset
.
TAGS
.
str
.
split
(
pat
=
','
)
testset_tags
=
testset
.
TAGS
.
str
.
split
(
pat
=
','
)
trainset_tags_unique
=
np
.
sort
(
np
.
unique
(
np
.
hstack
(
trainset_tags
)))
validset_tags_unique
=
np
.
sort
(
np
.
unique
(
np
.
hstack
(
validset_tags
)))
testset_tags_unique
=
np
.
sort
(
np
.
unique
(
np
.
hstack
(
testset_tags
)))
print
(
"TRAINING SET"
)
print
(
f
"Number of songs
\t\t\t\t
{
len
(
trainset
)
}
"
)
print
(
f
"Number of unique tags
\t\t\t
{
len
(
trainset_tags_unique
)
}
"
)
print
(
f
"Avg number of tags per song
\t\t
{
np
.
round
(
trainset_tags
.
apply
(
len
).
mean
(),
2
)
}
"
)
print
(
f
"Max number of tags per song
\t\t
{
trainset_tags
.
apply
(
len
).
max
()
}
"
)
print
(
f
"Min number of tags per song
\t\t
{
trainset_tags
.
apply
(
len
).
min
()
}
"
)
print
(
"
\n
VALIDATION SET"
)
print
(
f
"Number of songs
\t\t\t\t
{
len
(
validset
)
}
"
)
print
(
f
"Number of unique tags
\t\t\t
{
len
(
validset_tags_unique
)
}
"
)
print
(
f
"Avg number of tags per song
\t\t
{
np
.
round
(
validset_tags
.
apply
(
len
).
mean
(),
2
)
}
"
)
print
(
f
"Max number of tags per song
\t\t
{
validset_tags
.
apply
(
len
).
max
()
}
"
)
print
(
f
"Min number of tags per song
\t\t
{
validset_tags
.
apply
(
len
).
min
()
}
"
)
print
(
"
\n
TEST SET"
)
print
(
f
"Number of songs
\t\t\t\t
{
len
(
testset
)
}
"
)
print
(
f
"Number of unique tags
\t\t\t
{
len
(
testset_tags_unique
)
}
"
)
print
(
f
"Avg number of tags per song
\t\t
{
np
.
round
(
testset_tags
.
apply
(
len
).
mean
(),
2
)
}
"
)
print
(
f
"Max number of tags per song
\t\t
{
testset_tags
.
apply
(
len
).
max
()
}
"
)
print
(
f
"Min number of tags per song
\t\t
{
testset_tags
.
apply
(
len
).
min
()
}
"
)
def
correlations
():
data_path
=
os
.
path
.
join
(
PATH_ANNOTATIONS
,
'train_processed.tsv'
)
df
=
pd
.
read_csv
(
data_path
,
sep
=
'
\t
'
)
tags
=
df
.
TAGS
.
str
.
split
(
pat
=
','
)
artists
=
df
.
ARTIST_ID
u_tags
=
np
.
sort
(
np
.
unique
(
np
.
hstack
(
tags
)))
u_artists
,
counts
=
np
.
unique
(
artists
,
return_counts
=
True
)
counts_idx
=
counts
.
argsort
()
relevant_artists
=
u_artists
[
counts_idx
[::
-
1
]][:
sum
((
counts
>
10
).
astype
(
int
))]
contingency
=
pd
.
DataFrame
(
0
,
index
=
relevant_artists
,
columns
=
u_tags
)
for
tag
in
tqdm
(
u_tags
):
for
i
in
range
(
len
(
tags
)):
if
tag
in
df
.
TAGS
[
i
]:
if
df
.
ARTIST_ID
[
i
]
in
relevant_artists
:
contingency
.
loc
[
df
.
ARTIST_ID
[
i
]][
tag
]
+=
1
else
:
continue
pass
if
__name__
==
'__main__'
:
# stats = compute_duration_stats(os.path.join(PATH_ANNOTATIONS, 'train_processed.tsv'))
# stats = compute_melspec_length_stats(os.path.join(PATH_ANNOTATIONS, 'train_processed.tsv'))
# print(stats)
# compute_tag_stats()
correlations
()
\ No newline at end of file
main.py
deleted
100644 → 0
View file @
d2a228f9
from
utils
import
*
from
pytorch_lightning
import
Trainer
from
test_tube
import
Experiment
from
models.vgg_basic
import
MultiTagger
from
models
import
cp_resnet
def
run
():
logger
.
info
(
CURR_RUN_PATH
)
exp
=
Experiment
(
save_dir
=
CURR_RUN_PATH
)
if
USE_GPU
:
trainer
=
Trainer
(
gpus
=
[
0
],
distributed_backend
=
'ddp'
,
experiment
=
exp
,
max_nb_epochs
=
10
,
train_percent_check
=
1.0
,
fast_dev_run
=
False
)
else
:
trainer
=
Trainer
(
experiment
=
exp
,
max_nb_epochs
=
1
,
train_percent_check
=
0.1
,
fast_dev_run
=
True
)
from
strategies
import
model_config
model
=
cp_resnet
.
Network
(
model_config
)
trainer
.
fit
(
model
)
if
__name__
==
'__main__'
:
run
()
\ No newline at end of file
models/baseline.py
View file @
ca903fab
import
torch.nn
as
nn
from
utils
import
*
from
datasets
import
MelSpecDataset
from
datasets
.datasets
import
MelSpecDataset
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
...
...
models/cp_resnet.py
View file @
ca903fab
# coding: utf-8
import
math
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
librosa.filters
import
mel
as
librosa_mel_fn
from
utils
import
*
from
datasets
import
MelSpecDataset
from
datasets
.datasets
import
MelSpecDataset
from
torch.utils.data
import
DataLoader
import
pytorch_lightning
as
pl
...
...
@@ -390,18 +388,16 @@ class Network(pl.LightningModule):
y_hat_probs
=
F
.
softmax
(
y_hat
,
dim
=
1
)
y_hat_binary
=
(
y_hat_probs
>
0.5
).
type
(
torch
.
int
)
rocauc
=
roc_auc_score
(
y
.
t
().
cpu
(),
y_hat_probs
.
t
().
cpu
())
fscore
=
f1_score
(
y
.
t
().
cpu
(),
y_hat_probs
.
t
().
cpu
(),
average
=
'micro'
)
#
fscore = f1_score(y.t().cpu(), y_hat_probs.t().cpu(), average='micro')
return
{
'val_loss'
:
self
.
my_loss
(
y_hat
,
y
),
'rocauc'
:
rocauc
,
'fscore'
:
fscore
}
'rocauc'
:
rocauc
}
def
validation_end
(
self
,
outputs
):
avg_loss
=
torch
.
stack
([
x
[
'val_loss'
]
for
x
in
outputs
]).
mean
()
avg_auc
=
torch
.
stack
([
torch
.
tensor
([
x
[
'rocauc'
]])
for
x
in
outputs
]).
mean
()
avg_f
=
torch
.
stack
([
torch
.
tensor
([
x
[
'fscore'
]])
for
x
in
outputs
]).
mean
()
#
avg_f = torch.stack([torch.tensor([x['fscore']]) for x in outputs]).mean()
return
{
'val_loss'
:
avg_loss
,
'rocauc'
:
avg_auc
,
'fscore'
:
avg_f
}
'rocauc'
:
avg_auc
}
def
configure_optimizers
(
self
):
return
[
torch
.
optim
.
Adam
(
self
.
parameters
(),
lr
=
0.02
)]
...
...
@@ -409,21 +405,26 @@ class Network(pl.LightningModule):
@
pl
.
data_loader
def
tng_dataloader
(
self
):
trainset
=
MelSpecDataset
(
phase
=
'train'
,
ann_root
=
PATH_ANNOTATIONS
,
spec_root
=
PATH_MELSPEC_DOWNLOADED_FRAMED
)
spec_root
=
PATH_MELSPEC_DOWNLOADED_FRAMED
,
framed
=
True
)
return
DataLoader
(
dataset
=
trainset
,
batch_size
=
32
,
shuffle
=
True
)
@
pl
.
data_loader
def
val_dataloader
(
self
):
validationset
=
MelSpecDataset
(
phase
=
'validation'
,
ann_root
=
PATH_ANNOTATIONS
,
spec_root
=
PATH_MELSPEC_DOWNLOADED_FRAMED
)
spec_root
=
PATH_MELSPEC_DOWNLOADED_FRAMED
,
framed
=
True
)
return
DataLoader
(
dataset
=
validationset
,
batch_size
=
128
,
shuffle
=
True
)
@
pl
.
data_loader
def
test_dataloader
(
self
):
testset
=
MelSpecDataset
(
phase
=
'test'
,
ann_root
=
PATH_ANNOTATIONS
,
spec_root
=
PATH_MELSPEC_DOWNLOADED_FRAMED
)
spec_root
=
PATH_MELSPEC_DOWNLOADED_FRAMED
,
framed
=
True
)
return
DataLoader
(
dataset
=
testset
,
batch_size
=
32
,
shuffle
=
True
)
@
staticmethod
def
add_model_specific_args
(
parent_parser
,
root_dir
):
return
parent_parser
pass
if
__name__
==
'__main__'
:
model_config
=
{
...
...
models/legacy_models.py
View file @
ca903fab
from
utils
import
*
from
datasets
import
MelSpecDataset
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
torch.utils.data
import
DataLoader
import
pytorch_lightning
as
pl
from
sklearn.metrics
import
roc_auc_score
def
initialize_weights
(
module
):
if
isinstance
(
module
,
nn
.
Conv2d
):
...
...
models/vgg_basic.py
View file @
ca903fab
from
utils
import
*
from
datasets
import
MelSpecDataset
from
datasets
.datasets
import
MelSpecDataset
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
...
...
@@ -9,9 +9,9 @@ import pytorch_lightning as pl
from
sklearn.metrics
import
roc_auc_score
class
MultiTagger
(
pl
.
LightningModule
):
class
Network
(
pl
.
LightningModule
):
def
__init__
(
self
,
num_tags
=
8
):
super
(
MultiTagger
,
self
).
__init__
()
super
(
Network
,
self
).
__init__
()
self
.
num_tags
=
num_tags
self
.
conv1
=
nn
.
Sequential
(
...
...
@@ -147,17 +147,22 @@ class MultiTagger(pl.LightningModule):
@
pl
.
data_loader
def
tng_dataloader
(
self
):
trainset
=
MelSpecDataset
(
phase
=
'train'
,
ann_root
=
PATH_ANNOTATIONS
,
spec_root
=
PATH_MELSPEC_DOWNLOADED_FRAMED
)
spec_root
=
PATH_MELSPEC_DOWNLOADED_FRAMED
,
framed
=
True
)
return
DataLoader
(
dataset
=
trainset
,
batch_size
=
32
,
shuffle
=
True
)
@
pl
.
data_loader
def
val_dataloader
(
self
):
validationset
=
MelSpecDataset
(
phase
=
'validation'
,
ann_root
=
PATH_ANNOTATIONS
,
spec_root
=
PATH_MELSPEC_DOWNLOADED_FRAMED
)
spec_root
=
PATH_MELSPEC_DOWNLOADED_FRAMED
,
framed
=
True
)
return
DataLoader
(
dataset
=
validationset
,
batch_size
=
128
,
shuffle
=
True
)
@
pl
.
data_loader
def
test_dataloader
(
self
):
testset
=
MelSpecDataset
(
phase
=
'test'
,
ann_root
=
PATH_ANNOTATIONS
,
spec_root
=
PATH_MELSPEC_DOWNLOADED_FRAMED
)
spec_root
=
PATH_MELSPEC_DOWNLOADED_FRAMED
,
framed
=
True
)
return
DataLoader
(
dataset
=
testset
,
batch_size
=
32
,
shuffle
=
True
)
@
staticmethod
def
add_model_specific_args
(
parent_parser
,
root_dir
):
return
parent_parser
pass
\ No newline at end of file
processors/annotation_processors.py
0 → 100644
View file @
ca903fab
from
utils
import
*
def
preprocess_and_save_annotation_files
():
"""
Removes 'mood/theme---' from tag names, and replaces tabs between multiple tag names with commas.
Writes processed filename.ext as filename_processed.ext
"""
import
re
filelist
=
os
.
listdir
(
PATH_ANNOTATIONS
)
for
file
in
filelist
:
# Check if the current file is processed or has a processed copy.
# Skip if either of this is true. Else process.
if
'processed'
in
os
.
path
.
splitext
(
file
)[
0
].
split
(
'_'
)
or
\
f
'
{
os
.
path
.
splitext
(
file
)[
0
]
}
_processed
{
os
.
path
.
splitext
(
file
)[
1
]
}
'
in
filelist
:
continue
else
:
with
open
(
os
.
path
.
join
(
PATH_ANNOTATIONS
,
file
),
'r'
)
as
f
:
text
=
f
.
read
()
text
=
re
.
sub
(
r
'mood/theme---(\w*)\n'
,
r
'\1\n'
,
text
)
# matches last or singular tags
text
=
re
.
sub
(
r
'mood/theme---(\w*)(\s*)'
,
r
'\1,'
,
text
)
# matches all other tags
with
open
(
os
.
path
.
join
(
PATH_ANNOTATIONS
,
f
'
{
os
.
path
.
splitext
(
file
)[
0
]
}
_processed
{
os
.
path
.
splitext
(
file
)[
1
]
}
'
),
'w'
)
as
fw
:
fw
.
write
(
text
)
processors/audio_processors.py
0 → 100644
View file @
ca903fab
processors/griffin_lim.py
0 → 100644
View file @
ca903fab
import
math
import
sys
import
time
import
numpy
as
np
import
wave
import
scipy
import
scipy.signal
from
pylab
import
*
import
array
import
os
from
os.path
import
expanduser
import
scipy.io.wavfile
# Author: Brian K. Vogel
# brian.vogel@gmail.com
def
hz_to_mel
(
f_hz
):
"""Convert Hz to mel scale.
This uses the formula from O'Shaugnessy's book.
Args:
f_hz (float): The value in Hz.
Returns:
The value in mels.
"""
return
2595
*
np
.
log10
(
1.0
+
f_hz
/
700.0
)
def
mel_to_hz
(
m_mel
):
"""Convert mel scale to Hz.
This uses the formula from O'Shaugnessy's book.
Args:
m_mel (float): The value in mels
Returns:
The value in Hz
"""
return
700
*
(
10
**
(
m_mel
/
2595
)
-
1.0
)
def
fft_bin_to_hz
(
n_bin
,
sample_rate_hz
,
fft_size
):
"""Convert FFT bin index to frequency in Hz.
Args:
n_bin (int or float): The FFT bin index.
sample_rate_hz (int or float): The sample rate in Hz.
fft_size (int or float): The FFT size.
Returns:
The value in Hz.
"""
n_bin
=
float
(
n_bin
)
sample_rate_hz
=
float
(
sample_rate_hz
)
fft_size
=
float
(
fft_size
)
return
n_bin
*
sample_rate_hz
/
(
2.0
*
fft_size
)
def
hz_to_fft_bin
(
f_hz
,
sample_rate_hz
,
fft_size
):
"""Convert frequency in Hz to FFT bin index.
Args:
f_hz (int or float): The frequency in Hz.
sample_rate_hz (int or float): The sample rate in Hz.
fft_size (int or float): The FFT size.
Returns:
The FFT bin index as an int.
"""
f_hz
=
float
(
f_hz
)
sample_rate_hz
=
float
(
sample_rate_hz
)
fft_size
=
float
(
fft_size
)
fft_bin
=
int
(
np
.
round
((
f_hz
*
2.0
*
fft_size
/
sample_rate_hz
)))
if
fft_bin
>=
fft_size
:
fft_bin
=
fft_size
-
1
return
fft_bin
def
make_mel_filterbank
(
min_freq_hz
,
max_freq_hz
,
mel_bin_count
,
linear_bin_count
,
sample_rate_hz
):
"""Create a mel filterbank matrix.
Create and return a mel filterbank matrix `filterbank` of shape (`mel_bin_count`,
`linear_bin_couont`). The `filterbank` matrix can be used to transform a
(linear scale) spectrum or spectrogram into a mel scale spectrum or
spectrogram as follows:
`mel_scale_spectrum` = `filterbank`*'linear_scale_spectrum'
where linear_scale_spectrum' is a shape (`linear_bin_count`, `m`) and
`mel_scale_spectrum` is shape ('mel_bin_count', `m`) where `m` is the number
of spectral time slices.
Likewise, the reverse-direction transform can be performed as:
'linear_scale_spectrum' = filterbank.T`*`mel_scale_spectrum`
Note that the process of converting to mel scale and then back to linear
scale is lossy.
This function computes the mel-spaced filters such that each filter is triangular
(in linear frequency) with response 1 at the center frequency and decreases linearly
to 0 upon reaching an adjacent filter's center frequency. Note that any two adjacent
filters will overlap having a response of 0.5 at the mean frequency of their
respective center frequencies.
Args:
min_freq_hz (float): The frequency in Hz corresponding to the lowest
mel scale bin.
max_freq_hz (flloat): The frequency in Hz corresponding to the highest
mel scale bin.
mel_bin_count (int): The number of mel scale bins.
linear_bin_count (int): The number of linear scale (fft) bins.
sample_rate_hz (float): The sample rate in Hz.
Returns:
The mel filterbank matrix as an 2-dim Numpy array.
"""
min_mels
=
hz_to_mel
(
min_freq_hz
)
max_mels
=
hz_to_mel
(
max_freq_hz
)
# Create mel_bin_count linearly spaced values between these extreme mel values.
mel_lin_spaced
=
np
.
linspace
(
min_mels
,
max_mels
,
num
=
mel_bin_count
)
# Map each of these mel values back into linear frequency (Hz).
center_frequencies_hz
=
np
.
array
([
mel_to_hz
(
n
)
for
n
in
mel_lin_spaced
])
mels_per_bin
=
float
(
max_mels
-
min_mels
)
/
float
(
mel_bin_count
-
1
)
mels_start
=
min_mels
-
mels_per_bin
hz_start
=
mel_to_hz
(
mels_start
)
fft_bin_start
=
hz_to_fft_bin
(
hz_start
,
sample_rate_hz
,
linear_bin_count
)
#print('fft_bin_start: ', fft_bin_start)
mels_end
=
max_mels
+
mels_per_bin
hz_stop
=
mel_to_hz
(
mels_end
)
fft_bin_stop
=
hz_to_fft_bin
(
hz_stop
,
sample_rate_hz
,
linear_bin_count
)
#print('fft_bin_stop: ', fft_bin_stop)
# Map each center frequency to the closest fft bin index.
linear_bin_indices
=
np
.
array
([
hz_to_fft_bin
(
f_hz
,
sample_rate_hz
,
linear_bin_count
)
for
f_hz
in
center_frequencies_hz
])
# Create filterbank matrix.
filterbank
=
np
.
zeros
((
mel_bin_count
,
linear_bin_count
))
for
mel_bin
in
range
(
mel_bin_count
):
center_freq_linear_bin
=
linear_bin_indices
[
mel_bin
]
# Create a triangular filter having the current center freq.
# The filter will start with 0 response at left_bin (if it exists)
# and ramp up to 1.0 at center_freq_linear_bin, and then ramp
# back down to 0 response at right_bin (if it exists).
# Create the left side of the triangular filter that ramps up
# from 0 to a response of 1 at the center frequency.
if
center_freq_linear_bin
>
1
: