Commit ee66390f authored by Shreyan Chowdhury's avatar Shreyan Chowdhury

add another processor

parent 0bf50c50
......@@ -7,12 +7,13 @@ import pandas as pd
from utils import PATH_DATA_CACHE
slice_length = 512 #TODO: Find a better way
n_mels = 256
def sample_slicing_function(h5data, idx, xlen):
timeframes = slice_length
k = torch.randint(xlen - timeframes + 1, (1,))[0].item()
x = h5data[idx + k:idx + k + timeframes]
return torch.from_numpy(x.transpose(1, 0).reshape(1, 256, timeframes))
return torch.from_numpy(x.transpose(1, 0).reshape(1, n_mels, timeframes))
def full_song_slicing_function(h5data, idx, xlen):
return (h5data, idx, xlen)
......@@ -37,11 +38,12 @@ def mtgjamendo_parse_labels(csvf):
def processor_mtgjamendo44k(file_path):
# global n_mels
n_fft = 2048 # 2048
sr = 44100 # 22050 # 44100 # 32000
mono = True # @todo ask mattias
log_spec = False
n_mels = 256
# n_mels = 256
hop_length = 512
fmax = None
......@@ -88,7 +90,60 @@ def processor_mtgjamendo44k(file_path):
return torch.from_numpy(spectrograms)
def processor_mtgjamendo22k_96mels(file_path):
# global n_mels
n_fft = 2048 # 2048
sr = 22050 # 22050 # 44100 # 32000
mono = True # @todo ask mattias
log_spec = False
# n_mels = 96
hop_length = 512
fmax = None
dpath, filename = os.path.split(file_path)
#file_path2 = dpath + "/../audio22k/" + filename
if mono:
# this is the slowest part resampling
sig, sr = librosa.load(file_path, sr=sr, mono=True)
sig = sig[np.newaxis]
else:
sig, sr = librosa.load(file_path, sr=sr, mono=False)
# sig, sf_sr = sf.read(file_path)
# sig = np.transpose(sig, (1, 0))
# sig = np.asarray([librosa.resample(s, sf_sr, sr) for s in sig])
spectrograms = []
for y in sig:
# compute stft
stft = librosa.stft(y, n_fft=n_fft, hop_length=hop_length, win_length=None, window='hann', center=True,
pad_mode='reflect')
# keep only amplitures
stft = np.abs(stft)
# spectrogram weighting
if log_spec:
stft = np.log10(stft + 1)
else:
freqs = librosa.core.fft_frequencies(sr=sr, n_fft=n_fft)
stft = librosa.perceptual_weighting(stft ** 2, freqs, ref=1.0, amin=1e-10, top_db=80.0)
# apply mel filterbank
spectrogram = librosa.feature.melspectrogram(S=stft, sr=sr, n_mels=n_mels, fmax=fmax)
# keep spectrogram
spectrograms.append(np.asarray(spectrogram))
spectrograms = np.asarray(spectrograms, dtype=np.float32)
return torch.from_numpy(spectrograms)
audio_processor = processor_mtgjamendo44k
# audio_processor = processor_mtgjamendo22k_96mels
label_encoder = None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment