from madmom.audio.spectrogram import LogarithmicFilteredSpectrogram, SpectrogramDifference, LogarithmicFilterbank import numpy as np def extract_features(audiofile): num_channels = 1 sample_rate = 44100 frame_sizes = [1024, 2048, 4096] fps = 100 num_bands = 12 fmin = 30 fmax = 17000 norm_filters = True start_silence = 0 diff = True diff_ratio = 0.5 positive_diffs = True spectrograms = [] for frame_size in frame_sizes: spectrogram = LogarithmicFilteredSpectrogram( audiofile, num_channels=num_channels, sample_rate=sample_rate, filterbank=LogarithmicFilterbank, frame_size=frame_size, fps=fps, num_bands=num_bands, fmin=fmin, fmax=fmax, norm_filters=norm_filters, start_silence=start_silence) if diff: spectrogram_diff = SpectrogramDifference( spectrogram, diff_ratio=diff_ratio, positive_diffs=positive_diffs, stack_diffs=np.hstack) spectrogram = np.hstack((spectrogram, spectrogram_diff)) spectrograms.append(spectrogram) return np.hstack(spectrograms)