features.py 1.13 KB
Newer Older
Richard Vogl's avatar
Richard Vogl committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
from madmom.audio.spectrogram import LogarithmicFilteredSpectrogram, SpectrogramDifference, LogarithmicFilterbank
import numpy as np


def extract_features(audiofile):

    num_channels = 1
    sample_rate = 44100
    frame_sizes = [1024, 2048, 4096]
    fps = 100
    num_bands = 12
    fmin = 30
    fmax = 17000
    norm_filters = True
    start_silence = 0
    diff = True
    diff_ratio = 0.5
    positive_diffs = True

    spectrograms = []
    for frame_size in frame_sizes:
        spectrogram = LogarithmicFilteredSpectrogram(
            audiofile, num_channels=num_channels, sample_rate=sample_rate,
            filterbank=LogarithmicFilterbank, frame_size=frame_size, fps=fps,
            num_bands=num_bands, fmin=fmin, fmax=fmax,
            norm_filters=norm_filters, start_silence=start_silence)

        if diff:
            spectrogram_diff = SpectrogramDifference(
                spectrogram, diff_ratio=diff_ratio, positive_diffs=positive_diffs,
                stack_diffs=np.hstack)
            spectrogram = np.hstack((spectrogram, spectrogram_diff))

        spectrograms.append(spectrogram)

    return np.hstack(spectrograms)