Untitled

from scipy.signal import firls
import numpy as np
import torch
import torch.nn.functional as F

elc = np.array([[31.5, -29.9],  # freq, response
                [63, -23.9],
                [100, -19.8],
                [200, -13.8],
                [400, -7.8],
                [800, -1.9],
                [1000, 0.],
                [2000, 5.6],
                [3150, 9.0],
                [4000, 10.5],
                [5000, 11.7],
                [6300, 12.2],
                [7100, 12.0],
                [8000, 11.4],
                [9000, 10.1],
                [10000, 8.1],
                [12500, 0],
                [14000, -5.3],
                [16000, -11.7],
                [20000, -22.2],
                [31500, -42.7]])

def equal_filter(n_tap, sr):
    """returns a linear-phase FIR filter that simulates the equal loudness contour.
    (suppress low freq; amplify 3kHz)
    """
    assert n_tap % 2 == 1, "num tap should be an odd number otherwise it's odd.."
    freq_idx = sum(elc[:, 0] <= sr // 2)
    freq = elc[:freq_idx, 0]
    desired = 10 ** (elc[:freq_idx, 1] / 20.)
    return firls(n_tap, freq, desired, fs=sr)


if __name__ == '__main__':

    SR = 44100
    len_filter = 9
    audio_signal = torch.from_numpy(librosa.load('some_audio_file.wav', sr=SR, mono=True))
    # get the filter taps
    elc_filter = torch.from_numpy(equal_filter(n_tap=len_filter, sr=SR))

    # flip it to use torch's conv1d function, which does NOT flip the kernel.
    elc_filter = torch.flip(elc_filter, dims=(0,))

    # make it 3d for F.conv1d compatibility
    elc_filter = torch.reshape(elc_filter, (1, 1, -1))  # in_ch, out_ch, filter_length

    # make input batch for F.conv1d compatibility
    batch_audio = torch.reshape(audio_signal, (1, 1, -1))  # now (1, 1, time), ready for F.conv1d

    perceptual_simulated_batch_audio = F.conv1d(batch_audio, elc_filter, padding=len_filter // 2)