Untitled

def generate_directional_noise(audio,Fs,SNR,H,indicesListForMics):

    #variables for impulse response
    winlen=2048; #window length fo STFT
    hoplen=winlen/2; #hop size fo STFT

    [I,L,C]=H.shape #1024 frequencies,4 blocks,32 channels
    [samples,ch]=audio.shape

    #extract noise power from SNR
    p_audio=np.mean(np.sum(audio**2,axis=0)/samples) #Average power for audio
    sigma_db=10*np.log10(p_audio)-SNR #Average power of noise with respec to requiered SNR
    sigma=np.sqrt(10**(sigma_db/10))
    noise=np.random.normal(0,sigma,(samples)) #noise vector

    #convolve the spectrogram with the impulse response
    X_conv = librosa.core.stft(noise, n_fft=winlen, hop_length=hoplen,win_length=winlen, window='hann')

    frame_len=X_conv.shape[1]#-2
    XestLSsyn = np.zeros((I,frame_len,C),dtype=np.complex64)

    nframe = 0
    sta=1
    sto=sta + winlen -1
    while sto <=samples:
        Lind = np.arange(nframe,np.minimum(nframe+L,frame_len))

        # Apply estimate RIR to close field signal
        spatial_audio = np.squeeze(H*np.expand_dims(np.expand_dims(X_conv[:,nframe],axis=1),axis=2))
        XestLSsyn[:,Lind,:] = XestLSsyn[:,Lind,:] + spatial_audio[:,0:len(Lind),:]

        sta = sta + hoplen
        sto = sto + hoplen
        nframe = nframe + 1


    y=[]

    #convert back to time domain
    for i in range(len(indicesListForMics)):
        y.append(librosa.core.istft(XestLSsyn[:,:,indicesListForMics[i]],hop_length=hoplen,win_length=winlen,window='hann'))

    #set up the format of returned value
    y=np.asarray(y).T
    tmp=y
    y=np.zeros((samples,ch))
    length=tmp.shape[0]
    y[0:length,:]=tmp
    return y

fs = 48000
nfft=2048
hop_len_s=0.02
win_len=2*int(hop_len_s*fs)
r_mic = 0.042
mic_number=4
mics=np.array([ [r_mic*np.cos(np.deg2rad(35))*np.cos(np.deg2rad(45)),r_mic*np.cos(np.deg2rad(35))*np.sin(np.deg2rad(45)),r_mic*np.sin(np.deg2rad(35))],
                    [r_mic*np.cos(np.deg2rad(-35))*np.cos(np.deg2rad(-45)),r_mic*np.cos(np.deg2rad(-35))*np.sin(np.deg2rad(-45)),r_mic*np.sin(np.deg2rad(-35))],
                    [r_mic*np.cos(np.deg2rad(-35))*np.cos(np.deg2rad(135)),r_mic*np.cos(np.deg2rad(-35))*np.sin(np.deg2rad(135)),r_mic*np.sin(np.deg2rad(-35))],
                    [r_mic*np.cos(np.deg2rad(35))*np.cos(np.deg2rad(-135)),r_mic*np.cos(np.deg2rad(35))*np.sin(np.deg2rad(-135)),r_mic*np.sin(np.deg2rad(35))]])


def beamformer(spectrogram,csv_file):
    [time_bins,frequencies,nb_chnnels] = spectrogram.shape #input - spectrogram of the noisy audio

    #generate a vector of frequencies
    f_hop=float(fs)/float(nfft)
    f=np.expand_dims(np.arange(f_hop/2,fs/2,f_hop),axis=0)
    #place holder for output
    beamformed_signal=np.zeros((time_bins,frequencies),dtype=np.complex)
    #read ground truth
    csv=pd.read_csv(csv_file)

    #for each time frame
    for time in range(time_bins):
        #find the direction of the source in the current time frame
        start_t=time*hop_len_s
        end_t=(time*hop_len_s)+(float(win_len)/fs)
        row=csv[(csv['start_time']<start_t)&(csv['end_time']>end_t)].to_numpy()
        if row.size!=0:
            ele=row[0,3]#elevation for current source
            azi=row[0,4]#azimuth for current source
        else:
            ele=0#elevation for current source
            azi=0#azimuth for current source

        # x,y,z coordinates of source directional vector of magnitude 1
        x=np.cos(np.deg2rad(ele))*np.cos(np.deg2rad(azi))
        y=np.cos(np.deg2rad(ele))*np.sin(np.deg2rad(azi))
        z=np.sin(np.deg2rad(ele))
        directoion=np.expand_dims(np.array([x,y,z]),axis=1)

        #project microphone onto the source direction and convert to time domain by deviding by the speed of sound c=343m/s
        delays=np.matmul(mics , directoion)/343
        #referrance to the first microphone
        normalized_delays=delays-delays[0]
        #steering vector
        steering_vector=np.exp(-2*(1j)*np.pi*f*normalized_delays)/mic_number
        #beamformed signal
        beamformed_signal[time,:]=np.sum(spectrogram[time,:,:]*steering_vector.T,axis=1)
    return beamformed_signal