Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def generate_directional_noise(audio,Fs,SNR,H,indicesListForMics):
- #variables for impulse response
- winlen=2048; #window length fo STFT
- hoplen=winlen/2; #hop size fo STFT
- [I,L,C]=H.shape #1024 frequencies,4 blocks,32 channels
- [samples,ch]=audio.shape
- #extract noise power from SNR
- p_audio=np.mean(np.sum(audio**2,axis=0)/samples) #Average power for audio
- sigma_db=10*np.log10(p_audio)-SNR #Average power of noise with respec to requiered SNR
- sigma=np.sqrt(10**(sigma_db/10))
- noise=np.random.normal(0,sigma,(samples)) #noise vector
- #convolve the spectrogram with the impulse response
- X_conv = librosa.core.stft(noise, n_fft=winlen, hop_length=hoplen,win_length=winlen, window='hann')
- frame_len=X_conv.shape[1]#-2
- XestLSsyn = np.zeros((I,frame_len,C),dtype=np.complex64)
- nframe = 0
- sta=1
- sto=sta + winlen -1
- while sto <=samples:
- Lind = np.arange(nframe,np.minimum(nframe+L,frame_len))
- # Apply estimate RIR to close field signal
- spatial_audio = np.squeeze(H*np.expand_dims(np.expand_dims(X_conv[:,nframe],axis=1),axis=2))
- XestLSsyn[:,Lind,:] = XestLSsyn[:,Lind,:] + spatial_audio[:,0:len(Lind),:]
- sta = sta + hoplen
- sto = sto + hoplen
- nframe = nframe + 1
- y=[]
- #convert back to time domain
- for i in range(len(indicesListForMics)):
- y.append(librosa.core.istft(XestLSsyn[:,:,indicesListForMics[i]],hop_length=hoplen,win_length=winlen,window='hann'))
- #set up the format of returned value
- y=np.asarray(y).T
- tmp=y
- y=np.zeros((samples,ch))
- length=tmp.shape[0]
- y[0:length,:]=tmp
- return y
- fs = 48000
- nfft=2048
- hop_len_s=0.02
- win_len=2*int(hop_len_s*fs)
- r_mic = 0.042
- mic_number=4
- mics=np.array([ [r_mic*np.cos(np.deg2rad(35))*np.cos(np.deg2rad(45)),r_mic*np.cos(np.deg2rad(35))*np.sin(np.deg2rad(45)),r_mic*np.sin(np.deg2rad(35))],
- [r_mic*np.cos(np.deg2rad(-35))*np.cos(np.deg2rad(-45)),r_mic*np.cos(np.deg2rad(-35))*np.sin(np.deg2rad(-45)),r_mic*np.sin(np.deg2rad(-35))],
- [r_mic*np.cos(np.deg2rad(-35))*np.cos(np.deg2rad(135)),r_mic*np.cos(np.deg2rad(-35))*np.sin(np.deg2rad(135)),r_mic*np.sin(np.deg2rad(-35))],
- [r_mic*np.cos(np.deg2rad(35))*np.cos(np.deg2rad(-135)),r_mic*np.cos(np.deg2rad(35))*np.sin(np.deg2rad(-135)),r_mic*np.sin(np.deg2rad(35))]])
- def beamformer(spectrogram,csv_file):
- [time_bins,frequencies,nb_chnnels] = spectrogram.shape #input - spectrogram of the noisy audio
- #generate a vector of frequencies
- f_hop=float(fs)/float(nfft)
- f=np.expand_dims(np.arange(f_hop/2,fs/2,f_hop),axis=0)
- #place holder for output
- beamformed_signal=np.zeros((time_bins,frequencies),dtype=np.complex)
- #read ground truth
- csv=pd.read_csv(csv_file)
- #for each time frame
- for time in range(time_bins):
- #find the direction of the source in the current time frame
- start_t=time*hop_len_s
- end_t=(time*hop_len_s)+(float(win_len)/fs)
- row=csv[(csv['start_time']<start_t)&(csv['end_time']>end_t)].to_numpy()
- if row.size!=0:
- ele=row[0,3]#elevation for current source
- azi=row[0,4]#azimuth for current source
- else:
- ele=0#elevation for current source
- azi=0#azimuth for current source
- # x,y,z coordinates of source directional vector of magnitude 1
- x=np.cos(np.deg2rad(ele))*np.cos(np.deg2rad(azi))
- y=np.cos(np.deg2rad(ele))*np.sin(np.deg2rad(azi))
- z=np.sin(np.deg2rad(ele))
- directoion=np.expand_dims(np.array([x,y,z]),axis=1)
- #project microphone onto the source direction and convert to time domain by deviding by the speed of sound c=343m/s
- delays=np.matmul(mics , directoion)/343
- #referrance to the first microphone
- normalized_delays=delays-delays[0]
- #steering vector
- steering_vector=np.exp(-2*(1j)*np.pi*f*normalized_delays)/mic_number
- #beamformed signal
- beamformed_signal[time,:]=np.sum(spectrogram[time,:,:]*steering_vector.T,axis=1)
- return beamformed_signal
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement