Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import os
- import librosa
- import librosa.display
- import matplotlib.pyplot as plt
- from sklearn.preprocessing import normalize
- import warnings
- warnings.filterwarnings('ignore')
- import numpy as np
- import pickle
- import joblib
- from sklearn.model_selection import train_test_split
- from tensorflow.keras import models, layers
- import tensorflow as tf
- from google.colab import drive
- #INTEGRATE GOOGLE DRIVE WITH GOOGLE COLAB
- drive.mount('/content/gdrive')
- #ACCESSING THE DIRECTORY AND .WAV FILES
- os.chdir(r'/content/gdrive/MyDrive/Personal/G12/Research/Audio files') #access main directory
- df = pd.read_csv('metadata.csv')
- test_file = (r'/content/gdrive/MyDrive/Personal/G12/Research/Audio files/Train/Philippine Eagle/PE (170).wav') #our test file
- print("file path: " + os.path.abspath(test_file)) #show filepath
- df.head() #show metadata
- ipd.Audio(test_file) #audio player #audio player
- #PADDING THE SIGNALS
- array = np.random.randint(0, 10, (7, 4)) #Scale and pad the audio features so that every “channel” is the same size
- def padding(array, xx, yy):
- """
- :param array: numpy array
- :param xx: desired height
- :param yy: desirex width
- :return: padded array
- """
- h = array.shape[0]
- w = array.shape[1]
- a = max((xx - h) // 2,0)
- aa = max(0,xx - a - h)
- b = max(0,(yy - w) // 2)
- bb = max(yy - b - w,0)
- return np.pad(array, pad_width=((a, aa), (b, bb)), mode='constant')
- #GENERATE FEATURES
- def generate_features(signal_cut):
- max_size=1000 #max audio file feature width
- stft = padding(np.abs(librosa.stft(signal_cut, n_fft=255, hop_length = 512)), 128, max_size)
- MFCCs = padding(librosa.feature.mfcc(signal_cut, n_fft=n_fft, hop_length=hop_length,n_mfcc=128),128,max_size)
- spec_centroid = librosa.feature.spectral_centroid(signal=signal_cut, sr=sr)
- chroma_stft = librosa.feature.chroma_stft(signal=signal_cut, sr=sr)
- spec_bw = librosa.feature.spectral_bandwidth(signal=signal_cut, sr=sr)
- #Now the padding part
- image = np.array([padding(normalize(spec_bw),1, max_size)]).reshape(1,max_size)
- image = np.append(image,padding(normalize(spec_centroid),1, max_size), axis=0)
- #repeat the padded spec_bw,spec_centroid and chroma stft until they are stft and MFCC-sized
- for i in range(0,9):
- image = np.append(image,padding(normalize(spec_bw),1, max_size), axis=0)
- image = np.append(image, padding(normalize(spec_centroid),1, max_size), axis=0)
- image = np.append(image, padding(normalize(chroma_stft),12, max_size), axis=0)
- image=np.dstack((image,np.abs(stft)))
- image=np.dstack((image,MFCCs))
- return image
- X=df.drop('bird_id',axis=1)
- y=df.bird_id
- #Split once to get the test and training set
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=123, stratify=y)
- print(X_train.shape,X_test.shape)
- #Split twice to get the validation set
- X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=123)
- print(X_train.shape, X_test.shape, X_val.shape, len(y_train), len(y_test), len(y_val))
- #THIS IS WHERE THE ERROR APPEARS
- #file_name is different from filename
- #Calculate these features for every audio file and store as features and labels:
- def get_features(df_in):
- features=[]
- labels = [] #empty array to store labels
- #For each species, determine how many augmentations are needed
- df_in=df_in.reset_index()
- for i in df_in.bird_id.unique():
- print('bird_id:',i)
- #all the file indices with the same bird_id
- filelist = df_in.loc[df_in.bird_id == i].index
- for j in range(0,len(filelist)):
- filename = df_in.iloc[filelist[j]].file_name
- print("full path: " + os.path.abspath(filename))
- #define the beginning time of the signal
- tstart = df_in.iloc[filelist[j]].t_min
- tend = df_in.iloc[filelist[j]].t_max #end of signal
- file_name = df_in.iloc[filelist[j]].file_name
- bird_id = i
- songtype_id = df_in.iloc[filelist[j]].songtype_id
- #Load the file
- signal, sr = librosa.load(filename,sr=28000)
- #cut the file to signal start and end
- y_cut=y[int(round(tstart*sr)):int(round(tend*sr))]
- #generate features & output numpy array
- data = generate_features(signal)
- features.append(data[np.newaxis,...])
- labels.append(bird_id)
- output=np.concatenate(features,axis=0)
- return(np.array(output), labels)
- #use get_features to calculate and store the features
- test_features, test_labels = get_features(pd.concat([X_test,y_test],axis=1))
- train_features, train_labels = get_features_noOS(pd.concat([X_train,y_train],axis=1))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement