Untitled

import pandas as pd
import os
import librosa
import librosa.display
import matplotlib.pyplot as plt
from sklearn.preprocessing import normalize
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pickle
import joblib
from sklearn.model_selection import train_test_split
from tensorflow.keras import models, layers
import tensorflow as tf
from google.colab import drive

#INTEGRATE GOOGLE DRIVE WITH GOOGLE COLAB
drive.mount('/content/gdrive')

#ACCESSING THE DIRECTORY AND .WAV FILES
os.chdir(r'/content/gdrive/MyDrive/Personal/G12/Research/Audio files') #access main directory
df = pd.read_csv('metadata.csv')
test_file = (r'/content/gdrive/MyDrive/Personal/G12/Research/Audio files/Train/Philippine Eagle/PE (170).wav') #our test file
print("file path: " + os.path.abspath(test_file)) #show filepath
df.head() #show metadata
ipd.Audio(test_file) #audio player #audio player

#PADDING THE SIGNALS
array = np.random.randint(0, 10, (7, 4)) #Scale and pad the audio features so that every “channel” is the same size

def padding(array, xx, yy):
    """
    :param array: numpy array
    :param xx: desired height
    :param yy: desirex width
    :return: padded array
    """
    h = array.shape[0]
    w = array.shape[1]

    a = max((xx - h) // 2,0)
    aa = max(0,xx - a - h)

    b = max(0,(yy - w) // 2)
    bb = max(yy - b - w,0)

    return np.pad(array, pad_width=((a, aa), (b, bb)), mode='constant')

#GENERATE FEATURES
def generate_features(signal_cut):
    max_size=1000 #max audio file feature width
    stft = padding(np.abs(librosa.stft(signal_cut, n_fft=255, hop_length = 512)), 128, max_size)
    MFCCs = padding(librosa.feature.mfcc(signal_cut, n_fft=n_fft, hop_length=hop_length,n_mfcc=128),128,max_size)
    spec_centroid = librosa.feature.spectral_centroid(signal=signal_cut, sr=sr)
    chroma_stft = librosa.feature.chroma_stft(signal=signal_cut, sr=sr)
    spec_bw = librosa.feature.spectral_bandwidth(signal=signal_cut, sr=sr)
    #Now the padding part
    image = np.array([padding(normalize(spec_bw),1, max_size)]).reshape(1,max_size)
    image = np.append(image,padding(normalize(spec_centroid),1, max_size), axis=0)
#repeat the padded spec_bw,spec_centroid and chroma stft until they are stft and MFCC-sized
    for i in range(0,9):
        image = np.append(image,padding(normalize(spec_bw),1, max_size), axis=0)
        image = np.append(image, padding(normalize(spec_centroid),1, max_size), axis=0)
        image = np.append(image, padding(normalize(chroma_stft),12, max_size), axis=0)
    image=np.dstack((image,np.abs(stft)))
    image=np.dstack((image,MFCCs))
    return image

X=df.drop('bird_id',axis=1)
y=df.bird_id

#Split once to get the test and training set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=123, stratify=y)
print(X_train.shape,X_test.shape)

#Split twice to get the validation set
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=123)
print(X_train.shape, X_test.shape, X_val.shape, len(y_train), len(y_test), len(y_val))

#THIS IS WHERE THE ERROR APPEARS
#file_name is different from filename
#Calculate these features for every audio file and store as features and labels:

def get_features(df_in):
    features=[]
    labels = [] #empty array to store labels
    #For each species, determine how many augmentations are needed
    df_in=df_in.reset_index()
    for i in df_in.bird_id.unique():
           print('bird_id:',i)
           #all the file indices with the same bird_id
           filelist = df_in.loc[df_in.bird_id == i].index
    for j in range(0,len(filelist)):
            filename = df_in.iloc[filelist[j]].file_name
            print("full path: " + os.path.abspath(filename))
            #define the beginning time of the signal
            tstart = df_in.iloc[filelist[j]].t_min
            tend = df_in.iloc[filelist[j]].t_max #end of signal
            file_name = df_in.iloc[filelist[j]].file_name
            bird_id = i
            songtype_id = df_in.iloc[filelist[j]].songtype_id
            #Load the file
            signal, sr = librosa.load(filename,sr=28000)
            #cut the file to signal start and end
            y_cut=y[int(round(tstart*sr)):int(round(tend*sr))]
            #generate features & output numpy array
            data = generate_features(signal)
            features.append(data[np.newaxis,...])
            labels.append(bird_id)
            output=np.concatenate(features,axis=0)
            return(np.array(output), labels)
#use get_features to calculate and store the features
test_features, test_labels = get_features(pd.concat([X_test,y_test],axis=1))
train_features, train_labels = get_features_noOS(pd.concat([X_train,y_train],axis=1))