Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os
- import librosa
- import pandas as pd
- import numpy as np
- import time
- from keras.models import load_model
- from keras import Sequential
- from keras.layers import LSTM, Dense
- from keras.optimizers import Adam
- from main import load_song_features, read_labels_from_file
- trainPath = "D:\\Audio - dataset\\trainGender.tsv"
- testPath = "D:\\Audio - dataset\\trainGender.tsv"
- trainData = pd.read_csv(trainPath, sep='\t')
- testData = pd.read_csv(trainPath, sep='\t')
- def windows(data, window_size):
- start = 0
- while start < len(data):
- yield start, start + window_size
- start += (window_size // 2)
- def extract_features(parent_dir, bands=20, frames=41):
- window_size = 512 * (frames - 1)
- mfccs = []
- labels = []
- for i in range(1):
- path1 = os.path.join(parent_dir, trainData.path[i] + ".mp3")
- sound_clip, s = librosa.load(path1)
- label = trainData.gender[i]
- if label == 'male':
- label = 1
- else:
- label = 0
- for (start, end) in windows(sound_clip, window_size):
- if len(sound_clip[start:end]) == window_size:
- signal = sound_clip[start:end]
- mfcc = librosa.feature.mfcc(y=signal, sr=s, n_mfcc=bands).T.flatten()[:, np.newaxis].T
- mfccs.append(mfcc)
- labels.append(label)
- features = np.asarray(mfccs).reshape(len(mfccs), frames, bands)
- print(labels)
- return np.array(features), np.array(labels, dtype=np.int)
- def one_hot_encode(labels):
- n_labels = len(labels)
- n_unique_labels = len(np.unique(labels))
- one_hot_encode = np.zeros((n_labels, n_unique_labels))
- one_hot_encode[np.arange(n_labels), labels] = 1
- return one_hot_encode
- #shape_features = [77839, 41, 20]
- #tr_features = load_song_features('song_features/features_file.txt', shape_features)
- #tr_labels = read_labels_from_file('song_features/labels_file.txt')
- #tr_features, tr_labels = extract_features("D:\\Audio - dataset\\clips")
- #tr_labels = one_hot_encode(tr_labels)
- """"
- input_shape = (tr_features.shape[1], tr_features.shape[2])
- model = Sequential()
- model.add(LSTM(units=128, dropout=0.05, recurrent_dropout=0.35, return_sequences=True, input_shape=input_shape))
- model.add(LSTM(units=128, dropout=0.05, recurrent_dropout=0.35, return_sequences=True, input_shape=input_shape))
- model.add(LSTM(units=128, dropout=0.05, recurrent_dropout=0.35, return_sequences=True, input_shape=input_shape))
- model.add(LSTM(units=32, dropout=0.05, recurrent_dropout=0.35, return_sequences=False))
- model.add(Dense(units=tr_labels.shape[1], activation="softmax"))
- opt = Adam()
- model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"])
- model.summary()
- batch_size = 70 # num of training examples per minibatch
- num_epochs = 20
- model.fit(
- tr_features,
- tr_labels,
- batch_size=batch_size,
- epochs=num_epochs,
- )
- ynew = model.predict_classes(tr_features)
- model.save('my_model.h5')
- corr_guess = 0
- for i in range(len(tr_labels)):
- anss = 1
- if tr_labels[i][0] == 1:
- anss = 0
- corr_guess += anss == ynew[i]
- print(corr_guess/len(tr_labels))
- """""
- mfccs = []
- for i in range(1):
- window_size = 512 * 40
- sound_clip, s = librosa.load("test_samples/diana2.aac")
- for (start, end) in windows(sound_clip, window_size):
- if len(sound_clip[start:end]) == window_size:
- signal = sound_clip[start:end]
- mfcc = librosa.feature.mfcc(y=signal, sr=s, n_mfcc=20).T.flatten()[:, np.newaxis].T
- mfccs.append(mfcc)
- features = np.asarray(mfccs).reshape(len(mfccs), 41, 20)
- tr_features = np.array(features)
- model = load_model('my_model.h5')
- ynew = model.predict_classes(tr_features)
- print(ynew)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement