Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import tensorflow as tf
- import random as rn
- import pandas as pd
- from tensorflow.keras.utils import to_categorical
- from keras import backend as K
- from tensorflow import keras
- from tensorflow.keras.models import Sequential
- from tensorflow.keras.layers import Dense
- from sklearn.model_selection import train_test_split
- from sklearn.preprocessing import StandardScaler, MinMaxScaler
- from sklearn.model_selection import cross_validate
- from sklearn.model_selection import KFold
- import matplotlib.pyplot as plt
- from imblearn.pipeline import Pipeline
- from keras.wrappers.scikit_learn import KerasClassifier
- #Открыть набор данных в переменную
- df = pd.read_csv('vehicle.dat')
- print(df.columns)
- X = df.drop([' Class'], axis = 1)
- Y = to_categorical(df[' Class'].astype('category').cat.codes)
- X = MinMaxScaler().fit_transform(X.to_numpy())
- #Определение метрик качества работы сытемы - полнота, точность, F-мера
- def recall_m(y_true, y_pred):#Полнота
- print(type(y_true))
- true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
- possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
- recall = true_positives / (possible_positives + K.epsilon())
- return recall
- def precision_m(y_true, y_pred):#Точность
- true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
- predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
- precision = true_positives / (predicted_positives + K.epsilon())
- return precision
- def f1_m(y_true, y_pred):#F1-мера
- precision = precision_m(y_true, y_pred)
- recall = recall_m(y_true, y_pred)
- return 2*((precision*recall)/(precision+recall+K.epsilon()))
- #Создание нейронной сети
- def my_model(my_X, my_Y, activation_1='tanh', activation_2='relu',
- neurons_input = 10, neurons_hidden_1=10, loss = 'mse',
- optimizer='adam'):
- model = Sequential()#Сеть с последовательным послойным распространением сигнала
- model.add(Dense(neurons_input,activation=activation_1,input_shape=(my_X.shape[1],)))#Добавили полносвязный слой. 10 нейронов
- #активация - гиперболический тангенс tanh, число входов - 144, по числу параметров
- #Возможные функции активации
- #https://keras.io/api/layers/activations/
- model.add(Dense(neurons_hidden_1,activation=activation_2))
- model.add(Dense(my_Y.shape[1],activation='sigmoid'))
- #model.add(Dense(my_Y.shape[1],activation='selu'))#Добавили полносвязный слой. Число нейронов - число выходов (пользователей)
- #Функция активации - софтмакс, для нормировки
- model.compile(
- #Возможные loss-функции
- #https://keras.io/api/losses/probabilistic_losses/#binarycrossentropy-class
- #loss = tf.keras.losses.BinaryCrossentropy(),
- loss=loss,#потери - среднеквадратичная ошибка
- optimizer=tf.optimizers.Adam(learning_rate=1e-3),#оптимизатор adam
- #optimizer = optimizer,
- metrics=['acc', f1_m, precision_m, recall_m]#метрики - доля верных ответов, F1-мера, точность, полнота
- )
- return(model)
- # Define the K-fold Cross Validator
- num_folds = 3
- batch_size = 18
- acc_per_fold = []
- loss_per_fold = []
- f1_per_fold = []
- prec_per_fold = []
- recall_per_fold = []
- res_f1_per_fold = []
- res_loss_per_fold = []
- kfold = KFold(n_splits=num_folds, shuffle=True)
- # K-fold Cross Validation model evaluation
- fold_no = 1
- for train, test in kfold.split(X, Y):
- model = my_model(X, Y)
- # Compile the model
- # Generate a print
- print('------------------------------------------------------------------------')
- print(f'Training for fold {fold_no} ...')
- # Fit data to model
- results = model.fit(X[train],Y[train],
- batch_size=batch_size,
- validation_data = (X[test],Y[test]),
- epochs=50,verbose=1)#Обучаем модель, 560 циклов (эпох), с отображением процесса
- # Generate generalization metrics
- scores = model.evaluate(X[test], Y[test], verbose=1)
- #print(scores)
- print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]};')
- print(f'{model.metrics_names[1]} of {scores[1]*100}%; {model.metrics_names[2]} of {scores[2]*100}%;')
- print(f'{model.metrics_names[3]} of {scores[3]*100}%; {model.metrics_names[4]} of {scores[4]*100}%;')
- acc_per_fold.append(scores[1] * 100)
- f1_per_fold.append(scores[2] * 100)
- prec_per_fold.append(scores[3] * 100)
- recall_per_fold.append(scores[4] * 100)
- loss_per_fold.append(scores[0])
- res_f1_per_fold.append(results.history['val_f1_m'])
- res_loss_per_fold.append(results.history['loss'])
- # Increase fold number
- fold_no = fold_no + 1
- # == Provide average scores ==
- print('------------------------------------------------------------------------')
- print('Score per fold')
- for i in range(0, len(acc_per_fold)):
- print('------------------------------------------------------------------------')
- print(f'> Fold {i+1} - Loss: {loss_per_fold[i]} - Accuracy: {acc_per_fold[i]}%')
- print(f'- F1: {f1_per_fold[i]}% ')
- print(f'- Precision: {prec_per_fold[i]}%')
- print(f'- Recall: {recall_per_fold[i]}%')
- print('------------------------------------------------------------------------')
- print('Average scores for all folds:')
- print(f'> Accuracy: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
- print(f'> Loss: {np.mean(loss_per_fold)} (+- {np.std(loss_per_fold)})')
- print(f'> F1: {np.mean(f1_per_fold)} (+- {np.std(f1_per_fold)})')
- print(f'> Precision: {np.mean(prec_per_fold)} (+- {np.std(prec_per_fold)})')
- print(f'> Recall: {np.mean(recall_per_fold)} (+- {np.std(recall_per_fold)})')
- print('------------------------------------------------------------------------')
- #Находим среднюю F-меру
- f1_line = [sum(x) / len(x) for x in zip(*res_f1_per_fold)]
- #Средняя лосс-функция
- loss_line = [sum(x) / len(x) for x in zip(*res_loss_per_fold)]
- #Вывод графиков
- plt.plot(f1_line)#выводим F1-меру для обучающей выборки
- plt.plot(loss_line)#выводим F1-меру для тестовой выборки
- plt.title('Model F1')
- plt.ylabel('F1|Loss')
- plt.xlabel('Epoch')
- plt.legend(['F1', 'Loss'], loc='upper left')
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment