Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- """
- SECTION 1 : Load and setup data for training
- """
- import csv
- import random
- import math
- import operator
- import numpy as np
- import pandas as pd
- from sklearn.model_selection import train_test_split
- # Load dataset
- datatrain = pd.read_csv('iris.data')
- # Change string value to numeric
- datatrain.set_value(datatrain['species']=='Iris-setosa',['species'],0)
- datatrain.set_value(datatrain['species']=='Iris-versicolor',['species'],1)
- datatrain.set_value(datatrain['species']=='Iris-virginica',['species'],2)
- datatrain = datatrain.apply(pd.to_numeric)
- # Change dataframe to array
- datatrain_array = datatrain.as_matrix()
- # Split x and y (feature and target)
- X_train, X_test, y_train, y_test = train_test_split(datatrain_array[:,:4],
- datatrain_array[:,4],
- test_size=0.2)
- """
- SECTION 2 : Build and Train Model
- Multilayer perceptron model, with one hidden layer.
- input layer : 4 neuron, represents the feature of Iris
- hidden layer : 10 neuron, activation using ReLU
- output layer : 3 neuron, represents the class of Iris, Softmax Layer
- optimizer = stochastic gradient descent with no batch-size
- loss function = categorical cross entropy
- learning rate = 0.01
- epoch = 500
- """
- from sklearn.neural_network import MLPClassifier
- mlp = MLPClassifier(hidden_layer_sizes=(10),solver='sgd',learning_rate_init=0.01,max_iter=500)
- #KNN
- def euclideanDistance(instance1, instance2, length):
- distance = 0
- for x in range(length):
- distance += pow((instance1[x] - instance2[x]), 2)
- return math.sqrt(distance)
- def getNeighbors(trainingSet, testInstance, k):
- distances = []
- length = len(testInstance)-1
- for x in range(len(trainingSet)):
- dist = euclideanDistance(testInstance, trainingSet[x], length)
- distances.append((trainingSet[x], dist))
- distances.sort(key=operator.itemgetter(1))
- neighbors = []
- for x in range(k):
- neighbors.append(distances[x][0])
- return neighbors
- def getResponse(neighbors):
- classVotes = {}
- for x in range(len(neighbors)):
- response = neighbors[x][-1]
- if response in classVotes:
- classVotes[response] += 1
- else:
- classVotes[response] = 1
- sortedVotes = sorted(classVotes.iteritems(), key=operator.itemgetter(1), reverse=True)
- return sortedVotes[0][0]
- def getAccuracy(testSet, predictions):
- correct = 0
- for x in range(len(testSet)):
- if testSet[x][-1] == predictions[x]:
- correct += 1
- return (correct/float(len(testSet))) * 100.0
- def main():
- # prepare data
- trainingSet=[]
- testSet=[]
- trainingSet = np.c_[X_train,y_train]
- testSet = np.c_[X_test,y_test]
- # generate predictions
- predictions=[]
- k = 7
- for x in range(len(testSet)):
- neighbors = getNeighbors(trainingSet, testSet[x], k)
- result = getResponse(neighbors)
- predictions.append(result)
- print('> predicted=' + repr(result) + ', actual=' + repr(testSet[x][-1]))
- accuracy = getAccuracy(testSet, predictions)
- print('Accuracy KNN: ' + repr(accuracy) + '%')
- # Train the model
- mlp.fit(X_train, y_train)
- # Test the model
- print ('Accuracy RNA: ' + repr(mlp.score(X_test,y_test)* 100.0)+ '%')
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement