Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from io import StringIO
- import re
- import pandas as pd
- import csv
- import string
- import numpy as np
- import random
- import math
- def preprocess():
- f = open('ecoli.data', 'r')
- ecoli_data = []
- for line in f:
- row = line.split()
- ecoli_data.append(row)
- for i in range(len(ecoli_data)):
- if ecoli_data[i][8] == 'cp':
- ecoli_data[i][8] = '1'
- elif ecoli_data[i][8] == 'im':
- ecoli_data[i][8] = '0'
- else:
- pass
- dataset = []
- for i in range(len(ecoli_data)):
- if (ecoli_data[i][8] == '1') or (ecoli_data[i][8] == '0'):
- dataset.append(ecoli_data[i])
- for i in range(len(dataset)):
- dataset[i].pop(0)
- preprocessed_dataset = [[float(x) for x in lst] for lst in dataset]
- random.shuffle(preprocessed_dataset)
- train_data = preprocessed_dataset[:70]
- test_data = preprocessed_dataset[30:]
- return train_data, test_data, preprocessed_dataset
- weights = [0]*12
- weights = [random.uniform(-1, 1) for i in weights]
- def sigmoid(z):
- if (z < -100):
- return 0
- if (z > 100):
- return 1
- return 1.0 / (1 + math.exp(-z))
- def firstLayer(row, weights):
- activation_1 = weights[0]
- activation_1 += weights[1] * row[0]
- activation_1 += weights[2] * row[1]
- activation_1 += weights[3] * row[2]
- activation_2 = weights[4]
- activation_2 += weights[5] * row[3]
- activation_2 += weights[6] * row[4]
- activation_2 += weights[7] * row[5]
- activation_2 += weights[8] * row[6]
- return sigmoid(activation_1),sigmoid(activation_2)
- def secondLayer(row,weights):
- activation_3 = weights[9]
- activation_3 += weights[10] * row[0]
- activation_3 += weights[11] * row[1]
- return sigmoid(activation_3)
- #return 1.0 if activation_3 >= 0.0 else 0.0
- def predict(row,weights):
- first_layer = firstLayer(row,weights)
- second_layer = secondLayer(first_layer,weights)
- return second_layer,first_layer
- def train_weights(train, learningrate, epochs):
- #weights = [random.uniform(-1,1) for i in range(len(train[0]))]
- last_error = 0.0
- for epoch in range(epochs):
- sum_error = 0.0
- for row in train:
- prediction,first_layer = predict(row[:-1],weights)
- error = row[-1]-prediction
- #print(error)
- sum_error += error**2#abs(error)#math.abs(error)#**2**0.5
- # First layer
- weights[0] = weights[0] + learningrate * error
- weights[4] = weights[4] + learningrate * error
- weights[1] = weights[1] + learningrate * error * row[0]
- weights[2] = weights[2] + learningrate * error * row[1]
- weights[3] = weights[3] + learningrate * error * row[2]
- weights[5] = weights[5] + learningrate * error * row[3]
- weights[6] = weights[6] + learningrate * error * row[4]
- weights[7] = weights[7] + learningrate * error * row[5]
- weights[8] = weights[8] + learningrate * error * row[6]
- # Second layer
- weights[9] = weights[9] + learningrate * error
- weights[10] = weights[10] + learningrate * error * first_layer[0]
- weights[11] = weights[11] + learningrate * error * first_layer[1]
- #for i in range(len(row)-1):
- # weights[i+1] = weights[i+1] + learningrate*error*row[i]
- if((epoch%100==0) or (last_error != sum_error)):
- print("Epoch "+str(epoch) + " Learning rate " + str(learningrate) + " Error " + str(sum_error))
- last_error = sum_error
- return weights
- preprocessed_dataset, train_data, test_data = preprocess()
- for row in preprocessed_dataset:
- print(predict(row[:-1],weights)[0],row[-1])
- learningrate = 0.01#0.00001
- epochs = 1000
- train_weights = train_weights(preprocessed_dataset,learningrate,epochs)
- print(train_weights)
- accuracy = 0.0
- for row in test_data:
- prediction = predict(row[:-1],weights)
- print("Prediction:",prediction[0]," Real value:", row[-1])
- print("Error:",prediction[0]-row[-1])
- if(round(prediction[0])==row[-1]):
- accuracy += 1
- accuracy = accuracy/len(test_data)
- print("Accurary",accuracy)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement