Untitled

from io import StringIO
import re
import pandas as pd
import csv
import string
import numpy as np
import random
import math


def preprocess():
    f = open('ecoli.data', 'r')

    ecoli_data = []
    for line in f:
        row = line.split()
        ecoli_data.append(row)

    for i in range(len(ecoli_data)):
        if ecoli_data[i][8] == 'cp':
            ecoli_data[i][8] = '1'
        elif ecoli_data[i][8] == 'im':
            ecoli_data[i][8] = '0'
        else:
            pass

    dataset = []
    for i in range(len(ecoli_data)):
        if (ecoli_data[i][8] == '1') or (ecoli_data[i][8] == '0'):
            dataset.append(ecoli_data[i])

    for i in range(len(dataset)):
        dataset[i].pop(0)

    preprocessed_dataset = [[float(x) for x in lst] for lst in dataset]

    random.shuffle(preprocessed_dataset)

    train_data = preprocessed_dataset[:70]
    test_data = preprocessed_dataset[30:]

    return train_data, test_data, preprocessed_dataset

weights = [0]*12
weights = [random.uniform(-1, 1) for i in weights]

def sigmoid(z):
    if (z < -100):
        return 0
    if (z > 100):
        return 1
    return 1.0 / (1 + math.exp(-z))

def firstLayer(row, weights):
    activation_1 = weights[0]

    activation_1 += weights[1] * row[0]
    activation_1 += weights[2] * row[1]
    activation_1 += weights[3] * row[2]

    activation_2 = weights[4]

    activation_2 += weights[5] * row[3]
    activation_2 += weights[6] * row[4]
    activation_2 += weights[7] * row[5]
    activation_2 += weights[8] * row[6]
    return sigmoid(activation_1),sigmoid(activation_2)


def secondLayer(row,weights):
    activation_3 = weights[9]
    activation_3 += weights[10] * row[0]
    activation_3 += weights[11] * row[1]
    return sigmoid(activation_3)
    #return 1.0 if activation_3 >= 0.0 else 0.0

def predict(row,weights):
    first_layer = firstLayer(row,weights)
    second_layer = secondLayer(first_layer,weights)
    return second_layer,first_layer

def train_weights(train, learningrate, epochs):
    #weights = [random.uniform(-1,1) for i in range(len(train[0]))]
    last_error = 0.0
    for epoch in range(epochs):
        sum_error = 0.0
        for row in train:
            prediction,first_layer = predict(row[:-1],weights)
            error = row[-1]-prediction
            #print(error)
            sum_error += error**2#abs(error)#math.abs(error)#**2**0.5

            # First layer
            weights[0] = weights[0] + learningrate * error
            weights[4] = weights[4] + learningrate * error
            weights[1] = weights[1] + learningrate * error * row[0]
            weights[2] = weights[2] + learningrate * error * row[1]
            weights[3] = weights[3] + learningrate * error * row[2]
            weights[5] = weights[5] + learningrate * error * row[3]
            weights[6] = weights[6] + learningrate * error * row[4]
            weights[7] = weights[7] + learningrate * error * row[5]
            weights[8] = weights[8] + learningrate * error * row[6]

            # Second layer
            weights[9] = weights[9] + learningrate * error
            weights[10] = weights[10] + learningrate * error * first_layer[0]
            weights[11] = weights[11] + learningrate * error * first_layer[1]

            #for i in range(len(row)-1):
            #    weights[i+1] = weights[i+1] + learningrate*error*row[i]
        if((epoch%100==0) or (last_error != sum_error)):
            print("Epoch "+str(epoch) + " Learning rate " + str(learningrate) + " Error " + str(sum_error))
        last_error = sum_error
    return weights

preprocessed_dataset, train_data, test_data = preprocess()

for row in preprocessed_dataset:
    print(predict(row[:-1],weights)[0],row[-1])


learningrate = 0.01#0.00001
epochs = 1000
train_weights = train_weights(preprocessed_dataset,learningrate,epochs)
print(train_weights)


accuracy = 0.0
for row in test_data:
    prediction = predict(row[:-1],weights)
    print("Prediction:",prediction[0]," Real value:", row[-1])
    print("Error:",prediction[0]-row[-1])
    if(round(prediction[0])==row[-1]):
        accuracy += 1


accuracy = accuracy/len(test_data)
print("Accurary",accuracy)