Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import matplotlib.pyplot as plt
- import numpy as np
- from numpy import array
- import math
- corpus = []
- with open('iris.data.txt','rb') as f:
- for s in f:
- s = s.split('\n')[0]
- corpus = corpus + [s.split(',')]
- def findDatatype(dataTypeName):
- dataTypeIndex = 0.0
- if(dataTypeName == 'Iris-setosa'):
- dataTypeIndex = 1.0
- elif(dataTypeName == 'Iris-versicolor'):
- dataTypeIndex = 2.0
- elif(dataTypeName == 'Iris-virginica'):
- dataTypeIndex = 3.0
- return dataTypeIndex
- for i in range(len(corpus)):
- corpus[i][0] = float(corpus[i][0])
- corpus[i][1] = float(corpus[i][1])
- corpus[i][2] = float(corpus[i][2])
- corpus[i][3] = float(corpus[i][3])
- corpus[i][4] = findDatatype(corpus[i][4])
- data = np.array(corpus)
- for i in range(data.shape[1]-1):
- data[:,i] = (data[:,i] - np.mean(data[:,i]))/np.std(data[:,i])
- data1 = []
- data2 = []
- data3 = []
- for x in data:
- if x[4] == 1.0:
- data1 = data1 + [x]
- elif x[4] == 2.0:
- data2 = data2 + [x]
- elif x[4] == 3.0:
- data3 = data3 + [x]
- data1 = np.array(data1)
- data2 = np.array(data2)
- data3 = np.array(data3)
- data_size_training = 25
- train_data = data1[:data_size_training]
- train_data = np.append(train_data,data2[:data_size_training],axis=0)
- train_data = np.append(train_data,data3[:data_size_training],axis=0)
- test_data = data1[data_size_training:]
- test_data = np.append(test_data,data2[data_size_training:],axis=0)
- test_data = np.append(test_data,data3[data_size_training:],axis=0)
- np.random.shuffle(train_data)
- np.random.shuffle(test_data)
- train_output = []
- I = np.identity(3)
- for x in train_data:
- train_output = train_output + [I[x[4]-1]]
- test_output = []
- for x in test_data:
- test_output = test_output + [I[x[4]-1]]
- train_output = np.array(train_output)
- test_output = np.array(test_output)
- train_data = train_data[:,:4]
- test_data = test_data[:,:4]
- #train_data = np.append(np.ones((train_data.shape[0],1)),train_data[:,:4],axis=1)
- #test_data = np.append(np.ones((test_data.shape[0],1)),test_data[:,:4],axis=1)
- K = train_data.shape[0]/10
- def kmean(data):
- C = np.zeros((K,data.shape[1]))
- for i in range(K):
- C[i] = data[math.floor(np.random.rand()*data.shape[0])]
- for i in range(data.shape[0]):
- index = np.argmax([np.linalg.norm(C[j]-data[i]) for j in range(K)])
- C[index] = C[index] + .5*(data[i] - C[index])
- return C
- def phi_func(c):
- return lambda x:np.exp(-.5*(np.linalg.norm(x-c)**2)/c.shape[0])
- def Phi(C):
- return [phi_func(C[i]) for i in range(C.shape[0])]
- def sigmoeid(x):
- return 1/(1+np.exp(-x))
- theta = np.random.rand(train_output.shape[1],K+1)
- C = kmean(train_data)
- phi = Phi(C)
- #'''
- phi_train_data = []
- for t in train_data:
- phi_train_data = phi_train_data + [[phi[i](t) for i in range(len(phi))]]
- phi_train_data = np.array(phi_train_data)
- phi_train_data = np.append(np.ones((phi_train_data.shape[0],1)),phi_train_data,axis=1)
- #'''
- def feedForward(fvec):
- phi_x = np.append([[1]],np.array([[phi[i](fvec) for i in range(len(phi))]]),axis=1)
- u = np.dot(theta,phi_x.T)
- v = sigmoid(u)
- return v.T[0]
- def backPropogation(flag):
- #global C
- #global phi
- phi_train_data = []
- for t in train_data:
- phi_train_data = phi_train_data + [[phi[i](t) for i in range(len(phi))]]
- phi_train_data = np.array(phi_train_data)
- phi_train_data = np.append(np.ones((phi_train_data.shape[0],1)),phi_train_data,axis=1)
- #grad_phi = np.zeros((phi_train_data.shape[0],phi_train_data.shape[1],train_data.shape[1]))
- U = np.dot(theta,phi_train_data.T)
- Y = sigmoid(U)
- D = train_output.T
- gradY = ((Y-D)*Y*(1-Y))
- Delta = np.dot(gradY,phi_train_data)/phi_train_data.shape[0]
- Delta0 = np.zeros(C.shape)
- if flag:
- gradC = np.dot(theta.T,gradY)[1:,:]
- for i in range(phi_train_data.shape[0]):
- Delta0 = Delta0 + 2*np.array([gradC[k][i]*phi_train_data[i][1:][k]*(C[k]-train_data[i]) for k in range(C.shape[0])])
- Delta0 = Delta0/phi_train_data.shape[0]
- return Delta0,Delta
- #'''
- maxIter = 8000
- Delta = 0.0
- Delta0 = 0.0
- flag = False
- for i in range(maxIter):
- newDelta0,newDelta = backPropogation(flag)
- Delta = 0.3*Delta + .2*newDelta #momentum factor
- theta = theta - Delta
- if flag:
- Delta0 = 0.3*Delta0 + .2*newDelta0
- C = C - Delta0
- phi = Phi(C)
- #print i
- print 'Delta:(after',maxIter,'iterations)\n',Delta
- print '\nDelta_Cluster_Centers:(after',maxIter,'iterations)\n',Delta0
- est_test_output = np.zeros(test_output.shape)
- est_train_output = np.zeros(train_output.shape)
- for i in range(est_test_output.shape[0]):
- est_test_output[i][np.argmax(feedForward(test_data[i]))] = 1
- for i in range(est_train_output.shape[0]):
- est_train_output[i][np.argmax(feedForward(train_data[i]))] = 1
- print 'misclassification rate on training set is',1.0-(np.sum(train_output*est_train_output)/train_output.shape[0]),'after',maxIter,'iterations'
- print 'misclassification rate on test set is',1.0-(np.sum(test_output*est_test_output)/test_output.shape[0]),'after',maxIter,'iterations'
- #'''
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement