Advertisement
sau003

Iris Data processing

Mar 20th, 2014
95
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.90 KB | None | 0 0
  1. import matplotlib.pyplot as plt
  2. import numpy as np
  3. from numpy import array
  4. import math
  5.  
  6. corpus = []
  7. with open('iris.data.txt','rb') as f:
  8.     for s in f:
  9.         s = s.split('\n')[0]
  10.         corpus = corpus + [s.split(',')]
  11.  
  12. def findDatatype(dataTypeName):
  13.     dataTypeIndex = 0.0
  14.     if(dataTypeName == 'Iris-setosa'):
  15.         dataTypeIndex = 1.0
  16.     elif(dataTypeName == 'Iris-versicolor'):
  17.         dataTypeIndex = 2.0
  18.     elif(dataTypeName == 'Iris-virginica'):
  19.         dataTypeIndex = 3.0
  20.    
  21.     return dataTypeIndex
  22.    
  23. for i in range(len(corpus)):
  24.     corpus[i][0] = float(corpus[i][0])
  25.     corpus[i][1] = float(corpus[i][1])
  26.     corpus[i][2] = float(corpus[i][2])
  27.     corpus[i][3] = float(corpus[i][3])
  28.     corpus[i][4] = findDatatype(corpus[i][4])
  29.  
  30.  
  31. data = np.array(corpus)
  32. for i in range(data.shape[1]-1):
  33.     data[:,i] = (data[:,i] - np.mean(data[:,i]))/np.std(data[:,i])
  34.  
  35. data1 = []
  36. data2 = []
  37. data3 = []
  38.  
  39. for x in data:
  40.     if x[4] == 1.0:
  41.         data1 = data1 + [x]
  42.     elif x[4] == 2.0:
  43.         data2 = data2 + [x]
  44.     elif x[4] == 3.0:
  45.         data3 = data3 + [x]    
  46. data1 = np.array(data1)
  47. data2 = np.array(data2)
  48. data3 = np.array(data3)
  49. data_size_training = 25
  50. train_data = data1[:data_size_training]
  51. train_data = np.append(train_data,data2[:data_size_training],axis=0)
  52. train_data = np.append(train_data,data3[:data_size_training],axis=0)
  53.  
  54. test_data = data1[data_size_training:]
  55. test_data = np.append(test_data,data2[data_size_training:],axis=0)
  56. test_data = np.append(test_data,data3[data_size_training:],axis=0)
  57.  
  58. np.random.shuffle(train_data)
  59. np.random.shuffle(test_data)
  60.  
  61. train_output = []
  62. I = np.identity(3)
  63. for x in train_data:
  64.     train_output = train_output + [I[x[4]-1]]
  65.  
  66. test_output = []
  67. for x in test_data:
  68.     test_output = test_output + [I[x[4]-1]]
  69.  
  70. train_output = np.array(train_output)
  71. test_output = np.array(test_output)
  72. train_data = train_data[:,:4]
  73. test_data = test_data[:,:4]
  74. #train_data = np.append(np.ones((train_data.shape[0],1)),train_data[:,:4],axis=1)
  75. #test_data = np.append(np.ones((test_data.shape[0],1)),test_data[:,:4],axis=1)
  76.  
  77. K = train_data.shape[0]/10
  78.  
  79. def kmean(data):
  80.     C = np.zeros((K,data.shape[1]))
  81.     for i in range(K):
  82.         C[i] = data[math.floor(np.random.rand()*data.shape[0])]
  83.    
  84.     for i in range(data.shape[0]):
  85.         index = np.argmax([np.linalg.norm(C[j]-data[i]) for j in range(K)])
  86.         C[index] = C[index] + .5*(data[i] - C[index])
  87.    
  88.     return C
  89.  
  90. def phi_func(c):
  91.     return lambda x:np.exp(-.5*(np.linalg.norm(x-c)**2)/c.shape[0])
  92.  
  93. def Phi(C):
  94.     return [phi_func(C[i]) for i in range(C.shape[0])]
  95.  
  96. def sigmoeid(x):
  97.     return 1/(1+np.exp(-x))
  98.  
  99. theta = np.random.rand(train_output.shape[1],K+1)
  100. C = kmean(train_data)
  101. phi = Phi(C)
  102. #'''
  103. phi_train_data = []
  104. for t in train_data:
  105.     phi_train_data = phi_train_data + [[phi[i](t) for i in range(len(phi))]]
  106.  
  107. phi_train_data = np.array(phi_train_data)
  108. phi_train_data = np.append(np.ones((phi_train_data.shape[0],1)),phi_train_data,axis=1)
  109. #'''
  110. def feedForward(fvec):
  111.     phi_x = np.append([[1]],np.array([[phi[i](fvec) for i in range(len(phi))]]),axis=1)
  112.     u = np.dot(theta,phi_x.T)
  113.     v = sigmoid(u)
  114.     return v.T[0]
  115.  
  116. def backPropogation(flag):
  117.     #global C
  118.     #global phi
  119.     phi_train_data = []
  120.     for t in train_data:
  121.         phi_train_data = phi_train_data + [[phi[i](t) for i in range(len(phi))]]
  122.  
  123.     phi_train_data = np.array(phi_train_data)
  124.     phi_train_data = np.append(np.ones((phi_train_data.shape[0],1)),phi_train_data,axis=1)
  125.     #grad_phi = np.zeros((phi_train_data.shape[0],phi_train_data.shape[1],train_data.shape[1]))
  126.     U = np.dot(theta,phi_train_data.T)
  127.     Y = sigmoid(U)
  128.     D = train_output.T
  129.     gradY = ((Y-D)*Y*(1-Y))
  130.     Delta = np.dot(gradY,phi_train_data)/phi_train_data.shape[0]
  131.     Delta0 = np.zeros(C.shape)
  132.     if flag:
  133.         gradC = np.dot(theta.T,gradY)[1:,:]
  134.         for i in range(phi_train_data.shape[0]):
  135.             Delta0 = Delta0 + 2*np.array([gradC[k][i]*phi_train_data[i][1:][k]*(C[k]-train_data[i]) for k in range(C.shape[0])])
  136.         Delta0 = Delta0/phi_train_data.shape[0]
  137.     return Delta0,Delta
  138. #'''
  139. maxIter = 8000
  140. Delta = 0.0
  141. Delta0 = 0.0
  142. flag = False
  143. for i in range(maxIter):
  144.     newDelta0,newDelta = backPropogation(flag)
  145.     Delta = 0.3*Delta + .2*newDelta #momentum factor
  146.     theta = theta - Delta
  147.     if flag:
  148.         Delta0 = 0.3*Delta0 + .2*newDelta0
  149.         C = C - Delta0
  150.         phi = Phi(C)
  151.     #print i
  152.  
  153. print 'Delta:(after',maxIter,'iterations)\n',Delta
  154. print '\nDelta_Cluster_Centers:(after',maxIter,'iterations)\n',Delta0
  155.  
  156. est_test_output = np.zeros(test_output.shape)
  157. est_train_output = np.zeros(train_output.shape)
  158.  
  159. for i in range(est_test_output.shape[0]):
  160.     est_test_output[i][np.argmax(feedForward(test_data[i]))] = 1
  161.  
  162. for i in range(est_train_output.shape[0]):
  163.     est_train_output[i][np.argmax(feedForward(train_data[i]))] = 1
  164.  
  165. print 'misclassification rate on training set is',1.0-(np.sum(train_output*est_train_output)/train_output.shape[0]),'after',maxIter,'iterations'
  166. print 'misclassification rate on test set is',1.0-(np.sum(test_output*est_test_output)/test_output.shape[0]),'after',maxIter,'iterations'
  167. #'''
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement