Advertisement
Guest User

Untitled

a guest
Aug 29th, 2015
72
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.81 KB | None | 0 0
  1. import matplotlib.pyplot as plt
  2. import numpy as np
  3. import os
  4.  
  5. # Import of support vector machine (svm)
  6. from sklearn import svm
  7.  
  8. """--------------------------------SETTINGS---------------------------------"""
  9.  
  10. # Load data from .txt file
  11. os.chdir("C:\\")
  12. file = open("semeion.txt","r")
  13. data = file.read()
  14. file.close()
  15.  
  16. # Get index max (number of rows in the data)
  17. def getIndexMax(data):
  18. dataSplitted = data.split("\n")
  19. return len(dataSplitted)
  20.  
  21.  
  22. # Prepare data for fitting function
  23. # Answer: if True, returns answers (labels)
  24. # Training: if True, returns training samples (with no answers, only samples)
  25. # Last: if True, returns only last training sample/answer. Useful for testing
  26. def returnDataToUse(data,index,answers=False,training=False,last=False):
  27. dataSplitted = data.split("\n")
  28.  
  29. # Check that the index is not bigger than our dataset
  30. if index > len(dataSplitted):
  31. print("Index out of bounds, index max:",len(dataSplitted))
  32. return 0
  33.  
  34. # This bit of code returns answers
  35. if answers and not training:
  36. firstLine = dataSplitted[0][:-1].split(" ")
  37. firstAnsw = firstLine[256:]
  38. firstAnsw2 = [int(i) for i in firstAnsw]
  39.  
  40. firstAnswInt = firstAnsw2.index(1)
  41. correctAnswers = [firstAnswInt]
  42.  
  43. i = 1
  44. while i < index:
  45. temp = dataSplitted[i][:-1].split(" ")
  46. temp2 = temp[256:]
  47. temp3 = [int(k) for k in temp2]
  48. temp4 = temp3.index(1)
  49. correctAnswers.append(temp4)
  50. i += 1
  51.  
  52. completeAnswers = np.array(correctAnswers)
  53.  
  54. if last:
  55. return completeAnswers[-1]
  56. else:
  57. return completeAnswers
  58.  
  59. # This bit of code returns pure samples
  60. if training and not answers:
  61. firstLine = dataSplitted[0][:-1].split(" ")
  62. firstTraining = firstLine[:256]
  63. trainingArray = np.array([float(i) for i in firstTraining])
  64.  
  65. i = 1
  66. while i < index:
  67. temp = dataSplitted[i][:-1].split(" ")
  68. temp2 = temp[:256]
  69. temp3 = np.array([float(k) for k in temp2])
  70. trainingArray = np.vstack((trainingArray,temp3))
  71. i += 1
  72.  
  73. if last:
  74. return trainingArray[-1]
  75. else:
  76. return trainingArray
  77.  
  78. # This function displays the image of the number (sample at row x)
  79. # and prints the answer the predictor should give us back
  80. def displayImage(data,row):
  81.  
  82. # Split each row
  83. dataSplitted = data.split("\n")
  84.  
  85. # Get the 'rowth' row
  86. strings = dataSplitted[row]
  87.  
  88. # Split row into numbers(string), and avoid blank at the end
  89. stringsSplitted = (strings[:-1]).split(" ")
  90.  
  91. # Get target and convert it into numbers, then in a numpy array
  92. risp = stringsSplitted[256:]
  93. rispInt = [int(i) for i in risp]
  94. rispNp = np.array(rispInt)
  95.  
  96. # Print original data and number to guess in readable format
  97. print(rispInt)
  98. print("Number to guess:",rispInt.index(1),"\n")
  99.  
  100. # Training array converted into float numbers
  101. training = stringsSplitted[:256]
  102. trainingFloat = [float(i) for i in training]
  103.  
  104. # Building 16x16 (image) array
  105. #.
  106. i = 16
  107. k = 0
  108. img = np.array(trainingFloat[:16])
  109. while i <= len(trainingFloat):
  110.  
  111. #print(i)
  112. #print(k)
  113. temp = np.array(trainingFloat[k:i])
  114. img = np.vstack((img,temp))
  115.  
  116. k = i
  117. i += 16
  118.  
  119. # Plot image
  120. plt.imshow(img,cmap=plt.cm.gray_r,interpolation="nearest")
  121. plt.show()
  122.  
  123.  
  124. """------------------------------TRAINING------------------------------------"""
  125. # FIX THE NUMBER OF TRAINING SAMPLES
  126. trainingSamples = 1500
  127.  
  128. # Gamma: gradient descent parameter
  129. clf = svm.SVC(gamma=0.01, C=100)
  130.  
  131. # Index max
  132. print("Maximum index:",getIndexMax(data),"\n")
  133.  
  134. answerArray = returnDataToUse(data,trainingSamples,answers=True)
  135. trainingAr = returnDataToUse(data,trainingSamples,training=True)
  136.  
  137. x,y = trainingAr,answerArray
  138.  
  139. #Fit the data
  140. print("Training...")
  141. clf.fit(x,y)
  142.  
  143.  
  144. """------------------------------Sample prediction--------------------------"""
  145. # CHOOSE AN EXAMPLE TO PREDICT
  146. example = 1555
  147.  
  148. predictQ = returnDataToUse(data,example,training=True,last=True)
  149. predictA = returnDataToUse(data,example,answers=True,last=True)
  150.  
  151. print("Prediction:",clf.predict(predictQ))
  152. print("Actual answer",predictA,"\n")
  153.  
  154. # Display the actual image
  155. displayImage(data,example)
  156.  
  157.  
  158. """------------------------------Testing Function----------------------------"""
  159. # Actual testing on residual samples (=samples not used for training)
  160. print("Testing...")
  161. correct = 0
  162. wrong = 0
  163. j = example+1
  164. while j < getIndexMax(data):
  165. q = returnDataToUse(data,j,training=True,last=True)
  166. a = returnDataToUse(data,j,answers=True,last=True)
  167. p = clf.predict(q)
  168. if a == p[0]:
  169. correct += 1
  170. else:
  171. wrong += 1
  172. j += 1
  173.  
  174. print("Statistics, correct answers:",correct/(correct+wrong))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement