Advertisement
Guest User

Untitled

a guest
Jul 20th, 2019
88
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.02 KB | None | 0 0
  1. import numpy as np
  2. import time
  3.  
  4. class Node():
  5. def __init__(self, point):
  6. self.point = point
  7. self.left = None
  8. self.right = None
  9.  
  10. class Search():
  11. def __init__(self, best_point, best_distance):
  12. self.best_point = best_point
  13. self.best_distance = best_distance
  14.  
  15. def data_mit_index(data):
  16. #np.random.shuffle(data)
  17. A = np.copy(data)
  18. Indexvektor = np.arange(data.shape[0])
  19. A[:, 0] = Indexvektor
  20. return A
  21. def median(list):
  22. if len(list) % 2 == 0:
  23. return np.median(np.delete(list,0,0))
  24. return np.median(list)
  25. def kD_tree(data, level):
  26. num_points = data.shape[0]
  27. if num_points > 1:
  28. dim = data.shape[1] - 1
  29. r = level % dim
  30. auswahl = data[0:101, r + 1]
  31. point_index = auswahl.tolist().index(median(auswahl))
  32. point = auswahl[point_index]
  33. condlist1 = data[1:, r + 1] <= point
  34. P1 = data[1:,:][condlist1]
  35.  
  36. condlist2 = data[1:, r + 1] > point
  37. P2 = data[1:,:][condlist2]
  38.  
  39. v = Node(data[point_index,:])
  40. v.left = kD_tree(P1, level + 1)
  41. #if v.left != None:
  42. # v.left.parent = v
  43. v.right = kD_tree(P2, level + 1)
  44. #if v.right != None:
  45. # v.right.parent = v
  46.  
  47. return v
  48. elif num_points == 1:
  49. end = Node(data[0,:])
  50. return end
  51.  
  52.  
  53. def sucher_setup(x, k, data):
  54. e = np.ones((k, 1))
  55. X = e * x
  56. NORM_squared = np.sum((X - data[0:k, 1:]) * (X - data[0:k, 1:]), axis=1)
  57. NORM = np.sqrt(NORM_squared)
  58. sorted_index = np.argsort(NORM)
  59. NORM_sorted = np.zeros(k)
  60. BEST_POINT = np.zeros((k, data.shape[1]))
  61. for i in range(0, k):
  62. NORM_sorted[i] = NORM[sorted_index[k - i - 1]]
  63. BEST_POINT[i, :] = data[sorted_index[k - i - 1], :]
  64. return Search(BEST_POINT, NORM_sorted)
  65.  
  66.  
  67. def kNN_search(x, k, node, level, sucher):
  68. dim = x.shape[0]
  69. r = level % dim
  70. distance = np.linalg.norm(x - node.point[1:])
  71.  
  72. if node.point[0] >= k:
  73. for j in range(0, k):
  74. if distance >= sucher.best_distance[j]:
  75. if j == 0:
  76. break
  77. else:
  78. sucher.best_point[0:(j - 1), :] = sucher.best_point[1:j, :]
  79. sucher.best_point[j - 1, :] = node.point
  80.  
  81. sucher.best_distance[0:(j - 1)] = sucher.best_distance[1:j]
  82. sucher.best_distance[j - 1] = distance
  83. break
  84. if distance < sucher.best_distance[k - 1]:
  85. sucher.best_point[0:(k - 1), :] = sucher.best_point[1:k, :]
  86. sucher.best_point[k - 1, :] = node.point
  87.  
  88. sucher.best_distance[0:(k - 1)] = sucher.best_distance[1:k]
  89. sucher.best_distance[k - 1] = distance
  90. if x[r] <= node.point[r + 1]: # Zuerst wird die Seite auf der x liegt untersucht
  91. if x[r] - sucher.best_distance[0] <= node.point[
  92. r + 1] and node.left != None: # Aber nur falls überhaupt bessere Punkte in den jeweiligen Kasten liegen können
  93. kNN_search(x, k, node.left, level + 1, sucher)
  94. if x[r] + sucher.best_distance[0] > node.point[r + 1] and node.right != None:
  95. kNN_search(x, k, node.right, level + 1, sucher)
  96. else:
  97. if x[r] + sucher.best_distance[0] > node.point[r + 1] and node.right != None:
  98. kNN_search(x, k, node.right, level + 1, sucher)
  99. if x[r] - sucher.best_distance[0] <= node.point[r + 1] and node.left != None:
  100. kNN_search(x, k, node.left, level + 1, sucher)
  101.  
  102. def kNN(x, k, data, baum):
  103. sucher = sucher_setup(x, k, data)
  104. kNN_search(x, k, baum, 0, sucher)
  105. return sucher.best_point[:, 0].reshape(k)
  106.  
  107. def classify(name,KSET,l):
  108. filename = name+".train.csv"
  109. data = np.genfromtxt(filename, delimiter=',')
  110. data_size = data.shape[0]
  111. elements = data.shape[0]//l
  112. np.random.shuffle(data)
  113. index_data = data_mit_index(data)
  114. KSET_size = len(KSET)
  115. k_max = max(KSET)
  116. KSET_new = KSET.copy()
  117. KSET_new.remove(k_max)
  118. R = np.zeros((KSET_size,l))
  119. KARRAY = np.zeros(KSET_size)
  120. KARRAY[0 ] = k_max
  121. tree = []
  122. D_rest = []
  123. for i in range(0,l-1):
  124. D = index_data[i*elements:(i+1)*elements,:]
  125. D_rest.append(np.delete(index_data,slice(i*elements,(i+1)*elements),0))
  126. tree.append(kD_tree(D_rest[i], 0))
  127. classification = np.zeros((KSET_size,elements))
  128. for j in range(0,elements):
  129. Indizes_kmax = kNN(D[j, 1:], k_max, D_rest[i], tree[i])
  130. summe = sum(data[Indizes_kmax.astype(int),0])
  131.  
  132. if summe == 0:
  133. classification[0,j] = 1
  134. else:
  135. classification[0,j] = np.sign(summe)
  136. b = 1
  137. for k in KSET_new:
  138. KARRAY[b] = k
  139. Indizes = Indizes_kmax[(k_max-k):]
  140. summe = sum(data[Indizes.astype(int), 0])
  141. if summe == 0:
  142. classification[b,j] = 1
  143. else:
  144. classification[b,j] = np.sign(summe)
  145. b += 1
  146.  
  147.  
  148. for j in range(0,KSET_size):
  149. R[j,i] = np.mean(abs(data[D[:,0].astype(int),0] - classification[j,:])/2)
  150.  
  151. D = index_data[(l-1)*elements:data_size-1,:]
  152. m = D.shape[0]
  153. D_rest.append(np.delete(index_data, slice((l-1) * elements, data_size-1), 0))
  154. tree.append(kD_tree(D_rest[l-1], 0))
  155. classification = np.zeros((KSET_size, m))
  156. for j in range(0, m):
  157. Indizes_kmax = kNN(D[j, 1:], k_max, D_rest[l-1], tree[l-1])
  158. summe = sum(data[Indizes_kmax.astype(int), 0])
  159.  
  160. if summe == 0:
  161. classification[0, j] = 1
  162. else:
  163. classification[0, j] = np.sign(summe)
  164. b = 1
  165. for k in KSET_new:
  166. Indizes = Indizes_kmax[(k_max - k):]
  167. summe = sum(data[Indizes.astype(int), 0])
  168. if summe == 0:
  169. classification[b, j] = 1
  170. else:
  171. classification[b, j] = np.sign(summe)
  172. b += 1
  173.  
  174. for j in range(0, KSET_size):
  175. R[j, l-1] = np.mean(abs(data[D[:, 0].astype(int), 0] - classification[j, :]) / 2)
  176.  
  177. fehler_R= np.mean(R, axis=1).tolist()
  178. k_best_index = fehler_R.index(min(fehler_R))
  179. k_best = KARRAY[k_best_index]
  180.  
  181. filename_test = name+".test.csv"
  182. data_test = np.genfromtxt(filename_test, delimiter=',')
  183. data_test_index = data_mit_index(data_test)
  184. elements_test = data_test.shape[0]
  185. classification_test = np.zeros((l, elements_test))
  186.  
  187. R_test = np.zeros(l)
  188. for i in range(0, l):
  189. for j in range(0, elements_test):
  190. Indizes_test = (kNN(data_test_index[j, 1:], int(k_best), D_rest[i], tree[i]))
  191. summe = sum(data[Indizes_test.astype(int), 0])
  192. if summe == 0:
  193. classification_test[i, j] = 1
  194. else:
  195. classification_test[i, j] = np.sign(summe)
  196. R_test[i]= np.mean(abs(data_test[:,0] - classification_test[i, :]) / 2)
  197. f = np.mean(R_test)
  198. data_test_index[:,0] = np.sign(np.sum(classification_test, axis=0))
  199. resultname = name+".result.csv"
  200. np.savetxt(resultname,data_test_index,delimiter=',')
  201.  
  202.  
  203.  
  204.  
  205. return f```
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement