SHARE
TWEET

Untitled

a guest Jul 20th, 2019 66 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import numpy as np
  2. import time
  3.  
  4. class Node():
  5.     def __init__(self, point):
  6.         self.point = point
  7.         self.left = None
  8.         self.right = None
  9.  
  10. class Search():
  11.     def __init__(self, best_point, best_distance):
  12.         self.best_point = best_point
  13.         self.best_distance = best_distance
  14.  
  15. def data_mit_index(data):
  16.     #np.random.shuffle(data)
  17.     A = np.copy(data)
  18.     Indexvektor = np.arange(data.shape[0])
  19.     A[:, 0] = Indexvektor
  20.     return A
  21. def median(list):
  22.     if  len(list) % 2 == 0:
  23.         return np.median(np.delete(list,0,0))
  24.     return np.median(list)
  25. def kD_tree(data, level):
  26.     num_points = data.shape[0]
  27.     if num_points > 1:
  28.         dim = data.shape[1] - 1
  29.         r = level % dim
  30.         auswahl = data[0:101, r + 1]
  31.         point_index = auswahl.tolist().index(median(auswahl))
  32.         point = auswahl[point_index]
  33.         condlist1 = data[1:, r + 1] <= point
  34.         P1 = data[1:,:][condlist1]
  35.  
  36.         condlist2 = data[1:, r + 1] > point
  37.         P2 = data[1:,:][condlist2]
  38.  
  39.         v = Node(data[point_index,:])
  40.         v.left = kD_tree(P1, level + 1)
  41.         #if v.left != None:
  42.          #   v.left.parent = v
  43.         v.right = kD_tree(P2, level + 1)
  44.         #if v.right != None:
  45.          #   v.right.parent = v
  46.  
  47.         return v
  48.     elif num_points == 1:
  49.         end = Node(data[0,:])
  50.         return end
  51.  
  52.  
  53. def sucher_setup(x, k, data):
  54.     e = np.ones((k, 1))
  55.     X = e * x
  56.     NORM_squared = np.sum((X - data[0:k, 1:]) * (X - data[0:k, 1:]), axis=1)
  57.     NORM = np.sqrt(NORM_squared)
  58.     sorted_index = np.argsort(NORM)
  59.     NORM_sorted = np.zeros(k)
  60.     BEST_POINT = np.zeros((k, data.shape[1]))
  61.     for i in range(0, k):
  62.         NORM_sorted[i] = NORM[sorted_index[k - i - 1]]
  63.         BEST_POINT[i, :] = data[sorted_index[k - i - 1], :]
  64.     return Search(BEST_POINT, NORM_sorted)
  65.  
  66.  
  67. def kNN_search(x, k, node, level, sucher):
  68.     dim = x.shape[0]
  69.     r = level % dim
  70.     distance = np.linalg.norm(x - node.point[1:])
  71.  
  72.     if node.point[0] >= k:
  73.         for j in range(0, k):
  74.             if distance >= sucher.best_distance[j]:
  75.                 if j == 0:
  76.                     break
  77.                 else:
  78.                     sucher.best_point[0:(j - 1), :] = sucher.best_point[1:j, :]
  79.                     sucher.best_point[j - 1, :] = node.point
  80.  
  81.                     sucher.best_distance[0:(j - 1)] = sucher.best_distance[1:j]
  82.                     sucher.best_distance[j - 1] = distance
  83.                     break
  84.         if distance < sucher.best_distance[k - 1]:
  85.             sucher.best_point[0:(k - 1), :] = sucher.best_point[1:k, :]
  86.             sucher.best_point[k - 1, :] = node.point
  87.  
  88.             sucher.best_distance[0:(k - 1)] = sucher.best_distance[1:k]
  89.             sucher.best_distance[k - 1] = distance
  90.     if x[r] <= node.point[r + 1]:  # Zuerst wird die Seite auf der x liegt untersucht
  91.         if x[r] - sucher.best_distance[0] <= node.point[
  92.                     r + 1] and node.left != None:  # Aber nur falls überhaupt bessere Punkte in den jeweiligen Kasten liegen können
  93.             kNN_search(x, k, node.left, level + 1, sucher)
  94.         if x[r] + sucher.best_distance[0] > node.point[r + 1] and node.right != None:
  95.             kNN_search(x, k, node.right, level + 1, sucher)
  96.     else:
  97.         if x[r] + sucher.best_distance[0] > node.point[r + 1] and node.right != None:
  98.             kNN_search(x, k, node.right, level + 1, sucher)
  99.         if x[r] - sucher.best_distance[0] <= node.point[r + 1] and node.left != None:
  100.             kNN_search(x, k, node.left, level + 1, sucher)
  101.  
  102. def kNN(x, k, data, baum):
  103.     sucher = sucher_setup(x, k, data)
  104.     kNN_search(x, k, baum, 0, sucher)
  105.     return sucher.best_point[:, 0].reshape(k)
  106.  
  107. def classify(name,KSET,l):
  108.     filename = name+".train.csv"
  109.     data = np.genfromtxt(filename, delimiter=',')
  110.     data_size = data.shape[0]
  111.     elements = data.shape[0]//l
  112.     np.random.shuffle(data)
  113.     index_data = data_mit_index(data)
  114.     KSET_size = len(KSET)
  115.     k_max = max(KSET)
  116.     KSET_new = KSET.copy()
  117.     KSET_new.remove(k_max)
  118.     R = np.zeros((KSET_size,l))
  119.     KARRAY = np.zeros(KSET_size)
  120.     KARRAY[0 ] = k_max
  121.     tree = []
  122.     D_rest = []
  123.     for i in range(0,l-1):
  124.         D = index_data[i*elements:(i+1)*elements,:]
  125.         D_rest.append(np.delete(index_data,slice(i*elements,(i+1)*elements),0))
  126.         tree.append(kD_tree(D_rest[i], 0))
  127.         classification = np.zeros((KSET_size,elements))
  128.         for j in range(0,elements):
  129.             Indizes_kmax = kNN(D[j, 1:], k_max, D_rest[i], tree[i])
  130.             summe = sum(data[Indizes_kmax.astype(int),0])
  131.  
  132.             if summe == 0:
  133.                 classification[0,j] = 1
  134.             else:
  135.                 classification[0,j] = np.sign(summe)
  136.             b = 1
  137.             for k in KSET_new:
  138.                 KARRAY[b] = k
  139.                 Indizes = Indizes_kmax[(k_max-k):]
  140.                 summe = sum(data[Indizes.astype(int), 0])
  141.                 if summe == 0:
  142.                     classification[b,j] = 1
  143.                 else:
  144.                     classification[b,j] = np.sign(summe)
  145.                 b += 1
  146.  
  147.  
  148.         for j in range(0,KSET_size):
  149.             R[j,i] = np.mean(abs(data[D[:,0].astype(int),0] - classification[j,:])/2)
  150.  
  151.     D = index_data[(l-1)*elements:data_size-1,:]
  152.     m = D.shape[0]
  153.     D_rest.append(np.delete(index_data, slice((l-1) * elements, data_size-1), 0))
  154.     tree.append(kD_tree(D_rest[l-1], 0))
  155.     classification = np.zeros((KSET_size, m))
  156.     for j in range(0, m):
  157.         Indizes_kmax = kNN(D[j, 1:], k_max, D_rest[l-1], tree[l-1])
  158.         summe = sum(data[Indizes_kmax.astype(int), 0])
  159.  
  160.         if summe == 0:
  161.             classification[0, j] = 1
  162.         else:
  163.             classification[0, j] = np.sign(summe)
  164.         b = 1
  165.         for k in KSET_new:
  166.             Indizes = Indizes_kmax[(k_max - k):]
  167.             summe = sum(data[Indizes.astype(int), 0])
  168.             if summe == 0:
  169.                 classification[b, j] = 1
  170.             else:
  171.                 classification[b, j] = np.sign(summe)
  172.             b += 1
  173.  
  174.     for j in range(0, KSET_size):
  175.         R[j, l-1] = np.mean(abs(data[D[:, 0].astype(int), 0] - classification[j, :]) / 2)
  176.  
  177.     fehler_R= np.mean(R, axis=1).tolist()
  178.     k_best_index = fehler_R.index(min(fehler_R))
  179.     k_best = KARRAY[k_best_index]
  180.  
  181.     filename_test = name+".test.csv"
  182.     data_test = np.genfromtxt(filename_test, delimiter=',')
  183.     data_test_index = data_mit_index(data_test)
  184.     elements_test = data_test.shape[0]
  185.     classification_test = np.zeros((l, elements_test))
  186.  
  187.     R_test = np.zeros(l)
  188.     for i in range(0, l):
  189.         for j in range(0, elements_test):
  190.             Indizes_test = (kNN(data_test_index[j, 1:], int(k_best), D_rest[i], tree[i]))
  191.             summe = sum(data[Indizes_test.astype(int), 0])
  192.             if summe == 0:
  193.                 classification_test[i, j] = 1
  194.             else:
  195.                 classification_test[i, j] = np.sign(summe)
  196.         R_test[i]= np.mean(abs(data_test[:,0] - classification_test[i, :]) / 2)
  197.     f = np.mean(R_test)
  198.     data_test_index[:,0] = np.sign(np.sum(classification_test, axis=0))
  199.     resultname = name+".result.csv"
  200.     np.savetxt(resultname,data_test_index,delimiter=',')
  201.  
  202.  
  203.  
  204.  
  205.     return f```
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top