• API
• FAQ
• Tools
• Archive
SHARE
TWEET

# Untitled

a guest Jul 20th, 2019 66 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
1. import numpy as np
2. import time
3.
4. class Node():
5.     def __init__(self, point):
6.         self.point = point
7.         self.left = None
8.         self.right = None
9.
10. class Search():
11.     def __init__(self, best_point, best_distance):
12.         self.best_point = best_point
13.         self.best_distance = best_distance
14.
15. def data_mit_index(data):
16.     #np.random.shuffle(data)
17.     A = np.copy(data)
18.     Indexvektor = np.arange(data.shape[0])
19.     A[:, 0] = Indexvektor
20.     return A
21. def median(list):
22.     if  len(list) % 2 == 0:
23.         return np.median(np.delete(list,0,0))
24.     return np.median(list)
25. def kD_tree(data, level):
26.     num_points = data.shape[0]
27.     if num_points > 1:
28.         dim = data.shape[1] - 1
29.         r = level % dim
30.         auswahl = data[0:101, r + 1]
31.         point_index = auswahl.tolist().index(median(auswahl))
32.         point = auswahl[point_index]
33.         condlist1 = data[1:, r + 1] <= point
34.         P1 = data[1:,:][condlist1]
35.
36.         condlist2 = data[1:, r + 1] > point
37.         P2 = data[1:,:][condlist2]
38.
39.         v = Node(data[point_index,:])
40.         v.left = kD_tree(P1, level + 1)
41.         #if v.left != None:
42.          #   v.left.parent = v
43.         v.right = kD_tree(P2, level + 1)
44.         #if v.right != None:
45.          #   v.right.parent = v
46.
47.         return v
48.     elif num_points == 1:
49.         end = Node(data[0,:])
50.         return end
51.
52.
53. def sucher_setup(x, k, data):
54.     e = np.ones((k, 1))
55.     X = e * x
56.     NORM_squared = np.sum((X - data[0:k, 1:]) * (X - data[0:k, 1:]), axis=1)
57.     NORM = np.sqrt(NORM_squared)
58.     sorted_index = np.argsort(NORM)
59.     NORM_sorted = np.zeros(k)
60.     BEST_POINT = np.zeros((k, data.shape[1]))
61.     for i in range(0, k):
62.         NORM_sorted[i] = NORM[sorted_index[k - i - 1]]
63.         BEST_POINT[i, :] = data[sorted_index[k - i - 1], :]
64.     return Search(BEST_POINT, NORM_sorted)
65.
66.
67. def kNN_search(x, k, node, level, sucher):
68.     dim = x.shape[0]
69.     r = level % dim
70.     distance = np.linalg.norm(x - node.point[1:])
71.
72.     if node.point[0] >= k:
73.         for j in range(0, k):
74.             if distance >= sucher.best_distance[j]:
75.                 if j == 0:
76.                     break
77.                 else:
78.                     sucher.best_point[0:(j - 1), :] = sucher.best_point[1:j, :]
79.                     sucher.best_point[j - 1, :] = node.point
80.
81.                     sucher.best_distance[0:(j - 1)] = sucher.best_distance[1:j]
82.                     sucher.best_distance[j - 1] = distance
83.                     break
84.         if distance < sucher.best_distance[k - 1]:
85.             sucher.best_point[0:(k - 1), :] = sucher.best_point[1:k, :]
86.             sucher.best_point[k - 1, :] = node.point
87.
88.             sucher.best_distance[0:(k - 1)] = sucher.best_distance[1:k]
89.             sucher.best_distance[k - 1] = distance
90.     if x[r] <= node.point[r + 1]:  # Zuerst wird die Seite auf der x liegt untersucht
91.         if x[r] - sucher.best_distance[0] <= node.point[
92.                     r + 1] and node.left != None:  # Aber nur falls überhaupt bessere Punkte in den jeweiligen Kasten liegen können
93.             kNN_search(x, k, node.left, level + 1, sucher)
94.         if x[r] + sucher.best_distance[0] > node.point[r + 1] and node.right != None:
95.             kNN_search(x, k, node.right, level + 1, sucher)
96.     else:
97.         if x[r] + sucher.best_distance[0] > node.point[r + 1] and node.right != None:
98.             kNN_search(x, k, node.right, level + 1, sucher)
99.         if x[r] - sucher.best_distance[0] <= node.point[r + 1] and node.left != None:
100.             kNN_search(x, k, node.left, level + 1, sucher)
101.
102. def kNN(x, k, data, baum):
103.     sucher = sucher_setup(x, k, data)
104.     kNN_search(x, k, baum, 0, sucher)
105.     return sucher.best_point[:, 0].reshape(k)
106.
107. def classify(name,KSET,l):
108.     filename = name+".train.csv"
109.     data = np.genfromtxt(filename, delimiter=',')
110.     data_size = data.shape[0]
111.     elements = data.shape[0]//l
112.     np.random.shuffle(data)
113.     index_data = data_mit_index(data)
114.     KSET_size = len(KSET)
115.     k_max = max(KSET)
116.     KSET_new = KSET.copy()
117.     KSET_new.remove(k_max)
118.     R = np.zeros((KSET_size,l))
119.     KARRAY = np.zeros(KSET_size)
120.     KARRAY[0 ] = k_max
121.     tree = []
122.     D_rest = []
123.     for i in range(0,l-1):
124.         D = index_data[i*elements:(i+1)*elements,:]
125.         D_rest.append(np.delete(index_data,slice(i*elements,(i+1)*elements),0))
126.         tree.append(kD_tree(D_rest[i], 0))
127.         classification = np.zeros((KSET_size,elements))
128.         for j in range(0,elements):
129.             Indizes_kmax = kNN(D[j, 1:], k_max, D_rest[i], tree[i])
130.             summe = sum(data[Indizes_kmax.astype(int),0])
131.
132.             if summe == 0:
133.                 classification[0,j] = 1
134.             else:
135.                 classification[0,j] = np.sign(summe)
136.             b = 1
137.             for k in KSET_new:
138.                 KARRAY[b] = k
139.                 Indizes = Indizes_kmax[(k_max-k):]
140.                 summe = sum(data[Indizes.astype(int), 0])
141.                 if summe == 0:
142.                     classification[b,j] = 1
143.                 else:
144.                     classification[b,j] = np.sign(summe)
145.                 b += 1
146.
147.
148.         for j in range(0,KSET_size):
149.             R[j,i] = np.mean(abs(data[D[:,0].astype(int),0] - classification[j,:])/2)
150.
151.     D = index_data[(l-1)*elements:data_size-1,:]
152.     m = D.shape[0]
153.     D_rest.append(np.delete(index_data, slice((l-1) * elements, data_size-1), 0))
154.     tree.append(kD_tree(D_rest[l-1], 0))
155.     classification = np.zeros((KSET_size, m))
156.     for j in range(0, m):
157.         Indizes_kmax = kNN(D[j, 1:], k_max, D_rest[l-1], tree[l-1])
158.         summe = sum(data[Indizes_kmax.astype(int), 0])
159.
160.         if summe == 0:
161.             classification[0, j] = 1
162.         else:
163.             classification[0, j] = np.sign(summe)
164.         b = 1
165.         for k in KSET_new:
166.             Indizes = Indizes_kmax[(k_max - k):]
167.             summe = sum(data[Indizes.astype(int), 0])
168.             if summe == 0:
169.                 classification[b, j] = 1
170.             else:
171.                 classification[b, j] = np.sign(summe)
172.             b += 1
173.
174.     for j in range(0, KSET_size):
175.         R[j, l-1] = np.mean(abs(data[D[:, 0].astype(int), 0] - classification[j, :]) / 2)
176.
177.     fehler_R= np.mean(R, axis=1).tolist()
178.     k_best_index = fehler_R.index(min(fehler_R))
179.     k_best = KARRAY[k_best_index]
180.
181.     filename_test = name+".test.csv"
182.     data_test = np.genfromtxt(filename_test, delimiter=',')
183.     data_test_index = data_mit_index(data_test)
184.     elements_test = data_test.shape[0]
185.     classification_test = np.zeros((l, elements_test))
186.
187.     R_test = np.zeros(l)
188.     for i in range(0, l):
189.         for j in range(0, elements_test):
190.             Indizes_test = (kNN(data_test_index[j, 1:], int(k_best), D_rest[i], tree[i]))
191.             summe = sum(data[Indizes_test.astype(int), 0])
192.             if summe == 0:
193.                 classification_test[i, j] = 1
194.             else:
195.                 classification_test[i, j] = np.sign(summe)
196.         R_test[i]= np.mean(abs(data_test[:,0] - classification_test[i, :]) / 2)
197.     f = np.mean(R_test)
198.     data_test_index[:,0] = np.sign(np.sum(classification_test, axis=0))
199.     resultname = name+".result.csv"
200.     np.savetxt(resultname,data_test_index,delimiter=',')
201.
202.
203.
204.
205.     return f```
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy.

Top