Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from time import perf_counter
- from sklearn.neighbors import KDTree
- # from sklearn.preprocessing import normalize
- # from scipy import spatial
- def true_closest(X_train, X_test, k):
- result = []
- for x0 in X_test:
- bests = list(sorted([(i, np.linalg.norm(x - x0)) for i, x in enumerate(X_train)], key=lambda x: x[1]))
- bests = [i for i, d in bests]
- result.append(bests[:min(k, len(bests))])
- return result
- # X, y = read_cancer_dataset('cancer.csv')
- X, y = read_spam_dataset('spam.csv')
- # X = normalize(X, axis=0, norm='l2')
- X_train, y_train, X_test, y_test = train_test_split(X, y, 0.9)
- # X_train = np.random.randn(100, 3)
- # X_test = np.random.randn(10, 3)
- tree = KDTree(X_train, leaf_size=40)
- time1 = perf_counter()
- _, predicted = tree.query(X_test, k=30)
- time1 = perf_counter() - time1
- time2 = perf_counter()
- true = true_closest(X_train, X_test, k=30)
- time2 = perf_counter() - time2
- print(time1, time2)
- if np.sum(np.abs(np.array(np.array(predicted).shape) - np.array(np.array(true).shape))) != 0:
- print("Wrong shape")
- else:
- errors = sum([1 for row1, row2 in zip(predicted, true) for i1, i2 in zip(row1, row2) if i1 != i2])
- if errors > 0:
- print("Encounted", errors, "errors")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement