Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os
- import pickle
- import matplotlib.pyplot as plt
- import matplotlib.cm as cm
- import numpy as np
- print("HELLO WORLDDDD")
- data = pickle.load(open("assignment2.pkl", "rb"))
- train_data = data['train_data']
- train_labels = data['train_labels']
- test1 = data['test1']
- test2 = data['test2']
- words = data['words']
- n = 18 # Select the 18th sample
- pixels = train_data[n, :]
- plt.matshow(np.reshape(pixels, (30, 30), order='F'), cmap=cm.gray)
- def classify(train, train_labels, test):
- """Nearest neighbour classification
- train - matrix of training data (one sample per row)
- train_labels - corresponding training data labels
- test - matrix of samples to classify
- returns: labels - vector of test data labels
- """
- x = np.dot(test, train.transpose())
- modtest = np.sqrt(np.sum(test*test, axis=1))
- modtrain = np.sqrt(np.sum(train*train, axis=1))
- dist = x / np.outer(modtest, modtrain.transpose()) # cosine distance
- nearest = np.argmax(dist, axis=1)
- labels = train_labels[nearest]
- return labels
- # for x in range(0, 225):
- # for x in range(0, 899):
- # line_of_image = (np.reshape((current_thirty_by_thirty[n, :]), (1, 1), order='F'), cm.gray)
- # n = x # Select the xth sample
- # pixels = train_data[n, :]
- # current_thirty_by_thirty = (np.reshape(pixels, (30, 30), order='F'), cm.gray)
- import matplotlib.pyplot as plt
- import matplotlib.cm as cm
- import numpy as np
- #%matplotlib inline
- # for x in range(0, 1):
- # n = x # Select the 18th sample
- # pixels = train_data[n, :]
- # plt.matshow(np.reshape(pixels, (30, 30), order='F'), cmap=cm.gray)
- # print (train_data[n, :])
- def getCharacterImage(n):
- pixels = train_data[n, :]
- # plt.matshow(np.reshape(pixels, (30, 30), order='F'), cmap=cm.gray)
- characterArray = np.zeros(225)
- for i in range(0,225):
- characterArray[i] = getCharacterImage(i)
- print(characterArray)
- '''
- zero_array = [[]]
- width = 15
- heigth = 15
- zero_array = [[0 for x in range(width)] for y in range(height)]
- for i in range(0,15):
- for j in range(0,15):
- zero_array[i][j] = "1"
- print(zero_array)
- '''
- # Use first 599 samples of train_data for training
- train1_data = train_data[0:599, :]
- train1_labels = train_labels[0:599]
- # Use last 100 samples of train_data for testing
- test1_data = train_data[599:, :]
- test1_labels = train_labels[599:]
- # Count how many samples in the test set
- n_test = test1_labels.shape[0]
- # Classify the test set to get an array of guessed labels
- test1_guessed = classify(train1_data, train1_labels, test1_data)
- # Compare guesses against true labels and compute percent correct
- pcor = np.sum(test1_guessed == test1_labels) * 100.0 / n_test
- print(pcor) # This should give 92% correct
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement