Untitled

import os
import pickle
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np

print("HELLO WORLDDDD")


data = pickle.load(open("assignment2.pkl", "rb"))
train_data = data['train_data']
train_labels = data['train_labels']
test1 = data['test1']
test2 = data['test2']
words  = data['words']

n = 18 # Select the 18th sample
pixels = train_data[n, :]
plt.matshow(np.reshape(pixels, (30, 30), order='F'), cmap=cm.gray)


def classify(train, train_labels, test):
    """Nearest neighbour classification

    train - matrix of training data (one sample per row)
    train_labels - corresponding training data labels
    test - matrix of samples to classify

    returns: labels - vector of test data labels
    """
    x = np.dot(test, train.transpose())
    modtest = np.sqrt(np.sum(test*test, axis=1))
    modtrain = np.sqrt(np.sum(train*train, axis=1))
    dist = x / np.outer(modtest, modtrain.transpose())  # cosine distance
    nearest = np.argmax(dist, axis=1)
    labels = train_labels[nearest]
    return labels


# for x in range(0, 225):
#     for x in range(0, 899):
#         line_of_image = (np.reshape((current_thirty_by_thirty[n, :]), (1, 1), order='F'), cm.gray)
#     n = x # Select the xth sample
#     pixels = train_data[n, :]
#     current_thirty_by_thirty = (np.reshape(pixels, (30, 30), order='F'), cm.gray)


import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np
#%matplotlib inline

# for x in range(0, 1):
#     n = x # Select the 18th sample
#     pixels = train_data[n, :]
#     plt.matshow(np.reshape(pixels, (30, 30), order='F'), cmap=cm.gray)
#     print (train_data[n, :])

def getCharacterImage(n):
    pixels = train_data[n, :]
    # plt.matshow(np.reshape(pixels, (30, 30), order='F'), cmap=cm.gray)

characterArray = np.zeros(225)

for i in range(0,225):
    characterArray[i] = getCharacterImage(i)

print(characterArray)


'''
zero_array = [[]]
width = 15
heigth = 15
zero_array = [[0 for x in range(width)] for y in range(height)]

for i in range(0,15):
    for j in range(0,15):
        zero_array[i][j] = "1"

print(zero_array)

'''


# Use first 599 samples of train_data for training
train1_data = train_data[0:599, :]
train1_labels = train_labels[0:599]

# Use last 100 samples of train_data for testing
test1_data = train_data[599:, :]
test1_labels = train_labels[599:]

# Count how many samples in the test set
n_test = test1_labels.shape[0]

# Classify the test set to get an array of guessed labels
test1_guessed = classify(train1_data, train1_labels, test1_data)

# Compare guesses against true labels and compute percent correct
pcor = np.sum(test1_guessed == test1_labels) * 100.0 / n_test

print(pcor)  # This should give 92% correct