Untitled

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd


def power_iteration(A, max_iter):
    d = np.random.randn(A.shape[1], 1)
    x_i = d
    for i in range(max_iter):
        x_i = A.dot(x_i) / np.linalg.norm(x_i)
    lambda_max = x_i.T.dot(A.dot(x_i)) / x_i.T.dot(x_i)
    return lambda_max


def load_mnist(label1, label2):
    raw_train_data = pd.read_csv('data/mnist_train.csv')
    raw_test_data = pd.read_csv('data/mnist_test.csv')
    numpy_train_data = raw_train_data.as_matrix()
    numpy_test_data = raw_test_data.as_matrix()
    X_train = numpy_train_data[:, 1:]
    Y_train = numpy_train_data[:, 0].reshape((numpy_train_data.shape[0],))
    X_test = numpy_test_data[:, 1:]
    Y_test = numpy_test_data[:, 0].reshape((numpy_test_data.shape[0],))
    X_train_label1 = X_train[Y_train == label1, :]
    X_train_label2 = X_train[Y_train == label2, :]
    del X_train
    Y_train_label1 = Y_train[Y_train == label1]
    Y_train_label2 = Y_train[Y_train == label2]
    del Y_train
    X_train = np.row_stack((X_train_label1, X_train_label2))
    Y_train = np.row_stack((Y_train_label1.reshape((-1, 1)),
                            Y_train_label2.reshape((-1, 1))))

    Y_train[Y_train == label1] = 1
    Y_train[Y_train == label2] = 0
    perm = np.random.permutation(np.arange(X_train.shape[0]))
    X_train = X_train[perm, :]
    Y_train = Y_train[perm, :]
    if X_train.shape[0] >= 30000:
        X_train = X_train[:30000, :]
        Y_train = Y_train[:30000, :]

    X_test_label1 = X_test[Y_test == label1, :]
    X_test_label2 = X_test[Y_test == label2, :]
    del X_test
    Y_test_label1 = Y_test[Y_test == label1]
    Y_test_label2 = Y_test[Y_test == label2]
    del Y_test
    X_test = np.row_stack((X_test_label1, X_test_label2))
    Y_test = np.row_stack((Y_test_label1.reshape((-1, 1)), Y_test_label2.reshape(-1, 1)))
    Y_test[Y_test == label1] = 1
    Y_test[Y_test == label2] = 0
    perm = np.random.permutation(np.arange(X_test.shape[0]))
    X_test = X_test[perm, :]
    Y_test = Y_test[perm, :]

    return X_train.T / 255, Y_train, X_test.T / 255, Y_test


def sigmoind(V):
    return 1 / (1 + np.exp(-1 * V+1e-5))


def Cost(X, C, W):
    m = X.shape[1]
    A = sigmoind(np.matmul(X.T, W))
    cost = (-1 / m) * (np.dot(C.T, np.log(A)) +
                       np.dot((1 - C).T, np.log(1 - A)))
    return cost


def Gradient(X, C, W):
    m = X.shape[1]
    A = sigmoind(np.matmul(X.T, W))
    dW = (1 / m) * np.dot(X, A - C)
    return dW / np.linalg.norm(dW)


def Hassian(X, C, W):
    m = X.shape[1]
    A = sigmoind(np.matmul(X.T, W))
    D = np.diag((A * (1 - A)).reshape((-1)))
    hassian = (1 / m) * np.matmul(X, np.matmul(D, X.T))
    return hassian


def F_A(X, C, W):
    return Cost(X, C, W), Gradient(X, C, W), Hassian(X, C, W)


def test(d, X, C, W, func, abs_type, title, epsilon, max_iter):
    f_x = Cost(X, C, W)
    # import pdb
    # pdb.set_trace()
    epsilons = [epsilon ** i for i in range(1, max_iter + 1)]
    tests = np.zeros((max_iter, 2))
    for i in range(0, max_iter):
        f_x_d = Cost(X + epsilons[i] * d, C, W)
        f_x_d_g = f_x + np.dot((epsilons[i] * d.T), func(X, C, W))
        tests[i, 0] = np.abs(f_x_d - f_x) # you probably mean abs_type?
        tests[i, 1] = abs_type(f_x_d - f_x_d_g)

    plt.semilogy(range(1, max_iter + 1), tests[:, 0], 'r')
    plt.title(title + ': linear result')
    plt.legend(['linear result'])
    plt.xlabel('iterations')
    plt.ylabel('results')
    plt.savefig('imgs/{} linear result.png'.format(title))
    plt.show()

    plt.plot(range(1, max_iter + 1), tests[:, 1], 'b')
    plt.title(title + ': quadratic result')
    plt.legend(['quadratic result'])
    plt.xlabel('iterations')
    plt.ylabel('results')
    plt.savefig('imgs/{} quadratic result.png'.format(title))
    plt.show()


def Armijo_linesearch(X, C, W, gradient, direction, alpha=1, beta=0.5, c=1e-4, max_iter=4):
    for j in range(max_iter):
        if Cost(X, C, W + alpha * direction) <= Cost(X, C, W) + c * alpha * gradient.T.dot(direction):
            return alpha
        else:
            alpha = beta * alpha
    return alpha


def Steepest_Descent_model(X_train, Y_train, X_test, Y_test, W, labels, max_iter):
    label1, label2 = labels
    costs_train = []
    costs_test = []
    train_cost = 0.0
    test_cost = 0.0
    for i in range(max_iter):
        train_cost, grad = Cost(X_train, Y_train, W), Gradient(X_train, Y_train, W)
        train_cost = train_cost.reshape((-1))
        test_cost = Cost(X_test, Y_test, W).reshape((-1))
        print('Training cost after {} iterations is {}'.format(i, train_cost))
        print('Test cost after {} iterations is {}'.format(i, test_cost))
        print()
        if train_cost < 1e-10:
            break
        costs_train.append(train_cost)
        costs_test.append(test_cost)
        alpha = Armijo_linesearch(X_train, Y_train, W, grad, grad)
        W = W - alpha * grad

    costs_train = np.array(costs_train)
    costs_train = np.abs(costs_train-train_cost)
    costs_test = np.array(costs_test)
    costs_test = np.abs(costs_test - test_cost)
    plt.semilogy(costs_train, 'r')
    plt.semilogy(costs_test, 'b')
    plt.title('Steepest Descent: Training and Testing Convergence History')
    plt.legend(['Train Logistic Cost', 'Test Logistic Cost'])
    plt.xlabel('iterations')
    plt.ylabel('Cost')
    plt.savefig('imgs/Steepest Descent-Training and Testing {} vs {}.png'.format(label1, label2))
    plt.show()
    return W


def is_pos_def(x):
    return np.all(np.linalg.eigvals(x) > 0)

def Newton_model(X_train, Y_train, X_test, Y_test, W, labels, max_iter):
    label1, label2 = labels
    costs_train = []
    costs_test = []
    for i in range(max_iter):
        train_cost, grad, hassian = F_A(X_train, Y_train, W)
        train_cost = train_cost.reshape((-1))
        test_cost = Cost(X_test, Y_test, W).reshape((-1))
        print('Newton: Training cost after {} iterations is {}'.format(i, train_cost))
        print('Test cost after {} iterations is {}'.format(i, test_cost))
        # print(is_pos_def(hassian + 0.5 * np.eye(hassian.shape[0])))

        print()
        costs_train.append(train_cost)
        costs_test.append(test_cost)
        W = W - Armijo_linesearch(X_train, Y_train, W, grad, grad)*np.linalg.pinv(hassian + 0.009 * np.eye(hassian.shape[0])).dot(grad)


    costs_train = np.array(costs_train)
    costs_train = np.abs(costs_train-train_cost)
    costs_test = np.array(costs_test)
    costs_test = np.abs(costs_test - test_cost)
    plt.semilogy(costs_train, 'r')
    plt.semilogy(costs_test, 'b')
    plt.title('Newton: Training and Testing Convergence History')
    plt.legend(['Train Logistic Cost', 'Test Logistic Cost'])
    plt.xlabel('iterations')
    plt.ylabel('Cost')
    plt.savefig('imgs/Newton Training and Testing {} vs {}.png'.format(label1, label2))
    plt.show()

    return W


def predict(X, W):
    return sigmoind(X.T.dot(W)) >= 0.5


if __name__ == '__main__':
    X_train, Y_train, X_test, Y_test = load_mnist(0, 1)
    W = np.zeros((X_train.shape[0], 1)) + 1e-5
    # F_A(X_train, Y_train, W)
    # d = np.random.randn(X_train.shape[0], 1)
    # test(d, X_train, Y_train, W, Gradient, np.abs, 'Gradient Test', 0.5, 20)
    # test(d, X_train, Y_train, W, Hassian, np.linalg.norm, 'Hassian Test', 0.5, 20)
    # W = Steepest_Descent_model(X_train, Y_train, X_test, Y_test, W, [0, 1], 100)
    W = Newton_model(X_train, Y_train, X_test, Y_test, W, [0, 1], 45)
    # print(np.mean(Y_train == predict(X_train, W)))
    # print(np.mean(Y_test == predict(X_test, W)))
    # exit()