Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import matplotlib.pyplot as plt
- import pandas as pd
- def power_iteration(A, max_iter):
- d = np.random.randn(A.shape[1], 1)
- x_i = d
- for i in range(max_iter):
- x_i = A.dot(x_i) / np.linalg.norm(x_i)
- lambda_max = x_i.T.dot(A.dot(x_i)) / x_i.T.dot(x_i)
- return lambda_max
- def load_mnist(label1, label2):
- raw_train_data = pd.read_csv('data/mnist_train.csv')
- raw_test_data = pd.read_csv('data/mnist_test.csv')
- numpy_train_data = raw_train_data.as_matrix()
- numpy_test_data = raw_test_data.as_matrix()
- X_train = numpy_train_data[:, 1:]
- Y_train = numpy_train_data[:, 0].reshape((numpy_train_data.shape[0],))
- X_test = numpy_test_data[:, 1:]
- Y_test = numpy_test_data[:, 0].reshape((numpy_test_data.shape[0],))
- X_train_label1 = X_train[Y_train == label1, :]
- X_train_label2 = X_train[Y_train == label2, :]
- del X_train
- Y_train_label1 = Y_train[Y_train == label1]
- Y_train_label2 = Y_train[Y_train == label2]
- del Y_train
- X_train = np.row_stack((X_train_label1, X_train_label2))
- Y_train = np.row_stack((Y_train_label1.reshape((-1, 1)),
- Y_train_label2.reshape((-1, 1))))
- Y_train[Y_train == label1] = 1
- Y_train[Y_train == label2] = 0
- perm = np.random.permutation(np.arange(X_train.shape[0]))
- X_train = X_train[perm, :]
- Y_train = Y_train[perm, :]
- if X_train.shape[0] >= 30000:
- X_train = X_train[:30000, :]
- Y_train = Y_train[:30000, :]
- X_test_label1 = X_test[Y_test == label1, :]
- X_test_label2 = X_test[Y_test == label2, :]
- del X_test
- Y_test_label1 = Y_test[Y_test == label1]
- Y_test_label2 = Y_test[Y_test == label2]
- del Y_test
- X_test = np.row_stack((X_test_label1, X_test_label2))
- Y_test = np.row_stack((Y_test_label1.reshape((-1, 1)), Y_test_label2.reshape(-1, 1)))
- Y_test[Y_test == label1] = 1
- Y_test[Y_test == label2] = 0
- perm = np.random.permutation(np.arange(X_test.shape[0]))
- X_test = X_test[perm, :]
- Y_test = Y_test[perm, :]
- return X_train.T / 255, Y_train, X_test.T / 255, Y_test
- def sigmoind(V):
- return 1 / (1 + np.exp(-1 * V+1e-5))
- def Cost(X, C, W):
- m = X.shape[1]
- A = sigmoind(np.matmul(X.T, W))
- cost = (-1 / m) * (np.dot(C.T, np.log(A)) +
- np.dot((1 - C).T, np.log(1 - A)))
- return cost
- def Gradient(X, C, W):
- m = X.shape[1]
- A = sigmoind(np.matmul(X.T, W))
- dW = (1 / m) * np.dot(X, A - C)
- return dW / np.linalg.norm(dW)
- def Hassian(X, C, W):
- m = X.shape[1]
- A = sigmoind(np.matmul(X.T, W))
- D = np.diag((A * (1 - A)).reshape((-1)))
- hassian = (1 / m) * np.matmul(X, np.matmul(D, X.T))
- return hassian
- def F_A(X, C, W):
- return Cost(X, C, W), Gradient(X, C, W), Hassian(X, C, W)
- def test(d, X, C, W, func, abs_type, title, epsilon, max_iter):
- f_x = Cost(X, C, W)
- # import pdb
- # pdb.set_trace()
- epsilons = [epsilon ** i for i in range(1, max_iter + 1)]
- tests = np.zeros((max_iter, 2))
- for i in range(0, max_iter):
- f_x_d = Cost(X + epsilons[i] * d, C, W)
- f_x_d_g = f_x + np.dot((epsilons[i] * d.T), func(X, C, W))
- tests[i, 0] = np.abs(f_x_d - f_x) # you probably mean abs_type?
- tests[i, 1] = abs_type(f_x_d - f_x_d_g)
- plt.semilogy(range(1, max_iter + 1), tests[:, 0], 'r')
- plt.title(title + ': linear result')
- plt.legend(['linear result'])
- plt.xlabel('iterations')
- plt.ylabel('results')
- plt.savefig('imgs/{} linear result.png'.format(title))
- plt.show()
- plt.plot(range(1, max_iter + 1), tests[:, 1], 'b')
- plt.title(title + ': quadratic result')
- plt.legend(['quadratic result'])
- plt.xlabel('iterations')
- plt.ylabel('results')
- plt.savefig('imgs/{} quadratic result.png'.format(title))
- plt.show()
- def Armijo_linesearch(X, C, W, gradient, direction, alpha=1, beta=0.5, c=1e-4, max_iter=4):
- for j in range(max_iter):
- if Cost(X, C, W + alpha * direction) <= Cost(X, C, W) + c * alpha * gradient.T.dot(direction):
- return alpha
- else:
- alpha = beta * alpha
- return alpha
- def Steepest_Descent_model(X_train, Y_train, X_test, Y_test, W, labels, max_iter):
- label1, label2 = labels
- costs_train = []
- costs_test = []
- train_cost = 0.0
- test_cost = 0.0
- for i in range(max_iter):
- train_cost, grad = Cost(X_train, Y_train, W), Gradient(X_train, Y_train, W)
- train_cost = train_cost.reshape((-1))
- test_cost = Cost(X_test, Y_test, W).reshape((-1))
- print('Training cost after {} iterations is {}'.format(i, train_cost))
- print('Test cost after {} iterations is {}'.format(i, test_cost))
- print()
- if train_cost < 1e-10:
- break
- costs_train.append(train_cost)
- costs_test.append(test_cost)
- alpha = Armijo_linesearch(X_train, Y_train, W, grad, grad)
- W = W - alpha * grad
- costs_train = np.array(costs_train)
- costs_train = np.abs(costs_train-train_cost)
- costs_test = np.array(costs_test)
- costs_test = np.abs(costs_test - test_cost)
- plt.semilogy(costs_train, 'r')
- plt.semilogy(costs_test, 'b')
- plt.title('Steepest Descent: Training and Testing Convergence History')
- plt.legend(['Train Logistic Cost', 'Test Logistic Cost'])
- plt.xlabel('iterations')
- plt.ylabel('Cost')
- plt.savefig('imgs/Steepest Descent-Training and Testing {} vs {}.png'.format(label1, label2))
- plt.show()
- return W
- def is_pos_def(x):
- return np.all(np.linalg.eigvals(x) > 0)
- def Newton_model(X_train, Y_train, X_test, Y_test, W, labels, max_iter):
- label1, label2 = labels
- costs_train = []
- costs_test = []
- for i in range(max_iter):
- train_cost, grad, hassian = F_A(X_train, Y_train, W)
- train_cost = train_cost.reshape((-1))
- test_cost = Cost(X_test, Y_test, W).reshape((-1))
- print('Newton: Training cost after {} iterations is {}'.format(i, train_cost))
- print('Test cost after {} iterations is {}'.format(i, test_cost))
- # print(is_pos_def(hassian + 0.5 * np.eye(hassian.shape[0])))
- print()
- costs_train.append(train_cost)
- costs_test.append(test_cost)
- W = W - Armijo_linesearch(X_train, Y_train, W, grad, grad)*np.linalg.pinv(hassian + 0.009 * np.eye(hassian.shape[0])).dot(grad)
- costs_train = np.array(costs_train)
- costs_train = np.abs(costs_train-train_cost)
- costs_test = np.array(costs_test)
- costs_test = np.abs(costs_test - test_cost)
- plt.semilogy(costs_train, 'r')
- plt.semilogy(costs_test, 'b')
- plt.title('Newton: Training and Testing Convergence History')
- plt.legend(['Train Logistic Cost', 'Test Logistic Cost'])
- plt.xlabel('iterations')
- plt.ylabel('Cost')
- plt.savefig('imgs/Newton Training and Testing {} vs {}.png'.format(label1, label2))
- plt.show()
- return W
- def predict(X, W):
- return sigmoind(X.T.dot(W)) >= 0.5
- if __name__ == '__main__':
- X_train, Y_train, X_test, Y_test = load_mnist(0, 1)
- W = np.zeros((X_train.shape[0], 1)) + 1e-5
- # F_A(X_train, Y_train, W)
- # d = np.random.randn(X_train.shape[0], 1)
- # test(d, X_train, Y_train, W, Gradient, np.abs, 'Gradient Test', 0.5, 20)
- # test(d, X_train, Y_train, W, Hassian, np.linalg.norm, 'Hassian Test', 0.5, 20)
- # W = Steepest_Descent_model(X_train, Y_train, X_test, Y_test, W, [0, 1], 100)
- W = Newton_model(X_train, Y_train, X_test, Y_test, W, [0, 1], 45)
- # print(np.mean(Y_train == predict(X_train, W)))
- # print(np.mean(Y_test == predict(X_test, W)))
- # exit()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement