Untitled

import numpy as np
import random
import math


def l2_loss(Y, predictions):
    '''
        Computes L2 loss (sum squared loss) between true values, Y, and predictions.

        :param Y A 1D Numpy array with real values (float64)
        :param predictions A 1D Numpy array of the same size of Y
        :return L2 loss using predictions for Y.
    '''
    return np.linalg.norm(Y - predictions) ** 2

def sigmoid(x):
    '''
        Sigmoid function f(x) =  1/(1 + exp(-x))
        :param x A scalar or Numpy array
        :return Sigmoid function evaluated at x (applied element-wise if it is an array)
    '''
    return np.where(x > 0, 1 / (1 + np.exp(-x)), np.exp(x) / (np.exp(x) + 1))

def sigmoid_derivative(x):
    '''
        First derivative of the sigmoid function with respect to x.
        :param x A scalar or Numpy array
        :return Derivative of sigmoid evaluated at x (applied element-wise if it is an array)
    '''
    return sigmoid(x) * (1 - sigmoid(x))
    # return np.where(x > 0, np.exp(-x) / ((1 + np.exp(-x)) ** 2), np.exp(x) / ((np.exp(x) + np.exp(0)) ** 2))

class LinearRegression:
    '''
        LinearRegression model that minimizes squared error using matrix inversion.
    '''
    def __init__(self):
        '''
        @attrs:
            weights The weights of the linear regression model.
        '''
        self.weights = None

    def train(self, X, Y):
        '''
        Trains the LinearRegression model by finding the optimal set of weights
        using matrix inversion.

        :param X 2D Numpy array where each row contains an example
        :param Y 1D Numpy array containing the corresponding values for each example
        :return None
        '''
        A = sum([np.outer(x, x) for x in X])
        self.weights = np.matmul(np.linalg.inv(A), sum([x*y for x, y in zip(X, Y)]))

    def predict(self, X):
        '''
        Returns predictions of the model on a set of examples X.

        :param X 2D Numpy array where each row contains an example.
        :return A 1D Numpy array with one element for each row in X containing the predicted value.
        '''
        return np.array([np.dot(self.weights, x) for x in X])

    def loss(self, X, Y):
        '''
        Returns the total squared error on some dataset (X, Y).

        :param X 2D Numpy array where each row contains an example
        :param Y 1D Numpy array containing the corresponding values for each example
        :return A float which is the squared error of the model on the dataset
        '''
        predictions = self.predict(X)
        return l2_loss(predictions, Y)

    def average_loss(self, X, Y):
        '''
        Returns the mean squared error on some dataset (X, Y).

        MSE = Total squared error/# of examples

        :param X 2D Numpy array where each row contains an example
        :param Y 1D Numpy array containing the corresponding values for each example
        :return A float which is the mean squared error of the model on the dataset
        '''
        return self.loss(X, Y)/X.shape[0]

class OneLayerNN:
    '''
        One layer neural network trained with Stocastic Gradient Descent (SGD)
    '''
    def __init__(self):
        '''
        @attrs:
            weights The weights of the neural network model.
        '''
        self.weights = None

    def train(self, X, Y, learning_rate=0.001, epochs=250, print_loss=True):
        '''
        Trains the OneLayerNN model using SGD.

        :param X 2D Numpy array where each row contains an example
        :param Y 1D Numpy array containing the corresponding values for each example
        :param learning_rate The learning rate to use for SGD
        :param epochs The number of times to pass through the dataset
        :param print_loss If True, print the loss after each epoch.
        :return None
        '''
        m = len(X)
        n = len(X[0])
        batch_size = 1
        num_batches = int(math.ceil(m / batch_size))

        zipped = list(zip(X, Y))

        # X -> (batch_size, n)
        # w -> (n, 10)
        # dScores -> (batch_size, 10)
        # scores -> (batch_size, 10)
        self.weights = np.zeros((n, 1))

        for e in range(epochs):
            # print("Epoch " + str(e))
            np.random.shuffle(zipped)
            batched_X = np.array(np.array_split(np.array([i[0] for i in zipped]), num_batches))
            batched_Y = np.array(np.array_split(np.array([i[1] for i in zipped]), num_batches))
            for i in range(len(batched_X)):
                X_ = batched_X[i]
                Y_ = batched_Y[i]
                scores = np.matmul(X_, self.weights)
                Y_scores = np.array([np.array([y]) for y in Y_], dtype=float)
                errors = (scores - Y_scores)
                dW = np.matmul(X_.T, errors)
                self.weights += (-learning_rate * dW)

    def predict(self, X):
        '''
        Returns predictions of the model on a set of examples X.

        :param X 2D Numpy array where each row contains an example.
        :return A 1D Numpy array with one element for each row in X containing the predicted value.
        '''
        return np.array([row[0] for row in np.matmul(X, self.weights)])

    def loss(self, X, Y):
        '''
        Returns the total squared error on some dataset (X, Y).

        :param X 2D Numpy array where each row contains an example
        :param Y 1D Numpy array containing the corresponding values for each example
        :return A float which is the squared error of the model on the dataset
        '''
        predictions = self.predict(X)
        return l2_loss(predictions, Y)

    def average_loss(self, X, Y):
        '''
        Returns the mean squared error on some dataset (X, Y).

        MSE = Total squared error/# of examples

        :param X 2D Numpy array where each row contains an example
        :param Y 1D Numpy array containing the corresponding values for each example
        :return A float which is the mean squared error of the model on the dataset
        '''
        return self.loss(X, Y)/X.shape[0]

    def accuracy(self, X, Y):
        cnt = 0.0
        test_Y = Y
        pred = self.predict(X)
        for i in range(0, len(test_Y)):
            if test_Y[i] == pred[i]:
                cnt += 1
        return float(cnt/len(X))


class TwoLayerNN:

    def __init__(self, hidden_size, activation=sigmoid, activation_derivative=sigmoid_derivative):
        '''
        @attrs:
            activation: the activation function applied after the first layer
            activation_derivative: the derivative of the activation function. Used for training.
            hidden_size: The hidden size of the network (an integer)
            output_neurons: The number of outputs of the network
        '''
        self.activation = activation
        self.activation_derivative = activation_derivative
        self.hidden_size = hidden_size

        # In this assignment, we will only use output_neurons = 1.
        self.output_neurons = 1

        # These are the learned parameters for the 2-Layer NN you will implement
        self.hidden_weights = None
        self.hidden_bias = None
        self.output_weights = None
        self.output_bias = None

    def train(self, X, Y, learning_rate=0.01, epochs=1000, print_loss=True):
        '''
        Trains the TwoLayerNN with SGD using Backpropagation.

        :param X 2D Numpy array where each row contains an example
        :param Y 1D Numpy array containing the corresponding values for each example
        :param learning_rate The learning rate to use for SGD
        :param epochs The number of times to pass through the dataset
        :param print_loss If True, print the loss after each epoch.
        :return None
        '''
        m = len(X)
        n = len(X[0])
        batch_size = 1
        num_batches = int(math.ceil(m / batch_size))

        zipped = list(zip(X, Y))

        self.hidden_weights = .1 * np.random.randn(n, self.hidden_size) # (n, hs)
        self.hidden_bias = .1 * np.random.randn(1, self.hidden_size) # (1, hs)
        self.output_weights = .1 * np.random.randn(self.hidden_size, self.output_neurons) # (hs, 1)
        self.output_bias = .1 * np.random.randn(self.output_neurons, 1) # (1, 1)

        for e in range(epochs):
            # print("Epoch " + str(e))
            np.random.shuffle(zipped)
            batched_X = np.array(np.array_split(np.array([i[0] for i in zipped]), num_batches))
            batched_Y = np.array(np.array_split(np.array([i[1] for i in zipped]), num_batches))
            for i in range(len(batched_X)):
                X_ = batched_X[i] # (bs, n)
                Y_ = batched_Y[i] # (bs)
                bs = len(X_)

                # (bs, hs)
                hidden_outputs = np.matmul(X_, self.hidden_weights) + np.matmul(np.ones((bs, 1)), self.hidden_bias)
                # (bs, hs)
                hidden_activation = self.activation(hidden_outputs)
                # (bs, 1)
                scores = np.matmul(hidden_activation, self.output_weights) + np.matmul(np.ones((bs, 1)), self.output_bias)

                labels = np.array([np.array([y]) for y in Y_], dtype=float) # (bs, 1)

                errors = (scores - labels) # (bs, 1)
                dWo = np.matmul(hidden_activation.T, errors) # (hs, bs) * (bs, 1) => (hs, 1)
                dbo = np.sum(errors, axis=0, keepdims=True) # (1, 1)

                hidden_dsigmoid = self.activation_derivative(hidden_outputs) # (bs, hs)
                hidden_errors = np.multiply(np.matmul(errors, self.output_weights.T), hidden_dsigmoid) # (bs, hs)
                dWh = np.matmul(X_.T, hidden_errors) # (n, hs)
                dbh = np.sum(hidden_errors, axis=0, keepdims=True) # (1, hs)

                self.hidden_weights += 2 * (-learning_rate * dWh)
                self.hidden_bias += 2 * (-learning_rate * dbh)
                self.output_weights += 2 * (-learning_rate * dWo)
                self.output_bias += 2 * (-learning_rate * dbo)

    def predict(self, X):
        '''
        Returns predictions of the model on a set of examples X.

        :param X 2D Numpy array where each row contains an example.
        :return A 1D Numpy array with one element for each row in X containing the predicted value.
        '''

        hidden_outputs = np.matmul(X, self.hidden_weights) + np.matmul(np.ones((len(X), 1)), self.hidden_bias)
        hidden_activation = self.activation(hidden_outputs)
        scores = np.matmul(hidden_activation, self.output_weights) + np.matmul(np.ones((len(X), 1)), self.output_bias)

        return scores.flatten()

    def loss(self, X, Y):
        '''
        Returns the total squared error on some dataset (X, Y).

        :param X 2D Numpy array where each row contains an example
        :param Y 1D Numpy array containing the corresponding values for each example
        :return A float which is the squared error of the model on the dataset
        '''
        predictions = self.predict(X)

        # print(predictions[:10], Y[:10])
        # print(l2_loss(predictions, Y))
        return l2_loss(predictions, Y)

    def average_loss(self, X, Y):
        '''
        Returns the mean squared error on some dataset (X, Y).

        MSE = Total squared error/# of examples

        :param X 2D Numpy array where each row contains an example
        :param Y 1D Numpy array containing the corresponding values for each example
        :return A float which is the mean squared error of the model on the dataset
        '''
        return self.loss(X, Y)/X.shape[0]