Untitled

import numpy as np


def binary_train(X, y, loss="perceptron", w0=None, b0=None, step_size=0.5, max_iterations=1000):
    """
    Inputs:
    - X: training features, a N-by-D numpy array, where N is the
    number of training points and D is the dimensionality of features
    - y: binary training labels, a N dimensional numpy array where
    N is the number of training points, indicating the labels of
    training data
    - loss: loss type, either perceptron or logistic
    - step_size: step size (learning rate)
    - max_iterations: number of iterations to perform gradient descent

    Returns:
    - w: D-dimensional vector, a numpy array which is the weight
    vector of logistic or perceptron regression
    - b: scalar, which is the bias of logistic or perceptron regression
    """
    N, D = X.shape
    assert len(np.unique(y)) == 2


    w = np.zeros(D)
    if w0 is not None:
        w = w0

    b = 0
    if b0 is not None:
        b = b0

    if loss == "perceptron":
        ############################################
        # TODO 1 : Edit this if part               #
        #          Compute w and b here            #

        # fix y - makes no sense??
        y = 2*y + np.full((len(y),), -1)

        for i in range(0, max_iterations):
            count = 0
            acc_grad_wrt_w, acc_grad_wrt_b = np.zeros(w.shape), 0

            for ex_x, ex_y in zip(X, y):
                if ex_y * (w @ ex_x + b) <= 0:
                    # calculate gradient if misclassifies
                    acc_grad_wrt_w += - ex_y * ex_x
                    acc_grad_wrt_b += - ex_y
                    count += 1

            if count == 0:
                break

            # need to update
            w = w - step_size * (1/len(y)) * acc_grad_wrt_w
            b = b - step_size * (1/len(y)) * acc_grad_wrt_b

        ############################################

    elif loss == "logistic":
        ############################################
        # TODO 2 : Edit this if part               #
        #          Compute w and b here            #

        y = 2*y + np.full((len(y),), -1)

        for i in range(0, max_iterations):
            hypothesis = X @ w + b
            y_m_hypothesis = y * hypothesis
            grad_wrt_w = np.transpose(X) @ (-y * sigmoid(y_m_hypothesis) * np.exp(-y_m_hypothesis))
            grad_wrt_b = np.sum(-y * sigmoid(y_m_hypothesis) * np.exp(-y_m_hypothesis))

            w = w - step_size * (1/len(y)) * grad_wrt_w
            b = b - step_size * (1/len(y)) * grad_wrt_b

        ############################################


    else:
        raise "Loss Function is undefined."

    assert w.shape == (D,)
    return w, b

def sigmoid(z):

    """
    Inputs:
    - z: a numpy array or a float number

    Returns:
    - value: a numpy array or a float number after computing sigmoid function value = 1/(1+exp(-z)).
    """

    ############################################
    # TODO 3 : Edit this part to               #
    #          Compute value                   #
    value = 1/(1+np.exp(-z))
    ############################################

    return value

def binary_predict(X, w, b, loss="perceptron"):
    """
    Inputs:
    - X: testing features, a N-by-D numpy array, where N is the
    number of training points and D is the dimensionality of features
    - w: D-dimensional vector, a numpy array which is the weight
    vector of your learned model
    - b: scalar, which is the bias of your model
    - loss: loss type, either perceptron or logistic

    Returns:
    - preds: N dimensional vector of binary predictions: {0, 1}
    """
    N, D = X.shape

    if loss == "perceptron":
        ############################################
        # TODO 4 : Edit this if part               #
        #          Compute preds                   #
        preds = X @ w + b
        preds = np.array([ 1. if p > 0 else 0. for p in preds ])
        ############################################


    elif loss == "logistic":
        ############################################
        # TODO 5 : Edit this if part               #
        #          Compute preds
        preds = sigmoid(X @ w + b)
        preds = np.array([ 1. if p > 0.5 else 0. for p in preds ])
        ############################################


    else:
        raise "Loss Function is undefined."


    assert preds.shape == (N,)
    return preds


def multiclass_train(X, y, C,
                     w0=None,
                     b0=None,
                     gd_type="sgd",
                     step_size=0.5,
                     max_iterations=1000):
    """
    Inputs:
    - X: training features, a N-by-D numpy array, where N is the
    number of training points and D is the dimensionality of features
    - y: multiclass training labels, a N dimensional numpy array where
    N is the number of training points, indicating the labels of
    training data
    - C: number of classes in the data
    - gd_type: gradient descent type, either GD or SGD
    - step_size: step size (learning rate)
    - max_iterations: number of iterations to perform gradient descent

    Returns:
    - w: C-by-D weight matrix of multinomial logistic regression, where
    C is the number of classes and D is the dimensionality of features.
    - b: bias vector of length C, where C is the number of classes
    """

    N, D = X.shape

    w = np.zeros((C, D))
    if w0 is not None:
        w = w0

    b = np.zeros(C)
    if b0 is not None:
        b = b0

    np.random.seed(42)
    if gd_type == "sgd":
        ############################################
        # TODO 6 : Edit this if part               #
        #          Compute w and b                 #

        for i in range(0, max_iterations):
            ex_idx = np.random.randint(0, X.shape[0])
            x_ex, y_ex = X[ex_idx,:], y[ex_idx]

            one_hot = np.zeros((C,))
            one_hot[y_ex] = 1

            logits = np.transpose(w @ np.transpose(x_ex)) + b
            probs = softmax(logits)

            b_grad = probs - one_hot
            w_grad = np.outer(b_grad, x_ex)

            w = w - step_size * w_grad
            b = b - step_size * b_grad


        ############################################


    elif gd_type == "gd":
        ############################################
        # TODO 7 : Edit this if part               #
        #          Compute w and b                 #
        ############################################

        # convert y to one hot
        one_hot = np.zeros((len(y), C))
        one_hot[np.arange(len(y)), y] = 1

        for i in range(0, max_iterations):
            logits = np.transpose(w @ np.transpose(X)) + b
            probs = softmax(logits)

            probs_minus_one_hot = probs - one_hot
            b_grad = np.sum(probs_minus_one_hot, axis=0)
            w_grad = np.transpose(probs_minus_one_hot) @ X

            w = w - step_size * (1/len(y)) * w_grad
            b = b - step_size * (1/len(y)) * b_grad

            if np.linalg.norm(w_grad) + np.linalg.norm(b_grad) < 0.1:
                break


    else:
        raise "Type of Gradient Descent is undefined."

    assert w.shape == (C, D)
    assert b.shape == (C,)

    return w, b

def softmax(weights, verbose=False):
    weights = np.transpose(np.transpose(weights) - np.amax(weights, axis=-1))
    numerator = np.exp(weights)
    denominator = np.sum(numerator, axis=-1)
    return np.transpose(np.transpose(numerator) / denominator)    # element wise


def multiclass_predict(X, w, b):
    """
    Inputs:
    - X: testing features, a N-by-D numpy array, where N is the
    number of training points and D is the dimensionality of features
    - w: weights of the trained multinomial classifier, C-by-D
    - b: bias terms of the trained multinomial classifier, length of C

    Returns:
    - preds: N dimensional vector of multiclass predictions.
    Outputted predictions should be from {0, C - 1}, where
    C is the number of classes
    """
    N, D = X.shape
    ############################################
    # TODO 8 : Edit this part to               #
    #          Compute preds                   #
    logits = X @ np.transpose(w) + b
    preds = np.argmax(logits, axis=1).astype(float)
    ############################################

    assert preds.shape == (N,)
    return preds