Untitled

import numpy as np
from random import shuffle

def svm_loss_naive(W, X, y, reg, delta=1):
    """
    Structured SVM loss function, naive implementation (with loops)
    Inputs:
    - W: K x D array of weights
    - X: D x N array of data. Data are D-dimensional columns
    - y: 1-dimensional array of length N with labels 0...K-1, for K classes
    - reg: (float) regularization strength
    Returns:
    a tuple of:
    - loss as single float
    - gradient with respect to weights W; an array of same shape as W
    """
    K, D = W.shape
    N = y.shape[0]
    dW = np.zeros(W.shape) # initialize the gradient as zero
    loss = 0.
    #############################################################################
    # TODO:                                                                     #
    # Compute the gradient of the loss function and store it dW.                #
    # Rather that first computing the loss and then computing the derivative,   #
    # it may be simpler to compute the derivative at the same time that the     #
    # loss is being computed. As a result you may need to modify some of the    #
    # code above to compute the gradient.                                       #
    #############################################################################
    for i in range(N):
        sample = X[:, i]
        current = np.dot(W, sample)
        gradient_coeff = 0
        for j in range(K):
            if j == y[i]:
                continue
            loss += max(0, current[j] - current[y[i]] + delta)
            current_indicator = (current[j] - current[y[i]] + delta > 0)
            gradient_coeff -= current_indicator
            dW[j] += current_indicator * sample
        dW[y[i]] += gradient_coeff * sample

    # Right now the loss is a sum over all training examples, but we want it
    # to be an average instead so we divide by num_train.
    loss /= N
    # Add regularization to the loss.
    loss += reg * np.sum(W[:,:-1] ** 2)
    dW /= N;
    # Add regularization to the gradient
    for i in range(K):
        for j in range(D - 1):
            dW[i,j] += reg * 2 * W[i][j]
    return loss, dW


def svm_loss_vectorized(W, X, y, reg, delta=1):
    """
    Structured SVM loss function, vectorized implementation.
    Inputs:
    - W: K x D array of weights
    - X: D x N array of data. Data are D-dimensional columns
    - y: 1-dimensional array of length N with labels 0...K-1, for K classes
    - reg: (float) regularization strength
    Returns:
    a tuple of:
    - loss as single float
    - gradient with respect to weights W; an array of same shape as W
    """
    K, D = W.shape
    N = y.shape[0]
    dW = np.zeros(W.shape) # initialize the gradient as zero

    f = np.dot(W, X)
    f += delta - f[y, range(N)]
    f[y, range(N)] -= delta
    #############################################################################
    # TODO:                                                                     #
    # Implement a vectorized version of the structured SVM loss, storing the    #
    # result in loss.                                                           #
    #############################################################################
    loss = np.maximum(f, np.zeros((K, N)))
    loss = loss.sum() / N
    loss += np.sum(W[:,:-1] ** 2) * reg
    #############################################################################
    #                             END OF YOUR CODE                              #
    #############################################################################

    #############################################################################
    # TODO:                                                                     #
    # Implement a vectorized version of the gradient for the structured SVM     #
    # loss, storing the result in dW.                                           #
    #                                                                           #
    # Hint: Instead of computing the gradient from scratch, it may be easier    #
    # to reuse some of the intermediate values that you used to compute the     #
    # loss.                                                                     #
    #############################################################################
    f = (f > 0)
    indicator = np.sum(f, axis=0) * X
    for i in range(K):
        dW[i] -= (indicator[:, y == i]).sum(axis=1)
        dW[i] += (f[i] * X).sum(axis=1)
    dW /= N
    # Add regularization to gradient
    dW += 2 * reg * W
    dW[:, -1] -= 2 * reg * W[:, -1]
    #############################################################################
    #                             END OF YOUR CODE                              #
    #############################################################################
    return loss, dW


import numpy as np
from random import shuffle
from math import log, exp

def softmax_loss_naive(W, X, y, reg):
    """
    Softmax loss function, naive implementation (with loops)
    Inputs:
    - W: K x D array of weights
    - X: D x N array of data. Data are D-dimensional columns
    - y: 1-dimensional array of length N with labels 0...K-1, for K classes
    - reg: (float) regularization strength
    Returns:
    a tuple of:
    - loss as single float
    - gradient with respect to weights W, an array of same size as W
    """
    # Initialize the loss and gradient to zero.
    K, D = W.shape
    N = y.shape[0]
    dW = np.zeros(W.shape) # initialize the gradient as zero
    loss = 0.
    #############################################################################
    # TODO: Compute the softmax loss and its gradient using explicit loops.     #
    # Store the loss in loss and the gradient in dW. If you are not careful     #
    # here, it is easy to run into numeric instability. Don't forget the        #
    # regularization!                                                           #
    #############################################################################
    for i in range(N):
        current = np.exp(np.dot(W, X[:, i]))
        loss -= log(current[y[i]])
        cur_sum = np.sum(current)
        loss += log(cur_sum)
        for j in range(K):
            dW[j] += 1. / cur_sum * X[:, i] * current[j]
        dW[y[i]] -= X[:, i]

    # Right now the loss is a sum over all training examples, but we want it
    # to be an average instead so we divide by num_train.
    loss /= N
    dW /= N
    # Add regularization to the loss.
    loss += reg * np.sum(W[:, :-1] ** 2)
    # Add regularization to the gradient
    for i in range(K):
        for j in range(D - 1):
            dW[i,j] += reg * 2 * W[i][j]
    return loss, dW


def softmax_loss_vectorized(W, X, y, reg):
    """
    Softmax loss function, vectorized version.

    Inputs and outputs are the same as softmax_loss_naive.
    """
    # Initialize the loss and gradient to zero.
    dW = np.zeros_like(W)
    K, D = W.shape
    N = y.shape[0]
    #############################################################################
    # TODO: Compute the softmax loss and its gradient using no explicit loops.  #
    # Store the loss in loss and the gradient in dW. If you are not careful     #
    # here, it is easy to run into numeric instability. Don't forget the        #
    # regularization!                                                           #
    #############################################################################
    f = np.exp(np.dot(W, X))
    f_sum = f.sum(axis=0)
    loss = f_sum / f[y, range(N)]
    loss = np.log(loss)
    loss = sum(loss)
    # Right now the loss is a sum over all training examples, but we want it
    # to be an average instead so we divide by num_train.
    loss /= N
    # Add regularization to the loss.
    loss += np.sum(W[:,:-1] ** 2) * reg

    dW = np.dot(f / f_sum, X.T)
    for i in range(K):
        dW[i] -= (X[:, y == i]).sum(axis=1)
    dW /= N
    # Add regularization to gradient
    dW += 2 * reg * W
    dW[:, -1] -= 2 * reg * W[:, -1]
    #############################################################################
    #                          END OF YOUR CODE                                 #
    #############################################################################

    return loss, dW