Advertisement
Guest User

Untitled

a guest
May 6th, 2016
100
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 8.35 KB | None | 0 0
  1. import numpy as np
  2. from random import shuffle
  3.  
  4. def svm_loss_naive(W, X, y, reg, delta=1):
  5.     """
  6.    Structured SVM loss function, naive implementation (with loops)
  7.    Inputs:
  8.    - W: K x D array of weights
  9.    - X: D x N array of data. Data are D-dimensional columns
  10.    - y: 1-dimensional array of length N with labels 0...K-1, for K classes
  11.    - reg: (float) regularization strength
  12.    Returns:
  13.    a tuple of:
  14.    - loss as single float
  15.    - gradient with respect to weights W; an array of same shape as W
  16.    """
  17.     K, D = W.shape
  18.     N = y.shape[0]
  19.     dW = np.zeros(W.shape) # initialize the gradient as zero
  20.     loss = 0.
  21.     #############################################################################
  22.     # TODO:                                                                     #
  23.     # Compute the gradient of the loss function and store it dW.                #
  24.     # Rather that first computing the loss and then computing the derivative,   #
  25.     # it may be simpler to compute the derivative at the same time that the     #
  26.     # loss is being computed. As a result you may need to modify some of the    #
  27.     # code above to compute the gradient.                                       #
  28.     #############################################################################
  29.     for i in range(N):
  30.         sample = X[:, i]
  31.         current = np.dot(W, sample)
  32.         gradient_coeff = 0
  33.         for j in range(K):
  34.             if j == y[i]:
  35.                 continue
  36.             loss += max(0, current[j] - current[y[i]] + delta)
  37.             current_indicator = (current[j] - current[y[i]] + delta > 0)
  38.             gradient_coeff -= current_indicator
  39.             dW[j] += current_indicator * sample
  40.         dW[y[i]] += gradient_coeff * sample
  41.  
  42.     # Right now the loss is a sum over all training examples, but we want it
  43.     # to be an average instead so we divide by num_train.
  44.     loss /= N
  45.     # Add regularization to the loss.
  46.     loss += reg * np.sum(W[:,:-1] ** 2)
  47.     dW /= N;
  48.     # Add regularization to the gradient
  49.     for i in range(K):
  50.         for j in range(D - 1):
  51.             dW[i,j] += reg * 2 * W[i][j]
  52.     return loss, dW
  53.  
  54.  
  55. def svm_loss_vectorized(W, X, y, reg, delta=1):
  56.     """
  57.    Structured SVM loss function, vectorized implementation.
  58.    Inputs:
  59.    - W: K x D array of weights
  60.    - X: D x N array of data. Data are D-dimensional columns
  61.    - y: 1-dimensional array of length N with labels 0...K-1, for K classes
  62.    - reg: (float) regularization strength
  63.    Returns:
  64.    a tuple of:
  65.    - loss as single float
  66.    - gradient with respect to weights W; an array of same shape as W
  67.    """
  68.     K, D = W.shape
  69.     N = y.shape[0]
  70.     dW = np.zeros(W.shape) # initialize the gradient as zero
  71.  
  72.     f = np.dot(W, X)
  73.     f += delta - f[y, range(N)]
  74.     f[y, range(N)] -= delta
  75.     #############################################################################
  76.     # TODO:                                                                     #
  77.     # Implement a vectorized version of the structured SVM loss, storing the    #
  78.     # result in loss.                                                           #
  79.     #############################################################################
  80.     loss = np.maximum(f, np.zeros((K, N)))
  81.     loss = loss.sum() / N
  82.     loss += np.sum(W[:,:-1] ** 2) * reg
  83.     #############################################################################
  84.     #                             END OF YOUR CODE                              #
  85.     #############################################################################
  86.  
  87.     #############################################################################
  88.     # TODO:                                                                     #
  89.     # Implement a vectorized version of the gradient for the structured SVM     #
  90.     # loss, storing the result in dW.                                           #
  91.     #                                                                           #
  92.     # Hint: Instead of computing the gradient from scratch, it may be easier    #
  93.     # to reuse some of the intermediate values that you used to compute the     #
  94.     # loss.                                                                     #
  95.     #############################################################################
  96.     f = (f > 0)
  97.     indicator = np.sum(f, axis=0) * X
  98.     for i in range(K):
  99.         dW[i] -= (indicator[:, y == i]).sum(axis=1)
  100.         dW[i] += (f[i] * X).sum(axis=1)
  101.     dW /= N
  102.     # Add regularization to gradient
  103.     dW += 2 * reg * W
  104.     dW[:, -1] -= 2 * reg * W[:, -1]
  105.     #############################################################################
  106.     #                             END OF YOUR CODE                              #
  107.     #############################################################################
  108.     return loss, dW
  109.  
  110.  
  111.  
  112.  
  113.  
  114.  
  115.  
  116.  
  117.  
  118.  
  119.  
  120.  
  121.  
  122. import numpy as np
  123. from random import shuffle
  124. from math import log, exp
  125.  
  126. def softmax_loss_naive(W, X, y, reg):
  127.     """
  128.    Softmax loss function, naive implementation (with loops)
  129.    Inputs:
  130.    - W: K x D array of weights
  131.    - X: D x N array of data. Data are D-dimensional columns
  132.    - y: 1-dimensional array of length N with labels 0...K-1, for K classes
  133.    - reg: (float) regularization strength
  134.    Returns:
  135.    a tuple of:
  136.    - loss as single float
  137.    - gradient with respect to weights W, an array of same size as W
  138.    """
  139.     # Initialize the loss and gradient to zero.
  140.     K, D = W.shape
  141.     N = y.shape[0]
  142.     dW = np.zeros(W.shape) # initialize the gradient as zero
  143.     loss = 0.
  144.     #############################################################################
  145.     # TODO: Compute the softmax loss and its gradient using explicit loops.     #
  146.     # Store the loss in loss and the gradient in dW. If you are not careful     #
  147.     # here, it is easy to run into numeric instability. Don't forget the        #
  148.     # regularization!                                                           #
  149.     #############################################################################
  150.     for i in range(N):
  151.         current = np.exp(np.dot(W, X[:, i]))
  152.         loss -= log(current[y[i]])
  153.         cur_sum = np.sum(current)
  154.         loss += log(cur_sum)
  155.         for j in range(K):
  156.             dW[j] += 1. / cur_sum * X[:, i] * current[j]
  157.         dW[y[i]] -= X[:, i]
  158.  
  159.     # Right now the loss is a sum over all training examples, but we want it
  160.     # to be an average instead so we divide by num_train.
  161.     loss /= N
  162.     dW /= N
  163.     # Add regularization to the loss.
  164.     loss += reg * np.sum(W[:, :-1] ** 2)
  165.     # Add regularization to the gradient
  166.     for i in range(K):
  167.         for j in range(D - 1):
  168.             dW[i,j] += reg * 2 * W[i][j]
  169.     return loss, dW
  170.  
  171.  
  172. def softmax_loss_vectorized(W, X, y, reg):
  173.     """
  174.    Softmax loss function, vectorized version.
  175.  
  176.    Inputs and outputs are the same as softmax_loss_naive.
  177.    """
  178.     # Initialize the loss and gradient to zero.
  179.     dW = np.zeros_like(W)
  180.     K, D = W.shape
  181.     N = y.shape[0]
  182.     #############################################################################
  183.     # TODO: Compute the softmax loss and its gradient using no explicit loops.  #
  184.     # Store the loss in loss and the gradient in dW. If you are not careful     #
  185.     # here, it is easy to run into numeric instability. Don't forget the        #
  186.     # regularization!                                                           #
  187.     #############################################################################
  188.     f = np.exp(np.dot(W, X))
  189.     f_sum = f.sum(axis=0)
  190.     loss = f_sum / f[y, range(N)]
  191.     loss = np.log(loss)
  192.     loss = sum(loss)
  193.     # Right now the loss is a sum over all training examples, but we want it
  194.     # to be an average instead so we divide by num_train.
  195.     loss /= N
  196.     # Add regularization to the loss.
  197.     loss += np.sum(W[:,:-1] ** 2) * reg
  198.  
  199.     dW = np.dot(f / f_sum, X.T)
  200.     for i in range(K):
  201.         dW[i] -= (X[:, y == i]).sum(axis=1)
  202.     dW /= N
  203.     # Add regularization to gradient
  204.     dW += 2 * reg * W
  205.     dW[:, -1] -= 2 * reg * W[:, -1]
  206.     #############################################################################
  207.     #                          END OF YOUR CODE                                 #
  208.     #############################################################################
  209.  
  210.     return loss, dW
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement