Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- from random import shuffle
- def svm_loss_naive(W, X, y, reg, delta=1):
- """
- Structured SVM loss function, naive implementation (with loops)
- Inputs:
- - W: K x D array of weights
- - X: D x N array of data. Data are D-dimensional columns
- - y: 1-dimensional array of length N with labels 0...K-1, for K classes
- - reg: (float) regularization strength
- Returns:
- a tuple of:
- - loss as single float
- - gradient with respect to weights W; an array of same shape as W
- """
- K, D = W.shape
- N = y.shape[0]
- dW = np.zeros(W.shape) # initialize the gradient as zero
- loss = 0.
- #############################################################################
- # TODO: #
- # Compute the gradient of the loss function and store it dW. #
- # Rather that first computing the loss and then computing the derivative, #
- # it may be simpler to compute the derivative at the same time that the #
- # loss is being computed. As a result you may need to modify some of the #
- # code above to compute the gradient. #
- #############################################################################
- for i in range(N):
- sample = X[:, i]
- current = np.dot(W, sample)
- gradient_coeff = 0
- for j in range(K):
- if j == y[i]:
- continue
- loss += max(0, current[j] - current[y[i]] + delta)
- current_indicator = (current[j] - current[y[i]] + delta > 0)
- gradient_coeff -= current_indicator
- dW[j] += current_indicator * sample
- dW[y[i]] += gradient_coeff * sample
- # Right now the loss is a sum over all training examples, but we want it
- # to be an average instead so we divide by num_train.
- loss /= N
- # Add regularization to the loss.
- loss += reg * np.sum(W[:,:-1] ** 2)
- dW /= N;
- # Add regularization to the gradient
- for i in range(K):
- for j in range(D - 1):
- dW[i,j] += reg * 2 * W[i][j]
- return loss, dW
- def svm_loss_vectorized(W, X, y, reg, delta=1):
- """
- Structured SVM loss function, vectorized implementation.
- Inputs:
- - W: K x D array of weights
- - X: D x N array of data. Data are D-dimensional columns
- - y: 1-dimensional array of length N with labels 0...K-1, for K classes
- - reg: (float) regularization strength
- Returns:
- a tuple of:
- - loss as single float
- - gradient with respect to weights W; an array of same shape as W
- """
- K, D = W.shape
- N = y.shape[0]
- dW = np.zeros(W.shape) # initialize the gradient as zero
- f = np.dot(W, X)
- f += delta - f[y, range(N)]
- f[y, range(N)] -= delta
- #############################################################################
- # TODO: #
- # Implement a vectorized version of the structured SVM loss, storing the #
- # result in loss. #
- #############################################################################
- loss = np.maximum(f, np.zeros((K, N)))
- loss = loss.sum() / N
- loss += np.sum(W[:,:-1] ** 2) * reg
- #############################################################################
- # END OF YOUR CODE #
- #############################################################################
- #############################################################################
- # TODO: #
- # Implement a vectorized version of the gradient for the structured SVM #
- # loss, storing the result in dW. #
- # #
- # Hint: Instead of computing the gradient from scratch, it may be easier #
- # to reuse some of the intermediate values that you used to compute the #
- # loss. #
- #############################################################################
- f = (f > 0)
- indicator = np.sum(f, axis=0) * X
- for i in range(K):
- dW[i] -= (indicator[:, y == i]).sum(axis=1)
- dW[i] += (f[i] * X).sum(axis=1)
- dW /= N
- # Add regularization to gradient
- dW += 2 * reg * W
- dW[:, -1] -= 2 * reg * W[:, -1]
- #############################################################################
- # END OF YOUR CODE #
- #############################################################################
- return loss, dW
- import numpy as np
- from random import shuffle
- from math import log, exp
- def softmax_loss_naive(W, X, y, reg):
- """
- Softmax loss function, naive implementation (with loops)
- Inputs:
- - W: K x D array of weights
- - X: D x N array of data. Data are D-dimensional columns
- - y: 1-dimensional array of length N with labels 0...K-1, for K classes
- - reg: (float) regularization strength
- Returns:
- a tuple of:
- - loss as single float
- - gradient with respect to weights W, an array of same size as W
- """
- # Initialize the loss and gradient to zero.
- K, D = W.shape
- N = y.shape[0]
- dW = np.zeros(W.shape) # initialize the gradient as zero
- loss = 0.
- #############################################################################
- # TODO: Compute the softmax loss and its gradient using explicit loops. #
- # Store the loss in loss and the gradient in dW. If you are not careful #
- # here, it is easy to run into numeric instability. Don't forget the #
- # regularization! #
- #############################################################################
- for i in range(N):
- current = np.exp(np.dot(W, X[:, i]))
- loss -= log(current[y[i]])
- cur_sum = np.sum(current)
- loss += log(cur_sum)
- for j in range(K):
- dW[j] += 1. / cur_sum * X[:, i] * current[j]
- dW[y[i]] -= X[:, i]
- # Right now the loss is a sum over all training examples, but we want it
- # to be an average instead so we divide by num_train.
- loss /= N
- dW /= N
- # Add regularization to the loss.
- loss += reg * np.sum(W[:, :-1] ** 2)
- # Add regularization to the gradient
- for i in range(K):
- for j in range(D - 1):
- dW[i,j] += reg * 2 * W[i][j]
- return loss, dW
- def softmax_loss_vectorized(W, X, y, reg):
- """
- Softmax loss function, vectorized version.
- Inputs and outputs are the same as softmax_loss_naive.
- """
- # Initialize the loss and gradient to zero.
- dW = np.zeros_like(W)
- K, D = W.shape
- N = y.shape[0]
- #############################################################################
- # TODO: Compute the softmax loss and its gradient using no explicit loops. #
- # Store the loss in loss and the gradient in dW. If you are not careful #
- # here, it is easy to run into numeric instability. Don't forget the #
- # regularization! #
- #############################################################################
- f = np.exp(np.dot(W, X))
- f_sum = f.sum(axis=0)
- loss = f_sum / f[y, range(N)]
- loss = np.log(loss)
- loss = sum(loss)
- # Right now the loss is a sum over all training examples, but we want it
- # to be an average instead so we divide by num_train.
- loss /= N
- # Add regularization to the loss.
- loss += np.sum(W[:,:-1] ** 2) * reg
- dW = np.dot(f / f_sum, X.T)
- for i in range(K):
- dW[i] -= (X[:, y == i]).sum(axis=1)
- dW /= N
- # Add regularization to gradient
- dW += 2 * reg * W
- dW[:, -1] -= 2 * reg * W[:, -1]
- #############################################################################
- # END OF YOUR CODE #
- #############################################################################
- return loss, dW
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement