Advertisement
Guest User

Untitled

a guest
Oct 25th, 2016
137
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 8.72 KB | None | 0 0
  1. import numpy as np
  2. from scipy import sparse
  3. from scipy.special import expit
  4.  
  5.  
  6. class LogisticRegression:
  7.     def __init__(self):
  8.         self.w = None
  9.         self.loss_history = None
  10.  
  11.     def train(self, X, y, learning_rate=1e-3, reg=1e-5, num_iters=100,
  12.               batch_size=200, verbose=False):
  13.         """
  14.        Train this classifier using stochastic gradient descent.
  15.  
  16.        Inputs:
  17.        - X: N x D array of training data. Each training point is a D-dimensional
  18.             column.
  19.        - y: 1-dimensional array of length N with labels 0-1, for 2 classes.
  20.        - learning_rate: (float) learning rate for optimization.
  21.        - reg: (float) regularization strength.
  22.        - num_iters: (integer) number of steps to take when optimizing
  23.        - batch_size: (integer) number of training examples to use at each step.
  24.        - verbose: (boolean) If true, print progress during optimization.
  25.  
  26.        Outputs:
  27.        A list containing the value of the loss function at each training iteration.
  28.        """
  29.         # Add a column of ones to X for the bias sake.
  30.         X = LogisticRegression.append_biases(X)
  31.         num_train, dim = X.shape
  32.         if self.w is None:
  33.             # lazily initialize weights
  34.             self.w = np.random.randn(dim) * 0.01
  35.  
  36.         # Run stochastic gradient descent to optimize W
  37.         self.loss_history = []
  38.         for it in xrange(num_iters):
  39.             #########################################################################
  40.             # TODO:                                                                 #
  41.             # Sample batch_size elements from the training data and their           #
  42.             # corresponding labels to use in this round of gradient descent.        #
  43.             # Store the data in X_batch and their corresponding labels in           #
  44.             # y_batch; after sampling X_batch should have shape (batch_size, dim)   #
  45.             # and y_batch should have shape (batch_size,)                           #
  46.             #                                                                       #
  47.             # Hint: Use np.random.choice to generate indices. Sampling with         #
  48.             # replacement is faster than sampling without replacement.              #
  49.             #########################################################################
  50.  
  51.             batch = np.random.choice(num_train, batch_size, replace=True)
  52.             X_batch = X[batch]
  53.             y_batch = y[batch]
  54.  
  55.             #########################################################################
  56.             #                       END OF YOUR CODE                                #
  57.             #########################################################################
  58.  
  59.             # evaluate loss and gradient
  60.             loss, gradW = self.loss(X_batch, y_batch, reg)
  61.             self.loss_history.append(loss)
  62.             # perform parameter update
  63.             #########################################################################
  64.             # TODO:                                                                 #
  65.             # Update the weights using the gradient and the learning rate.          #
  66.             #########################################################################
  67.            
  68.             self.w -= learning_rate * gradW
  69.  
  70.  
  71.             #########################################################################
  72.             #                       END OF YOUR CODE                                #
  73.             #########################################################################
  74.  
  75.             #if verbose and it % 10 == 0:
  76.             #    print 'iteration %d / %d: loss %f' % (it, num_iters, loss)
  77.  
  78.         return self
  79.  
  80.     def predict_proba(self, X, append_bias=False):
  81.         """
  82.        Use the trained weights of this linear classifier to predict probabilities for
  83.        data points.
  84.  
  85.        Inputs:
  86.        - X: N x D array of data. Each row is a D-dimensional point.
  87.        - append_bias: bool. Whether to append bias before predicting or not.
  88.  
  89.        Returns:
  90.        - y_proba: Probabilities of classes for the data in X. y_pred is a 2-dimensional
  91.          array with a shape (N, 2), and each row is a distribution of classes [prob_class_0, prob_class_1].
  92.        """
  93.         if append_bias:
  94.             X = LogisticRegression.append_biases(X)
  95.         ###########################################################################
  96.         # TODO:                                                                   #
  97.         # Implement this method. Store the probabilities of classes in y_proba.   #
  98.         # Hint: It might be helpful to use np.vstack and np.sum                   #
  99.         ###########################################################################
  100.  
  101.         thetha_X = sparse.csr_matrix.dot(X, np.matrix(self.w).T)
  102.        
  103.         sigmoid = lambda x: 1.0 / (1.0 + np.exp(-x))
  104.         h_thetha_X = np.squeeze(np.asarray(sigmoid(thetha_X)))
  105.        
  106.         y_proba = np.vstack((1 - h_thetha_X, h_thetha_X)).T
  107.         #print y_proba
  108.  
  109.         ###########################################################################
  110.         #                           END OF YOUR CODE                              #
  111.         ###########################################################################
  112.         return y_proba
  113.  
  114.     def predict(self, X):
  115.         """
  116.        Use the ```predict_proba``` method to predict labels for data points.
  117.  
  118.        Inputs:
  119.        - X: N x D array of training data. Each column is a D-dimensional point.
  120.  
  121.        Returns:
  122.        - y_pred: Predicted labels for the data in X. y_pred is a 1-dimensional
  123.          array of length N, and each element is an integer giving the predicted
  124.          class.
  125.        """
  126.  
  127.         ###########################################################################
  128.         # TODO:                                                                   #
  129.         # Implement this method. Store the predicted labels in y_pred.            #
  130.         ###########################################################################
  131.         y_proba = self.predict_proba(X, append_bias=True)
  132.         y_pred = np.argmax(y_proba, axis=1)
  133.  
  134.         ###########################################################################
  135.         #                           END OF YOUR CODE                              #
  136.         ###########################################################################
  137.         return y_pred
  138.  
  139.     def loss(self, X_batch, y_batch, reg):
  140.         """Logistic Regression loss function
  141.        Inputs:
  142.        - X: N x D array of data. Data are D-dimensional rows
  143.        - y: 1-dimensional array of length N with labels 0-1, for 2 classes
  144.        Returns:
  145.        a tuple of:
  146.        - loss as single float
  147.        - gradient with respect to weights w; an array of same shape as w
  148.        """
  149.         dw = np.zeros_like(self.w)  # initialize the gradient as zero
  150.         loss = 0
  151.         # Compute loss and gradient. Your code should not contain python loops.
  152.        
  153.         #print 'X_batch shape : ' + str(X_batch.shape)
  154.         #print 'self.w.T shape : ' + str(np.matrix(self.w).T.shape)
  155.        
  156.         thetha_X = sparse.csr_matrix.dot(X_batch, np.matrix(self.w).T)
  157.         #print 'thetha_X shape : ' + str(thetha_X.shape)
  158.         sigmoid = lambda x: 1.0 / (1.0 + np.exp(-x))
  159.         h_thetha_X = np.squeeze(np.asarray(sigmoid(thetha_X)))
  160.  
  161.         #print h_thetha_X.shape
  162.         dw = np.squeeze(np.asarray(sparse.csc_matrix.dot(X_batch.T, np.matrix(y_batch - h_thetha_X).T).T))
  163.        
  164.         #print 'dw shape : ' + str(dw.shape)
  165.         #print y_batch.shape
  166.         #print h_thetha_X.shape
  167.        
  168.         #print h_thetha_X[10:]
  169.        
  170.         loss = -(y_batch * np.log(h_thetha_X) + (1.0 - y_batch) * np.log(1.0 - h_thetha_X))
  171.  
  172.         # Right now the loss is a sum over all training examples, but we want it
  173.         # to be an average instead so we divide by num_train.
  174.         # Note that the same thing must be done with gradient.
  175.         #print 'loss : ' + str(loss)
  176.        
  177.         loss = np.mean(loss)
  178.         #print 'loss : ' + str(loss)
  179.         dw /= -X_batch.shape[0]
  180.         #print 'dw : ' + str(dw)
  181.            
  182.         # Add regularization to the loss and gradient.
  183.         # Note that you have to exclude bias term in regularization.
  184.  
  185.        
  186.         #print 'dw shape : ' + str(dw.shape)
  187.        
  188.         #print np.sum(self.w[:-1] ** 2)
  189.        
  190.         loss += reg * np.sum(self.w[:-1] ** 2)
  191.         dw += 2 * reg * np.hstack((self.w[:-1], [0]))
  192.        
  193.        
  194.  
  195.         return loss, dw
  196.  
  197.     @staticmethod
  198.     def append_biases(X):
  199.         return sparse.hstack((X, np.ones(X.shape[0])[:, np.newaxis])).tocsr()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement