Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- from scipy import sparse
- from scipy.special import expit
- class LogisticRegression:
- def __init__(self):
- self.w = None
- self.loss_history = None
- def train(self, X, y, learning_rate=1e-3, reg=1e-5, num_iters=100,
- batch_size=200, verbose=False):
- """
- Train this classifier using stochastic gradient descent.
- Inputs:
- - X: N x D array of training data. Each training point is a D-dimensional
- column.
- - y: 1-dimensional array of length N with labels 0-1, for 2 classes.
- - learning_rate: (float) learning rate for optimization.
- - reg: (float) regularization strength.
- - num_iters: (integer) number of steps to take when optimizing
- - batch_size: (integer) number of training examples to use at each step.
- - verbose: (boolean) If true, print progress during optimization.
- Outputs:
- A list containing the value of the loss function at each training iteration.
- """
- # Add a column of ones to X for the bias sake.
- X = LogisticRegression.append_biases(X)
- num_train, dim = X.shape
- if self.w is None:
- # lazily initialize weights
- self.w = np.random.randn(dim) * 0.01
- # Run stochastic gradient descent to optimize W
- self.loss_history = []
- for it in xrange(num_iters):
- #########################################################################
- # TODO: #
- # Sample batch_size elements from the training data and their #
- # corresponding labels to use in this round of gradient descent. #
- # Store the data in X_batch and their corresponding labels in #
- # y_batch; after sampling X_batch should have shape (batch_size, dim) #
- # and y_batch should have shape (batch_size,) #
- # #
- # Hint: Use np.random.choice to generate indices. Sampling with #
- # replacement is faster than sampling without replacement. #
- #########################################################################
- batch = np.random.choice(num_train, batch_size, replace=True)
- X_batch = X[batch]
- y_batch = y[batch]
- #########################################################################
- # END OF YOUR CODE #
- #########################################################################
- # evaluate loss and gradient
- loss, gradW = self.loss(X_batch, y_batch, reg)
- self.loss_history.append(loss)
- # perform parameter update
- #########################################################################
- # TODO: #
- # Update the weights using the gradient and the learning rate. #
- #########################################################################
- self.w -= learning_rate * gradW
- #########################################################################
- # END OF YOUR CODE #
- #########################################################################
- #if verbose and it % 10 == 0:
- # print 'iteration %d / %d: loss %f' % (it, num_iters, loss)
- return self
- def predict_proba(self, X, append_bias=False):
- """
- Use the trained weights of this linear classifier to predict probabilities for
- data points.
- Inputs:
- - X: N x D array of data. Each row is a D-dimensional point.
- - append_bias: bool. Whether to append bias before predicting or not.
- Returns:
- - y_proba: Probabilities of classes for the data in X. y_pred is a 2-dimensional
- array with a shape (N, 2), and each row is a distribution of classes [prob_class_0, prob_class_1].
- """
- if append_bias:
- X = LogisticRegression.append_biases(X)
- ###########################################################################
- # TODO: #
- # Implement this method. Store the probabilities of classes in y_proba. #
- # Hint: It might be helpful to use np.vstack and np.sum #
- ###########################################################################
- thetha_X = sparse.csr_matrix.dot(X, np.matrix(self.w).T)
- sigmoid = lambda x: 1.0 / (1.0 + np.exp(-x))
- h_thetha_X = np.squeeze(np.asarray(sigmoid(thetha_X)))
- y_proba = np.vstack((1 - h_thetha_X, h_thetha_X)).T
- #print y_proba
- ###########################################################################
- # END OF YOUR CODE #
- ###########################################################################
- return y_proba
- def predict(self, X):
- """
- Use the ```predict_proba``` method to predict labels for data points.
- Inputs:
- - X: N x D array of training data. Each column is a D-dimensional point.
- Returns:
- - y_pred: Predicted labels for the data in X. y_pred is a 1-dimensional
- array of length N, and each element is an integer giving the predicted
- class.
- """
- ###########################################################################
- # TODO: #
- # Implement this method. Store the predicted labels in y_pred. #
- ###########################################################################
- y_proba = self.predict_proba(X, append_bias=True)
- y_pred = np.argmax(y_proba, axis=1)
- ###########################################################################
- # END OF YOUR CODE #
- ###########################################################################
- return y_pred
- def loss(self, X_batch, y_batch, reg):
- """Logistic Regression loss function
- Inputs:
- - X: N x D array of data. Data are D-dimensional rows
- - y: 1-dimensional array of length N with labels 0-1, for 2 classes
- Returns:
- a tuple of:
- - loss as single float
- - gradient with respect to weights w; an array of same shape as w
- """
- dw = np.zeros_like(self.w) # initialize the gradient as zero
- loss = 0
- # Compute loss and gradient. Your code should not contain python loops.
- #print 'X_batch shape : ' + str(X_batch.shape)
- #print 'self.w.T shape : ' + str(np.matrix(self.w).T.shape)
- thetha_X = sparse.csr_matrix.dot(X_batch, np.matrix(self.w).T)
- #print 'thetha_X shape : ' + str(thetha_X.shape)
- sigmoid = lambda x: 1.0 / (1.0 + np.exp(-x))
- h_thetha_X = np.squeeze(np.asarray(sigmoid(thetha_X)))
- #print h_thetha_X.shape
- dw = np.squeeze(np.asarray(sparse.csc_matrix.dot(X_batch.T, np.matrix(y_batch - h_thetha_X).T).T))
- #print 'dw shape : ' + str(dw.shape)
- #print y_batch.shape
- #print h_thetha_X.shape
- #print h_thetha_X[10:]
- loss = -(y_batch * np.log(h_thetha_X) + (1.0 - y_batch) * np.log(1.0 - h_thetha_X))
- # Right now the loss is a sum over all training examples, but we want it
- # to be an average instead so we divide by num_train.
- # Note that the same thing must be done with gradient.
- #print 'loss : ' + str(loss)
- loss = np.mean(loss)
- #print 'loss : ' + str(loss)
- dw /= -X_batch.shape[0]
- #print 'dw : ' + str(dw)
- # Add regularization to the loss and gradient.
- # Note that you have to exclude bias term in regularization.
- #print 'dw shape : ' + str(dw.shape)
- #print np.sum(self.w[:-1] ** 2)
- loss += reg * np.sum(self.w[:-1] ** 2)
- dw += 2 * reg * np.hstack((self.w[:-1], [0]))
- return loss, dw
- @staticmethod
- def append_biases(X):
- return sparse.hstack((X, np.ones(X.shape[0])[:, np.newaxis])).tocsr()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement