Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- def binary_train(X, y, loss="perceptron", w0=None, b0=None, step_size=0.5, max_iterations=1000):
- """
- Inputs:
- - X: training features, a N-by-D numpy array, where N is the
- number of training points and D is the dimensionality of features
- - y: binary training labels, a N dimensional numpy array where
- N is the number of training points, indicating the labels of
- training data
- - loss: loss type, either perceptron or logistic
- - step_size: step size (learning rate)
- - max_iterations: number of iterations to perform gradient descent
- Returns:
- - w: D-dimensional vector, a numpy array which is the weight
- vector of logistic or perceptron regression
- - b: scalar, which is the bias of logistic or perceptron regression
- """
- N, D = X.shape
- assert len(np.unique(y)) == 2
- w = np.zeros(D)
- if w0 is not None:
- w = w0
- b = 0
- if b0 is not None:
- b = b0
- if loss == "perceptron":
- ############################################
- # TODO 1 : Edit this if part #
- # Compute w and b here #
- # fix y - makes no sense??
- y = 2*y + np.full((len(y),), -1)
- for i in range(0, max_iterations):
- count = 0
- acc_grad_wrt_w, acc_grad_wrt_b = np.zeros(w.shape), 0
- for ex_x, ex_y in zip(X, y):
- if ex_y * (w @ ex_x + b) <= 0:
- # calculate gradient if misclassifies
- acc_grad_wrt_w += - ex_y * ex_x
- acc_grad_wrt_b += - ex_y
- count += 1
- if count == 0:
- break
- # need to update
- w = w - step_size * (1/len(y)) * acc_grad_wrt_w
- b = b - step_size * (1/len(y)) * acc_grad_wrt_b
- ############################################
- elif loss == "logistic":
- ############################################
- # TODO 2 : Edit this if part #
- # Compute w and b here #
- y = 2*y + np.full((len(y),), -1)
- for i in range(0, max_iterations):
- hypothesis = X @ w + b
- y_m_hypothesis = y * hypothesis
- grad_wrt_w = np.transpose(X) @ (-y * sigmoid(y_m_hypothesis) * np.exp(-y_m_hypothesis))
- grad_wrt_b = np.sum(-y * sigmoid(y_m_hypothesis) * np.exp(-y_m_hypothesis))
- w = w - step_size * (1/len(y)) * grad_wrt_w
- b = b - step_size * (1/len(y)) * grad_wrt_b
- ############################################
- else:
- raise "Loss Function is undefined."
- assert w.shape == (D,)
- return w, b
- def sigmoid(z):
- """
- Inputs:
- - z: a numpy array or a float number
- Returns:
- - value: a numpy array or a float number after computing sigmoid function value = 1/(1+exp(-z)).
- """
- ############################################
- # TODO 3 : Edit this part to #
- # Compute value #
- value = 1/(1+np.exp(-z))
- ############################################
- return value
- def binary_predict(X, w, b, loss="perceptron"):
- """
- Inputs:
- - X: testing features, a N-by-D numpy array, where N is the
- number of training points and D is the dimensionality of features
- - w: D-dimensional vector, a numpy array which is the weight
- vector of your learned model
- - b: scalar, which is the bias of your model
- - loss: loss type, either perceptron or logistic
- Returns:
- - preds: N dimensional vector of binary predictions: {0, 1}
- """
- N, D = X.shape
- if loss == "perceptron":
- ############################################
- # TODO 4 : Edit this if part #
- # Compute preds #
- preds = X @ w + b
- preds = np.array([ 1. if p > 0 else 0. for p in preds ])
- ############################################
- elif loss == "logistic":
- ############################################
- # TODO 5 : Edit this if part #
- # Compute preds
- preds = sigmoid(X @ w + b)
- preds = np.array([ 1. if p > 0.5 else 0. for p in preds ])
- ############################################
- else:
- raise "Loss Function is undefined."
- assert preds.shape == (N,)
- return preds
- def multiclass_train(X, y, C,
- w0=None,
- b0=None,
- gd_type="sgd",
- step_size=0.5,
- max_iterations=1000):
- """
- Inputs:
- - X: training features, a N-by-D numpy array, where N is the
- number of training points and D is the dimensionality of features
- - y: multiclass training labels, a N dimensional numpy array where
- N is the number of training points, indicating the labels of
- training data
- - C: number of classes in the data
- - gd_type: gradient descent type, either GD or SGD
- - step_size: step size (learning rate)
- - max_iterations: number of iterations to perform gradient descent
- Returns:
- - w: C-by-D weight matrix of multinomial logistic regression, where
- C is the number of classes and D is the dimensionality of features.
- - b: bias vector of length C, where C is the number of classes
- """
- N, D = X.shape
- w = np.zeros((C, D))
- if w0 is not None:
- w = w0
- b = np.zeros(C)
- if b0 is not None:
- b = b0
- np.random.seed(42)
- if gd_type == "sgd":
- ############################################
- # TODO 6 : Edit this if part #
- # Compute w and b #
- for i in range(0, max_iterations):
- ex_idx = np.random.randint(0, X.shape[0])
- x_ex, y_ex = X[ex_idx,:], y[ex_idx]
- one_hot = np.zeros((C,))
- one_hot[y_ex] = 1
- logits = np.transpose(w @ np.transpose(x_ex)) + b
- probs = softmax(logits)
- b_grad = probs - one_hot
- w_grad = np.outer(b_grad, x_ex)
- w = w - step_size * w_grad
- b = b - step_size * b_grad
- ############################################
- elif gd_type == "gd":
- ############################################
- # TODO 7 : Edit this if part #
- # Compute w and b #
- ############################################
- # convert y to one hot
- one_hot = np.zeros((len(y), C))
- one_hot[np.arange(len(y)), y] = 1
- for i in range(0, max_iterations):
- logits = np.transpose(w @ np.transpose(X)) + b
- probs = softmax(logits)
- probs_minus_one_hot = probs - one_hot
- b_grad = np.sum(probs_minus_one_hot, axis=0)
- w_grad = np.transpose(probs_minus_one_hot) @ X
- w = w - step_size * (1/len(y)) * w_grad
- b = b - step_size * (1/len(y)) * b_grad
- if np.linalg.norm(w_grad) + np.linalg.norm(b_grad) < 0.1:
- break
- else:
- raise "Type of Gradient Descent is undefined."
- assert w.shape == (C, D)
- assert b.shape == (C,)
- return w, b
- def softmax(weights, verbose=False):
- weights = np.transpose(np.transpose(weights) - np.amax(weights, axis=-1))
- numerator = np.exp(weights)
- denominator = np.sum(numerator, axis=-1)
- return np.transpose(np.transpose(numerator) / denominator) # element wise
- def multiclass_predict(X, w, b):
- """
- Inputs:
- - X: testing features, a N-by-D numpy array, where N is the
- number of training points and D is the dimensionality of features
- - w: weights of the trained multinomial classifier, C-by-D
- - b: bias terms of the trained multinomial classifier, length of C
- Returns:
- - preds: N dimensional vector of multiclass predictions.
- Outputted predictions should be from {0, C - 1}, where
- C is the number of classes
- """
- N, D = X.shape
- ############################################
- # TODO 8 : Edit this part to #
- # Compute preds #
- logits = X @ np.transpose(w) + b
- preds = np.argmax(logits, axis=1).astype(float)
- ############################################
- assert preds.shape == (N,)
- return preds
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement