Advertisement
Guest User

Untitled

a guest
Oct 13th, 2019
138
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 8.72 KB | None | 0 0
  1. import numpy as np
  2.  
  3.  
  4. def binary_train(X, y, loss="perceptron", w0=None, b0=None, step_size=0.5, max_iterations=1000):
  5. """
  6. Inputs:
  7. - X: training features, a N-by-D numpy array, where N is the
  8. number of training points and D is the dimensionality of features
  9. - y: binary training labels, a N dimensional numpy array where
  10. N is the number of training points, indicating the labels of
  11. training data
  12. - loss: loss type, either perceptron or logistic
  13. - step_size: step size (learning rate)
  14. - max_iterations: number of iterations to perform gradient descent
  15.  
  16. Returns:
  17. - w: D-dimensional vector, a numpy array which is the weight
  18. vector of logistic or perceptron regression
  19. - b: scalar, which is the bias of logistic or perceptron regression
  20. """
  21. N, D = X.shape
  22. assert len(np.unique(y)) == 2
  23.  
  24.  
  25. w = np.zeros(D)
  26. if w0 is not None:
  27. w = w0
  28.  
  29. b = 0
  30. if b0 is not None:
  31. b = b0
  32.  
  33. if loss == "perceptron":
  34. ############################################
  35. # TODO 1 : Edit this if part #
  36. # Compute w and b here #
  37.  
  38. # fix y - makes no sense??
  39. y = 2*y + np.full((len(y),), -1)
  40.  
  41. for i in range(0, max_iterations):
  42. count = 0
  43. acc_grad_wrt_w, acc_grad_wrt_b = np.zeros(w.shape), 0
  44.  
  45. for ex_x, ex_y in zip(X, y):
  46. if ex_y * (w @ ex_x + b) <= 0:
  47. # calculate gradient if misclassifies
  48. acc_grad_wrt_w += - ex_y * ex_x
  49. acc_grad_wrt_b += - ex_y
  50. count += 1
  51.  
  52. if count == 0:
  53. break
  54.  
  55. # need to update
  56. w = w - step_size * (1/len(y)) * acc_grad_wrt_w
  57. b = b - step_size * (1/len(y)) * acc_grad_wrt_b
  58.  
  59. ############################################
  60.  
  61. elif loss == "logistic":
  62. ############################################
  63. # TODO 2 : Edit this if part #
  64. # Compute w and b here #
  65.  
  66. y = 2*y + np.full((len(y),), -1)
  67.  
  68. for i in range(0, max_iterations):
  69. hypothesis = X @ w + b
  70. y_m_hypothesis = y * hypothesis
  71. grad_wrt_w = np.transpose(X) @ (-y * sigmoid(y_m_hypothesis) * np.exp(-y_m_hypothesis))
  72. grad_wrt_b = np.sum(-y * sigmoid(y_m_hypothesis) * np.exp(-y_m_hypothesis))
  73.  
  74. w = w - step_size * (1/len(y)) * grad_wrt_w
  75. b = b - step_size * (1/len(y)) * grad_wrt_b
  76.  
  77. ############################################
  78.  
  79.  
  80. else:
  81. raise "Loss Function is undefined."
  82.  
  83. assert w.shape == (D,)
  84. return w, b
  85.  
  86. def sigmoid(z):
  87.  
  88. """
  89. Inputs:
  90. - z: a numpy array or a float number
  91.  
  92. Returns:
  93. - value: a numpy array or a float number after computing sigmoid function value = 1/(1+exp(-z)).
  94. """
  95.  
  96. ############################################
  97. # TODO 3 : Edit this part to #
  98. # Compute value #
  99. value = 1/(1+np.exp(-z))
  100. ############################################
  101.  
  102. return value
  103.  
  104. def binary_predict(X, w, b, loss="perceptron"):
  105. """
  106. Inputs:
  107. - X: testing features, a N-by-D numpy array, where N is the
  108. number of training points and D is the dimensionality of features
  109. - w: D-dimensional vector, a numpy array which is the weight
  110. vector of your learned model
  111. - b: scalar, which is the bias of your model
  112. - loss: loss type, either perceptron or logistic
  113.  
  114. Returns:
  115. - preds: N dimensional vector of binary predictions: {0, 1}
  116. """
  117. N, D = X.shape
  118.  
  119. if loss == "perceptron":
  120. ############################################
  121. # TODO 4 : Edit this if part #
  122. # Compute preds #
  123. preds = X @ w + b
  124. preds = np.array([ 1. if p > 0 else 0. for p in preds ])
  125. ############################################
  126.  
  127.  
  128. elif loss == "logistic":
  129. ############################################
  130. # TODO 5 : Edit this if part #
  131. # Compute preds
  132. preds = sigmoid(X @ w + b)
  133. preds = np.array([ 1. if p > 0.5 else 0. for p in preds ])
  134. ############################################
  135.  
  136.  
  137. else:
  138. raise "Loss Function is undefined."
  139.  
  140.  
  141. assert preds.shape == (N,)
  142. return preds
  143.  
  144.  
  145.  
  146. def multiclass_train(X, y, C,
  147. w0=None,
  148. b0=None,
  149. gd_type="sgd",
  150. step_size=0.5,
  151. max_iterations=1000):
  152. """
  153. Inputs:
  154. - X: training features, a N-by-D numpy array, where N is the
  155. number of training points and D is the dimensionality of features
  156. - y: multiclass training labels, a N dimensional numpy array where
  157. N is the number of training points, indicating the labels of
  158. training data
  159. - C: number of classes in the data
  160. - gd_type: gradient descent type, either GD or SGD
  161. - step_size: step size (learning rate)
  162. - max_iterations: number of iterations to perform gradient descent
  163.  
  164. Returns:
  165. - w: C-by-D weight matrix of multinomial logistic regression, where
  166. C is the number of classes and D is the dimensionality of features.
  167. - b: bias vector of length C, where C is the number of classes
  168. """
  169.  
  170. N, D = X.shape
  171.  
  172. w = np.zeros((C, D))
  173. if w0 is not None:
  174. w = w0
  175.  
  176. b = np.zeros(C)
  177. if b0 is not None:
  178. b = b0
  179.  
  180. np.random.seed(42)
  181. if gd_type == "sgd":
  182. ############################################
  183. # TODO 6 : Edit this if part #
  184. # Compute w and b #
  185.  
  186. for i in range(0, max_iterations):
  187. ex_idx = np.random.randint(0, X.shape[0])
  188. x_ex, y_ex = X[ex_idx,:], y[ex_idx]
  189.  
  190. one_hot = np.zeros((C,))
  191. one_hot[y_ex] = 1
  192.  
  193. logits = np.transpose(w @ np.transpose(x_ex)) + b
  194. probs = softmax(logits)
  195.  
  196. b_grad = probs - one_hot
  197. w_grad = np.outer(b_grad, x_ex)
  198.  
  199. w = w - step_size * w_grad
  200. b = b - step_size * b_grad
  201.  
  202.  
  203. ############################################
  204.  
  205.  
  206. elif gd_type == "gd":
  207. ############################################
  208. # TODO 7 : Edit this if part #
  209. # Compute w and b #
  210. ############################################
  211.  
  212. # convert y to one hot
  213. one_hot = np.zeros((len(y), C))
  214. one_hot[np.arange(len(y)), y] = 1
  215.  
  216. for i in range(0, max_iterations):
  217. logits = np.transpose(w @ np.transpose(X)) + b
  218. probs = softmax(logits)
  219.  
  220. probs_minus_one_hot = probs - one_hot
  221. b_grad = np.sum(probs_minus_one_hot, axis=0)
  222. w_grad = np.transpose(probs_minus_one_hot) @ X
  223.  
  224. w = w - step_size * (1/len(y)) * w_grad
  225. b = b - step_size * (1/len(y)) * b_grad
  226.  
  227. if np.linalg.norm(w_grad) + np.linalg.norm(b_grad) < 0.1:
  228. break
  229.  
  230.  
  231.  
  232. else:
  233. raise "Type of Gradient Descent is undefined."
  234.  
  235. assert w.shape == (C, D)
  236. assert b.shape == (C,)
  237.  
  238. return w, b
  239.  
  240. def softmax(weights, verbose=False):
  241. weights = np.transpose(np.transpose(weights) - np.amax(weights, axis=-1))
  242. numerator = np.exp(weights)
  243. denominator = np.sum(numerator, axis=-1)
  244. return np.transpose(np.transpose(numerator) / denominator) # element wise
  245.  
  246.  
  247. def multiclass_predict(X, w, b):
  248. """
  249. Inputs:
  250. - X: testing features, a N-by-D numpy array, where N is the
  251. number of training points and D is the dimensionality of features
  252. - w: weights of the trained multinomial classifier, C-by-D
  253. - b: bias terms of the trained multinomial classifier, length of C
  254.  
  255. Returns:
  256. - preds: N dimensional vector of multiclass predictions.
  257. Outputted predictions should be from {0, C - 1}, where
  258. C is the number of classes
  259. """
  260. N, D = X.shape
  261. ############################################
  262. # TODO 8 : Edit this part to #
  263. # Compute preds #
  264. logits = X @ np.transpose(w) + b
  265. preds = np.argmax(logits, axis=1).astype(float)
  266. ############################################
  267.  
  268. assert preds.shape == (N,)
  269. return preds
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement