Guest User

Untitled

a guest
Jul 16th, 2018
85
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.31 KB | None | 0 0
  1. def softmax(class_scores):
  2. """
  3. Calculate class probability distribution for each digit from given class scores.
  4.  
  5. :param class_scores: class scores of your function
  6. :return: probability distribution
  7. """
  8. class_scores -= np.max(class_scores)
  9. return np.exp(class_scores) / np.sum(np.exp(class_scores),axis=1, keepdims=True)
  10.  
  11. def onehot_encode_label(label):
  12. """
  13. Support function to convert label vector into a one hot encoding matrix
  14.  
  15. :param label: array with shape (D,) , D can be whatever you want
  16. :return: one hot encoding matrix
  17. """
  18. onehot_encoder = OneHotEncoder(sparse=False)
  19. label = label.reshape(len(label), 1)
  20. onehot_encoded_label = onehot_encoder.fit_transform(label)
  21. return onehot_encoded_label
  22.  
  23. def data_loss(class_probabilities, onehot_encode_label):
  24. """
  25. Compute data_loss L_i for the correct class with a onehot encoded label
  26.  
  27. :param class_probabilities: probabilities from the softmax function
  28. :param onehot_encode_label: correct labels in a one hot enconding shape
  29.  
  30. :return: the data loss L_i
  31. """
  32. return onehot_encode_label * -np.log(class_probabilities)
  33.  
  34. def loss(X, y, theta, lam):
  35. """
  36. :param X: data
  37. :param y: label of the data
  38. :param theta: learnable parameters
  39. :param lam: regularization factor
  40.  
  41. :return: loss and gradient as a tupel
  42. """
  43. encoded_labels = onehot_encode_label(y) # also needed for the gradient, therefore separated calculated
  44. probabilities = softmax(class_scores(X,theta)) # also needed for the gradient, therefore separated calculated
  45. loss_Li = data_loss(probabilities,encoded_labels)
  46.  
  47. m = X.shape[0] # number of training data for normalization
  48. l2_regularization = (lam/2)*np.sum(theta*theta) # regularization loss
  49.  
  50. loss = np.sum(loss_Li)/m + l2_regularization
  51.  
  52. dl2 = lam*theta
  53. dloss = np.dot(X.T, (probabilities - encoded_labels)/m)
  54. gradient = dloss + dl2
  55.  
  56. return loss, gradient
  57.  
  58. def sgd(training_data, training_label, theta, lam=0.5, iterations=100, learning_rate=1e-5, batch_size=256):
  59. losses = []
  60. for i in range(iterations):
  61. shuffle_index = np.random.permutation(training_data.shape[0])
  62. data, label = training_data[shuffle_index], training_label[shuffle_index]
  63. data, label = data[:batch_size], label[:batch_size]
  64.  
  65. l, grad = loss(data, label, theta, lam)
  66. losses.append(l)
  67. theta -= learning_rate*grad
  68. return theta, losses
  69.  
  70. # Initialize learnable parameters theta
  71. theta = np.zeros([X_train.shape[1],len(np.unique(y_train))])
  72.  
  73. # Start optimization with traning data, theta and optional hyperparameters
  74. opt_model, loss_history = sgd(X_train,y_train,theta,iterations=250)
  75.  
  76. # evaluation
  77. print('last iteration loss:',loss_history[-1])
  78. print('first iteration loss:',loss_history[0])
  79. print('Is the first loss equal to ln(10)?', np.log(10) - loss_history[0] < 0.000001) # if its false you have a implementation error
  80.  
  81. # plot a loss curve
  82. plt.plot(loss_history)
  83. plt.ylabel('loss')
  84. plt.xlabel('iterations')
  85. plt.show()
  86.  
  87. # plot weights
  88. plt.figure(figsize=(20, 20))
  89. num_classes = 10
  90. for c in range(num_classes):
  91. f = plt.subplot(10, num_classes, 1 * num_classes + c + 1)
  92. f.axis('off')
  93. plt.imshow(np.reshape(opt_model[:,c],[28,28]))
  94. plt.show()
Add Comment
Please, Sign In to add comment