Guest User

Untitled

a guest
Aug 19th, 2018
81
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.22 KB | None | 0 0
  1. #Numpy for matrix math and matplotlib for plotting loss
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4.  
  5.  
  6. def forward(x, w1, w2):
  7.  
  8. #BS, D_in * D_in, H = BS, H
  9. hidden_raw = np.matmul(x, w1)
  10.  
  11. #BS, H = BS, H
  12. hidden = np.maximum(hidden_raw, 0)
  13.  
  14. #BS, H * H, D_out = BS, D_out
  15. yhat = np.matmul(hidden, w2)
  16.  
  17. #yhat for loss and prediction. hidden for backprop
  18. return yhat, hidden
  19.  
  20.  
  21. def backward(hidden, x, loss_gradient):
  22.  
  23. #H, BS * BS, D_out = H, D_out
  24. grad_w2 = np.matmul(hidden.T, loss_gradient)
  25.  
  26. #BS, 10 * 10, H = BS, H
  27. grad_hidden = np.matmul(loss_gradient, w2.T)
  28.  
  29. #BS, H = BS, H
  30. grad_hidden_pre_relu = grad_hidden * (hidden > 0)
  31.  
  32. #D_in, BS * BS, H = D_in, H
  33. grad_w1 = np.matmul(x.T, grad_hidden_pre_relu)
  34.  
  35. return grad_w1, grad_w2
  36.  
  37.  
  38. # N is batch size; D_in is input dimension;
  39. # H is hidden dimension; D_out is output dimension.
  40. N, D_in, H, D_out = 64, 1000, 100, 10
  41.  
  42. # Create random input and output data
  43. x = np.random.randn(N, D_in)
  44. y = np.random.randn(N, D_out)
  45.  
  46. #Randomly initialize network weights
  47. w1 = np.random.randn(D_in, H)
  48. w2 = np.random.randn(H, D_out)
  49.  
  50. #Track losses
  51. losses = []
  52.  
  53. #Perform full-batch optimization steps
  54. for t in range(500):
  55.  
  56. #Decaying learning rate
  57. learning_rate = 1 / (t + 100)
  58.  
  59. #Forward propagate through the network
  60. yhat, hidden = forward(x, w1, w2)
  61.  
  62. #Calculate our loss matrix. Sample by y_dimension
  63. loss_matrix = np.square(yhat - y)
  64. loss_gradient = 2 * (yhat - y)
  65.  
  66. #Backpropagate and calculate gradients
  67. grad_w1, grad_w2 = backward(hidden, x, loss_gradient)
  68.  
  69. #Clip our gradients to [-1, 1]
  70. grad_w1, grad_w2 = [np.clip(v, -1, 1) for v in [grad_w1, grad_w2]]
  71.  
  72. #Update the weights by a small step in the direction of the gradient
  73. w1 = w1 - grad_w1 * learning_rate
  74. w2 = w2 - grad_w2 * learning_rate
  75.  
  76. # norm of the loss vector for each sample. Take the mean between samples
  77. loss_rms = np.sqrt(np.square(loss_matrix).sum(1)).mean()
  78. losses.append(loss_rms)
  79.  
  80. print(losses)
  81.  
  82. #Visualize our losses over time, starting after the initial training
  83. plt.plot(losses[300:])
  84. plt.title(
  85. 'Loss for model with learning decay and gradient clipping\napproaches ' +
  86. str(losses[-1])[:5])
  87. plt.savefig('model_2.jpg')
  88. plt.show()
Add Comment
Please, Sign In to add comment