daily pastebin goal
60%
SHARE
TWEET

Untitled

a guest Aug 19th, 2018 52 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #Numpy for matrix math and matplotlib for plotting loss
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4.  
  5.  
  6. def forward(x, w1, w2):
  7.  
  8.     #BS, D_in * D_in, H = BS, H
  9.     hidden_raw = np.matmul(x, w1)
  10.  
  11.     #BS, H = BS, H
  12.     hidden = np.maximum(hidden_raw, 0)
  13.  
  14.     #BS, H * H, D_out = BS, D_out
  15.     yhat = np.matmul(hidden, w2)
  16.  
  17.     #yhat for loss and prediction. hidden for backprop
  18.     return yhat, hidden
  19.  
  20.  
  21. def backward(hidden, x, loss_gradient):
  22.  
  23.     #H, BS * BS, D_out = H, D_out
  24.     grad_w2 = np.matmul(hidden.T, loss_gradient)
  25.  
  26.     #BS, 10 * 10, H = BS, H
  27.     grad_hidden = np.matmul(loss_gradient, w2.T)
  28.  
  29.     #BS, H = BS, H
  30.     grad_hidden_pre_relu = grad_hidden * (hidden > 0)
  31.  
  32.     #D_in, BS * BS, H = D_in, H
  33.     grad_w1 = np.matmul(x.T, grad_hidden_pre_relu)
  34.  
  35.     return grad_w1, grad_w2
  36.  
  37.  
  38. # N is batch size; D_in is input dimension;
  39. # H is hidden dimension; D_out is output dimension.
  40. N, D_in, H, D_out = 64, 1000, 100, 10
  41.  
  42. # Create random input and output data
  43. x = np.random.randn(N, D_in)
  44. y = np.random.randn(N, D_out)
  45.  
  46. #Randomly initialize network weights
  47. w1 = np.random.randn(D_in, H)
  48. w2 = np.random.randn(H, D_out)
  49.  
  50. #Track losses
  51. losses = []
  52.  
  53. #Perform full-batch optimization steps
  54. for t in range(500):
  55.  
  56.     #Decaying learning rate
  57.     learning_rate = 1 / (t + 100)
  58.  
  59.     #Forward propagate through the network
  60.     yhat, hidden = forward(x, w1, w2)
  61.  
  62.     #Calculate our loss matrix. Sample by y_dimension
  63.     loss_matrix = np.square(yhat - y)
  64.     loss_gradient = 2 * (yhat - y)
  65.  
  66.     #Backpropagate and calculate gradients
  67.     grad_w1, grad_w2 = backward(hidden, x, loss_gradient)
  68.  
  69.     #Clip our gradients to [-1, 1]
  70.     grad_w1, grad_w2 = [np.clip(v, -1, 1) for v in [grad_w1, grad_w2]]
  71.  
  72.     #Update the weights by a small step in the direction of the gradient
  73.     w1 = w1 - grad_w1 * learning_rate
  74.     w2 = w2 - grad_w2 * learning_rate
  75.  
  76.     # norm of the loss vector for each sample. Take the mean between samples
  77.     loss_rms = np.sqrt(np.square(loss_matrix).sum(1)).mean()
  78.     losses.append(loss_rms)
  79.  
  80. print(losses)
  81.  
  82. #Visualize our losses over time, starting after the initial training
  83. plt.plot(losses[300:])
  84. plt.title(
  85.     'Loss for model with learning decay and gradient clipping\napproaches ' +
  86.     str(losses[-1])[:5])
  87. plt.savefig('model_2.jpg')
  88. plt.show()
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top