Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #Numpy for matrix math and matplotlib for plotting loss
- import numpy as np
- import matplotlib.pyplot as plt
- def forward(x, w1, w2):
- #BS, D_in * D_in, H = BS, H
- hidden_raw = np.matmul(x, w1)
- #BS, H = BS, H
- hidden = np.maximum(hidden_raw, 0)
- #BS, H * H, D_out = BS, D_out
- yhat = np.matmul(hidden, w2)
- #yhat for loss and prediction. hidden for backprop
- return yhat, hidden
- def backward(hidden, x, loss_gradient):
- #H, BS * BS, D_out = H, D_out
- grad_w2 = np.matmul(hidden.T, loss_gradient)
- #BS, 10 * 10, H = BS, H
- grad_hidden = np.matmul(loss_gradient, w2.T)
- #BS, H = BS, H
- grad_hidden_pre_relu = grad_hidden * (hidden > 0)
- #D_in, BS * BS, H = D_in, H
- grad_w1 = np.matmul(x.T, grad_hidden_pre_relu)
- return grad_w1, grad_w2
- # N is batch size; D_in is input dimension;
- # H is hidden dimension; D_out is output dimension.
- N, D_in, H, D_out = 64, 1000, 100, 10
- # Create random input and output data
- x = np.random.randn(N, D_in)
- y = np.random.randn(N, D_out)
- #Randomly initialize network weights
- w1 = np.random.randn(D_in, H)
- w2 = np.random.randn(H, D_out)
- #Track losses
- losses = []
- #Perform full-batch optimization steps
- for t in range(500):
- #Decaying learning rate
- learning_rate = 1 / (t + 100)
- #Forward propagate through the network
- yhat, hidden = forward(x, w1, w2)
- #Calculate our loss matrix. Sample by y_dimension
- loss_matrix = np.square(yhat - y)
- loss_gradient = 2 * (yhat - y)
- #Backpropagate and calculate gradients
- grad_w1, grad_w2 = backward(hidden, x, loss_gradient)
- #Clip our gradients to [-1, 1]
- grad_w1, grad_w2 = [np.clip(v, -1, 1) for v in [grad_w1, grad_w2]]
- #Update the weights by a small step in the direction of the gradient
- w1 = w1 - grad_w1 * learning_rate
- w2 = w2 - grad_w2 * learning_rate
- # norm of the loss vector for each sample. Take the mean between samples
- loss_rms = np.sqrt(np.square(loss_matrix).sum(1)).mean()
- losses.append(loss_rms)
- print(losses)
- #Visualize our losses over time, starting after the initial training
- plt.plot(losses[300:])
- plt.title(
- 'Loss for model with learning decay and gradient clipping\napproaches ' +
- str(losses[-1])[:5])
- plt.savefig('model_2.jpg')
- plt.show()
Add Comment
Please, Sign In to add comment