Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def do_nesterov_accelerated_gradient_descent():
- w, b, eta = init_w, init_b, 1.0
- prev_v_w, prev_v_b, gamma = 0, 0, 0.9
- for i in range(max_epochs):
- dw, db = 0, 0
- # do partial update
- v_w = gamma * prev_v_w
- v_b = gamma * prev_v_b
- for x,y in zip(X,Y):
- # calculate gradients after partial update
- dw += grad_w(w, b, x, y)
- db += grad_b(w, b, x, y)
- # now do the full update
- v_w = gamma * prev_v_w + eta*dw
- v_b = gamma * prev_v_b + eta*db
- w = w - v_w
- b = b - v-b
- prev_v_w = v_w
- prev_v_b = v_b
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement