Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def update_parameters(parameters, grads,learning_rate,iter_no,method = 'SGD',opt_parameters=None,beta1=0.9,beta2=0.999):
- """
- Update parameters using gradient descent
- Arguments:
- parameters -- python dictionary containing your parameters
- grads -- python dictionary containing your gradients, output of L_model_backward
- method -- method for updation of weights
- 'SGD','SGDM','RMSP','ADAM'
- learning rate -- learning rate alpha value
- beta1 -- weighted avg parameter for SGDM and ADAM
- beta2 -- weighted avg parameter for RMSP and ADAM
- Returns:
- parameters -- python dictionary containing your updated parameters
- parameters["W" + str(l)] = ...
- parameters["b" + str(l)] = ...
- opt_parameters
- """
- L = len(parameters) // 2 # number of layers in the neural network
- if method == 'SGD':
- for l in range(L):
- parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate*grads["dW" + str(l + 1)]
- parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate*grads["db" + str(l + 1)]
- opt_parameters = None
- elif method == 'SGDM':
- for l in range(L):
- opt_parameters['vdb'+str(l+1)] = beta1*opt_parameters['vdb'+str(l+1)] + (1-beta1)*grads["db" + str(l + 1)]
- opt_parameters['vdw'+str(l+1)] = beta1*opt_parameters['vdw'+str(l+1)] + (1-beta1)*grads["dW" + str(l + 1)]
- parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate*opt_parameters['vdw'+str(l+1)]
- parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate*opt_parameters['vdb'+str(l+1)]
- elif method == 'RMSP':
- for l in range(L):
- opt_parameters['sdb'+str(l+1)] = beta2*opt_parameters['sdb'+str(l+1)] + \
- (1-beta2)*np.square(grads["db" + str(l + 1)])
- opt_parameters['sdw'+str(l+1)] = beta2*opt_parameters['sdw'+str(l+1)] + \
- (1-beta2)*np.square(grads["dW" + str(l + 1)])
- parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - \
- learning_rate*(grads["dW" + str(l + 1)]/(np.sqrt(opt_parameters['sdw'+str(l+1)])+10**-8))
- parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - \
- learning_rate*(grads["db" + str(l + 1)]/(np.sqrt(opt_parameters['sdb'+str(l+1)])+10**-8))
- elif method == 'ADAM':
- for l in range(L):
- opt_parameters['vdb'+str(l+1)] = beta1*opt_parameters['vdb'+str(l+1)] + (1-beta1)*grads["db" + str(l + 1)]
- opt_parameters['vdw'+str(l+1)] = beta1*opt_parameters['vdw'+str(l+1)] + (1-beta1)*grads["dW" + str(l + 1)]
- opt_parameters['sdb'+str(l+1)] = beta2*opt_parameters['sdb'+str(l+1)] + \
- (1-beta2)*np.square(grads["db" + str(l + 1)])
- opt_parameters['sdw'+str(l+1)] = beta2*opt_parameters['sdw'+str(l+1)] + \
- (1-beta2)*np.square(grads["dW" + str(l + 1)])
- learning_rate = learning_rate * np.sqrt((1-beta2**iter_no)/((1-beta1**iter_no)+10**-8))
- parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - \
- learning_rate*(opt_parameters['vdw'+str(l+1)]/\
- (np.sqrt(opt_parameters['sdw'+str(l+1)])+10**-8))
- parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - \
- learning_rate*(opt_parameters['vdb'+str(l+1)]/\
- (np.sqrt(opt_parameters['sdb'+str(l+1)])+10**-8))
- return parameters,opt_parameters
Add Comment
Please, Sign In to add comment