Untitled

def sgd_momentum(w, dw, config=None):
    """
    Performs stochastic gradient descent with momentum.

    config format:
    - learning_rate: Scalar learning rate.
    - momentum: Scalar between 0 and 1 giving the momentum value.
      Setting momentum = 0 reduces to sgd.
    - velocity: A numpy array of the same shape as w and dw used to store a
      moving average of the gradients.
    """
    if config is None: config = {}
    config.setdefault('learning_rate', 1e-2)
    config.setdefault('momentum', 0.9)

    v = config.get('velocity', np.zeros_like(w))
    learning_rate = config.get('learning_rate')
    mu = config.get('momentum')

    v = mu * v - learning_rate * dw # integrate velocity
    next_w = w + v # integrate position
    config['velocity'] = v

    return next_w, config