Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import theano
- import numpy as np
- from theano import tensor as T
- class SGD():
- '''Stochastic gradient descent, with support for momentum,
- learning rate decay, and Nesterov momentum.
- # Arguments
- lr: float >= 0. Learning rate.
- momentum: float >= 0. Parameter updates momentum.
- decay: float >= 0. Learning rate decay over each update.
- nesterov: boolean. Whether to apply Nesterov momentum.
- '''
- def __init__(self, lr=0.01, momentum=0., decay=0.,
- nesterov=False, **kwargs):
- self.iterations = theano.shared(np.asarray(0.,
- dtype=theano.config.floatX)) # @UndefinedVariable
- self.lr = theano.shared(np.asarray(lr, dtype=theano.config.floatX)) # @UndefinedVariable
- self.momentum = theano.shared(np.asarray(momentum,
- dtype=theano.config.floatX)) # @UndefinedVariable
- self.decay = theano.shared(np.asarray(decay,
- dtype=theano.config.floatX)) # @UndefinedVariable
- self.inital_decay = decay
- self.nesterov = nesterov
- def get_updates(self, params, loss, grads):
- self.updates = []
- lr = self.lr
- if self.inital_decay > 0:
- lr *= (1. / (1. + self.decay * self.iterations))
- self.updates.append(self.iterations, self.iterations + 1)
- # momentum
- shapes = [p.get_value(borrow=True, return_internal_type=True).shape
- for p in params]
- moments = [theano.shared(np.zeros(shape, dtype=theano.config.floatX)) # @UndefinedVariable
- for shape in shapes] # @UndefinedVariable
- self.weights = [self.iterations] + moments
- for p, g, m in zip(params, grads, moments):
- v = self.momentum * m - lr * g # velocity
- self.updates.append((m, v))
- if self.nesterov:
- new_p = p + self.momentum * v - lr * g
- else:
- new_p = p + v
- self.updates.append((p, new_p))
- return self.updates
- def get_config(self):
- config = {'lr': float(self.lr.get_value()),
- 'momentum': float(self.momentum.get_value()),
- 'decay': float(self.decay.get_value()),
- 'nesterov': self.nesterov}
- base_config = super(SGD, self).get_config()
- return dict(list(base_config.items()) + list(config.items()))
- class Adam():
- '''Adam optimizer.
- Default parameters follow those provided in the original paper.
- # Arguments
- lr: float >= 0. Learning rate.
- beta_1/beta_2: floats, 0 < beta < 1. Generally close to 1.
- epsilon: float >= 0. Fuzz factor.
- # References
- - [Adam - A Method for Stochastic Optimization](http://arxiv.org/abs/1412.6980v8)
- '''
- def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999,
- epsilon=1e-8, decay=0., **kwargs):
- super(Adam, self).__init__(**kwargs)
- self.__dict__.update(locals())
- self.iterations = theano.shared(np.asarray(0,
- dtype=theano.config.floatX)) # @UndefinedVariable
- self.lr = theano.shared(np.asarray(lr, theano.config.floatX)) # @UndefinedVariable
- self.beta_1 = theano.shared(np.asarray(beta_1, theano.config.floatX)) # @UndefinedVariable
- self.beta_2 = theano.shared(np.asarray(beta_2, theano.config.floatX)) # @UndefinedVariable
- self.decay = theano.shared(np.asarray(decay, theano.config.floatX)) # @UndefinedVariable
- self.inital_decay = decay
- def get_updates(self, params, loss, grads):
- self.updates = [(self.iterations, self.iterations + 1)]
- lr = self.lr
- if self.inital_decay > 0:
- lr *= (1. / (1. + self.decay * self.iterations))
- t = self.iterations + 1
- lr_t = lr * sqrt(1. - T.pow(self.beta_2, t)) / (1. - T.pow(self.beta_1, t))
- shapes = [p.get_value(borrow=True, return_internal_type=True).shape
- for p in params]
- ms = [theano.shared(np.zeros(shape, dtype=theano.config.floatX)) # @UndefinedVariable
- for shape in shapes]
- vs = [theano.shared(np.zeros(shape, dtype=theano.config.floatX)) # @UndefinedVariable
- for shape in shapes]
- self.weights = [self.iterations] + ms + vs
- for p, g, m, v in zip(params, grads, ms, vs):
- m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
- v_t = (self.beta_2 * v) + (1. - self.beta_2) * T.square(g)
- p_t = p - lr_t * m_t / (sqrt(v_t) + self.epsilon)
- self.updates.append((m, m_t))
- self.updates.append((v, v_t))
- new_p = p_t
- self.updates.append((p, new_p))
- return self.updates
- def get_config(self):
- config = {'lr': float(self.lr.get_value()),
- 'beta_1': float(self.beta_1.get_value(self.beta_1)),
- 'beta_2': float(self.beta_2.get_value(self.beta_2)),
- 'epsilon': self.epsilon}
- base_config = super(Adam, self).get_config()
- return dict(list(base_config.items()) + list(config.items()))
- def sqrt(x):
- x = T.clip(x, 0., np.inf)
- return T.sqrt(x)
Add Comment
Please, Sign In to add comment