Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # This file shows how to train a binary RBM on MNIST by viewing it as a 2 layer DBM.
- # It is a variation of rbm.yaml. First, the input layer is a GaussianVisLayer. The inputs are
- # not binarized. And it's two layers.
- # The hyperparameters in this file aren't especially great; they're mostly chosen to demonstrate
- # the interface. Feel free to suggest better hyperparameters!
- !obj:pylearn2.train.Train {
- # For this example, we will modify the binarized rbm.yaml to use the
- # raw inputs with a Gaussian Visible Layer and two Binary
- # Vector hidden layers.
- dataset: &data !obj:pylearn2.datasets.mnist.MNIST {
- which_set: 'train',
- one_hot: 1,
- start: 0,
- stop: %(train_stop)i
- },
- model: !obj:pylearn2.models.dbm.DBM {
- batch_size: 100,
- # 1 mean field iteration reaches convergence in the RBM
- niter: 1,
- # The visible layer of this RBM is a GaussianVisLayer
- # This layer was recently repaired in early March 2014
- # to be working again for DBM models
- visible_layer: !obj:pylearn2.models.dbm.GaussianVisLayer {
- nvis: 784,
- },
- hidden_layers: [
- # This RBM has two hidden layer, consisting of a two binary vectors.
- # Optionally, one can do max pooling on top of this vector, but
- # here we don't, by setting pool_size = 1.
- !obj:pylearn2.models.dbm.BinaryVectorMaxPool {
- # Every layer in the DBM must have a layer_name field.
- # These are used to generate unique names of monitoring
- # channels associated with the different layers.
- layer_name: 'h1',
- # The detector layer is the portion of this layer that
- # precedes the pooling. We control its size with this
- # argument.
- detector_layer_dim: %(detector_layer1_dim)i,
- pool_size: 1,
- # We initialize the weights by drawing them from W_ij ~ U(-irange, irange)
- irange: .05,
- # We initialize all the biases of the hidden units to a negative
- # number. This helps to learn a sparse representation.
- init_bias: -2.,
- },
- # This is the second layer, identical to the first. The only difference
- # is the number of units and layer name
- !obj:pylearn2.models.dbm.BinaryVectorMaxPool {
- layer_name: 'h2',
- detector_layer_dim: %(detector_layer2_dim)i,
- pool_size: 1,
- irange: .05,
- init_bias: -2.,
- }
- ]
- },
- # We train the model using stochastic gradient descent.
- # One benefit of using pylearn2 is that we can use the exact same piece of
- # code to train a DBM as to train an MLP. The interface that SGD uses to get
- # the gradient of the cost function from an MLP can also get the *approximate*
- # gradient from a DBM.
- algorithm: !obj:pylearn2.training_algorithms.sgd.SGD {
- # We initialize the learning rate and momentum here. Down below
- # we can control the way they decay with various callbacks.
- learning_rate: 1e-3,
- # Compute new model parameters using SGD + Momentum
- learning_rule: !obj:pylearn2.training_algorithms.learning_rule.Momentum {
- init_momentum: 0.5,
- },
- # These arguments say to compute the monitoring channels on 10 batches
- # of the training set.
- monitoring_batches: %(monitoring_batches)i,
- monitoring_dataset : *data,
- # The SumOfCosts allows us to add together a few terms to make a complicated
- # cost function.
- cost : !obj:pylearn2.costs.cost.SumOfCosts {
- costs: [
- # The first term of our cost function is variational PCD.
- # For the RBM, the variational approximation is exact, so
- # this is really just PCD. In deeper models, it means we
- # use mean field rather than Gibbs sampling in the positive phase.
- !obj:pylearn2.costs.dbm.VariationalPCD {
- # Here we specify how many fantasy particles to maintain
- num_chains: 100,
- # Here we specify how many steps of Gibbs sampling to do between
- # each parameter update.
- num_gibbs_steps: 5
- },
- # The second term of our cost function is a little bit of weight
- # decay.
- # Note since we have 2 layers, we need 2 entries here.
- !obj:pylearn2.costs.dbm.WeightDecay {
- coeffs: [ .0001, .0001 ]
- },
- # Finally, we regularize the RBM to sparse, using a method copied
- # from Ruslan Salakhutdinov's DBM demo
- # Note since we have 2 layers, we need 2 entries here.
- !obj:pylearn2.costs.dbm.TorontoSparsity {
- targets: [ .2, .2 ],
- coeffs: [ .001, .001 ],
- }
- ],
- },
- # We tell the RBM to train for 30 epochs
- termination_criterion: !obj:pylearn2.termination_criteria.EpochCounter { max_epochs: %(max_epochs)i },
- update_callbacks: [
- # This callback makes the learning rate shrink by dividing it by decay_factor after
- # each sgd step.
- !obj:pylearn2.training_algorithms.sgd.ExponentialDecay {
- decay_factor: 1.000015,
- min_lr: 0.0001
- }
- ]
- },
- extensions: [
- # This callback makes the momentum grow to 0.9 linearly. It starts
- # growing at epoch 5 and finishes growing at epoch 6.
- !obj:pylearn2.training_algorithms.learning_rule.MomentumAdjustor {
- final_momentum: .9,
- start: 5,
- saturate: 6
- },
- ],
- save_path: "%(save_path)s/dbm_gaussian_2layer.pkl",
- # This says to save it every epoch
- save_freq : 1
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement