Advertisement
Guest User

rbm_gaussian_2layer.yaml

a guest
Mar 24th, 2014
316
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
YAML 6.48 KB | None | 0 0
  1. # This file shows how to train a binary RBM on MNIST by viewing it as a 2 layer DBM.
  2. # It is a variation of rbm.yaml. First, the input layer is a GaussianVisLayer. The inputs are
  3. # not binarized. And it's two layers.
  4. # The hyperparameters in this file aren't especially great; they're mostly chosen to demonstrate
  5. # the interface. Feel free to suggest better hyperparameters!
  6. !obj:pylearn2.train.Train {
  7.     # For this example, we will modify the binarized rbm.yaml to use the
  8.     # raw inputs with a Gaussian Visible Layer and two Binary
  9.     # Vector hidden layers.
  10.     dataset: &data !obj:pylearn2.datasets.mnist.MNIST {
  11.         which_set: 'train',
  12.         one_hot: 1,
  13.         start: 0,
  14.         stop: %(train_stop)i
  15.     },
  16.     model: !obj:pylearn2.models.dbm.DBM {
  17.         batch_size: 100,
  18.         # 1 mean field iteration reaches convergence in the RBM
  19.         niter: 1,
  20.         # The visible layer of this RBM is a GaussianVisLayer
  21.         # This layer was recently repaired in early March 2014
  22.         # to be working again for DBM models
  23.         visible_layer: !obj:pylearn2.models.dbm.GaussianVisLayer {
  24.             nvis: 784,
  25.         },
  26.         hidden_layers: [
  27.             # This RBM has two hidden layer, consisting of a two binary vectors.
  28.             # Optionally, one can do max pooling on top of this vector, but
  29.             # here we don't, by setting pool_size = 1.
  30.             !obj:pylearn2.models.dbm.BinaryVectorMaxPool {
  31.                 # Every layer in the DBM must have a layer_name field.
  32.                 # These are used to generate unique names of monitoring
  33.                 # channels associated with the different layers.
  34.                 layer_name: 'h1',
  35.                 # The detector layer is the portion of this layer that
  36.                 # precedes the pooling. We control its size with this
  37.                 # argument.
  38.                 detector_layer_dim: %(detector_layer1_dim)i,
  39.                 pool_size: 1,
  40.                 # We initialize the weights by drawing them from W_ij ~ U(-irange, irange)
  41.                 irange: .05,
  42.                 # We initialize all the biases of the hidden units to a negative
  43.                 # number. This helps to learn a sparse representation.
  44.                 init_bias: -2.,
  45.             },            
  46.             # This is the second layer, identical to the first. The only difference
  47.             # is the number of units and layer name
  48.             !obj:pylearn2.models.dbm.BinaryVectorMaxPool {
  49.                 layer_name: 'h2',
  50.                 detector_layer_dim: %(detector_layer2_dim)i,
  51.                 pool_size: 1,
  52.                 irange: .05,
  53.                 init_bias: -2.,
  54.             }
  55.        ]
  56.     },
  57.     # We train the model using stochastic gradient descent.
  58.     # One benefit of using pylearn2 is that we can use the exact same piece of
  59.     # code to train a DBM as to train an MLP. The interface that SGD uses to get
  60.     # the gradient of the cost function from an MLP can also get the *approximate*
  61.     # gradient from a DBM.
  62.     algorithm: !obj:pylearn2.training_algorithms.sgd.SGD {
  63.                # We initialize the learning rate and momentum here. Down below
  64.                # we can control the way they decay with various callbacks.
  65.                learning_rate: 1e-3,
  66.                # Compute new model parameters using SGD + Momentum
  67.                learning_rule: !obj:pylearn2.training_algorithms.learning_rule.Momentum {
  68.                    init_momentum: 0.5,
  69.                },
  70.                # These arguments say to compute the monitoring channels on 10 batches
  71.                # of the training set.
  72.                monitoring_batches: %(monitoring_batches)i,
  73.                monitoring_dataset : *data,
  74.                # The SumOfCosts allows us to add together a few terms to make a complicated
  75.                # cost function.              
  76.                cost : !obj:pylearn2.costs.cost.SumOfCosts {
  77.                 costs: [
  78.                         # The first term of our cost function is variational PCD.
  79.                         # For the RBM, the variational approximation is exact, so
  80.                         # this is really just PCD. In deeper models, it means we
  81.                         # use mean field rather than Gibbs sampling in the positive phase.
  82.                         !obj:pylearn2.costs.dbm.VariationalPCD {
  83.                            # Here we specify how many fantasy particles to maintain
  84.                            num_chains: 100,
  85.                            # Here we specify how many steps of Gibbs sampling to do between
  86.                            # each parameter update.
  87.                            num_gibbs_steps: 5
  88.                         },
  89.                         # The second term of our cost function is a little bit of weight
  90.                         # decay.
  91.                         # Note since we have 2 layers, we need 2 entries here.
  92.                         !obj:pylearn2.costs.dbm.WeightDecay {
  93.                           coeffs: [ .0001, .0001  ]
  94.                         },
  95.                         # Finally, we regularize the RBM to sparse, using a method copied
  96.                         # from Ruslan Salakhutdinov's DBM demo
  97.                         # Note since we have 2 layers, we need 2 entries here.
  98.                         !obj:pylearn2.costs.dbm.TorontoSparsity {
  99.                          targets: [ .2, .2 ],
  100.                          coeffs: [ .001, .001 ],
  101.                         }                        
  102.                        ],
  103.            },
  104.            # We tell the RBM to train for 30 epochs
  105.            termination_criterion: !obj:pylearn2.termination_criteria.EpochCounter { max_epochs: %(max_epochs)i },
  106.            update_callbacks: [
  107.                 # This callback makes the learning rate shrink by dividing it by decay_factor after
  108.                 # each sgd step.
  109.                 !obj:pylearn2.training_algorithms.sgd.ExponentialDecay {
  110.                         decay_factor: 1.000015,
  111.                         min_lr:      0.0001
  112.                 }
  113.            ]
  114.         },
  115.     extensions: [
  116.             # This callback makes the momentum grow to 0.9 linearly. It starts
  117.             # growing at epoch 5 and finishes growing at epoch 6.
  118.             !obj:pylearn2.training_algorithms.learning_rule.MomentumAdjustor {
  119.                 final_momentum: .9,
  120.                 start: 5,
  121.                 saturate: 6
  122.             },
  123.     ],
  124.     save_path: "%(save_path)s/dbm_gaussian_2layer.pkl",
  125.     # This says to save it every epoch
  126.     save_freq : 1
  127. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement