Advertisement
Guest User

rbm2.yaml

a guest
Jul 9th, 2013
231
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
YAML 6.03 KB | None | 0 0
  1. # This file shows how to train a binary RBM on MNIST by viewing it as a single layer DBM.
  2. # The hyperparameters in this file aren't especially great; they're mostly chosen to demonstrate
  3. # the interface. Feel free to suggest better hyperparameters!
  4. !obj:pylearn2.train.Train {
  5.     dataset: &data !obj:pylearn2.datasets.transformer_dataset.TransformerDataset {
  6.         raw: &raw_data !obj:pylearn2.datasets.binarizer.Binarizer {
  7.             raw: &raw_train !obj:pylearn2.datasets.mnist.MNIST {
  8.                 which_set: "train",
  9.             }
  10.         },
  11.  
  12.         transformer: !obj:pylearn2.models.DBNSampler.DBNSampler {
  13.             rbm_list: [!pkl: "rbm.pkl"]
  14.         }
  15.     },
  16.     model: !obj:pylearn2.models.dbm.DBM {
  17.         batch_size: 100,
  18.         # 1 mean field iteration reaches convergence in the RBM
  19.         niter: 1,
  20.         # The visible layer of this RBM is just a binary vector
  21.         # (as opposed to a binary image for convolutional models,
  22.         # a Gaussian distributed vector, etc.)
  23.         visible_layer: !obj:pylearn2.models.dbm.BinaryVector {
  24.             nvis: 500,
  25.             # We can initialize the biases of the visible units
  26.             # so that sigmoid(b_i) = E[v_i] where the expectation
  27.             # is taken over the dataset. This should get the biases
  28.             # about correct from the start and helps speed up learning.
  29.             bias_from_marginals: *raw_train,
  30.         },
  31.         hidden_layers: [
  32.             # This RBM has one hidden layer, consisting of a binary vector.
  33.             # Optionally, one can do max pooling on top of this vector, but
  34.             # here we don't, by setting pool_size = 1.
  35.             !obj:pylearn2.models.dbm.BinaryVectorMaxPool {
  36.                 # Every layer in the DBM must have a layer_name field.
  37.                 # These are used to generate unique names of monitoring
  38.                 # channels associated with the different layers.
  39.                 layer_name: 'h',
  40.                 # The detector layer is the portion of this layer that
  41.                 # precedes the pooling. We control its size with this
  42.                 # argument. Here we request 500 hidden units.
  43.                 detector_layer_dim: 500,
  44.                 pool_size: 1,
  45.                 # We initialize the weights by drawing them from W_ij ~ U(-irange, irange)
  46.                 irange: .05,
  47.                 # We initialize all the biases of the hidden units to a negative
  48.                 # number. This helps to learn a sparse representation.
  49.                 init_bias: -2.,
  50.             }
  51.        ]
  52.     },
  53.     # We train the model using stochastic gradient descent.
  54.     # One benefit of using pylearn2 is that we can use the exact same piece of
  55.     # code to train a DBM as to train an MLP. The interface that SGD uses to get
  56.     # the gradient of the cost function from an MLP can also get the *approximate*
  57.     # gradient from a DBM.
  58.     algorithm: !obj:pylearn2.training_algorithms.sgd.SGD {
  59.                # We initialize the learning rate and momentum here. Down below
  60.                # we can control the way they decay with various callbacks.
  61.                learning_rate: 1e-3,
  62.                init_momentum: .5,
  63.                # These arguments say to compute the monitoring channels on 10 batches
  64.                # of the training set.
  65.                monitoring_batches: 10,
  66.                monitoring_dataset : *data,
  67.                # The SumOfCosts allows us to add together a few terms to make a complicated
  68.                # cost function.
  69.                cost : !obj:pylearn2.costs.cost.SumOfCosts {
  70.                 costs: [
  71.                         # The first term of our cost function is variational PCD.
  72.                         # For the RBM, the variational approximation is exact, so
  73.                         # this is really just PCD. In deeper models, it means we
  74.                         # use mean field rather than Gibbs sampling in the positive phase.
  75.                         !obj:pylearn2.costs.dbm.VariationalPCD {
  76.                            # Here we specify how many fantasy particles to maintain
  77.                            num_chains: 100,
  78.                            # Here we specify how many steps of Gibbs sampling to do between
  79.                            # each parameter update.
  80.                            num_gibbs_steps: 5
  81.                         },
  82.                         # The second term of our cost function is a little bit of weight
  83.                         # decay.
  84.                         !obj:pylearn2.costs.dbm.WeightDecay {
  85.                           coeffs: [ .0001  ]
  86.                         },
  87.                         # Finally, we regularize the RBM to sparse, using a method copied
  88.                         # from Ruslan Salakhutdinov's DBM demo
  89.                         !obj:pylearn2.costs.dbm.TorontoSparsity {
  90.                          targets: [ .2 ],
  91.                          coeffs: [ .001 ],
  92.                         }
  93.                        ],
  94.            },
  95.            # We tell the RBM to train for 300 epochs
  96.            termination_criterion: !obj:pylearn2.termination_criteria.EpochCounter { max_epochs: 1 },
  97.            update_callbacks: [
  98.                 # This callback makes the learning rate shrink by dividing it by decay_factor after
  99.                 # each sgd step.
  100.                 !obj:pylearn2.training_algorithms.sgd.ExponentialDecay {
  101.                         decay_factor: 1.000015,
  102.                         min_lr:      0.0001
  103.                 }
  104.            ]
  105.         },
  106.     extensions: [
  107.             # This callback makes the momentum grow to 0.9 linearly. It starts
  108.             # growing at epoch 5 and finishes growing at epoch 6.
  109.             !obj:pylearn2.training_algorithms.sgd.MomentumAdjustor {
  110.                 final_momentum: .9,
  111.                 start: 5,
  112.                 saturate: 6
  113.             },
  114.     ],
  115.     # This saves to save the trained model to the same path as
  116.     # the yaml file, but replacing .yaml with .pkl
  117.     save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}.pkl",
  118.     # This says to save it every epoch
  119.     save_freq : 1
  120. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement