Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # This file shows how to train a binary RBM on MNIST by viewing it as a single layer DBM.
- # The hyperparameters in this file aren't especially great; they're mostly chosen to demonstrate
- # the interface. Feel free to suggest better hyperparameters!
- !obj:pylearn2.train.Train {
- dataset: &data !obj:pylearn2.datasets.transformer_dataset.TransformerDataset {
- raw: &raw_data !obj:pylearn2.datasets.binarizer.Binarizer {
- raw: &raw_train !obj:pylearn2.datasets.mnist.MNIST {
- which_set: "train",
- }
- },
- transformer: !obj:pylearn2.models.DBNSampler.DBNSampler {
- rbm_list: [!pkl: "rbm.pkl"]
- }
- },
- model: !obj:pylearn2.models.dbm.DBM {
- batch_size: 100,
- # 1 mean field iteration reaches convergence in the RBM
- niter: 1,
- # The visible layer of this RBM is just a binary vector
- # (as opposed to a binary image for convolutional models,
- # a Gaussian distributed vector, etc.)
- visible_layer: !obj:pylearn2.models.dbm.BinaryVector {
- nvis: 500,
- # We can initialize the biases of the visible units
- # so that sigmoid(b_i) = E[v_i] where the expectation
- # is taken over the dataset. This should get the biases
- # about correct from the start and helps speed up learning.
- bias_from_marginals: *raw_train,
- },
- hidden_layers: [
- # This RBM has one hidden layer, consisting of a binary vector.
- # Optionally, one can do max pooling on top of this vector, but
- # here we don't, by setting pool_size = 1.
- !obj:pylearn2.models.dbm.BinaryVectorMaxPool {
- # Every layer in the DBM must have a layer_name field.
- # These are used to generate unique names of monitoring
- # channels associated with the different layers.
- layer_name: 'h',
- # The detector layer is the portion of this layer that
- # precedes the pooling. We control its size with this
- # argument. Here we request 500 hidden units.
- detector_layer_dim: 500,
- pool_size: 1,
- # We initialize the weights by drawing them from W_ij ~ U(-irange, irange)
- irange: .05,
- # We initialize all the biases of the hidden units to a negative
- # number. This helps to learn a sparse representation.
- init_bias: -2.,
- }
- ]
- },
- # We train the model using stochastic gradient descent.
- # One benefit of using pylearn2 is that we can use the exact same piece of
- # code to train a DBM as to train an MLP. The interface that SGD uses to get
- # the gradient of the cost function from an MLP can also get the *approximate*
- # gradient from a DBM.
- algorithm: !obj:pylearn2.training_algorithms.sgd.SGD {
- # We initialize the learning rate and momentum here. Down below
- # we can control the way they decay with various callbacks.
- learning_rate: 1e-3,
- init_momentum: .5,
- # These arguments say to compute the monitoring channels on 10 batches
- # of the training set.
- monitoring_batches: 10,
- monitoring_dataset : *data,
- # The SumOfCosts allows us to add together a few terms to make a complicated
- # cost function.
- cost : !obj:pylearn2.costs.cost.SumOfCosts {
- costs: [
- # The first term of our cost function is variational PCD.
- # For the RBM, the variational approximation is exact, so
- # this is really just PCD. In deeper models, it means we
- # use mean field rather than Gibbs sampling in the positive phase.
- !obj:pylearn2.costs.dbm.VariationalPCD {
- # Here we specify how many fantasy particles to maintain
- num_chains: 100,
- # Here we specify how many steps of Gibbs sampling to do between
- # each parameter update.
- num_gibbs_steps: 5
- },
- # The second term of our cost function is a little bit of weight
- # decay.
- !obj:pylearn2.costs.dbm.WeightDecay {
- coeffs: [ .0001 ]
- },
- # Finally, we regularize the RBM to sparse, using a method copied
- # from Ruslan Salakhutdinov's DBM demo
- !obj:pylearn2.costs.dbm.TorontoSparsity {
- targets: [ .2 ],
- coeffs: [ .001 ],
- }
- ],
- },
- # We tell the RBM to train for 300 epochs
- termination_criterion: !obj:pylearn2.termination_criteria.EpochCounter { max_epochs: 1 },
- update_callbacks: [
- # This callback makes the learning rate shrink by dividing it by decay_factor after
- # each sgd step.
- !obj:pylearn2.training_algorithms.sgd.ExponentialDecay {
- decay_factor: 1.000015,
- min_lr: 0.0001
- }
- ]
- },
- extensions: [
- # This callback makes the momentum grow to 0.9 linearly. It starts
- # growing at epoch 5 and finishes growing at epoch 6.
- !obj:pylearn2.training_algorithms.sgd.MomentumAdjustor {
- final_momentum: .9,
- start: 5,
- saturate: 6
- },
- ],
- # This saves to save the trained model to the same path as
- # the yaml file, but replacing .yaml with .pkl
- save_path: "${PYLEARN2_TRAIN_FILE_FULL_STEM}.pkl",
- # This says to save it every epoch
- save_freq : 1
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement