Advertisement
rajeswar

Untitled

Aug 8th, 2014
385
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 12.07 KB | None | 0 0
  1. """This tutorial introduces the LeNet5 neural network architecture
  2. using Theano.  LeNet5 is a convolutional neural network, good for
  3. classifying images. This tutorial shows how to build the architecture,
  4. and comes with all the hyper-parameters you need to reproduce the
  5. paper's MNIST results.
  6.  
  7.  
  8. This implementation simplifies the model in the following ways:
  9.  
  10. - LeNetConvPool doesn't implement location-specific gain and bias parameters
  11. - LeNetConvPool doesn't implement pooling by average, it implements pooling
  12.   by max.
  13. - Digit classification is implemented with a logistic regression rather than
  14.   an RBF network
  15. - LeNet5 was not fully-connected convolutions at second layer
  16.  
  17. References:
  18. - Y. LeCun, L. Bottou, Y. Bengio and P. Haffner:
  19.   Gradient-Based Learning Applied to Document
  20.   Recognition, Proceedings of the IEEE, 86(11):2278-2324, November 1998.
  21.   http://yann.lecun.com/exdb/publis/pdf/lecun-98.pdf
  22.  
  23. """
  24. import cPickle
  25. import gzip
  26. import os
  27. import sys
  28. import time
  29.  
  30. import numpy
  31.  
  32. import theano
  33. import theano.tensor as T
  34. from theano.tensor.signal import downsample
  35. from  theano.tensor.nnet.Conv3D import conv3D
  36. from theano.tensor.nnet import conv
  37. from theano.tensor.nnet import conv3d2d
  38. from theano.sandbox.cuda import fftconv
  39. from logistic_sgd import LogisticRegression, load_data
  40. from mlp import HiddenLayer
  41. mode_default = theano.compile.mode.get_default_mode()
  42. mode_fft = mode_default.including('gpu', 'conv3d_fft', 'convgrad3d_fft','convtransp3d_fft')
  43.  
  44. class LeNetConvPoolLayer(object):
  45.     """Pool Layer of a convolutional network """
  46.  
  47.     def __init__(self, rng, input, filter_shape, image_shape, poolsize,pad):
  48.         """
  49.        Allocate a LeNetConvPoolLayer with shared variable internal parameters.
  50.  
  51.        :type rng: numpy.random.RandomState
  52.        :param rng: a random number generator used to initialize weights
  53.  
  54.        :type input: theano.tensor.dtensor4
  55.        :param input: symbolic image tensor, of shape image_shape
  56.  
  57.        :type filter_shape: tuple or list of length 4
  58.        :param filter_shape: (number of filters, num input feature maps,
  59.                              filter height,filter width)
  60.  
  61.        :type image_shape: tuple or list of length 4
  62.        :param image_shape: (batch size, num input feature maps,
  63.                             image height, image width,)
  64.  
  65.        :type poolsize: tuple or list of length 2
  66.        :param poolsize: the downsampling (pooling) factor (#rows,#cols)
  67.        """
  68.  
  69.         assert image_shape[2] == filter_shape[2]
  70.         self.input = input
  71.  
  72.         # there are "num input feature maps * filter height * filter width * filter depth"
  73.         # inputs to each hidden unit
  74.         fan_in = numpy.prod(filter_shape[2:])
  75.         # each unit in the lower layer receives a gradient from:
  76.         # "num output feature maps * filter height * filter width" /
  77.         #   pooling size
  78.         fan_out = (filter_shape[0] * numpy.prod(filter_shape[3:]) /
  79.                    numpy.prod(poolsize))
  80.         # initialize weights with random weights
  81.         W_bound = numpy.sqrt(6. / (fan_in + fan_out))
  82.         self.W = theano.shared(numpy.asarray(
  83.             rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
  84.             dtype=theano.config.floatX),
  85.                                borrow=True)
  86.     #print self.W
  87.         # the bias is a 1D tensor -- one bias per output feature map
  88.         b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
  89.         self.b = theano.shared(value=b_values, borrow=True)
  90.  
  91.         # convolve input feature maps with filters
  92.         d = theano.shared(numpy.ndarray(shape=(3,),dtype=int))
  93.     d.set_value([1, 1, 1])
  94.     #w1=numpy.transpose(self.W,(0,3,4,2,1))
  95.     #self.b.dimshuffle('x', 0,'x', 'x', 'x')
  96.     #bias2=self.b.transpose(0,2,3,4,1)
  97.         conv_out = conv3d2d.conv3d(signals=input, filters=self.W, signals_shape=image_shape,filters_shape=filter_shape)
  98.         #conv_out = conv.conv2d(input=input, filters=self.W,
  99.                 #filter_shape=filter_shape, image_shape=image_shape)
  100.    
  101.         # downsample each feature map individually, using maxpooling
  102.         #conv_out_m=numpy.transpose(conv_out, (0, 4, 3, 1, 2))
  103.         pooled_out = downsample.max_pool_2d(input=conv_out,ds=poolsize, ignore_border=True)
  104.         #pooled_out_m = numpy.transpose(pooled_out, (0, 3, 4, 2, 1))
  105.  
  106.         # add the bias term. Since the bias is a vector (1D array), we first
  107.         # reshape it to a tensor of shape (1,n_filters,1,1). Each bias will
  108.         # thus be broadcasted across mini-batches and feature map
  109.         # width & height
  110.         self.output = T.tanh(pooled_out+self.b.dimshuffle('x','x',0,'x','x'))
  111.        
  112.         #print 'hi'
  113.        
  114.  
  115.         # store parameters of this layer
  116.         self.params = [self.W, self.b]
  117.  
  118.  
  119. def evaluate_lenet5(learning_rate=0.01, n_epochs=10,
  120.                     dataset='./kth_thesis.pkl',
  121.                     nkerns=[5, 10, 128], batch_size=10):
  122.     """ Demonstrates lenet on MNIST dataset
  123.  
  124.    :type learning_rate: float
  125.    :param learning_rate: learning rate used (factor for the stochastic
  126.                          gradient)
  127.  
  128.    :type n_epochs: int
  129.    :param n_epochs: maximal number of epochs to run the optimizer
  130.  
  131.    :type dataset: string
  132.    :param dataset: path to the dataset used for training /testing (MNIST here)
  133.  
  134.    :type nkerns: list of ints
  135.    :param nkerns: number of kernels on each layer
  136.    """
  137.  
  138.     rng = numpy.random.RandomState(23455)
  139.  
  140.     datasets = load_data(dataset)
  141.    
  142.     #print datasets[0]
  143.     #print datasets[1]
  144.     #print datasets[2]
  145.  
  146.     train_set_x, train_set_y = datasets[0]
  147.     test_set_x, test_set_y = datasets[1]
  148.    
  149.     #print train_set_x
  150.     #print train_set_y
  151.     #print valid_set_x
  152.     #print valid_set_y
  153.     #print test_set_x
  154.     #print test_set_y
  155.     # compute number of minibatches for training, validation and testing
  156.     n_train_batches = train_set_x.get_value(borrow=True).shape[0]
  157.     n_test_batches = test_set_x.get_value(borrow=True).shape[0]
  158.     n_train_batches /= batch_size
  159.     n_test_batches /= batch_size
  160.  
  161.     # allocate symbolic variables for the data
  162.     index = T.lscalar()  # index to a [mini]batch
  163.     x = T.matrix('x')   # the data is presented as rasterized images
  164.     y = T.ivector('y')  # the labels are presented as 1D vector of
  165.                         # [int] labels
  166.  
  167.     ishape = (80, 60)  # this is the size of MNIST images
  168.  
  169.     ######################
  170.     # BUILD ACTUAL MODEL #
  171.     ######################
  172.     print '... building the model'
  173.  
  174.     # Reshape matrix of rasterized images of shape (batch_size,28*28)
  175.     # to a 4D tensor, compatible with our LeNetConvPoolLayer
  176.     layer0_input = x.reshape((batch_size, 9, 1, 60, 80))
  177.     #print layer0_input
  178.  
  179.     # Construct the first convolutional pooling layer:
  180.     # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
  181.     # maxpooling reduces this further to (24/2,24/2) = (12,12)
  182.     # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
  183.     layer0 = LeNetConvPoolLayer(rng, input=layer0_input,
  184.             image_shape=(batch_size, 9, 1, 60, 80),
  185.             filter_shape=(nkerns[0], 4, 1, 7, 9), poolsize=(3, 3),pad=False)
  186.            
  187.     #print layer0.output
  188.    
  189.  
  190.     # Construct the second convolutional pooling layer
  191.     # filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
  192.     # maxpooling reduces this further to (8/2,8/2) = (4,4)
  193.     # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)
  194.     layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
  195.             image_shape=(batch_size, 6, nkerns[0], 18, 24),
  196.             filter_shape=(nkerns[1], 4, nkerns[0], 7, 7), poolsize=(3, 3),pad=False)
  197.            
  198.     #print layer1.output
  199.  
  200.     # the TanhLayer being fully-connected, it operates on 2D matrices of
  201.     # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
  202.     # This will generate a matrix of shape (20,32*4*4) = (20,512)
  203.     layer2 = LeNetConvPoolLayer(rng, input=layer1.output,
  204.             image_shape=(batch_size, 3, nkerns[1] , 4, 6 ),
  205.             filter_shape=(nkerns[2], 3, nkerns[1] , 4, 6 ), poolsize=(1, 1),pad=False)
  206.            
  207.     #print layer2.output
  208.     layer3_input = layer2.output.flatten(2)
  209.  
  210.  
  211.     # classify the values of the fully-connected sigmoidal layer
  212.     layer3 = HiddenLayer(rng,input=layer3_input,n_in=128,n_out=20,activation=T.tanh)
  213.     layer4 = LogisticRegression(input=layer3.output, n_in=20, n_out=6)
  214.  
  215.     # the cost we minimize during training is the NLL of the model
  216.     cost = layer4.negative_log_likelihood(y)
  217.  
  218.     # create a function to compute the mistakes that are made by the model
  219.     test_model = theano.function([index], layer4.errors(y),
  220.              givens={
  221.                 x: test_set_x[index * batch_size: (index + 1) * batch_size],
  222.                 y: test_set_y[index * batch_size: (index + 1) * batch_size]},mode=mode_fft)
  223.  
  224.  
  225.     # create a list of all model parameters to be fit by gradient descent
  226.     params = layer4.params+layer3.params + layer2.params + layer1.params + layer0.params
  227.  
  228.     # create a list of gradients for all model parameters
  229.     grads = T.grad(cost, params)
  230.  
  231.     # train_model is a function that updates the model parameters by
  232.     # SGD Since this model has many parameters, it would be tedious to
  233.     # manually create an update rule for each model parameter. We thus
  234.     # create the updates list by automatically looping over all
  235.     # (params[i],grads[i]) pairs.
  236.     updates = []
  237.     for param_i, grad_i in zip(params, grads):
  238.         updates.append((param_i, param_i - learning_rate * grad_i))
  239.  
  240.     train_model = theano.function([index], cost, updates=updates,
  241.           givens={
  242.             x: train_set_x[index * batch_size: (index + 1) * batch_size],
  243.             y: train_set_y[index * batch_size: (index + 1) * batch_size]},mode=mode_fft)
  244.  
  245.     ###############
  246.     # TRAIN MODEL #
  247.     ###############
  248.     print '... training'
  249.     # early-stopping parameters
  250.     patience = 100000  # look as this many examples regardless
  251.     patience_increase = 2  # wait this much longer when a new best is
  252.                            # found
  253.     improvement_threshold = 0.995  # a relative improvement of this much is
  254.                                    # considered significant
  255.     test_frequency = n_train_batches
  256.                                   # go through this many
  257.                                   # minibatche before checking the network
  258.                                   # on the validation set; in this case we
  259.                                   # check every epoch
  260.  
  261.     best_params = None
  262.     best_validation_loss = numpy.inf
  263.     best_iter = 0
  264.     test_score = 0.
  265.     start_time = time.clock()
  266.  
  267.     epoch = 0
  268.     done_looping = False
  269.  
  270.     while (epoch < n_epochs) and (not done_looping):
  271.         epoch = epoch + 1
  272.         for minibatch_index in xrange(n_train_batches):
  273.  
  274.             iter = (epoch - 1) * n_train_batches + minibatch_index
  275.  
  276.             #if iter % 100 == 0:
  277.                 #print 'training @ iter = ', iter
  278.             #print "training cost"
  279.             cost_ij = train_model(minibatch_index)
  280.             #if minibatch_index == 0:
  281.             #print cost_ij
  282.  
  283.             if (iter + 1) % test_frequency == 0:
  284.  
  285.                     # test it on the test set
  286.                     test_losses = [test_model(i) for i in xrange(n_test_batches)]
  287.                     #print "test"  
  288.                     print test_losses
  289.                     test_score = numpy.mean(test_losses)
  290.                     print(('     epoch %i, minibatch %i/%i, test error of best '
  291.                            'model %f %%') %
  292.                           (epoch, minibatch_index + 1, n_train_batches,
  293.                            test_score * 100.))
  294.  
  295.     end_time = time.clock()
  296.     print('Optimization complete.')
  297.     print >> sys.stderr, ('The code for file ' +
  298.                           os.path.split(__file__)[1] +
  299.                           ' ran for %.2fm' % ((end_time - start_time) / 60.))
  300.  
  301. if __name__ == '__main__':
  302.     evaluate_lenet5()
  303.  
  304.  
  305. def experiment(state, channel):
  306.     evaluate_lenet5(state.learning_rate, dataset=state.dataset)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement