Advertisement
Guest User

Untitled

a guest
Aug 27th, 2015
197
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 14.61 KB | None | 0 0
  1. """This tutorial introduces the LeNet5 neural network architecture
  2. using Theano.  LeNet5 is a convolutional neural network, good for
  3. classifying images. This tutorial shows how to build the architecture,
  4. and comes with all the hyper-parameters you need to reproduce the
  5. paper's MNIST results.
  6.  
  7.  
  8. This implementation simplifies the model in the following ways:
  9.  
  10. - LeNetConvPool doesn't implement location-specific gain and bias parameters
  11. - LeNetConvPool doesn't implement pooling by average, it implements pooling
  12.   by max.
  13. - Digit classification is implemented with a logistic regression rather than
  14.   an RBF network
  15. - LeNet5 was not fully-connected convolutions at second layer
  16.  
  17. References:
  18. - Y. LeCun, L. Bottou, Y. Bengio and P. Haffner:
  19.   Gradient-Based Learning Applied to Document
  20.   Recognition, Proceedings of the IEEE, 86(11):2278-2324, November 1998.
  21.   http://yann.lecun.com/exdb/publis/pdf/lecun-98.pdf
  22.  
  23. """
  24. import os
  25. import sys
  26. import timeit
  27.  
  28. import numpy
  29.  
  30. import theano
  31. import theano.tensor as T
  32. from theano.tensor.signal import downsample
  33. from theano.tensor.nnet import conv
  34.  
  35. from logistic_sgd import LogisticRegression
  36. from mlp import HiddenLayer
  37.  
  38. def inspect_inputs(i, node, fn):
  39.     print i, node, "input(s) value(s):", [input[0] for input in fn.inputs],
  40.  
  41. def inspect_outputs(i, node, fn):
  42.     print "output(s) value(s):", [output[0] for output in fn.outputs]
  43.  
  44. class LeNetConvPoolLayer(object):
  45.     """Pool Layer of a convolutional network """
  46.  
  47.     def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2)):
  48.         """
  49.        Allocate a LeNetConvPoolLayer with shared variable internal parameters.
  50.  
  51.        :type rng: numpy.random.RandomState
  52.        :param rng: a random number generator used to initialize weights
  53.  
  54.        :type input: theano.tensor.dtensor4
  55.        :param input: symbolic image tensor, of shape image_shape
  56.  
  57.        :type filter_shape: tuple or list of length 4
  58.        :param filter_shape: (number of filters, num input feature maps,
  59.                              filter height, filter width)
  60.  
  61.        :type image_shape: tuple or list of length 4
  62.        :param image_shape: (batch size, num input feature maps,
  63.                             image height, image width)
  64.  
  65.        :type poolsize: tuple or list of length 2
  66.        :param poolsize: the downsampling (pooling) factor (#rows, #cols)
  67.        """
  68.  
  69.         assert image_shape[1] == filter_shape[1]
  70.         self.input = input
  71.  
  72.         # there are "num input feature maps * filter height * filter width"
  73.         # inputs to each hidden unit
  74.         fan_in = numpy.prod(filter_shape[1:])
  75.         # each unit in the lower layer receives a gradient from:
  76.         # "num output feature maps * filter height * filter width" /
  77.         #   pooling size
  78.         fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) /
  79.                    numpy.prod(poolsize))
  80.         # initialize weights with random weights
  81.         W_bound = numpy.sqrt(6. / (fan_in + fan_out))
  82.         self.W = theano.shared(
  83.             numpy.asarray(
  84.                 rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
  85.                 dtype=theano.config.floatX
  86.             ),
  87.             borrow=True
  88.         )
  89.  
  90.         # the bias is a 1D tensor -- one bias per output feature map
  91.         b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
  92.         self.b = theano.shared(value=b_values, borrow=True)
  93.  
  94.         # convolve input feature maps with filters
  95.         conv_out = conv.conv2d(
  96.             input=input,
  97.             filters=self.W,
  98.             filter_shape=filter_shape,
  99.             image_shape=image_shape
  100.         )
  101.  
  102.         # downsample each feature map individually, using maxpooling
  103.         pooled_out = downsample.max_pool_2d(
  104.             input=conv_out,
  105.             ds=poolsize,
  106.             ignore_border=True
  107.         )
  108.  
  109.         # add the bias term. Since the bias is a vector (1D array), we first
  110.         # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will
  111.         # thus be broadcasted across mini-batches and feature map
  112.         # width & height
  113.         self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
  114.  
  115.         # store parameters of this layer
  116.         self.params = [self.W, self.b]
  117.  
  118.         # keep track of model input
  119.         self.input = input
  120.  
  121.  
  122. def evaluate_lenet5(learning_rate=0.1, n_epochs=200,
  123.                     dataset='mnist.pkl.gz',
  124.                     nkerns=[20, 50], batch_size=20):
  125.     """ Demonstrates lenet on MNIST dataset
  126.  
  127.    :type learning_rate: float
  128.    :param learning_rate: learning rate used (factor for the stochastic
  129.                          gradient)
  130.  
  131.    :type n_epochs: int
  132.    :param n_epochs: maximal number of epochs to run the optimizer
  133.  
  134.    :type dataset: string
  135.    :param dataset: path to the dataset used for training /testing (MNIST here)
  136.  
  137.    :type nkerns: list of ints
  138.    :param nkerns: number of kernels on each layer
  139.    """
  140.  
  141.     rng = numpy.random.RandomState(23455)
  142.  
  143. #     datasets = load_data(dataset)
  144.  
  145.     train_set_x, train_set_y = datasets[0]
  146.     valid_set_x, valid_set_y = datasets[1]
  147.     test_set_x, test_set_y = datasets[2]
  148.    
  149.     ######################
  150.     # PARAMETERS #
  151.     ######################
  152.     batch_size = 1
  153.     filterWidth = 5
  154.     totalpossiblewords = maxlen + beforepad + afterpad
  155.     word2vecDimension = 300
  156.     kernelNumber = 40
  157.     MLPhiddenneurons = 50
  158.     MLPexitNeurons = len(reasons)
  159.     maxPoolingDimension = totalpossiblewords - filterWidth / 2 - filterWidth / 2
  160.    
  161.     # compute number of minibatches for training, validation and testing
  162.     n_train_batches = train_set_x.get_value(borrow=True).shape[0]
  163.     n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
  164.     n_test_batches = test_set_x.get_value(borrow=True).shape[0]
  165.     n_train_batches /= batch_size
  166.     n_valid_batches /= batch_size
  167.     n_test_batches /= batch_size
  168.  
  169.     # allocate symbolic variables for the data
  170.     index = T.lscalar()  # index to a [mini]batch
  171.  
  172.     # start-snippet-1
  173.     x = T.matrix('x')   # the data is presented as rasterized images
  174.     y = T.ivector('y')  # the labels are presented as 1D vector of
  175.                         # [int] labels
  176.    
  177.    
  178.     #x.tag.test_value = numpy.random.rand(len(traindata), 300 * totalpossiblewords)
  179.    
  180.     ######################
  181.     # BUILD ACTUAL MODEL #
  182.     ######################
  183.     print '... building the model'
  184.  
  185.     # Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
  186.     # to a 4D tensor, compatible with our LeNetConvPoolLayer
  187.     # (28, 28) is the size of MNIST images.
  188.     layer0_input = x.reshape((batch_size, 1, word2vecDimension, totalpossiblewords))
  189.  
  190.     # Construct the first convolutional pooling layer:
  191.     # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
  192.     # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
  193.     # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
  194.     layer0 = LeNetConvPoolLayer(
  195.         rng,
  196.         input=layer0_input,
  197.         image_shape=(batch_size, 1, word2vecDimension, totalpossiblewords),
  198.         filter_shape=(kernelNumber, 1, word2vecDimension, filterWidth),
  199.         poolsize=(1, maxPoolingDimension)
  200.     )
  201.  
  202.     # Construct the second convolutional pooling layer
  203.     # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
  204.     # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
  205.     # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
  206. #     layer1 = LeNetConvPoolLayer(
  207. #         rng,
  208. #         input=layer0.output,
  209. #         image_shape=(batch_size, nkerns[0], 12, 12),
  210. #         filter_shape=(nkerns[1], nkerns[0], 5, 5),
  211. #         poolsize=(2, 2)
  212. #     )
  213.  
  214.     # the HiddenLayer being fully-connected, it operates on 2D matrices of
  215.     # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
  216.     # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
  217.     # or (500, 50 * 4 * 4) = (500, 800) with the default values.
  218.     layer2_input = layer0.output.flatten(2)
  219.  
  220.     # construct a fully-connected sigmoidal layer
  221.     layer2 = HiddenLayer(
  222.         rng,
  223.         input=layer2_input,
  224.         n_in=kernelNumber,
  225.         n_out=MLPhiddenneurons,
  226.         activation=T.tanh
  227.     )
  228.  
  229.     # classify the values of the fully-connected sigmoidal layer
  230.     layer3 = LogisticRegression(input=layer2.output, n_in=MLPhiddenneurons, n_out=len(reasons))
  231.  
  232.     # the cost we minimize during training is the NLL of the model
  233.     cost = layer3.negative_log_likelihood(y)
  234.    
  235.     # create a function to compute the mistakes that are made by the model
  236.     test_model = theano.function(
  237.         [index],
  238.         layer3.errors(y),
  239.         givens={
  240.             x: test_set_x[index * batch_size: (index + 1) * batch_size],
  241.             y: test_set_y[index * batch_size: (index + 1) * batch_size]
  242.         }
  243.     )
  244.  
  245.     validate_model = theano.function(
  246.         [index],
  247.         layer3.errors(y),
  248.         givens={
  249.             x: valid_set_x[index * batch_size: (index + 1) * batch_size],
  250.             y: valid_set_y[index * batch_size: (index + 1) * batch_size]
  251.         }
  252.     )
  253.        
  254.     # create a list of all model parameters to be fit by gradient descent
  255.     params = layer3.params + layer2.params + layer0.params
  256.  
  257.     # create a list of gradients for all model parameters
  258.     grads = T.grad(cost, params)
  259.  
  260.     # train_model is a function that updates the model parameters by
  261.     # SGD Since this model has many parameters, it would be tedious to
  262.     # manually create an update rule for each model parameter. We thus
  263.     # create the updates list by automatically looping over all
  264.     # (params[i], grads[i]) pairs.
  265.     updates = [
  266.         (param_i, param_i - learning_rate * grad_i)
  267.         for param_i, grad_i in zip(params, grads)
  268.     ]
  269.  
  270.     train_model = theano.function(
  271.         [index],
  272.         cost,
  273.         updates=updates,
  274.         givens={
  275.             x: train_set_x[index * batch_size: (index + 1) * batch_size],
  276.             y: train_set_y[index * batch_size: (index + 1) * batch_size]
  277.         },
  278. #         mode=theano.compile.MonitorMode(
  279. #                         pre_func=inspect_inputs,
  280. #                         post_func=inspect_outputs)
  281.  
  282.     )
  283.     predict_model = theano.function(
  284.         [index],
  285.         layer3.y_pred,
  286.         givens={
  287.             x: test_set_x[index * batch_size: (index + 1) * batch_size],
  288.     },
  289.     )
  290.    
  291. #     predict_model = theano.function(
  292. #         [index],
  293. #         layer3.y_pred,
  294. #         givens={
  295. #             x: test_set_x[index * batch_size: (index + 1) * batch_size]
  296. #         }
  297. #     )
  298.     # end-snippet-1
  299.  
  300.     ###############
  301.     # TRAIN MODEL #
  302.     ###############
  303.     print '... training'
  304.     # early-stopping parameters
  305.     patience = 10000  # look as this many examples regardless
  306.     patience_increase = 2  # wait this much longer when a new best is
  307.                            # found
  308.     improvement_threshold = 0.995  # a relative improvement of this much is
  309.                                    # considered significant
  310.     validation_frequency = min(n_train_batches, patience / 2)
  311.                                   # go through this many
  312.                                   # minibatche before checking the network
  313.                                   # on the validation set; in this case we
  314.                                   # check every epoch
  315.  
  316.     best_validation_loss = numpy.inf
  317.     best_iter = 0
  318.     test_score = 0.
  319.     start_time = timeit.default_timer()
  320.     epoch = 0
  321.     done_looping = False
  322.  
  323.     while (epoch < n_epochs) and (not done_looping):
  324.         epoch = epoch + 1
  325.         for minibatch_index in xrange(n_train_batches):
  326.  
  327.             iter = (epoch - 1) * n_train_batches + minibatch_index
  328.  
  329.             if iter % 100 == 0:
  330.                 print 'training @ iter = ', iter
  331.             cost_ij = train_model(minibatch_index)
  332.  
  333.             if (iter + 1) % validation_frequency == 0:
  334.  
  335.                 # compute zero-one loss on validation set
  336.                 validation_losses = [validate_model(i) for i
  337.                                      in xrange(n_valid_batches)]
  338.                 this_validation_loss = numpy.mean(validation_losses)
  339.                 print('epoch %i, minibatch %i/%i, validation error %f %%' %
  340.                       (epoch, minibatch_index + 1, n_train_batches,
  341.                        this_validation_loss * 100.))
  342.  
  343.                 # if we got the best validation score until now
  344.                 if this_validation_loss < best_validation_loss:
  345.  
  346.                     #improve patience if loss improvement is good enough
  347.                     if this_validation_loss < best_validation_loss *  \
  348.                        improvement_threshold:
  349.                         patience = max(patience, iter * patience_increase)
  350.  
  351.                     # save best validation score and iteration number
  352.                     best_validation_loss = this_validation_loss
  353.                     best_iter = iter
  354.  
  355.                     # test it on the test set
  356.                     test_losses = [
  357.                         test_model(i)
  358.                         for i in xrange(n_test_batches)
  359.                     ]
  360.                     test_score = numpy.mean(test_losses)
  361.                     print(('     epoch %i, minibatch %i/%i, test error of '
  362.                            'best model %f %%') %
  363.                           (epoch, minibatch_index + 1, n_train_batches,
  364.                            test_score * 100.))
  365.                     predicted_values = [predict_model(i) for i in xrange(n_test_batches)]
  366.                     predicted_values = [x for sublist in predicted_values for x in sublist] # ovo samo ispegla gornju listu listi u jednu dugu listu
  367.                     precision, recall, fbeta, support = precision_recall_fscore_support(testlabel, predicted_values, average='macro')
  368.                     print 'macro', precision, recall, fbeta, support
  369.                     precision, recall, fbeta, support = precision_recall_fscore_support(testlabel, predicted_values, average='micro')
  370.                     print 'micro', precision, recall, fbeta, support
  371.                     print ''
  372.  
  373.  
  374.             if patience <= iter:
  375.                 done_looping = True
  376.                 break
  377.  
  378.     end_time = timeit.default_timer()
  379.     print('Optimization complete.')
  380.     print('Best validation score of %f %% obtained at iteration %i, '
  381.           'with test performance %f %%' %
  382.           (best_validation_loss * 100., best_iter + 1, test_score * 100.))
  383.  
  384. if __name__ == '__main__':
  385.     evaluate_lenet5()
  386.  
  387. def experiment(state, channel):
  388.     evaluate_lenet5(state.learning_rate, dataset=state.dataset)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement