Advertisement
Guest User

Problematic Convolutional Neural Network

a guest
Jul 4th, 2016
169
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 10.35 KB | None | 0 0
  1. import numpy as np
  2. import theano
  3. from theano import tensor as T
  4. from theano.tensor.nnet import conv2d
  5.  
  6. import data_organizer, normalize
  7.  
  8. # Default values
  9. BATCH_SIZE = 10
  10. HEIGHT = normalize.FINAL_HEIGHT
  11. WIDTH = normalize.FINAL_WIDTH
  12. RNG_SEED = 1337
  13. ETA = 0.1
  14. LMBDA = 0.005
  15.  
  16. # Useful constants
  17. VAR_TANH_STD = 0.39429449
  18. VAR_HARD_TANH_STD = 0.51605855
  19. BACK_ADJUST_TANH = 0.46440290
  20. BACK_ADJUST_HARD_TANH = 0.68268949
  21. COMPROMISE_TANH = np.sqrt(VAR_TANH_STD * BACK_ADJUST_TANH)
  22. COMPROMISE_HARD_TANH = np.sqrt(VAR_HARD_TANH_STD * BACK_ADJUST_HARD_TANH)
  23.  
  24. class _ConvLayer:
  25.     def __init__(self, rng, input_img, single_input_img,
  26.                 filt_shp, input_shp, activation, var_adjust, has_mask=False):
  27.         # input_shp: (mini-batch size, # input maps, height, width)
  28.         # filt_shp: (# output maps, # input maps, kernel height, kernel width)
  29.         self.input_img = input_img
  30.         self.activation = activation
  31.  
  32.         fan_in = np.prod(filt_shp[1:])
  33.         fan_out = filt_shp[0] * np.prod(filt_shp[2:])
  34.  
  35.         W_var = 2.0 / (var_adjust * (fan_in + fan_out))
  36.  
  37.         # var_adjust ~= var(A(prev_w_in)),
  38.         # var_adjust ~= var(A'(prev_w_in)) + E(A'(prev_w_in))^2
  39.         # W_var ~= 1.0 / (var_adjust * fan_in)
  40.         # W_var ~= 1.0 / (var_adjust * fan_out)
  41.  
  42.         # Feedforward:
  43.         # There are fan_in input activations of m: 0, v: var(A(prev_w_in))
  44.         # Each is multiplied by an independent weight of
  45.         #   m: 0, v: 1.0 / (var(A(prev_w_in)) * fan_in)
  46.         # The input activations are correlated, but multiplying independent
  47.         #   weights make the products independent
  48.         # Therefore weighted input is of m: 0, v: 1.0
  49.  
  50.         # Backpropagation:
  51.         # There are fan_out output weighted input gradients of
  52.         #   m: 0, v: out_var / plane_size^2
  53.         # Each is multiplied by an weight of
  54.         #   m: 0, v: 1.0 / ((var(A'(prev_w_in) + E(A'(prev_w_in))^2) * fan_out)
  55.         # Finally, the product is multiplied by the gradient term of
  56.         #   m: E(A'(prev_w_in)), v: var(A'(prev_w_in))
  57.         # Let's suppose these values are independent
  58.         #   (last 2 are independent, any other pair has slight correlation)
  59.         # The output weighted input gradients are correlated, but
  60.         #   multiplying independent weights make the products independent
  61.         # Therefore input weighted input gradients are of
  62.         #   m: 0, v: out_var / plane_size^2
  63.         # Bias gradients sum up the an entire plane of weighted input
  64.         #   gradients, which are correlated, making the bias gradients of
  65.         #   m: 0, v: out_var
  66.         # Weight gradients multiply each weighted input gradient by
  67.         #   A(prev_w_in), which is also correlated, making the
  68.         #   weight gradients of m: 0, v: var(A(prev_w_in)) * out_var
  69.  
  70.         # W: weights ---
  71.         #   (# output maps, # input maps, kernel height, kernel width)
  72.         self.W = theano.shared(np.asarray(rng.normal(
  73.                             loc=0.0, scale=np.sqrt(W_var), size=filt_shp),
  74.                         dtype = theano.config.floatX),
  75.                     borrow = True)
  76.  
  77.         # b: biases --- (# output maps)
  78.         # Ones are added to allow broadcasting with input_shp
  79.         self.b = theano.shared(np.zeros(filt_shp[0],
  80.                         dtype = theano.config.floatX),
  81.                     borrow = True)
  82.  
  83.         # For some reason I can't do bordermode='half'
  84.         top = filt_shp[2] // 2
  85.         bot = input_shp[2] + top
  86.         left = filt_shp[3] // 2
  87.         right = input_shp[3] + left
  88.  
  89.         conv_out = conv2d(input = input_img, filters = self.W,
  90.                 filter_shape = filt_shp, input_shape = input_shp,
  91.                 border_mode='full')
  92.         single_conv_out = conv2d(input = single_input_img, filters = self.W,
  93.                 filter_shape = filt_shp, input_shape = (1,) + input_shp[1:],
  94.                 border_mode='full')
  95.  
  96.         # conv_out: convolution, or weighted input, of a mini-batch ---
  97.         #   (mini-batch size, # output maps, height, width)
  98.         conv_out = conv_out[:, :, top:bot, left:right]
  99.  
  100.         # single_conv_out: convolution, or weighted input, of one image ---
  101.         #   (1, # output maps, height, width)
  102.         single_conv_out = single_conv_out[:, :, top:bot, left:right]
  103.  
  104.         broadcast_b = self.b.dimshuffle('x', 0, 'x', 'x')
  105.  
  106.         # output_img: final output ---
  107.         #   (mini-batch size, # output maps, height, width)
  108.         self.output_img = self.activation(conv_out + broadcast_b)
  109.         self.single_output_img = \
  110.             self.activation(single_conv_out + broadcast_b)
  111.  
  112.         # Useful for the backpropogation step
  113.         self.params = [self.W, self.b]
  114.         self.dlsscst_dt_contrib = var_adjust * np.prod(filt_shp) + filt_shp[0]
  115.         if (not has_mask):
  116.             self.dregcst_dt_contrib = W_var * np.prod(filt_shp)
  117.             self.sum_square_weights_contrib = (self.W * self.W).sum()
  118.         else:
  119.             self.dregcst_dt_contrib = W_var * \
  120.                     filt_shp[0] * (filt_shp[1] - 1) * np.prod(filt_shp[2:])
  121.             W_reshaped = self.W[:, :-1, :, :]
  122.             self.sum_square_weights_contrib = (W_reshaped * W_reshaped).sum()
  123.  
  124. class CNN:
  125.     def __init__(self, num_planes=[3, 16, 3], kernel_size=(5, 5),
  126.                     img_shp=[BATCH_SIZE, HEIGHT, WIDTH], has_mask=False,
  127.                     rng_seed=RNG_SEED, eta=ETA, lmbda=LMBDA):
  128.         assert(len(num_planes) >= 2 and
  129.                kernel_size[0] % 2 == 1 and
  130.                kernel_size[1] % 2 == 1)
  131.  
  132.         rng = np.random.RandomState(rng_seed)
  133.         eta_shared = theano.shared(np.asarray(eta,
  134.             dtype=theano.config.floatX))
  135.         lmbda_shared = theano.shared(np.asarray(lmbda,
  136.             dtype=theano.config.floatX))
  137.  
  138.         # input_img: (mini-batch size, # input maps, height, width)
  139.         input_img = T.tensor4('input_img', dtype=theano.config.floatX)
  140.         s_input_img = T.tensor3('s_input_img', dtype=theano.config.floatX)
  141.         s_truth_img = T.tensor3('s_truth_img', dtype=theano.config.floatX)
  142.         s_input_img_reshaped = s_input_img.dimshuffle('x', 0, 1, 2)
  143.         s_truth_img_reshaped = s_truth_img.dimshuffle('x', 0, 1, 2)
  144.         # groundtruth_img: (mini-batch size, # output maps, height, width)
  145.         groundtruth_img = T.tensor4('output_img', dtype=theano.config.floatX)
  146.  
  147.         # Fast, nonlinear and symmetric
  148.         hard_tanh = lambda x: T.clip(x, -1, 1)
  149.  
  150.         layers = []
  151.         params = []
  152.         sum_square_weights = 0
  153.         prev_output = input_img
  154.         prev_single_output = s_input_img_reshaped
  155.         dlsscst_dt_contrib_sum = 0
  156.         dregcst_dt_contrib_sum = 0
  157.  
  158.         for i in range(len(num_planes) - 2):
  159.             if (i == 0):
  160.                 var_adjust = COMPROMISE_TANH
  161.             else:
  162.                 var_adjust = COMPROMISE_HARD_TANH
  163.             layer = _ConvLayer(rng, input_img=prev_output,
  164.                         single_input_img=prev_single_output,
  165.                         input_shp=(img_shp[0], num_planes[i],
  166.                             img_shp[1], img_shp[2]),
  167.                         filt_shp=(num_planes[i + 1], num_planes[i],
  168.                             kernel_size[0], kernel_size[1]),
  169.                         activation = hard_tanh,
  170.                         var_adjust = var_adjust,
  171.                         has_mask = has_mask and (i == 0))
  172.             prev_output = layer.output_img
  173.             prev_single_output = layer.single_output_img
  174.             params += layer.params
  175.             dlsscst_dt_contrib_sum += layer.dlsscst_dt_contrib
  176.             dregcst_dt_contrib_sum += layer.dregcst_dt_contrib
  177.             sum_square_weights += layer.sum_square_weights_contrib
  178.             layers.append(layer)
  179.  
  180.         layer = _ConvLayer(rng, input_img=prev_output,
  181.                     single_input_img=prev_single_output,
  182.                     input_shp=(img_shp[0], num_planes[-2],
  183.                         img_shp[1], img_shp[2]),
  184.                     filt_shp=(num_planes[-1], num_planes[-2],
  185.                         kernel_size[0], kernel_size[1]),
  186.                     activation = T.tanh,
  187.                     var_adjust = VAR_HARD_TANH_STD)
  188.         output_img = layer.output_img
  189.         s_output_img = layer.single_output_img
  190.         params += layer.params
  191.         dlsscst_dt_contrib_sum += layer.dlsscst_dt_contrib
  192.         dregcst_dt_contrib_sum += layer.dregcst_dt_contrib
  193.         sum_square_weights += layer.sum_square_weights_contrib
  194.         layers.append(layer)
  195.  
  196.         # To make things floatX instead of the default float64
  197.         dlsscst_dt_contrib_sum = \
  198.             np.asarray(dlsscst_dt_contrib_sum, dtype=theano.config.floatX)
  199.         dregcst_dt_contrib_sum = \
  200.             np.asarray(dregcst_dt_contrib_sum, dtype=theano.config.floatX)
  201.  
  202.         # To make the user entered eta and lmbda values more meaningful
  203.         # Call the time constant 'T' = 1 / user_eta
  204.         # Call the average abs difference across batch and plane 'diff'
  205.         # Change in loss cost in T iterations ~= out_var
  206.         #   For practical purposes, out_var ~= diff^2
  207.         # Change in regularization cost in T iterations ~= (usr_lmbda)^2
  208.         #   For practical purposes, regularization starts to overpower
  209.         #       loss only when diff <= usr_lmbda
  210.         eta_convert = eta_shared / dlsscst_dt_contrib_sum
  211.         lmbda_convert = (lmbda_shared ** 2) * \
  212.                 dlsscst_dt_contrib_sum / dregcst_dt_contrib_sum
  213.  
  214.         # Cost function for tanh last layer
  215.         loss_f = lambda x, y: ((1.0 - y) * np.log(1.0 - x) +
  216.                                 (1.0 + y) * np.log(1.0 + x)) / (-2.0)
  217.         loss = loss_f(output_img, groundtruth_img)
  218.         s_loss = T.mean(loss_f(s_output_img, s_truth_img_reshaped))
  219.         # Combining data from batches in a way that assumes complete
  220.         #   dependence between losses and weighted input gradients
  221.         # In reality, the gradients scale down with the complexity
  222.         #   of the inputs and with the additional complexity of the
  223.         #   difference between the ground truth and the output,
  224.         #   where roughly speaking complexity is the number of
  225.         #   uncorrelated regions in a given plane
  226.         loss = T.sum(loss, axis=1)
  227.         loss = T.mean(loss)
  228.         cost = loss + lmbda_convert * sum_square_weights / 2.0
  229.  
  230.         grads = T.grad(cost, params)
  231.         updates = [(param, param - eta_convert * grad)
  232.                     for param, grad in zip(params, grads)]
  233.  
  234.         self._arch = {'num_planes': num_planes, 'kernel_size': kernel_size,
  235.                 'img_shp': img_shp, 'has_mask': False}
  236.         self._layers = layers
  237.         self._rng_seed = rng_seed
  238.  
  239.         self.input_shp = (img_shp[0], num_planes[0], img_shp[1], img_shp[2])
  240.         self.output_shp = (img_shp[0], num_planes[-1], img_shp[1], img_shp[2])
  241.         self.eta = eta_shared
  242.         self.lmbda = lmbda_shared
  243.  
  244.         self.train_model = theano.function(
  245.                                 inputs=[input_img, groundtruth_img],
  246.                                 outputs=loss,
  247.                                 updates=updates)
  248.         self.get_grads = theano.function(
  249.                                 inputs=[input_img, groundtruth_img],
  250.                                 outputs=grads)
  251.         self.get_loss = theano.function(
  252.                                 inputs=[s_input_img, s_truth_img],
  253.                                 outputs=s_loss)
  254.         self.feed_forward = theano.function(
  255.                                 inputs=[s_input_img],
  256.                                 outputs=s_output_img[0])
  257.  
  258.     def export_info(self):
  259.         result = []
  260.         for layer in self._layers:
  261.             result.append((layer.W.get_value(), layer.b.get_value()))
  262.         return {'arch': self._arch, 'params': result,
  263.                 'rng_seed': self._rng_seed}
  264.  
  265.     def load_info(info, eta=ETA, lmbda=LMBDA):
  266.         neural_net = CNN(
  267.                 num_planes=info['arch']['num_planes'],
  268.                 kernel_size=info['arch']['kernel_size'],
  269.                 img_shp=info['arch']['img_shp'],
  270.                 has_mask=info['arch']['has_mask'],
  271.                 eta=eta,
  272.                 lmbda=lmbda)
  273.         for (layer, (W, b)) in zip(neural_net._layers, info['params']):
  274.             layer.W.set_value(W)
  275.             layer.b.set_value(b)
  276.         return neural_net
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement