Advertisement
Guest User

ENet

a guest
Aug 21st, 2017
181
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 29.89 KB | None | 0 0
  1. import tensorflow as tf
  2. from tensorflow.contrib.layers.python.layers import initializers
  3. slim = tf.contrib.slim
  4. xrange=range
  5. '''
  6. ============================================================================
  7. ENet: A Deep Neural Network Architecture for Real-Time Semantic Segmentation
  8. ============================================================================
  9. Based on the paper: https://arxiv.org/pdf/1606.02147.pdf
  10. '''
  11. @slim.add_arg_scope
  12. def prelu(x, scope, decoder=False):
  13. '''
  14. Performs the parametric relu operation. This implementation is based on:
  15. https://stackoverflow.com/questions/39975676/how-to-implement-prelu-activation-in-tensorflow
  16.  
  17. For the decoder portion, prelu becomes just a normal prelu
  18.  
  19. INPUTS:
  20. - x(Tensor): a 4D Tensor that undergoes prelu
  21. - scope(str): the string to name your prelu operation's alpha variable.
  22. - decoder(bool): if True, prelu becomes a normal relu.
  23.  
  24. OUTPUTS:
  25. - pos + neg / x (Tensor): gives prelu output only during training; otherwise, just return x.
  26.  
  27. '''
  28. #If decoder, then perform relu and just return the output
  29. if decoder:
  30. return tf.nn.relu(x, name=scope)
  31.  
  32. alpha= tf.get_variable(scope + 'alpha', x.get_shape()[-1],
  33. initializer=tf.constant_initializer(0.0),
  34. dtype=tf.float32)
  35. pos = tf.nn.relu(x)
  36. neg = alpha * (x - abs(x)) * 0.5
  37. return pos + neg
  38.  
  39. def spatial_dropout(x, p, seed, scope, is_training=True):
  40. '''
  41. Performs a 2D spatial dropout that drops layers instead of individual elements in an input feature map.
  42. Note that p stands for the probability of dropping, but tf.nn.relu uses probability of keeping.
  43.  
  44. ------------------
  45. Technical Details
  46. ------------------
  47. The noise shape must be of shape [batch_size, 1, 1, num_channels], with the height and width set to 1, because
  48. it will represent either a 1 or 0 for each layer, and these 1 or 0 integers will be broadcasted to the entire
  49. dimensions of each layer they interact with such that they can decide whether each layer should be entirely
  50. 'dropped'/set to zero or have its activations entirely kept.
  51. --------------------------
  52.  
  53. INPUTS:
  54. - x(Tensor): a 4D Tensor of the input feature map.
  55. - p(float): a float representing the probability of dropping a layer
  56. - seed(int): an integer for random seeding the random_uniform distribution that runs under tf.nn.relu
  57. - scope(str): the string name for naming the spatial_dropout
  58. - is_training(bool): to turn on dropout only when training. Optional.
  59.  
  60. OUTPUTS:
  61. - output(Tensor): a 4D Tensor that is in exactly the same size as the input x,
  62. with certain layers having their elements all set to 0 (i.e. dropped).
  63. '''
  64. if is_training:
  65. keep_prob = 1.0 - p
  66. input_shape = x.get_shape().as_list()
  67. noise_shape = tf.constant(value=[input_shape[0], 1, 1, input_shape[3]])
  68. output = tf.nn.dropout(x, keep_prob, noise_shape, seed=seed, name=scope)
  69.  
  70. return output
  71.  
  72. return x
  73.  
  74. def unravel_argmax(argmax, shape):
  75. argmax_shape = argmax.get_shape()
  76. new_1dim_shape = tf.shape(tf.constant(0, shape=[tf.Dimension(4), argmax_shape[0]*argmax_shape[1]*argmax_shape[2]*argmax_shape[3]]))
  77. batch_shape = tf.constant(0, dtype=tf.int64, shape=[argmax_shape[0], 1, 1, 1]).get_shape()
  78. b = tf.multiply(tf.ones_like(argmax), tf.reshape(tf.range(shape[0]), batch_shape))
  79. y = argmax // (shape[2] * shape[3])
  80. x = argmax % (shape[2] * shape[3]) // shape[3]
  81. c = tf.ones_like(argmax) * tf.range(shape[3])
  82. pack = tf.stack([b, y, x, c])
  83. pack = tf.reshape(pack, new_1dim_shape)
  84. pack = tf.transpose(pack)
  85. return pack
  86.  
  87.  
  88. def unpool_forAndroidSupport(updates, mask, k_size=[1, 2, 2, 1], output_shape=None, scope=''):
  89. with tf.variable_scope(scope):
  90. mask = tf.cast(mask, tf.int32)
  91. input_shape = tf.shape(updates, out_type=tf.int32)
  92. # calculation new shape
  93. if output_shape is None:
  94. output_shape = (input_shape[0], input_shape[1] * ksize[1], input_shape[2] * ksize[2], input_shape[3])
  95.  
  96. # calculation indices for batch, height, width and feature maps
  97. one_like_mask = tf.ones_like(mask, dtype=tf.int32)
  98. batch_shape = tf.concat([[input_shape[0]], [1], [1], [1]], 0)
  99. batch_range = tf.reshape(tf.range(output_shape[0], dtype=tf.int32), shape=batch_shape)
  100. b = one_like_mask * batch_range
  101. y = mask // (output_shape[2] * output_shape[3])
  102. x = (mask // output_shape[3]) % output_shape[2] #mask % (output_shape[2] * output_shape[3]) // output_shape[3]
  103. feature_range = tf.range(output_shape[3], dtype=tf.int32)
  104. f = one_like_mask * feature_range
  105.  
  106. # transpose indices & reshape update values to one dimension
  107. updates_size = tf.size(updates)
  108. indices = tf.to_int64(tf.transpose(tf.reshape(tf.stack([b, y, x, f]), [4, updates_size])))
  109. values = tf.reshape(updates, [updates_size])
  110. delta = tf.SparseTensor(indices, values, tf.to_int64(output_shape))
  111. inter1=tf.sparse_add(tf.zeros(output_shape,dtype=tf.int64), tf.to_int64(tf.sparse_reorder(delta)))
  112. return inter1
  113.  
  114. def unpool_unsupportedOnAndroid(updates, mask, k_size=[1, 2, 2, 1], output_shape=None, scope=''):
  115. '''
  116. Unpooling function based on the implementation by Panaetius at https://github.com/tensorflow/tensorflow/issues/2169
  117. INPUTS:
  118. - inputs(Tensor): a 4D tensor of shape [batch_size, height, width, num_channels] that represents the input block to be upsampled
  119. - mask(Tensor): a 4D tensor that represents the argmax values/pooling indices of the previously max-pooled layer
  120. - k_size(list): a list of values representing the dimensions of the unpooling filter.
  121. - output_shape(list): a list of values to indicate what the final output shape should be after unpooling
  122. - scope(str): the string name to name your scope
  123. OUTPUTS:
  124. - ret(Tensor): the returned 4D tensor that has the shape of output_shape.
  125. '''
  126. with tf.variable_scope(scope):
  127. mask = tf.cast(mask, tf.int32)
  128. input_shape = tf.shape(updates, out_type=tf.int32)
  129. # calculation new shape
  130. if output_shape is None:
  131. output_shape = (input_shape[0], input_shape[1] * ksize[1], input_shape[2] * ksize[2], input_shape[3])
  132.  
  133. # calculation indices for batch, height, width and feature maps
  134. one_like_mask = tf.ones_like(mask, dtype=tf.int32)
  135. batch_shape = tf.concat([[input_shape[0]], [1], [1], [1]], 0)
  136. batch_range = tf.reshape(tf.range(output_shape[0], dtype=tf.int32), shape=batch_shape)
  137. b = one_like_mask * batch_range
  138. y = mask // (output_shape[2] * output_shape[3])
  139. x = (mask // output_shape[3]) % output_shape[2] #mask % (output_shape[2] * output_shape[3]) // output_shape[3]
  140. feature_range = tf.range(output_shape[3], dtype=tf.int32)
  141. f = one_like_mask * feature_range
  142.  
  143. # transpose indices & reshape update values to one dimension
  144. updates_size = tf.size(updates)
  145. indices = tf.transpose(tf.reshape(tf.stack([b, y, x, f]), [4, updates_size]))
  146. values = tf.reshape(updates, [updates_size])
  147. ret = tf.scatter_nd(indices, values, output_shape)
  148. return ret
  149.  
  150. @slim.add_arg_scope
  151. def initial_block(inputs, is_training=True, scope='initial_block'):
  152. '''
  153. The initial block for Enet has 2 branches: The convolution branch and Maxpool branch.
  154.  
  155. The conv branch has 13 layers, while the maxpool branch gives 3 layers corresponding to the RGB channels.
  156. Both output layers are then concatenated to give an output of 16 layers.
  157.  
  158. NOTE: Does not need to store pooling indices since it won't be used later for the final upsampling.
  159.  
  160. INPUTS:
  161. - inputs(Tensor): A 4D tensor of shape [batch_size, height, width, channels]
  162.  
  163. OUTPUTS:
  164. - net_concatenated(Tensor): a 4D Tensor that contains the
  165. '''
  166. is_training_change = not is_training
  167. #Convolutional branch
  168. net_conv = slim.conv2d(inputs, 13, [3,3], stride=2, activation_fn=None, scope=scope+'_conv')
  169. net_conv = slim.batch_norm(net_conv, is_training=is_training, fused=True, scope=scope+'_batchnorm')
  170. net_conv = prelu(net_conv, scope=scope+'_prelu')
  171.  
  172. #Max pool branch
  173. net_pool = slim.max_pool2d(inputs, [2,2], stride=2, scope=scope+'_max_pool')
  174.  
  175. #Concatenated output - does it matter max pool comes first or conv comes first? probably not.
  176. net_concatenated = tf.concat([net_conv, net_pool], axis=3, name=scope+'_concat')
  177. return net_concatenated
  178.  
  179. @slim.add_arg_scope
  180. def bottleneck(inputs,
  181. output_depth,
  182. filter_size,
  183. regularizer_prob,
  184. projection_ratio=4,
  185. seed=0,
  186. is_training=True,
  187. downsampling=False,
  188. upsampling=False,
  189. pooling_indices=None,
  190. output_shape=None,
  191. dilated=False,
  192. dilation_rate=None,
  193. asymmetric=False,
  194. decoder=False,
  195. scope='bottleneck'):
  196. '''
  197. The bottleneck module has three different kinds of variants:
  198.  
  199. 1. A regular convolution which you can decide whether or not to downsample.
  200. 2. A dilated convolution, which requires you to have a dilation factor.
  201. 3. An asymmetric convolution that has a decomposed filter size of 5x1 and 1x5 separately.
  202.  
  203. INPUTS:
  204. - inputs(Tensor): a 4D Tensor of the previous convolutional block of shape [batch_size, height, width, num_channels].
  205. - output_depth(int): an integer indicating the output depth of the output convolutional block.
  206. - filter_size(int): an integer that gives the height and width of the filter size to use for a regular/dilated convolution.
  207. - regularizer_prob(float): the float p that represents the prob of dropping a layer for spatial dropout regularization.
  208. - projection_ratio(int): the amount of depth to reduce for initial 1x1 projection. Depth is divided by projection ratio. Default is 4.
  209. - seed(int): an integer for the random seed used in the random normal distribution within dropout.
  210. - is_training(bool): a boolean value to indicate whether or not is training. Decides batch_norm and prelu activity.
  211.  
  212. - downsampling(bool): if True, a max-pool2D layer is added to downsample the spatial sizes.
  213. - upsampling(bool): if True, the upsampling bottleneck is activated but requires pooling indices to upsample.
  214. - pooling_indices(Tensor): the argmax values that are obtained after performing tf.nn.max_pool_with_argmax.
  215. - output_shape(list): A list of integers indicating the output shape of the unpooling layer.
  216. - dilated(bool): if True, then dilated convolution is done, but requires a dilation rate to be given.
  217. - dilation_rate(int): the dilation factor for performing atrous convolution/dilated convolution.
  218. - asymmetric(bool): if True, then asymmetric convolution is done, and the only filter size used here is 5.
  219. - decoder(bool): if True, then all the prelus become relus according to ENet author.
  220. - scope(str): a string name that names your bottleneck.
  221.  
  222. OUTPUTS:
  223. - net(Tensor): The convolution block output after a bottleneck
  224. - pooling_indices(Tensor): If downsample, then this tensor is produced for use in upooling later.
  225. - inputs_shape(list): The shape of the input to the downsampling conv block. For use in unpooling later.
  226.  
  227. '''
  228. #Calculate the depth reduction based on the projection ratio used in 1x1 convolution.
  229. reduced_depth = int(inputs.get_shape().as_list()[3] / projection_ratio)
  230. is_training_change = not is_training
  231. with slim.arg_scope([prelu], decoder=decoder):
  232.  
  233. #=============DOWNSAMPLING BOTTLENECK====================
  234. if downsampling:
  235. #=============MAIN BRANCH=============
  236. #Just perform a max pooling
  237. #net_main,pooling_indices = tf.nn.max_pool_with_argmax(inputs,ksize=[1,2,2,1],strides=[1,2,2,1],
  238. # padding='SAME',name=scope+'_main_max_pool')
  239. net_main = tf.nn.max_pool(inputs,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME',name=scope+'_main_max_pool')
  240. pooling_indices = tf.constant([a for a in range(net_main.shape[0]*net_main.shape[1]*net_main.shape[2]*net_main.shape[3])])
  241. pooling_indices = tf.reshape(pooling_indices,shape=net_main.shape , name=scope+'_main_max_pool')
  242. #First get the difference in depth to pad, then pad with zeros only on the last dimension.
  243. inputs_shape = inputs.get_shape().as_list()
  244. depth_to_pad = abs(inputs_shape[3] - output_depth)
  245. paddings = tf.convert_to_tensor([[0,0], [0,0], [0,0], [0, depth_to_pad]])
  246. net_main = tf.pad(net_main, paddings=paddings, name=scope+'_main_padding')
  247.  
  248. #=============SUB BRANCH==============
  249. #First projection that has a 2x2 kernel and stride 2
  250. net = slim.conv2d(inputs, reduced_depth, [2,2], stride=2, scope=scope+'_conv1')
  251. net = slim.batch_norm(net, is_training=is_training, scope=scope+'_batch_norm1')
  252. net = prelu(net, scope=scope+'_prelu1')
  253.  
  254. #Second conv block
  255. net = slim.conv2d(net, reduced_depth, [filter_size, filter_size], scope=scope+'_conv2')
  256. net = slim.batch_norm(net, is_training=is_training, scope=scope+'_batch_norm2')
  257. net = prelu(net, scope=scope+'_prelu2')
  258.  
  259. #Final projection with 1x1 kernel
  260. net = slim.conv2d(net, output_depth, [1,1], scope=scope+'_conv3')
  261. net = slim.batch_norm(net, is_training=is_training, scope=scope+'_batch_norm3')
  262. net = prelu(net, scope=scope+'_prelu3')
  263.  
  264. #Regularizer
  265. net = spatial_dropout(net, p=regularizer_prob, seed=seed, scope=scope+'_spatial_dropout')
  266.  
  267. #Finally, combine the two branches together via an element-wise addition
  268. net = tf.add(net, net_main, name=scope+'_add')
  269. net = prelu(net, scope=scope+'_last_prelu')
  270.  
  271. #also return inputs shape for convenience later
  272. return net, pooling_indices, inputs_shape
  273.  
  274. #============DILATION CONVOLUTION BOTTLENECK====================
  275. #Everything is the same as a regular bottleneck except for the dilation rate argument
  276. elif dilated:
  277. #Check if dilation rate is given
  278. if not dilation_rate:
  279. raise ValueError('Dilation rate is not given.')
  280.  
  281. #Save the main branch for addition later
  282. net_main = inputs
  283.  
  284. #First projection with 1x1 kernel (dimensionality reduction)
  285. net = slim.conv2d(inputs, reduced_depth, [1,1], scope=scope+'_conv1')
  286. net = slim.batch_norm(net, is_training=is_training, scope=scope+'_batch_norm1')
  287. net = prelu(net, scope=scope+'_prelu1')
  288.  
  289. #Second conv block --- apply dilated convolution here
  290. net = slim.conv2d(net, reduced_depth, [filter_size, filter_size], rate=dilation_rate, scope=scope+'_dilated_conv2')
  291. net = slim.batch_norm(net, is_training=is_training, scope=scope+'_batch_norm2')
  292. net = prelu(net, scope=scope+'_prelu2')
  293.  
  294. #Final projection with 1x1 kernel (Expansion)
  295. net = slim.conv2d(net, output_depth, [1,1], scope=scope+'_conv3')
  296. net = slim.batch_norm(net, is_training=is_training, scope=scope+'_batch_norm3')
  297. net = prelu(net, scope=scope+'_prelu3')
  298.  
  299. #Regularizer
  300. net = spatial_dropout(net, p=regularizer_prob, seed=seed, scope=scope+'_spatial_dropout')
  301. net = prelu(net, scope=scope+'_prelu4')
  302.  
  303. #Add the main branch
  304. net = tf.add(net_main, net, name=scope+'_add_dilated')
  305. net = prelu(net, scope=scope+'_last_prelu')
  306.  
  307. return net
  308.  
  309. #===========ASYMMETRIC CONVOLUTION BOTTLENECK==============
  310. #Everything is the same as a regular bottleneck except for a [5,5] kernel decomposed into two [5,1] then [1,5]
  311. elif asymmetric:
  312. #Save the main branch for addition later
  313. net_main = inputs
  314.  
  315. #First projection with 1x1 kernel (dimensionality reduction)
  316. net = slim.conv2d(inputs, reduced_depth, [1,1], scope=scope+'_conv1')
  317. net = slim.batch_norm(net, is_training=is_training, scope=scope+'_batch_norm1')
  318. net = prelu(net, scope=scope+'_prelu1')
  319.  
  320. #Second conv block --- apply asymmetric conv here
  321. net = slim.conv2d(net, reduced_depth, [filter_size, 1], scope=scope+'_asymmetric_conv2a')
  322. net = slim.conv2d(net, reduced_depth, [1, filter_size], scope=scope+'_asymmetric_conv2b')
  323. net = slim.batch_norm(net, is_training=is_training, scope=scope+'_batch_norm2')
  324. net = prelu(net, scope=scope+'_prelu2')
  325.  
  326. #Final projection with 1x1 kernel
  327. net = slim.conv2d(net, output_depth, [1,1], scope=scope+'_conv3')
  328. net = slim.batch_norm(net, is_training=is_training, scope=scope+'_batch_norm3')
  329. net = prelu(net, scope=scope+'_prelu3')
  330.  
  331. #Regularizer
  332. net = spatial_dropout(net, p=regularizer_prob, seed=seed, scope=scope+'_spatial_dropout')
  333. net = prelu(net, scope=scope+'_prelu4')
  334.  
  335. #Add the main branch
  336. net = tf.add(net_main, net, name=scope+'_add_asymmetric')
  337. net = prelu(net, scope=scope+'_last_prelu')
  338.  
  339. return net
  340.  
  341. #============UPSAMPLING BOTTLENECK================
  342. #Everything is the same as a regular one, except convolution becomes transposed.
  343. elif upsampling:
  344. #Check if pooling indices is given
  345. if pooling_indices == None:
  346. raise ValueError('Pooling indices are not given.')
  347.  
  348. #Check output_shape given or not
  349. if output_shape == None:
  350. raise ValueError('Output depth is not given')
  351.  
  352. #=======MAIN BRANCH=======
  353. #Main branch to upsample. output shape must match with the shape of the layer that was pooled initially, in order
  354. #for the pooling indices to work correctly. However, the initial pooled layer was padded, so need to reduce dimension
  355. #before unpooling. In the paper, padding is replaced with convolution for this purpose of reducing the depth!
  356. net_unpool = slim.conv2d(inputs, output_depth, [1,1], scope=scope+'_main_conv1')
  357. net_unpool = slim.batch_norm(net_unpool, is_training=is_training, scope=scope+'batch_norm1')
  358. net_unpool = unpool_unsupportedOnAndroid(net_unpool, pooling_indices, output_shape=output_shape, scope='unpool')
  359.  
  360. #======SUB BRANCH=======
  361. #First 1x1 projection to reduce depth
  362. net = slim.conv2d(inputs, reduced_depth, [1,1], scope=scope+'_conv1')
  363. net = slim.batch_norm(net, is_training=is_training, scope=scope+'_batch_norm2')
  364. net = prelu(net, scope=scope+'_prelu1')
  365.  
  366. #Second conv block -----------------------------> NOTE: using tf.nn.conv2d_transpose for variable input shape.
  367. net_unpool_shape = net_unpool.get_shape().as_list()
  368. output_shape = [net_unpool_shape[0], net_unpool_shape[1], net_unpool_shape[2], reduced_depth]
  369. output_shape = tf.convert_to_tensor(output_shape)
  370. filter_size = [filter_size, filter_size, reduced_depth, reduced_depth]
  371. filters = tf.get_variable(shape=filter_size, initializer=initializers.xavier_initializer(), dtype=tf.float32, name=scope+'_transposed_conv2_filters')
  372.  
  373. # net = slim.conv2d_transpose(net, reduced_depth, [filter_size, filter_size], stride=2, scope=scope+'_transposed_conv2')
  374. net = tf.nn.conv2d_transpose(net, filter=filters, strides=[1,2,2,1], output_shape=output_shape, name=scope+'_transposed_conv2')
  375. net = slim.batch_norm(net, is_training=is_training, scope=scope+'_batch_norm3')
  376. net = prelu(net, scope=scope+'_prelu2')
  377.  
  378. #Final projection with 1x1 kernel
  379. net = slim.conv2d(net, output_depth, [1,1], scope=scope+'_conv3')
  380. net = slim.batch_norm(net, is_training=is_training, scope=scope+'_batch_norm4')
  381. net = prelu(net, scope=scope+'_prelu3')
  382.  
  383. #Regularizer
  384. net = spatial_dropout(net, p=regularizer_prob, seed=seed, scope=scope+'_spatial_dropout')
  385. net = prelu(net, scope=scope+'_prelu4')
  386. #Finally, add the unpooling layer and the sub branch together
  387. net = tf.add(net, tf.to_float(net_unpool), name=scope+'_add_upsample')
  388. net = prelu(net, scope=scope+'_last_prelu')
  389.  
  390. return net
  391.  
  392. #OTHERWISE, just perform a regular bottleneck!
  393. #==============REGULAR BOTTLENECK==================
  394. #Save the main branch for addition later
  395. net_main = inputs
  396.  
  397. #First projection with 1x1 kernel
  398. net = slim.conv2d(inputs, reduced_depth, [1,1], scope=scope+'_conv1')
  399. net = slim.batch_norm(net, is_training=is_training, scope=scope+'_batch_norm1')
  400. net = prelu(net, scope=scope+'_prelu1')
  401.  
  402. #Second conv block
  403. net = slim.conv2d(net, reduced_depth, [filter_size, filter_size], scope=scope+'_conv2')
  404. net = slim.batch_norm(net, is_training=is_training, scope=scope+'_batch_norm2')
  405. net = prelu(net, scope=scope+'_prelu2')
  406.  
  407. #Final projection with 1x1 kernel
  408. net = slim.conv2d(net, output_depth, [1,1], scope=scope+'_conv3')
  409. net = slim.batch_norm(net, is_training=is_training, scope=scope+'_batch_norm3')
  410. net = prelu(net, scope=scope+'_prelu3')
  411.  
  412. #Regularizer
  413. net = spatial_dropout(net, p=regularizer_prob, seed=seed, scope=scope+'_spatial_dropout')
  414. net = prelu(net, scope=scope+'_prelu4')
  415.  
  416. #Add the main branch
  417. net = tf.add(net_main, net, name=scope+'_add_regular')
  418. net = prelu(net, scope=scope+'_last_prelu')
  419.  
  420. return net
  421.  
  422. #Now actually start building the network
  423. def ENet(inputs,
  424. num_classes,
  425. batch_size,
  426. num_initial_blocks=1,
  427. stage_two_repeat=2,
  428. skip_connections=True,
  429. reuse=None,
  430. is_training=True,
  431. scope='ENet'):
  432. '''
  433. The ENet model for real-time semantic segmentation!
  434.  
  435. INPUTS:
  436. - inputs(Tensor): a 4D Tensor of shape [batch_size, image_height, image_width, num_channels] that represents one batch of preprocessed images.
  437. - num_classes(int): an integer for the number of classes to predict. This will determine the final output channels as the answer.
  438. - batch_size(int): the batch size to explictly set the shape of the inputs in order for operations to work properly.
  439. - num_initial_blocks(int): the number of times to repeat the initial block.
  440. - stage_two_repeat(int): the number of times to repeat stage two in order to make the network deeper.
  441. - skip_connections(bool): if True, add the corresponding encoder feature maps to the decoder. They are of exact same shapes.
  442. - reuse(bool): Whether or not to reuse the variables for evaluation.
  443. - is_training(bool): if True, switch on batch_norm and prelu only during training, otherwise they are turned off.
  444. - scope(str): a string that represents the scope name for the variables.
  445.  
  446. OUTPUTS:
  447. - net(Tensor): a 4D Tensor output of shape [batch_size, image_height, image_width, num_classes], where each pixel has a one-hot encoded vector
  448. determining the label of the pixel.
  449. '''
  450. #Set the shape of the inputs first to get the batch_size information
  451. '''batch_size = inputs.get_shape().as_list()
  452. product = 1
  453. for x in list:
  454. product *= x
  455. batch_size=product/518400'''
  456. is_training_change = not is_training
  457. inputs = tf.reshape(inputs, [1,360,480,3])
  458. inputs.set_shape(shape=(1,360,480,3))
  459.  
  460. with tf.variable_scope(scope, reuse=reuse):
  461. #Set the primary arg scopes. Fused batch_norm is faster than normal batch norm.
  462. with slim.arg_scope([initial_block, bottleneck], is_training=is_training),\
  463. slim.arg_scope([slim.batch_norm], fused=True), \
  464. slim.arg_scope([slim.conv2d, slim.conv2d_transpose], activation_fn=None):
  465. #=================INITIAL BLOCK=================
  466. for i in xrange(1, max(num_initial_blocks, 1) + 1):
  467. net = initial_block(inputs, scope='initial_block_' + str(i))
  468.  
  469. #Save for skip connection later
  470. if skip_connections:
  471. net_one = net
  472.  
  473. #===================STAGE ONE=======================
  474. net, pooling_indices_1, inputs_shape_1 = bottleneck(net, output_depth=64, filter_size=3, regularizer_prob=0.01, downsampling=True, scope='bottleneck1_0')
  475. net = bottleneck(net, output_depth=64, filter_size=3, regularizer_prob=0.01, scope='bottleneck1_1')
  476. net = bottleneck(net, output_depth=64, filter_size=3, regularizer_prob=0.01, scope='bottleneck1_2')
  477. net = bottleneck(net, output_depth=64, filter_size=3, regularizer_prob=0.01, scope='bottleneck1_3')
  478. net = bottleneck(net, output_depth=64, filter_size=3, regularizer_prob=0.01, scope='bottleneck1_4')
  479.  
  480. #Save for skip connection later
  481. if skip_connections:
  482. net_two = net
  483.  
  484. #regularization prob is 0.1 from bottleneck 2.0 onwards
  485. with slim.arg_scope([bottleneck], regularizer_prob=0.1):
  486. net, pooling_indices_2, inputs_shape_2 = bottleneck(net, output_depth=128, filter_size=3, downsampling=True, scope='bottleneck2_0')
  487. #Repeat the stage two at least twice to get stage 2 and 3:
  488. for i in xrange(2, max(stage_two_repeat, 2) + 2):
  489. net = bottleneck(net, output_depth=128, filter_size=3, scope='bottleneck'+str(i)+'_1')
  490. net = bottleneck(net, output_depth=128, filter_size=3, dilated=True, dilation_rate=2, scope='bottleneck'+str(i)+'_2')
  491. net = bottleneck(net, output_depth=128, filter_size=5, asymmetric=True, scope='bottleneck'+str(i)+'_3')
  492. net = bottleneck(net, output_depth=128, filter_size=3, dilated=True, dilation_rate=4, scope='bottleneck'+str(i)+'_4')
  493. net = bottleneck(net, output_depth=128, filter_size=3, scope='bottleneck'+str(i)+'_5')
  494. net = bottleneck(net, output_depth=128, filter_size=3, dilated=True, dilation_rate=8, scope='bottleneck'+str(i)+'_6')
  495. net = bottleneck(net, output_depth=128, filter_size=5, asymmetric=True, scope='bottleneck'+str(i)+'_7')
  496. net = bottleneck(net, output_depth=128, filter_size=3, dilated=True, dilation_rate=16, scope='bottleneck'+str(i)+'_8')
  497.  
  498. with slim.arg_scope([bottleneck], regularizer_prob=0.1, decoder=True):
  499. #===================STAGE FOUR========================
  500. bottleneck_scope_name = "bottleneck" + str(i + 1)
  501.  
  502. #The decoder section, so start to upsample.
  503. net = bottleneck(net, output_depth=64, filter_size=3, upsampling=True,
  504. pooling_indices=pooling_indices_2, output_shape=inputs_shape_2, scope=bottleneck_scope_name+'_0')
  505.  
  506. #Perform skip connections here
  507. if skip_connections:
  508. net = tf.add(net, net_two, name=bottleneck_scope_name+'_skip_connection')
  509.  
  510. net = bottleneck(net, output_depth=64, filter_size=3, scope=bottleneck_scope_name+'_1')
  511. net = bottleneck(net, output_depth=64, filter_size=3, scope=bottleneck_scope_name+'_2')
  512.  
  513. #===================STAGE FIVE========================
  514. bottleneck_scope_name = "bottleneck" + str(i + 2)
  515.  
  516. net = bottleneck(net, output_depth=16, filter_size=3, upsampling=True,
  517. pooling_indices=pooling_indices_1, output_shape=inputs_shape_1, scope=bottleneck_scope_name+'_0')
  518.  
  519. #perform skip connections here
  520. if skip_connections:
  521. net = tf.add(net, net_one, name=bottleneck_scope_name+'_skip_connection')
  522.  
  523. net = bottleneck(net, output_depth=16, filter_size=3, scope=bottleneck_scope_name+'_1')
  524.  
  525. #=============FINAL CONVOLUTION=============
  526. logits = slim.conv2d_transpose(net, num_classes, [2,2], stride=2, scope='fullconv')
  527. probabilities = tf.nn.softmax(logits, name='logits_to_softmax')
  528. predictions = tf.argmax(probabilities, -1)
  529. predictions = tf.cast(predictions, tf.float32)
  530. output = tf.reshape(predictions,(1,172800),name='output')
  531. return logits,probabilities# output
  532.  
  533.  
  534. def ENet_arg_scope(weight_decay=2e-4,
  535. batch_norm_decay=0.1,
  536. batch_norm_epsilon=0.001):
  537. '''
  538. The arg scope for enet model. The weight decay is 2e-4 as seen in the paper.
  539. Batch_norm decay is 0.1 (momentum 0.1) according to official implementation.
  540.  
  541. INPUTS:
  542. - weight_decay(float): the weight decay for weights variables in conv2d and separable conv2d
  543. - batch_norm_decay(float): decay for the moving average of batch_norm momentums.
  544. - batch_norm_epsilon(float): small float added to variance to avoid dividing by zero.
  545.  
  546. OUTPUTS:
  547. - scope(arg_scope): a tf-slim arg_scope with the parameters needed for xception.
  548. '''
  549. # Set weight_decay for weights in conv2d and separable_conv2d layers.
  550. with slim.arg_scope([slim.conv2d],
  551. weights_regularizer=slim.l2_regularizer(weight_decay),
  552. biases_regularizer=slim.l2_regularizer(weight_decay)):
  553.  
  554. # Set parameters for batch_norm.
  555. with slim.arg_scope([slim.batch_norm],
  556. decay=batch_norm_decay,
  557. epsilon=batch_norm_epsilon) as scope:
  558. return scope
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement