Advertisement
Guest User

Untitled

a guest
Mar 25th, 2017
134
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 8.28 KB | None | 0 0
  1. # encoding: UTF-8
  2. # Copyright 2016 Google.com
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15.  
  16. import tensorflow as tf
  17. import math
  18. from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets
  19. tf.set_random_seed(0)
  20.  
  21. # neural network with 5 layers
  22. #
  23. # · · · · · · · · · · (input data, flattened pixels) X [batch, 784] # 784 = 28*28
  24. # \x/x\x/x\x/x\x/x\x/ -- fully connected layer (sigmoid) W1 [784, 200] B1[200]
  25. # · · · · · · · · · Y1 [batch, 200]
  26. # \x/x\x/x\x/x\x/ -- fully connected layer (sigmoid) W2 [200, 100] B2[100]
  27. # · · · · · · · Y2 [batch, 100]
  28. # \x/x\x/x\x/ -- fully connected layer (sigmoid) W3 [100, 60] B3[60]
  29. # · · · · · Y3 [batch, 60]
  30. # \x/x\x/ -- fully connected layer (sigmoid) W4 [60, 30] B4[30]
  31. # · · · Y4 [batch, 30]
  32. # \x/ -- fully connected layer (softmax) W5 [30, 10] B5[10]
  33. # · Y5 [batch, 10]
  34.  
  35. # Download images and labels into mnist.test (10K images+labels) and mnist.train (60K images+labels)
  36. mnist = read_data_sets("data", one_hot=True, reshape=False, validation_size=0)
  37.  
  38. # input X: 28x28 grayscale images, the first dimension (None) will index the images in the mini-batch
  39. X = tf.placeholder(tf.float32, [None, 28, 28, 1])
  40. # correct answers will go here
  41. Y_ = tf.placeholder(tf.float32, [None, 10])
  42. # variable learning rate
  43. lr = tf.placeholder(tf.float32)
  44. # test flag for batch norm
  45. tst = tf.placeholder(tf.bool)
  46. iter = tf.placeholder(tf.int32)
  47. # dropout probability
  48. pkeep = tf.placeholder(tf.float32)
  49. pkeep_conv = tf.placeholder(tf.float32)
  50.  
  51. def batchnorm(Ylogits, is_test, iteration, offset, convolutional=False):
  52. exp_moving_avg = tf.train.ExponentialMovingAverage(0.999, iteration) # adding the iteration prevents from averaging across non-existing iterations
  53. bnepsilon = 1e-5
  54. if convolutional:
  55. mean, variance = tf.nn.moments(Ylogits, [0, 1, 2])
  56. else:
  57. mean, variance = tf.nn.moments(Ylogits, [0])
  58. update_moving_everages = exp_moving_avg.apply([mean, variance])
  59. m = tf.cond(is_test, lambda: exp_moving_avg.average(mean), lambda: mean)
  60. v = tf.cond(is_test, lambda: exp_moving_avg.average(variance), lambda: variance)
  61. Ybn = tf.nn.batch_normalization(Ylogits, m, v, offset, None, bnepsilon)
  62. return Ybn, update_moving_everages
  63.  
  64. def no_batchnorm(Ylogits, is_test, iteration, offset, convolutional=False):
  65. return Ylogits, tf.no_op()
  66.  
  67. def compatible_convolutional_noise_shape(Y):
  68. noiseshape = tf.shape(Y)
  69. noiseshape = noiseshape * tf.constant([1,0,0,1]) + tf.constant([0,1,1,0])
  70. return noiseshape
  71.  
  72. # three convolutional layers with their channel counts, and a
  73. # fully connected layer (tha last layer has 10 softmax neurons)
  74. K = 24 # first convolutional layer output depth
  75. L = 48 # second convolutional layer output depth
  76. M = 64 # third convolutional layer
  77. N = 200 # fully connected layer
  78.  
  79. W1 = tf.Variable(tf.truncated_normal([6, 6, 1, K], stddev=0.1)) # 6x6 patch, 1 input channel, K output channels
  80. B1 = tf.Variable(tf.constant(0.1, tf.float32, [K]))
  81. W2 = tf.Variable(tf.truncated_normal([5, 5, K, L], stddev=0.1))
  82. B2 = tf.Variable(tf.constant(0.1, tf.float32, [L]))
  83. W3 = tf.Variable(tf.truncated_normal([4, 4, L, M], stddev=0.1))
  84. B3 = tf.Variable(tf.constant(0.1, tf.float32, [M]))
  85.  
  86. W4 = tf.Variable(tf.truncated_normal([7 * 7 * M, N], stddev=0.1))
  87. B4 = tf.Variable(tf.constant(0.1, tf.float32, [N]))
  88. W5 = tf.Variable(tf.truncated_normal([N, 10], stddev=0.1))
  89. B5 = tf.Variable(tf.constant(0.1, tf.float32, [10]))
  90.  
  91. # The model
  92. # batch norm scaling is not useful with relus
  93. # batch norm offsets are used instead of biases
  94. has_dropout = True
  95. stride = 1 # output is 28x28
  96. Y1l = tf.nn.conv2d(X, W1, strides=[1, stride, stride, 1], padding='SAME')
  97. Y1bn, update_ema1 = batchnorm(Y1l, tst, iter, B1, convolutional=True)
  98. Y1r = tf.nn.relu(Y1bn)
  99. if has_dropout:
  100. Y1 = tf.nn.dropout(Y1r, pkeep_conv, compatible_convolutional_noise_shape(Y1r))
  101. else:
  102. Y1 = Y1r
  103.  
  104. stride = 2 # output is 14x14
  105. Y2l = tf.nn.conv2d(Y1, W2, strides=[1, stride, stride, 1], padding='SAME')
  106. Y2bn, update_ema2 = batchnorm(Y2l, tst, iter, B2, convolutional=True)
  107. Y2r = tf.nn.relu(Y2bn)
  108. if has_dropout:
  109. Y2 = tf.nn.dropout(Y2r, pkeep_conv, compatible_convolutional_noise_shape(Y2r))
  110. else:
  111. Y2 = Y2r
  112.  
  113. stride = 2 # output is 7x7
  114. Y3l = tf.nn.conv2d(Y2, W3, strides=[1, stride, stride, 1], padding='SAME')
  115. Y3bn, update_ema3 = batchnorm(Y3l, tst, iter, B3, convolutional=True)
  116. Y3r = tf.nn.relu(Y3bn)
  117. if has_dropout:
  118. Y3 = tf.nn.dropout(Y3r, pkeep_conv, compatible_convolutional_noise_shape(Y3r))
  119. else:
  120. Y3 = Y3r
  121.  
  122. # reshape the output from the third convolution for the fully connected layer
  123. YY = tf.reshape(Y3, shape=[-1, 7 * 7 * M])
  124.  
  125. Y4l = tf.matmul(YY, W4)
  126. Y4bn, update_ema4 = batchnorm(Y4l, tst, iter, B4)
  127. Y4r = tf.nn.relu(Y4bn)
  128. if has_dropout:
  129. Y4 = tf.nn.dropout(Y4r, pkeep)
  130. else:
  131. Y4 = Y4r
  132. Ylogits = tf.matmul(Y4, W5) + B5
  133. Y = tf.nn.softmax(Ylogits)
  134.  
  135. update_ema = tf.group(update_ema1, update_ema2, update_ema3, update_ema4)
  136.  
  137. # cross-entropy loss function (= -sum(Y_i * log(Yi)) ), normalised for batches of 100 images
  138. # TensorFlow provides the softmax_cross_entropy_with_logits function to avoid numerical stability
  139. # problems with log(0) which is NaN
  140. cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=Ylogits, labels=Y_)
  141. cross_entropy = tf.reduce_mean(cross_entropy)*100
  142.  
  143. # accuracy of the trained model, between 0 (worst) and 1 (best)
  144. correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1))
  145. accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
  146.  
  147. # training step, learning rate =
  148. train_step = tf.train.AdamOptimizer(lr).minimize(cross_entropy)
  149.  
  150. # init
  151. init = tf.global_variables_initializer()
  152. sess = tf.Session()
  153. sess.run(init)
  154.  
  155. for i in range(1000):
  156. batch_X, batch_Y = mnist.train.next_batch(100)
  157. max_learning_rate = 0.02
  158. min_learning_rate = 0.0001
  159. decay_speed = 1600
  160. learning_rate = min_learning_rate + (max_learning_rate - min_learning_rate) * math.exp(-i / decay_speed)
  161.  
  162. sess.run(train_step, {X: batch_X, Y_: batch_Y, lr: learning_rate, iter: i, tst: True, pkeep: 0.75, pkeep_conv: 1.0})
  163. sess.run(update_ema, {X: batch_X, Y_: batch_Y, tst: False, iter: i, pkeep: 1.0, pkeep_conv: 1.0})
  164.  
  165. correct_prediction = tf.equal(tf.argmax(Y,1), tf.argmax(Y_,1))
  166. accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
  167. print(sess.run(accuracy, feed_dict={X: mnist.test.images, Y_: mnist.test.labels, pkeep: 1.0, pkeep_conv: 1.0, tst: False}))
  168.  
  169. # Some results to expect:
  170. # (In all runs, if sigmoids are used, all biases are initialised at 0, if RELUs are used,
  171. # all biases are initialised at 0.1 apart from the last one which is initialised at 0.)
  172.  
  173. ## learning rate = 0.003, 10K iterations
  174. # final test accuracy = 0.9788 (sigmoid - slow start, training cross-entropy not stabilised in the end)
  175. # final test accuracy = 0.9825 (relu - above 0.97 in the first 1500 iterations but noisy curves)
  176.  
  177. ## now with learning rate = 0.0001, 10K iterations
  178. # final test accuracy = 0.9722 (relu - slow but smooth curve, would have gone higher in 20K iterations)
  179.  
  180. ## decaying learning rate from 0.003 to 0.0001 decay_speed 2000, 10K iterations
  181. # final test accuracy = 0.9746 (sigmoid - training cross-entropy not stabilised)
  182. # final test accuracy = 0.9824 (relu - training set fully learned, test accuracy stable)
  183.  
  184. # 3000 example:
  185. # sigmoid: 0.9713
  186. # relu: 0.9748
  187. # dropout: 0.9762
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement