Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # encoding: UTF-8
- # Copyright 2016 Google.com
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- import tensorflow as tf
- import math
- from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets
- tf.set_random_seed(0)
- # neural network with 5 layers
- #
- # · · · · · · · · · · (input data, flattened pixels) X [batch, 784] # 784 = 28*28
- # \x/x\x/x\x/x\x/x\x/ -- fully connected layer (sigmoid) W1 [784, 200] B1[200]
- # · · · · · · · · · Y1 [batch, 200]
- # \x/x\x/x\x/x\x/ -- fully connected layer (sigmoid) W2 [200, 100] B2[100]
- # · · · · · · · Y2 [batch, 100]
- # \x/x\x/x\x/ -- fully connected layer (sigmoid) W3 [100, 60] B3[60]
- # · · · · · Y3 [batch, 60]
- # \x/x\x/ -- fully connected layer (sigmoid) W4 [60, 30] B4[30]
- # · · · Y4 [batch, 30]
- # \x/ -- fully connected layer (softmax) W5 [30, 10] B5[10]
- # · Y5 [batch, 10]
- # Download images and labels into mnist.test (10K images+labels) and mnist.train (60K images+labels)
- mnist = read_data_sets("data", one_hot=True, reshape=False, validation_size=0)
- # input X: 28x28 grayscale images, the first dimension (None) will index the images in the mini-batch
- X = tf.placeholder(tf.float32, [None, 28, 28, 1])
- # correct answers will go here
- Y_ = tf.placeholder(tf.float32, [None, 10])
- # variable learning rate
- lr = tf.placeholder(tf.float32)
- # test flag for batch norm
- tst = tf.placeholder(tf.bool)
- iter = tf.placeholder(tf.int32)
- # dropout probability
- pkeep = tf.placeholder(tf.float32)
- pkeep_conv = tf.placeholder(tf.float32)
- def batchnorm(Ylogits, is_test, iteration, offset, convolutional=False):
- exp_moving_avg = tf.train.ExponentialMovingAverage(0.999, iteration) # adding the iteration prevents from averaging across non-existing iterations
- bnepsilon = 1e-5
- if convolutional:
- mean, variance = tf.nn.moments(Ylogits, [0, 1, 2])
- else:
- mean, variance = tf.nn.moments(Ylogits, [0])
- update_moving_everages = exp_moving_avg.apply([mean, variance])
- m = tf.cond(is_test, lambda: exp_moving_avg.average(mean), lambda: mean)
- v = tf.cond(is_test, lambda: exp_moving_avg.average(variance), lambda: variance)
- Ybn = tf.nn.batch_normalization(Ylogits, m, v, offset, None, bnepsilon)
- return Ybn, update_moving_everages
- def no_batchnorm(Ylogits, is_test, iteration, offset, convolutional=False):
- return Ylogits, tf.no_op()
- def compatible_convolutional_noise_shape(Y):
- noiseshape = tf.shape(Y)
- noiseshape = noiseshape * tf.constant([1,0,0,1]) + tf.constant([0,1,1,0])
- return noiseshape
- # three convolutional layers with their channel counts, and a
- # fully connected layer (tha last layer has 10 softmax neurons)
- K = 24 # first convolutional layer output depth
- L = 48 # second convolutional layer output depth
- M = 64 # third convolutional layer
- N = 200 # fully connected layer
- W1 = tf.Variable(tf.truncated_normal([6, 6, 1, K], stddev=0.1)) # 6x6 patch, 1 input channel, K output channels
- B1 = tf.Variable(tf.constant(0.1, tf.float32, [K]))
- W2 = tf.Variable(tf.truncated_normal([5, 5, K, L], stddev=0.1))
- B2 = tf.Variable(tf.constant(0.1, tf.float32, [L]))
- W3 = tf.Variable(tf.truncated_normal([4, 4, L, M], stddev=0.1))
- B3 = tf.Variable(tf.constant(0.1, tf.float32, [M]))
- W4 = tf.Variable(tf.truncated_normal([7 * 7 * M, N], stddev=0.1))
- B4 = tf.Variable(tf.constant(0.1, tf.float32, [N]))
- W5 = tf.Variable(tf.truncated_normal([N, 10], stddev=0.1))
- B5 = tf.Variable(tf.constant(0.1, tf.float32, [10]))
- # The model
- # batch norm scaling is not useful with relus
- # batch norm offsets are used instead of biases
- has_dropout = True
- stride = 1 # output is 28x28
- Y1l = tf.nn.conv2d(X, W1, strides=[1, stride, stride, 1], padding='SAME')
- Y1bn, update_ema1 = batchnorm(Y1l, tst, iter, B1, convolutional=True)
- Y1r = tf.nn.relu(Y1bn)
- if has_dropout:
- Y1 = tf.nn.dropout(Y1r, pkeep_conv, compatible_convolutional_noise_shape(Y1r))
- else:
- Y1 = Y1r
- stride = 2 # output is 14x14
- Y2l = tf.nn.conv2d(Y1, W2, strides=[1, stride, stride, 1], padding='SAME')
- Y2bn, update_ema2 = batchnorm(Y2l, tst, iter, B2, convolutional=True)
- Y2r = tf.nn.relu(Y2bn)
- if has_dropout:
- Y2 = tf.nn.dropout(Y2r, pkeep_conv, compatible_convolutional_noise_shape(Y2r))
- else:
- Y2 = Y2r
- stride = 2 # output is 7x7
- Y3l = tf.nn.conv2d(Y2, W3, strides=[1, stride, stride, 1], padding='SAME')
- Y3bn, update_ema3 = batchnorm(Y3l, tst, iter, B3, convolutional=True)
- Y3r = tf.nn.relu(Y3bn)
- if has_dropout:
- Y3 = tf.nn.dropout(Y3r, pkeep_conv, compatible_convolutional_noise_shape(Y3r))
- else:
- Y3 = Y3r
- # reshape the output from the third convolution for the fully connected layer
- YY = tf.reshape(Y3, shape=[-1, 7 * 7 * M])
- Y4l = tf.matmul(YY, W4)
- Y4bn, update_ema4 = batchnorm(Y4l, tst, iter, B4)
- Y4r = tf.nn.relu(Y4bn)
- if has_dropout:
- Y4 = tf.nn.dropout(Y4r, pkeep)
- else:
- Y4 = Y4r
- Ylogits = tf.matmul(Y4, W5) + B5
- Y = tf.nn.softmax(Ylogits)
- update_ema = tf.group(update_ema1, update_ema2, update_ema3, update_ema4)
- # cross-entropy loss function (= -sum(Y_i * log(Yi)) ), normalised for batches of 100 images
- # TensorFlow provides the softmax_cross_entropy_with_logits function to avoid numerical stability
- # problems with log(0) which is NaN
- cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=Ylogits, labels=Y_)
- cross_entropy = tf.reduce_mean(cross_entropy)*100
- # accuracy of the trained model, between 0 (worst) and 1 (best)
- correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1))
- accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
- # training step, learning rate =
- train_step = tf.train.AdamOptimizer(lr).minimize(cross_entropy)
- # init
- init = tf.global_variables_initializer()
- sess = tf.Session()
- sess.run(init)
- for i in range(1000):
- batch_X, batch_Y = mnist.train.next_batch(100)
- max_learning_rate = 0.02
- min_learning_rate = 0.0001
- decay_speed = 1600
- learning_rate = min_learning_rate + (max_learning_rate - min_learning_rate) * math.exp(-i / decay_speed)
- sess.run(train_step, {X: batch_X, Y_: batch_Y, lr: learning_rate, iter: i, tst: True, pkeep: 0.75, pkeep_conv: 1.0})
- sess.run(update_ema, {X: batch_X, Y_: batch_Y, tst: False, iter: i, pkeep: 1.0, pkeep_conv: 1.0})
- correct_prediction = tf.equal(tf.argmax(Y,1), tf.argmax(Y_,1))
- accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
- print(sess.run(accuracy, feed_dict={X: mnist.test.images, Y_: mnist.test.labels, pkeep: 1.0, pkeep_conv: 1.0, tst: False}))
- # Some results to expect:
- # (In all runs, if sigmoids are used, all biases are initialised at 0, if RELUs are used,
- # all biases are initialised at 0.1 apart from the last one which is initialised at 0.)
- ## learning rate = 0.003, 10K iterations
- # final test accuracy = 0.9788 (sigmoid - slow start, training cross-entropy not stabilised in the end)
- # final test accuracy = 0.9825 (relu - above 0.97 in the first 1500 iterations but noisy curves)
- ## now with learning rate = 0.0001, 10K iterations
- # final test accuracy = 0.9722 (relu - slow but smooth curve, would have gone higher in 20K iterations)
- ## decaying learning rate from 0.003 to 0.0001 decay_speed 2000, 10K iterations
- # final test accuracy = 0.9746 (sigmoid - training cross-entropy not stabilised)
- # final test accuracy = 0.9824 (relu - training set fully learned, test accuracy stable)
- # 3000 example:
- # sigmoid: 0.9713
- # relu: 0.9748
- # dropout: 0.9762
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement