Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- """
- Created on Mon May 13 22:37:02 2019
- @author: 12718
- """
- import tensorflow as tf
- from tensorflow.examples.tutorials.mnist import input_data
- import matplotlib.pyplot as plt
- tf.reset_default_graph()
- tf.set_random_seed(1)
- mnist = input_data.read_data_sets(
- 'C:\\Users\\12718\\Desktop\\python\\machinelearning\\MNIST_data',one_hot = True)
- print (mnist.train.num_examples) #(55000,784)
- print (mnist.validation.num_examples)
- print (mnist.test.num_examples) #(10000,784)
- #plt.figure()
- #plt.imshow(mnist.train.images[5].reshape(28,28),cmap = 'gray')
- #plt.show()
- INPUT_SIZE = 784
- OUTPUT_SIZE = 10
- HIDDEN_UNITS = 500
- BATCH_SIZE = 100
- LR = 0.8
- LEARNING_RATE_DECAY = 0.99
- REGULATIZATION_WEIGHT = 0.0001 #正则化
- TRAINING_STEPS = 20000
- MOVING_AVERAGE_DECAY = 0.99 #滑动平均
- def inference(x, avg_class, w1, b1,w2,b2): #这里变量名有w1,b1,w2,b2可以通过scope增加可读性
- if avg_class is None:
- layer1 = tf.nn.relu(tf.matmul(x,w1)+b1)
- return tf.matmul(layer1, w2)+b2
- else:
- #计算滑动平均值
- layer1 = tf.nn.relu(tf.matmul(x,avg_class.average(w1))+avg_class.average(b1))
- return tf.matmul(layer1, avg_class.average(w2))+avg_class.average(b2)
- #def inference(x, avg_class, reuse = False):
- # with tf.variable_scope('layer1', reuse = reuse):
- # w = weights([INPUT_SIZE,HIDDEN_UNITS])
- # b = bias([HIDDEN_UNITS])
- # if avg_class is None:
- # layer1 = tf.nn.relu(tf.matmul(x,w)+b)
- # else:
- # layer1 = tf.nn.relu(tf.matmul(x,avg_class.average(w))+avg_class.average(b))
- # with tf.variable_scope('layer2',reuse = reuse):
- # w = weights([HIDDEN_UNITS, OUTPUT_SIZE])
- # b = bias([OUTPUT_SIZE])
- # if avg_class is None:
- # layer2 = tf.matmul(layer1,w)+b
- # else:
- # layer2 = tf.matmul(layer1, avg_class.average(w))+avg_class.average(b)
- # return layer2
- x = tf.placeholder('float', [None, INPUT_SIZE])
- y_ = tf.placeholder('float', [None, OUTPUT_SIZE])
- def weights(shape):
- initial = tf.truncated_normal(shape = shape,stddev = 0.1)
- return tf.Variable(initial)
- def bias(shape):
- initial = tf.constant(0.1,shape = shape)
- return tf.Variable(initial)
- #
- w1 = weights([INPUT_SIZE,HIDDEN_UNITS])
- b1 = bias([HIDDEN_UNITS])
- w2 = weights([HIDDEN_UNITS, OUTPUT_SIZE])
- b2 = bias([OUTPUT_SIZE])
- y = inference(x, None,w1,b1,w2,b2)
- ### LR exponential decay
- global_step = tf.Variable(0,trainable = False)
- LR2 = tf.train.exponential_decay(LR,global_step,
- mnist.train.num_examples/BATCH_SIZE,LEARNING_RATE_DECAY)
- ###
- ### 滑动平均
- variables_avg = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY,global_step)
- variables_avg_ops = variables_avg.apply(tf.trainable_variables())
- average_y = inference(x, variables_avg,w1,b1,w2,b2)
- ####
- #### Regularization
- regularizer = tf.contrib.layers.l2_regularizer(REGULATIZATION_WEIGHT)
- regularization = regularizer(w1)+regularizer(w2)
- cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = y,labels = y_))
- loss = cross_entropy + regularization
- train_ops = tf.train.GradientDescentOptimizer(LR2).minimize(loss,global_step = global_step)
- ####
- ######由于要更新滑动平均值
- #train_op = tf.group(train_ops, variables_avg_ops)
- # or
- with tf.control_dependencies([train_ops, variables_avg_ops]):
- train_op = tf.no_op(name = 'train') #tf.no_op 什么操作都不做,仅作为点位符使用控制边界
- prediction = tf.equal(tf.argmax(y_,1),tf.argmax(average_y,1))
- accuracy = tf.reduce_mean(tf.cast(prediction,'float'))
- train_losses = []
- test_losses = []
- steps = []
- LRs = []
- train_accuracy = []
- validate_accuracy = []
- with tf.Session() as sess:
- sess.run(tf.global_variables_initializer())
- validate_feed = {x:mnist.validation.images, y_:mnist.validation.labels}
- test_feed = {x:mnist.test.images, y_:mnist.test.labels}
- for i in range(TRAINING_STEPS+1):
- xs, ys = mnist.train.next_batch(BATCH_SIZE)
- train_feed = {x:xs, y_:ys}
- sess.run(train_op, feed_dict = train_feed)
- if i%1000==0:
- train_acc = sess.run(accuracy, feed_dict = {x:xs,y_:ys})
- validate_acc = sess.run(accuracy, feed_dict = validate_feed)
- train_cross = sess.run(cross_entropy, feed_dict = train_feed)
- validate_cross = sess.run(cross_entropy, feed_dict = validate_feed)
- train_accuracy.append(train_acc)
- validate_accuracy.append(validate_acc)
- lr = sess.run(LR2)
- train_losses.append(train_cross)
- test_losses.append(validate_cross)
- steps.append(i)
- LRs.append(lr)
- print ('after traing %d steps, train: %g, validation accuracy: %g'
- %(i,train_acc,validate_acc))
- print ('global step: ',sess.run(global_step))
- test_acc = sess.run(accuracy, feed_dict = test_feed)
- print ('the accuracy: ', test_acc)
- fig, ax1 = plt.subplots(1)
- ax1.plot(steps, train_accuracy, 'o-', color = 'g',label = 'train_acc')
- ax1.plot(steps, validate_accuracy, 'o-', color = 'r', label = 'validate_acc')
- ax1.set_ylabel('ACC')
- plt.legend(loc = 'best')
- ax2 = ax1.twinx()
- ax2.plot(steps, LRs, 'o-', color = 'blue')
- ax2.set_ylabel('LR')
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement