Untitled

# -*- coding: utf-8 -*-
"""
Created on Mon May 13 22:37:02 2019

@author: 12718
"""

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import matplotlib.pyplot as plt

tf.reset_default_graph()
tf.set_random_seed(1)
mnist = input_data.read_data_sets(
        'C:\\Users\\12718\\Desktop\\python\\machinelearning\\MNIST_data',one_hot = True)
print (mnist.train.num_examples) #(55000,784)
print (mnist.validation.num_examples)
print (mnist.test.num_examples)  #(10000,784)
#plt.figure()
#plt.imshow(mnist.train.images[5].reshape(28,28),cmap = 'gray')
#plt.show()
INPUT_SIZE = 784
OUTPUT_SIZE = 10
HIDDEN_UNITS = 500
BATCH_SIZE = 100
LR = 0.8
LEARNING_RATE_DECAY = 0.99
REGULATIZATION_WEIGHT = 0.0001   #正则化
TRAINING_STEPS = 20000
MOVING_AVERAGE_DECAY = 0.99  #滑动平均

def inference(x, avg_class, w1, b1,w2,b2): #这里变量名有w1,b1,w2,b2可以通过scope增加可读性
    if avg_class is None:
        layer1 = tf.nn.relu(tf.matmul(x,w1)+b1)
        return tf.matmul(layer1, w2)+b2
    else:
        #计算滑动平均值
        layer1 = tf.nn.relu(tf.matmul(x,avg_class.average(w1))+avg_class.average(b1))
        return tf.matmul(layer1, avg_class.average(w2))+avg_class.average(b2)

#def inference(x, avg_class, reuse = False):
#    with tf.variable_scope('layer1', reuse = reuse):
#        w = weights([INPUT_SIZE,HIDDEN_UNITS])
#        b = bias([HIDDEN_UNITS])
#        if avg_class is None:
#            layer1 = tf.nn.relu(tf.matmul(x,w)+b)
#        else:
#            layer1 = tf.nn.relu(tf.matmul(x,avg_class.average(w))+avg_class.average(b))
#    with tf.variable_scope('layer2',reuse = reuse):
#        w = weights([HIDDEN_UNITS, OUTPUT_SIZE])
#        b = bias([OUTPUT_SIZE])
#        if avg_class is None:
#            layer2 = tf.matmul(layer1,w)+b
#        else:
#            layer2 = tf.matmul(layer1, avg_class.average(w))+avg_class.average(b)
#    return layer2


x = tf.placeholder('float', [None, INPUT_SIZE])
y_ = tf.placeholder('float', [None, OUTPUT_SIZE])

def weights(shape):
    initial = tf.truncated_normal(shape = shape,stddev = 0.1)
    return tf.Variable(initial)

def bias(shape):
    initial = tf.constant(0.1,shape = shape)
    return tf.Variable(initial)
#
w1 = weights([INPUT_SIZE,HIDDEN_UNITS])
b1 = bias([HIDDEN_UNITS])
w2 = weights([HIDDEN_UNITS, OUTPUT_SIZE])
b2 = bias([OUTPUT_SIZE])

y = inference(x, None,w1,b1,w2,b2)
### LR exponential decay
global_step = tf.Variable(0,trainable = False)
LR2 = tf.train.exponential_decay(LR,global_step,
                                 mnist.train.num_examples/BATCH_SIZE,LEARNING_RATE_DECAY)
###

### 滑动平均
variables_avg = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY,global_step)
variables_avg_ops = variables_avg.apply(tf.trainable_variables())
average_y = inference(x, variables_avg,w1,b1,w2,b2)
####
#### Regularization
regularizer = tf.contrib.layers.l2_regularizer(REGULATIZATION_WEIGHT)
regularization = regularizer(w1)+regularizer(w2)
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = y,labels = y_))
loss = cross_entropy + regularization
train_ops = tf.train.GradientDescentOptimizer(LR2).minimize(loss,global_step = global_step)
####

######由于要更新滑动平均值
#train_op = tf.group(train_ops, variables_avg_ops)
# or
with tf.control_dependencies([train_ops, variables_avg_ops]):
    train_op = tf.no_op(name = 'train')  #tf.no_op 什么操作都不做，仅作为点位符使用控制边界

prediction = tf.equal(tf.argmax(y_,1),tf.argmax(average_y,1))
accuracy = tf.reduce_mean(tf.cast(prediction,'float'))
train_losses = []
test_losses = []
steps = []
LRs = []
train_accuracy = []
validate_accuracy = []
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    validate_feed = {x:mnist.validation.images, y_:mnist.validation.labels}
    test_feed = {x:mnist.test.images, y_:mnist.test.labels}
    for i in range(TRAINING_STEPS+1):
        xs, ys = mnist.train.next_batch(BATCH_SIZE)
        train_feed = {x:xs, y_:ys}
        sess.run(train_op, feed_dict = train_feed)
        if i%1000==0:
            train_acc = sess.run(accuracy, feed_dict = {x:xs,y_:ys})
            validate_acc = sess.run(accuracy, feed_dict = validate_feed)
            train_cross = sess.run(cross_entropy, feed_dict = train_feed)
            validate_cross = sess.run(cross_entropy, feed_dict = validate_feed)
            train_accuracy.append(train_acc)
            validate_accuracy.append(validate_acc)
            lr = sess.run(LR2)
            train_losses.append(train_cross)
            test_losses.append(validate_cross)
            steps.append(i)
            LRs.append(lr)
            print ('after traing %d steps, train: %g, validation accuracy: %g'
                   %(i,train_acc,validate_acc))
            print ('global step: ',sess.run(global_step))
    test_acc = sess.run(accuracy, feed_dict = test_feed)
    print ('the accuracy: ', test_acc)

fig, ax1 = plt.subplots(1)
ax1.plot(steps, train_accuracy, 'o-', color = 'g',label = 'train_acc')
ax1.plot(steps, validate_accuracy, 'o-', color = 'r', label = 'validate_acc')
ax1.set_ylabel('ACC')
plt.legend(loc = 'best')
ax2 = ax1.twinx()
ax2.plot(steps, LRs, 'o-', color = 'blue')
ax2.set_ylabel('LR')
plt.show()