Advertisement
Aquinius

Multi_GPU_CNN_test

Dec 13th, 2019
132
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 7.33 KB | None | 0 0
  1. from __future__ import print_function
  2.  
  3. import numpy as np
  4. import tensorflow as tf
  5. import time
  6.  
  7. # Import MNIST data
  8. from tensorflow.examples.tutorials.mnist import input_data
  9. mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
  10.  
  11. # Parameters
  12. num_gpus = 4
  13. num_steps = 400000
  14. learning_rate = 0.001
  15. batch_size = 1024
  16. display_step = 10
  17.  
  18. # Network Parameters
  19. num_input = 784 # MNIST data input (img shape: 28*28)
  20. num_classes = 10 # MNIST total classes (0-9 digits)
  21. dropout = 0.75 # Dropout, probability to keep units
  22.  
  23. def conv_net(x, n_classes, dropout, reuse, is_training):
  24.     # Define a scope for reusing the variables
  25.     with tf.variable_scope('ConvNet', reuse=reuse):
  26.         # MNIST data input is a 1-D vector of 784 features (28*28 pixels)
  27.         # Reshape to match picture format [Height x Width x Channel]
  28.         # Tensor input become 4-D: [Batch Size, Height, Width, Channel]
  29.         x = tf.reshape(x, shape=[-1, 28, 28, 1])
  30.  
  31.         # Convolution Layer with 64 filters and a kernel size of 5
  32.         x = tf.layers.conv2d(x, 64, 5, activation=tf.nn.relu)
  33.         # Max Pooling (down-sampling) with strides of 2 and kernel size of 2
  34.         x = tf.layers.max_pooling2d(x, 2, 2)
  35.  
  36.         # Convolution Layer with 256 filters and a kernel size of 5
  37.         x = tf.layers.conv2d(x, 256, 3, activation=tf.nn.relu)
  38.         # Convolution Layer with 512 filters and a kernel size of 5
  39.         x = tf.layers.conv2d(x, 512, 3, activation=tf.nn.relu)
  40.         # Max Pooling (down-sampling) with strides of 2 and kernel size of 2
  41.         x = tf.layers.max_pooling2d(x, 2, 2)
  42.  
  43.         # Flatten the data to a 1-D vector for the fully connected layer
  44.         x = tf.contrib.layers.flatten(x)
  45.  
  46.         # Fully connected layer (in contrib folder for now)
  47.         x = tf.layers.dense(x, 2048)
  48.         # Apply Dropout (if is_training is False, dropout is not applied)
  49.         x = tf.layers.dropout(x, rate=dropout, training=is_training)
  50.  
  51.         # Fully connected layer (in contrib folder for now)
  52.         x = tf.layers.dense(x, 1024)
  53.         # Apply Dropout (if is_training is False, dropout is not applied)
  54.         x = tf.layers.dropout(x, rate=dropout, training=is_training)
  55.  
  56.         # Output layer, class prediction
  57.         out = tf.layers.dense(x, n_classes)
  58.         # Because 'softmax_cross_entropy_with_logits' loss already apply
  59.         # softmax, we only apply softmax to testing network
  60.         out = tf.nn.softmax(out) if not is_training else out
  61.  
  62.     return out
  63.  
  64. def average_gradients(tower_grads):
  65.     average_grads = []
  66.     for grad_and_vars in zip(*tower_grads):
  67.         # Note that each grad_and_vars looks like the following:
  68.         #   ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
  69.         grads = []
  70.         for g, _ in grad_and_vars:
  71.             # Add 0 dimension to the gradients to represent the tower.
  72.             expanded_g = tf.expand_dims(g, 0)
  73.  
  74.             # Append on a 'tower' dimension which we will average over below.
  75.             grads.append(expanded_g)
  76.  
  77.         # Average over the 'tower' dimension.
  78.         grad = tf.concat(grads, 0)
  79.         grad = tf.reduce_mean(grad, 0)
  80.  
  81.         # Keep in mind that the Variables are redundant because they are shared
  82.         # across towers. So .. we will just return the first tower's pointer to
  83.         # the Variable.
  84.         v = grad_and_vars[0][1]
  85.         grad_and_var = (grad, v)
  86.         average_grads.append(grad_and_var)
  87.     return average_grads
  88.  
  89. # By default, all variables will be placed on '/gpu:0'
  90. # So we need a custom device function, to assign all variables to '/cpu:0'
  91. # Note: If GPUs are peered, '/gpu:0' can be a faster option
  92. PS_OPS = ['Variable', 'VariableV2', 'AutoReloadVariable']
  93.  
  94. def assign_to_device(device, ps_device='/cpu:0'):
  95.     def _assign(op):
  96.         node_def = op if isinstance(op, tf.NodeDef) else op.node_def
  97.         if node_def.op in PS_OPS:
  98.             return "/" + ps_device
  99.         else:
  100.             return device
  101.  
  102.     return _assign
  103.  
  104. # Place all ops on CPU by default
  105. with tf.device('/cpu:0'):
  106.     tower_grads = []
  107.     reuse_vars = False
  108.  
  109.     # tf Graph input
  110.     X = tf.placeholder(tf.float32, [None, num_input])
  111.     Y = tf.placeholder(tf.float32, [None, num_classes])
  112.  
  113.     # Loop over all GPUs and construct their own computation graph
  114.     for i in range(num_gpus):
  115.         with tf.device(assign_to_device('/gpu:{}'.format(i), ps_device='/cpu:0')):
  116.  
  117.             # Split data between GPUs
  118.             _x = X[i * batch_size: (i+1) * batch_size]
  119.             _y = Y[i * batch_size: (i+1) * batch_size]
  120.  
  121.             # Because Dropout have different behavior at training and prediction time, we
  122.             # need to create 2 distinct computation graphs that share the same weights.
  123.  
  124.             # Create a graph for training
  125.             logits_train = conv_net(_x, num_classes, dropout,
  126.                                     reuse=reuse_vars, is_training=True)
  127.             # Create another graph for testing that reuse the same weights
  128.             logits_test = conv_net(_x, num_classes, dropout,
  129.                                    reuse=True, is_training=False)
  130.  
  131.             # Define loss and optimizer (with train logits, for dropout to take effect)
  132.             loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
  133.                 logits=logits_train, labels=_y))
  134.             optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
  135.             grads = optimizer.compute_gradients(loss_op)
  136.  
  137.             # Only first GPU compute accuracy
  138.             if i == 0:
  139.                 # Evaluate model (with test logits, for dropout to be disabled)
  140.                 correct_pred = tf.equal(tf.argmax(logits_test, 1), tf.argmax(_y, 1))
  141.                 accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
  142.  
  143.             reuse_vars = True
  144.             tower_grads.append(grads)
  145.  
  146.     tower_grads = average_gradients(tower_grads)
  147.     train_op = optimizer.apply_gradients(tower_grads)
  148.  
  149.     # Initializing the variables
  150.     init = tf.global_variables_initializer()
  151.  
  152.     # Launch the graph
  153.     with tf.Session() as sess:
  154.         sess.run(init)
  155.         step = 1
  156.         # Keep training until reach max iterations
  157.         for step in range(1, num_steps + 1):
  158.             # Get a batch for each GPU
  159.             batch_x, batch_y = mnist.train.next_batch(batch_size * num_gpus)
  160.             # Run optimization op (backprop)
  161.             ts = time.time()
  162.             sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})
  163.             te = time.time() - ts
  164.             if step % display_step == 0 or step == 1:
  165.                 # Calculate batch loss and accuracy
  166.                 loss, acc = sess.run([loss_op, accuracy], feed_dict={X: batch_x,
  167.                                                                      Y: batch_y})
  168.                 print("Step " + str(step) + ": Minibatch Loss= " + \
  169.                       "{:.4f}".format(loss) + ", Training Accuracy= " + \
  170.                       "{:.3f}".format(acc) + ", %i Examples/sec" % int(len(batch_x)/te))
  171.             step += 1
  172.         print("Optimization Finished!")
  173.  
  174.         # Calculate accuracy for 1000 mnist test images
  175.         print("Testing Accuracy:", \
  176.             np.mean([sess.run(accuracy, feed_dict={X: mnist.test.images[i:i+batch_size],
  177.             Y: mnist.test.labels[i:i+batch_size]}) for i in range(0, len(mnist.test.images), batch_size)]))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement