Advertisement
Guest User

Untitled

a guest
May 19th, 2019
107
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.50 KB | None | 0 0
  1. from utils import *
  2.  
  3. params = {}
  4.  
  5.  
  6. def create_variable(scope, name, shape, trainable=True, on_cpu=True, **kwargs) -> tf.Variable:
  7. def _create_variable():
  8. with tf.variable_scope(scope):
  9. _w = tf.get_variable(name, shape, trainable=trainable, **kwargs)
  10. params[_w.name] = _w
  11. return _w
  12.  
  13. if on_cpu:
  14. with tf.device("/cpu:0"):
  15. w = _create_variable()
  16. else:
  17. w = _create_variable()
  18.  
  19. return w
  20.  
  21.  
  22. def get_variable(scope, name, trainable=True) -> tf.Variable:
  23. with tf.variable_scope(scope, reuse=True):
  24. w = tf.get_variable(name, trainable=trainable)
  25. params[w.name] = w
  26. return w
  27.  
  28.  
  29. def get_toy_data(n, xd):
  30. xs = np.concatenate([np.random.random((n, xd)) / 2, np.random.random((n, xd)) / 2 + 0.5])
  31. ys = np.concatenate([np.zeros((n,), dtype=np.int), np.ones((n,), dtype=np.int)])
  32. permut = np.random.permutation(len(xs))
  33. xs = xs[permut]
  34. ys = ys[permut]
  35. return xs, np.eye(2)[ys]
  36.  
  37.  
  38. def average_gradients(tower_grads):
  39. """Calculate the average gradient for each shared variable across all towers.
  40. Note that this function provides a synchronization point across all towers.
  41. Args:
  42. tower_grads: List of lists of (gradient, variable) tuples. The outer list
  43. is over individual gradients. The inner list is over the gradient
  44. calculation for each tower.
  45. Returns:
  46. List of pairs of (gradient, variable) where the gradient has been averaged
  47. across all towers.
  48. """
  49. average_grads = []
  50. for grad_and_vars in zip(*tower_grads):
  51. # Note that each grad_and_vars looks like the following:
  52. # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
  53. grads = []
  54. for g, _ in grad_and_vars:
  55. # Add 0 dimension to the gradients to represent the tower.
  56. expanded_g = tf.expand_dims(g, 0)
  57.  
  58. # Append on a 'tower' dimension which we will average over below.
  59. grads.append(expanded_g)
  60.  
  61. # Average over the 'tower' dimension.
  62. grad = tf.concat(axis=0, values=grads)
  63. grad = tf.reduce_mean(grad, 0)
  64.  
  65. # Keep in mind that the Variables are redundant because they are shared
  66. # across towers. So .. we will just return the first tower's pointer to
  67. # the Variable.
  68. v = grad_and_vars[0][1]
  69. grad_and_var = (grad, v)
  70. average_grads.append(grad_and_var)
  71. return average_grads
  72.  
  73.  
  74. def main():
  75. n = 6000
  76. xd = 14 * 14
  77. hd = 100
  78.  
  79. xs, ys = get_toy_data(n, xd)
  80.  
  81. X = tf.placeholder(tf.float32, [None, xd], name="X")
  82. Y = tf.placeholder(tf.float32, [None, 2], name="Y")
  83.  
  84. w1 = create_variable("layer1", "weight", (xd, hd))
  85. h = tf.nn.relu(tf.matmul(X, w1))
  86.  
  87. w2 = create_variable("layer2", "weight", (hd, hd))
  88. h = tf.nn.relu(tf.matmul(h, w2))
  89.  
  90. w3 = create_variable("layer3", "weight", (hd, 2))
  91. h = tf.matmul(h, w3)
  92. hhat = tf.nn.softmax(h)
  93.  
  94. opt = tf.train.AdamOptimizer(learning_rate=0.001, name="opt")
  95.  
  96. gpu_names = get_available_gpu_names([1])
  97.  
  98. batch_size = 300
  99. batch_size_per_gpu = batch_size // len(gpu_names)
  100.  
  101. grad_list = []
  102. loss_list = []
  103.  
  104. with tf.variable_scope(tf.get_variable_scope()):
  105. for i, gpu_name in enumerate(gpu_names):
  106. with tf.device(gpu_name):
  107. idx_start = i * batch_size_per_gpu
  108. idx_end = (i + 1) * batch_size_per_gpu
  109.  
  110. loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
  111. logits=h[idx_start:idx_end], labels=Y[idx_start:idx_end],
  112. ))
  113. tf.get_variable_scope().reuse_variables()
  114. grad = opt.compute_gradients(loss)
  115.  
  116. loss_list.append(loss)
  117. grad_list.append(grad)
  118.  
  119. grads = average_gradients(grad_list)
  120. train_op = opt.apply_gradients(grads)
  121.  
  122. sess = tf.Session(config=tf.ConfigProto(
  123. allow_soft_placement=True,
  124. log_device_placement=True))
  125. sess.run(tf.global_variables_initializer())
  126.  
  127. num_batch = n // batch_size
  128. for epoch in range(100):
  129. total_loss = 0
  130. for batch_idx in range(num_batch):
  131.  
  132. idx_start = batch_idx * batch_size
  133. idx_end = (batch_idx + 1) * batch_size
  134.  
  135. xs_b = xs[idx_start:idx_end]
  136. ys_b = ys[idx_start:idx_end]
  137.  
  138. _, loss_value = sess.run([train_op, loss_list], feed_dict={
  139. X: xs_b,
  140. Y: ys_b
  141. })
  142. total_loss += np.mean(loss_value)
  143.  
  144. print(total_loss)
  145.  
  146.  
  147. if __name__ == '__main__':
  148. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement