SHARE
TWEET

Untitled

a guest May 19th, 2019 77 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. from utils import *
  2.  
  3. params = {}
  4.  
  5.  
  6. def create_variable(scope, name, shape, trainable=True, on_cpu=True, **kwargs) -> tf.Variable:
  7.     def _create_variable():
  8.         with tf.variable_scope(scope):
  9.             _w = tf.get_variable(name, shape, trainable=trainable, **kwargs)
  10.             params[_w.name] = _w
  11.         return _w
  12.  
  13.     if on_cpu:
  14.         with tf.device("/cpu:0"):
  15.             w = _create_variable()
  16.     else:
  17.         w = _create_variable()
  18.  
  19.     return w
  20.  
  21.  
  22. def get_variable(scope, name, trainable=True) -> tf.Variable:
  23.     with tf.variable_scope(scope, reuse=True):
  24.         w = tf.get_variable(name, trainable=trainable)
  25.         params[w.name] = w
  26.     return w
  27.  
  28.  
  29. def get_toy_data(n, xd):
  30.     xs = np.concatenate([np.random.random((n, xd)) / 2, np.random.random((n, xd)) / 2 + 0.5])
  31.     ys = np.concatenate([np.zeros((n,), dtype=np.int), np.ones((n,), dtype=np.int)])
  32.     permut = np.random.permutation(len(xs))
  33.     xs = xs[permut]
  34.     ys = ys[permut]
  35.     return xs, np.eye(2)[ys]
  36.  
  37.  
  38. def average_gradients(tower_grads):
  39.     """Calculate the average gradient for each shared variable across all towers.
  40.     Note that this function provides a synchronization point across all towers.
  41.     Args:
  42.         tower_grads: List of lists of (gradient, variable) tuples. The outer list
  43.         is over individual gradients. The inner list is over the gradient
  44.           calculation for each tower.
  45.     Returns:
  46.         List of pairs of (gradient, variable) where the gradient has been averaged
  47.         across all towers.
  48.     """
  49.     average_grads = []
  50.     for grad_and_vars in zip(*tower_grads):
  51.         # Note that each grad_and_vars looks like the following:
  52.         #   ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
  53.         grads = []
  54.         for g, _ in grad_and_vars:
  55.             # Add 0 dimension to the gradients to represent the tower.
  56.             expanded_g = tf.expand_dims(g, 0)
  57.  
  58.             # Append on a 'tower' dimension which we will average over below.
  59.             grads.append(expanded_g)
  60.  
  61.         # Average over the 'tower' dimension.
  62.         grad = tf.concat(axis=0, values=grads)
  63.         grad = tf.reduce_mean(grad, 0)
  64.  
  65.         # Keep in mind that the Variables are redundant because they are shared
  66.         # across towers. So .. we will just return the first tower's pointer to
  67.         # the Variable.
  68.         v = grad_and_vars[0][1]
  69.         grad_and_var = (grad, v)
  70.         average_grads.append(grad_and_var)
  71.     return average_grads
  72.  
  73.  
  74. def main():
  75.     n = 6000
  76.     xd = 14 * 14
  77.     hd = 100
  78.  
  79.     xs, ys = get_toy_data(n, xd)
  80.  
  81.     X = tf.placeholder(tf.float32, [None, xd], name="X")
  82.     Y = tf.placeholder(tf.float32, [None, 2], name="Y")
  83.  
  84.     w1 = create_variable("layer1", "weight", (xd, hd))
  85.     h = tf.nn.relu(tf.matmul(X, w1))
  86.  
  87.     w2 = create_variable("layer2", "weight", (hd, hd))
  88.     h = tf.nn.relu(tf.matmul(h, w2))
  89.  
  90.     w3 = create_variable("layer3", "weight", (hd, 2))
  91.     h = tf.matmul(h, w3)
  92.     hhat = tf.nn.softmax(h)
  93.  
  94.     opt = tf.train.AdamOptimizer(learning_rate=0.001, name="opt")
  95.  
  96.     gpu_names = get_available_gpu_names([1])
  97.  
  98.     batch_size = 300
  99.     batch_size_per_gpu = batch_size // len(gpu_names)
  100.  
  101.     grad_list = []
  102.     loss_list = []
  103.  
  104.     with tf.variable_scope(tf.get_variable_scope()):
  105.         for i, gpu_name in enumerate(gpu_names):
  106.             with tf.device(gpu_name):
  107.                 idx_start = i * batch_size_per_gpu
  108.                 idx_end = (i + 1) * batch_size_per_gpu
  109.  
  110.                 loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
  111.                     logits=h[idx_start:idx_end], labels=Y[idx_start:idx_end],
  112.                 ))
  113.                 tf.get_variable_scope().reuse_variables()
  114.                 grad = opt.compute_gradients(loss)
  115.  
  116.                 loss_list.append(loss)
  117.                 grad_list.append(grad)
  118.  
  119.     grads = average_gradients(grad_list)
  120.     train_op = opt.apply_gradients(grads)
  121.  
  122.     sess = tf.Session(config=tf.ConfigProto(
  123.         allow_soft_placement=True,
  124.         log_device_placement=True))
  125.     sess.run(tf.global_variables_initializer())
  126.  
  127.     num_batch = n // batch_size
  128.     for epoch in range(100):
  129.         total_loss = 0
  130.         for batch_idx in range(num_batch):
  131.  
  132.             idx_start = batch_idx * batch_size
  133.             idx_end = (batch_idx + 1) * batch_size
  134.  
  135.             xs_b = xs[idx_start:idx_end]
  136.             ys_b = ys[idx_start:idx_end]
  137.  
  138.             _, loss_value = sess.run([train_op, loss_list], feed_dict={
  139.                 X: xs_b,
  140.                 Y: ys_b
  141.             })
  142.             total_loss += np.mean(loss_value)
  143.  
  144.         print(total_loss)
  145.  
  146.  
  147. if __name__ == '__main__':
  148.     main()
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top