• API
• FAQ
• Tools
• Archive
SHARE
TWEET

# Untitled

a guest May 19th, 2019 77 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
1. from utils import *
2.
3. params = {}
4.
5.
6. def create_variable(scope, name, shape, trainable=True, on_cpu=True, **kwargs) -> tf.Variable:
7.     def _create_variable():
8.         with tf.variable_scope(scope):
9.             _w = tf.get_variable(name, shape, trainable=trainable, **kwargs)
10.             params[_w.name] = _w
11.         return _w
12.
13.     if on_cpu:
14.         with tf.device("/cpu:0"):
15.             w = _create_variable()
16.     else:
17.         w = _create_variable()
18.
19.     return w
20.
21.
22. def get_variable(scope, name, trainable=True) -> tf.Variable:
23.     with tf.variable_scope(scope, reuse=True):
24.         w = tf.get_variable(name, trainable=trainable)
25.         params[w.name] = w
26.     return w
27.
28.
29. def get_toy_data(n, xd):
30.     xs = np.concatenate([np.random.random((n, xd)) / 2, np.random.random((n, xd)) / 2 + 0.5])
31.     ys = np.concatenate([np.zeros((n,), dtype=np.int), np.ones((n,), dtype=np.int)])
32.     permut = np.random.permutation(len(xs))
33.     xs = xs[permut]
34.     ys = ys[permut]
35.     return xs, np.eye(2)[ys]
36.
37.
39.     """Calculate the average gradient for each shared variable across all towers.
40.     Note that this function provides a synchronization point across all towers.
41.     Args:
42.         tower_grads: List of lists of (gradient, variable) tuples. The outer list
43.         is over individual gradients. The inner list is over the gradient
44.           calculation for each tower.
45.     Returns:
46.         List of pairs of (gradient, variable) where the gradient has been averaged
47.         across all towers.
48.     """
51.         # Note that each grad_and_vars looks like the following:
54.         for g, _ in grad_and_vars:
55.             # Add 0 dimension to the gradients to represent the tower.
56.             expanded_g = tf.expand_dims(g, 0)
57.
58.             # Append on a 'tower' dimension which we will average over below.
60.
61.         # Average over the 'tower' dimension.
64.
65.         # Keep in mind that the Variables are redundant because they are shared
66.         # across towers. So .. we will just return the first tower's pointer to
67.         # the Variable.
72.
73.
74. def main():
75.     n = 6000
76.     xd = 14 * 14
77.     hd = 100
78.
79.     xs, ys = get_toy_data(n, xd)
80.
81.     X = tf.placeholder(tf.float32, [None, xd], name="X")
82.     Y = tf.placeholder(tf.float32, [None, 2], name="Y")
83.
84.     w1 = create_variable("layer1", "weight", (xd, hd))
85.     h = tf.nn.relu(tf.matmul(X, w1))
86.
87.     w2 = create_variable("layer2", "weight", (hd, hd))
88.     h = tf.nn.relu(tf.matmul(h, w2))
89.
90.     w3 = create_variable("layer3", "weight", (hd, 2))
91.     h = tf.matmul(h, w3)
92.     hhat = tf.nn.softmax(h)
93.
95.
96.     gpu_names = get_available_gpu_names([1])
97.
98.     batch_size = 300
99.     batch_size_per_gpu = batch_size // len(gpu_names)
100.
102.     loss_list = []
103.
104.     with tf.variable_scope(tf.get_variable_scope()):
105.         for i, gpu_name in enumerate(gpu_names):
106.             with tf.device(gpu_name):
107.                 idx_start = i * batch_size_per_gpu
108.                 idx_end = (i + 1) * batch_size_per_gpu
109.
110.                 loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
111.                     logits=h[idx_start:idx_end], labels=Y[idx_start:idx_end],
112.                 ))
113.                 tf.get_variable_scope().reuse_variables()
115.
116.                 loss_list.append(loss)
118.
121.
122.     sess = tf.Session(config=tf.ConfigProto(
123.         allow_soft_placement=True,
124.         log_device_placement=True))
125.     sess.run(tf.global_variables_initializer())
126.
127.     num_batch = n // batch_size
128.     for epoch in range(100):
129.         total_loss = 0
130.         for batch_idx in range(num_batch):
131.
132.             idx_start = batch_idx * batch_size
133.             idx_end = (batch_idx + 1) * batch_size
134.
135.             xs_b = xs[idx_start:idx_end]
136.             ys_b = ys[idx_start:idx_end]
137.
138.             _, loss_value = sess.run([train_op, loss_list], feed_dict={
139.                 X: xs_b,
140.                 Y: ys_b
141.             })
142.             total_loss += np.mean(loss_value)
143.
144.         print(total_loss)
145.
146.
147. if __name__ == '__main__':
148.     main()
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy.

Top