Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Benchmark transferring data from TF into Python runtime
- # requirement: tf 0.12 (for var.read_value(), ones_initializer())
- #
- # On Linux default malloc is slow
- # sudo apt-get install google-perftools
- # export LD_PRELOAD="/usr/lib/libtcmalloc.so.4"
- #
- # 2014 MacBook:
- # 128MB -- 3.56 GB/s
- # 1024MB -- 1.96 GB/s
- #
- # Xeon E5-2630 v3 @ 2.40GHz:
- # 128 MB -- 0.43 GB/s (default malloc)
- # 128 MB -- 4-6.2 GB/s (tcmalloc)
- # 1024 MB -- 4-5.97 GB/s (tcmalloc)
- import gc
- import os
- import subprocess
- import sys
- import tensorflow as tf
- import threading
- import time
- flags = tf.flags
- flags.DEFINE_integer("iters", 10, "Maximum number of additions")
- flags.DEFINE_integer("warmup_iters", 5, "warmup iterations")
- flags.DEFINE_integer("data_mb", 128, "size of vector in MBs")
- flags.DEFINE_boolean("verbose", False, "extra logging")
- flags.DEFINE_boolean("sanity_check", False, "run sanity check on results")
- FLAGS = flags.FLAGS
- def default_config():
- optimizer_options = tf.OptimizerOptions(opt_level=tf.OptimizerOptions.L0)
- config = tf.ConfigProto(
- graph_options=tf.GraphOptions(optimizer_options=optimizer_options))
- config.log_device_placement = False
- config.allow_soft_placement = False
- return config
- def benchmark():
- gc.disable()
- dtype = tf.int32
- params_size = 250*1000*FLAGS.data_mb # 1MB is 250k integers
- params = tf.get_variable("params", [params_size], dtype,
- initializer=tf.ones_initializer())
- params_read = params.read_value() # prevent caching
- init_op = tf.initialize_all_variables()
- sess = tf.Session(config=default_config())
- sess.run(init_op)
- total = 0
- for i in range(FLAGS.iters+FLAGS.warmup_iters):
- if i == FLAGS.warmup_iters:
- start_time = time.time()
- # fetch value into Python runtime, and discard value immediately
- result = sess.run(params_read)
- if FLAGS.sanity_check:
- total += result.sum()
- print(float(total)/params_size)
- elapsed_time = time.time() - start_time
- rate = float(FLAGS.iters)*FLAGS.data_mb/elapsed_time
- print("%.2f MB per second" % (rate))
- if __name__ == '__main__':
- benchmark()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement