Advertisement
Guest User

Untitled

a guest
Jul 23rd, 2017
226
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.85 KB | None | 0 0
  1. 2017-07-23 16:16:17.281414: I tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc:316] Started server with target: grpc://localhost:2225
  2. Process Process-3:
  3. Traceback (most recent call last):
  4. File "/home/skay/anaconda2/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
  5. self.run()
  6. File "/home/skay/anaconda2/lib/python2.7/multiprocessing/process.py", line 114, in run
  7. self._target(*self._args, **self._kwargs)
  8. File "/home/skay/.PyCharmCE2017.1/config/scratches/scratch_6.py", line 32, in cifar10
  9. serv = tf.train.Server(cluster, job_name= params.job_name,task_index=params.task_index)
  10. File "/home/skay/anaconda2/lib/python2.7/site-packages/tensorflow/python/training/server_lib.py", line 145, in __init__
  11. self._server_def.SerializeToString(), status)
  12. File "/home/skay/anaconda2/lib/python2.7/contextlib.py", line 24, in __exit__
  13. self.gen.next()
  14. File "/home/skay/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/errors_impl.py", line 466, in raise_exception_on_not_ok_status
  15. pywrap_tensorflow.TF_GetCode(status)) UnknownError: Could not start gRPC server
  16.  
  17. > `2017-07-23 16:27:48.605617: I tensorflow/core/distributed_runtime/master_session.cc:999] Start master session fe9fd6a338e2c9a7 with config:
  18.  
  19. 2017-07-23 16:27:48.607126: I tensorflow/core/distributed_runtime/master_session.cc:999] Start master session 3560417f98b00dea with config:
  20.  
  21. [ 1. 2. 3. 4. 5. 6. 7. 8. 9. 10.]
  22. Process-3
  23. [ 1. 2. 3. 4. 5. 6. 7. 8. 9. 10.]
  24. Process-3
  25. [ 1. 2. 3. 4. 5. 6. 7. 8. 9. 10.]
  26. Process-3
  27.  
  28. ERROR:tensorflow:==================================
  29. Object was never used (type <class 'tensorflow.python.framework.ops.Operation'>):
  30. <tf.Operation 'worker_0/init' type=NoOp>
  31. If you want to mark it as used call its "mark_used()" method.
  32. It was originally created here:
  33. ['File "/home/skay/.PyCharmCE2017.1/config/scratches/scratch_6.py", line 83, in <module>n proc.start()', 'File "/home/skay/anaconda2/lib/python2.7/multiprocessing/process.py", line 130, in startn self._popen = Popen(self)', 'File "/home/skay/anaconda2/lib/python2.7/multiprocessing/forking.py", line 126, in __init__n code = process_obj._bootstrap()', 'File "/home/skay/anaconda2/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrapn self.run()', 'File "/home/skay/anaconda2/lib/python2.7/multiprocessing/process.py", line 114, in runn self._target(*self._args, **self._kwargs)', 'File "/home/skay/.PyCharmCE2017.1/config/scratches/scratch_6.py", line 49, in cifar10n init_op=tf.initialize_all_variables(),logdir='/tmp/mydir')', 'File "/home/skay/anaconda2/lib/python2.7/site-packages/tensorflow/python/util/tf_should_use.py", line 170, in wrappedn return _add_should_use_warning(fn(*args, **kwargs))', 'File "/home/skay/anaconda2/lib/python2.7/site-packages/tensorflow/python/util/tf_should_use.py", line 139, in _add_should_use_warningn wrapped = TFShouldUseWarningWrapper(x)', 'File "/home/skay/anaconda2/lib/python2.7/site-packages/tensorflow/python/util/tf_should_use.py", line 96, in __init__n stack = [s.strip() for s in traceback.format_stack()]']
  34. ==================================
  35. 2017-07-23 16:28:28.646871: I tensorflow/core/distributed_runtime/master.cc:209] CreateSession still waiting for response from worker: /job:worker/replica:0/task:0
  36. 2017-07-23 16:28:38.647276: I tensorflow/core/distributed_runtime/master.cc:209] CreateSession still waiting for response from worker: /job:worker/replica:0/task:0
  37. 2017-07-23 16:28:48.647526: I tensorflow/core/distributed_runtime/master.cc:209] CreateSession still waiting for response from worker: /job:worker/replica:
  38.  
  39. # build a python mutliprocess.py
  40. import multiprocessing
  41. import time
  42. import tensorflow as tf
  43. from tensorflow.contrib.training import HParams
  44. import os
  45. import psutil
  46. import numpy as np
  47. from tensorflow.python.client import device_lib
  48. from resnet import *
  49. import Queue
  50.  
  51. cluster_spec ={"ps": ["localhost:2226"
  52. ],
  53. "worker": [
  54. "localhost:2227",
  55. "localhost:2228"]}
  56.  
  57. cluster = tf.train.ClusterSpec(cluster_spec)
  58. im_Test = np.linspace(1,10,10)
  59.  
  60. def model_fun(input):
  61. print multiprocessing.current_process().name
  62. return input
  63.  
  64. def cifar10(device,return_dict,result_t):
  65. params = HParams(cluster=cluster,
  66. job_name = device[0],
  67. task_index = device[1])
  68.  
  69. serv = tf.train.Server(cluster, job_name= params.job_name,task_index=params.task_index)
  70. input_img=[]
  71. true_lab=[]
  72.  
  73. if params.job_name == "ps":
  74. ##try and wait for all the wokers t
  75. serv.join()
  76. elif params.job_name == "worker":
  77. with tf.device(tf.train.replica_device_setter(worker_device="/job:worker/replica:0/task:%d" % params.task_index,
  78. cluster=cluster)):
  79. # with tf.Graph().as_default(), tf.device('/cpu:%d' % params.task_index):
  80. # with tf.container('%s %d' % ('batchname', params.task_index)) as scope:
  81. input_img = tf.placeholder(dtype=tf.float32, shape=[10,])
  82. with tf.name_scope('%s_%d' % (params.job_name, params.task_index)) as scope:
  83. hess_op = model_fun(input_img)
  84. global_step = tf.contrib.framework.get_or_create_global_step()
  85. sv = tf.train.Supervisor(is_chief=(params.task_index == 0),
  86. global_step=global_step,
  87. init_op=tf.initialize_all_variables(),logdir='/tmp/mydir')
  88. with sv.prepare_or_wait_for_session(serv.target) as sess:
  89. step = 0
  90. while not sv.should_stop() :
  91. hess = sess.run(hess_op, feed_dict={input_img:im_Test })
  92. print(np.array(hess))
  93. print multiprocessing.current_process().name
  94. step += 1
  95. if(step==3):
  96. return_dict[params.job_name] = params.task_index
  97. result_t.put(return_dict)
  98. break
  99. sv.stop()
  100. sess.close()
  101.  
  102.  
  103. return
  104.  
  105. if __name__ == '__main__':
  106.  
  107. logger = multiprocessing.log_to_stderr()
  108. manager = multiprocessing.Manager()
  109. result = manager.Queue()
  110. return_dict = manager.dict()
  111. processes = []
  112. devices = [['ps', 0],
  113. ['worker', 0],
  114. ['worker', 1]
  115. ]
  116.  
  117. for i in (devices):
  118. start_time = time.time()
  119. proc = multiprocessing.Process(target=cifar10,args=(i,return_dict,result))
  120. processes.append(proc)
  121. proc.start()
  122.  
  123. for p in processes:
  124. p.join()
  125.  
  126. # print return_dict.values()
  127. kill = []
  128. while True:
  129. if result.empty() == True:
  130. break
  131. kill.append(result.get())
  132. print kill
  133.  
  134.  
  135. print("time taken = %d" % (start_time - time.time()))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement