Advertisement
Guest User

Untitled

a guest
Aug 16th, 2017
124
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.08 KB | None | 0 0
  1. InvalidArgumentError (see above for traceback): Cannot assign a device to node 'save/RestoreV2_14': Could not satisfy explicit device specification '/job:ps/task:0/device:CPU:0' because no devices matching that specification are registered in this process; available devices: /job:local/replica:0/task:0/cpu:0, /job:local/replica:0/task:1/cpu:0, /job:worker/replica:0/task:1/cpu:0
  2. [[Node: save/RestoreV2_14 = RestoreV2[dtypes=[DT_INT32], _device="/job:ps/task:0/device:CPU:0"](save/Const, save/RestoreV2_14/tensor_names, save/RestoreV2_14/shape_and_slices)]]
  3.  
  4. parser = argparse.ArgumentParser(description='tensorflow')
  5. parser.add_argument('--job_name', dest='job_name')
  6. parser.add_argument('--task_index', dest='task_index', default=0)
  7. args = parser.parse_args()
  8.  
  9. ps_hosts = ['localhost:2222']
  10. worker_hosts = ['localhost:2223', 'localhost:2224']
  11. job_name = args.job_name
  12. task_index = int(args.task_index)
  13.  
  14.  
  15.  
  16.  
  17.  
  18. # Create a cluster from the parameter server and worker hosts.
  19. cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts})
  20.  
  21. # Create and start a server for the local task.
  22. server = tf.train.Server(cluster, job_name=job_name, task_index=task_index)
  23. if job_name == "ps":
  24. server.join()
  25.  
  26. elif job_name == "worker":
  27. with tf.device(tf.train.replica_device_setter(
  28. worker_device="/job:worker/task:%d" % task_index,
  29. cluster=cluster)):
  30. total_input_features = len(train_x[0])
  31. x = tf.placeholder('float', [None, total_input_features])
  32. y = tf.placeholder('float')
  33. global_step = tf.Variable(0, name="global_step", trainable=False)
  34. is_chief = (task_index == 0)
  35. prediction = neural_network_model(x, total_input_features, n_nodes_hl1,
  36. first_layer_activation,
  37. n_nodes_hl2,
  38. second_layer_activation)
  39. total_loss = tf.reduce_mean(tf.square(prediction - y))
  40. optimizer = tf.train.AdamOptimizer()
  41. train_op = optimizer.minimize(total_loss, global_step=global_step)
  42.  
  43. init_op = tf.initialize_all_variables()
  44.  
  45. sv = tf.train.Supervisor(
  46. is_chief=is_chief,
  47. logdir="/tmp/train_logs",
  48. init_op=init_op,
  49. global_step=global_step)
  50.  
  51. print '******** ALL CREATED ********'
  52.  
  53.  
  54. # The supervisor takes care of session initialization, restoring from
  55. # a checkpoint, and closing when done or an error occurs.
  56.  
  57. with sv.managed_session(server.target) as sess:
  58.  
  59. # Loop until the supervisor shuts down or 1000000 steps have completed.
  60. step = 0
  61. while not sv.should_stop() and step < 1000000:
  62. # Run a training step asynchronously.
  63. # See `tf.train.SyncReplicasOptimizer` for additional details on how to
  64. # perform *synchronous* training.
  65.  
  66. train_feed = {x: train_x, y: train_y}
  67. _, step = sess.run([train_op, global_step], feed_dict=train_feed)
  68. if step % 100 == 0:
  69. print "Done step %d" % step
  70.  
  71. sv.stop()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement