Untitled

InvalidArgumentError (see above for traceback): Cannot assign a device to node 'save/RestoreV2_14': Could not satisfy explicit device specification '/job:ps/task:0/device:CPU:0' because no devices matching that specification are registered in this process; available devices: /job:local/replica:0/task:0/cpu:0, /job:local/replica:0/task:1/cpu:0, /job:worker/replica:0/task:1/cpu:0
 [[Node: save/RestoreV2_14 = RestoreV2[dtypes=[DT_INT32], _device="/job:ps/task:0/device:CPU:0"](save/Const, save/RestoreV2_14/tensor_names, save/RestoreV2_14/shape_and_slices)]]

parser = argparse.ArgumentParser(description='tensorflow')
parser.add_argument('--job_name', dest='job_name')
parser.add_argument('--task_index', dest='task_index', default=0)
args = parser.parse_args()

ps_hosts = ['localhost:2222']
worker_hosts = ['localhost:2223', 'localhost:2224']
job_name = args.job_name
task_index = int(args.task_index)


# Create a cluster from the parameter server and worker hosts.
cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts})

# Create and start a server for the local task.
server = tf.train.Server(cluster, job_name=job_name, task_index=task_index)
if job_name == "ps":
    server.join()

elif job_name == "worker":
    with tf.device(tf.train.replica_device_setter(
        worker_device="/job:worker/task:%d" % task_index,
        cluster=cluster)):
        total_input_features = len(train_x[0])
        x = tf.placeholder('float', [None, total_input_features])
        y = tf.placeholder('float')
        global_step = tf.Variable(0, name="global_step", trainable=False)
        is_chief = (task_index == 0)
        prediction = neural_network_model(x, total_input_features, n_nodes_hl1,
                                          first_layer_activation,
                                          n_nodes_hl2,
                                          second_layer_activation)
        total_loss = tf.reduce_mean(tf.square(prediction - y))
        optimizer = tf.train.AdamOptimizer()
        train_op = optimizer.minimize(total_loss, global_step=global_step)

        init_op = tf.initialize_all_variables()

        sv = tf.train.Supervisor(
            is_chief=is_chief,
            logdir="/tmp/train_logs",
            init_op=init_op,
            global_step=global_step)

        print '******** ALL CREATED ********'


       # The supervisor takes care of session initialization, restoring from
       # a checkpoint, and closing when done or an error occurs.

        with sv.managed_session(server.target) as sess:

            # Loop until the supervisor shuts down or 1000000 steps have completed.
            step = 0
            while not sv.should_stop() and step < 1000000:
                # Run a training step asynchronously.
                # See `tf.train.SyncReplicasOptimizer` for additional details on how to
                # perform *synchronous* training.

                train_feed = {x: train_x, y: train_y}
                _, step = sess.run([train_op, global_step], feed_dict=train_feed)
                if step % 100 == 0:
                    print "Done step %d" % step

        sv.stop()