Untitled

from random import randint

from tensorforce.agents import PPOAgent

if __name__ == "__main__":
    agent = PPOAgent(
        states=dict(type='float', shape=(2,)),
        actions=dict(type='int', num_actions=3),
        network=[
        ],
        batching_capacity=1000,
        step_optimizer=dict(
            type='adam',
            learning_rate=1e-2
        )
    )

    while True:
        a = randint(0, 1)
        b = randint(0, 1)
        action = agent.act((a, b))
        agent.observe(reward=(1.0 if action == a + b else -1.0), terminal=True)
        print(str(action) + ' - ' + str(a + b))  # action taken vs action expected