Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from random import randint
- from tensorforce.agents import PPOAgent
- if __name__ == "__main__":
- agent = PPOAgent(
- states=dict(type='float', shape=(2,)),
- actions=dict(type='int', num_actions=3),
- network=[
- ],
- batching_capacity=1000,
- step_optimizer=dict(
- type='adam',
- learning_rate=1e-2
- )
- )
- while True:
- a = randint(0, 1)
- b = randint(0, 1)
- action = agent.act((a, b))
- agent.observe(reward=(1.0 if action == a + b else -1.0), terminal=True)
- print(str(action) + ' - ' + str(a + b)) # action taken vs action expected
Add Comment
Please, Sign In to add comment