Guest User

Untitled

a guest
Jul 21st, 2018
84
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.63 KB | None | 0 0
  1. from random import randint
  2.  
  3. from tensorforce.agents import PPOAgent
  4.  
  5. if __name__ == "__main__":
  6. agent = PPOAgent(
  7. states=dict(type='float', shape=(2,)),
  8. actions=dict(type='int', num_actions=3),
  9. network=[
  10. ],
  11. batching_capacity=1000,
  12. step_optimizer=dict(
  13. type='adam',
  14. learning_rate=1e-2
  15. )
  16. )
  17.  
  18. while True:
  19. a = randint(0, 1)
  20. b = randint(0, 1)
  21. action = agent.act((a, b))
  22. agent.observe(reward=(1.0 if action == a + b else -1.0), terminal=True)
  23. print(str(action) + ' - ' + str(a + b)) # action taken vs action expected
Add Comment
Please, Sign In to add comment