Guest User

Untitled

a guest
Jul 21st, 2018
87
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.55 KB | None | 0 0
  1. from random import randint
  2. import numpy as np
  3. from tensorforce.agents import PPOAgent
  4. from tensorforce.environments import Environment
  5. from tensorforce.execution import Runner
  6.  
  7. class SimpleEnvironment(Environment):
  8. def __init__(self):
  9. self.counter = 0
  10.  
  11. @property
  12. def states(self):
  13. print "getting states"
  14. return dict(type='float', shape=(2,))
  15.  
  16. @property
  17. def actions(self):
  18. return dict(type='float', shape=1)
  19.  
  20. def execute(self, actions):
  21. self.counter += 1
  22. reward = -np.linalg.norm(actions)
  23. done = (self.counter % 30 == 0)
  24. return np.array([2,2]), done, reward
  25.  
  26. def reset(self):
  27. return np.array([2, 2])
  28.  
  29.  
  30. if __name__ == "__main__":
  31. environment = SimpleEnvironment()
  32.  
  33.  
  34. agent = PPOAgent(
  35. states=environment.states,
  36. actions=environment.actions,
  37. network=[dict(type='dense', size=2),
  38. dict(type='dense', size=2)
  39. ],
  40. batching_capacity=1000,
  41. step_optimizer=dict(
  42. type='adam',
  43. learning_rate=1e-3
  44. )
  45. )
  46. runner = Runner(agent=agent, environment=environment)
  47.  
  48. def episode_finished(r):
  49. print("Finished episode {ep} after {ts} timesteps (reward: {reward})".format(ep=r.episode, ts=r.episode_timestep,
  50. reward=r.episode_rewards[-1]))
  51. return True
  52. # Start learning
  53. runner.run(episodes=300000, max_episode_timesteps=300, episode_finished=episode_finished, deterministic=False)
  54. runner.close()
Add Comment
Please, Sign In to add comment