Advertisement
Guest User

Untitled

a guest
Feb 23rd, 2019
71
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.63 KB | None | 0 0
  1. #num_eval_rollouts=1000
  2. num_epochs = 200
  3. # loop over runouts
  4. returns=[]
  5. for i in range(1,num_epochs, 30):
  6. # reset variables in each iteration
  7. totalr=0
  8. done=False
  9. obs=env.reset()
  10. print(i)
  11. cloner.train(epochs = i)
  12. print(i)
  13. # loop until the end of the episode
  14. while not done:
  15. # get an action from the trained model (output for specific observation as input)
  16. action=cloner.sample(obs)
  17. # implement the action
  18. obs,r,done,_=env.step(action)
  19. env.render()
  20. totalr+=r
  21. returns.append(totalr)
  22. print('mean return', np.mean(returns))
  23. print('std of return', np.std(returns))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement