Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #num_eval_rollouts=1000
- num_epochs = 200
- # loop over runouts
- returns=[]
- for i in range(1,num_epochs, 30):
- # reset variables in each iteration
- totalr=0
- done=False
- obs=env.reset()
- print(i)
- cloner.train(epochs = i)
- print(i)
- # loop until the end of the episode
- while not done:
- # get an action from the trained model (output for specific observation as input)
- action=cloner.sample(obs)
- # implement the action
- obs,r,done,_=env.step(action)
- env.render()
- totalr+=r
- returns.append(totalr)
- print('mean return', np.mean(returns))
- print('std of return', np.std(returns))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement