Advertisement
Guest User

Untitled

a guest
Jan 24th, 2020
89
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.73 KB | None | 0 0
  1.     aql = Agent()
  2.     env = make("MountainCar-v0")
  3.     episodes = 20
  4.     rewards = []
  5.     for i in range(episodes):
  6.         eps = 0.1
  7.         state = transform_state(env.reset())
  8.         total_reward = 0
  9.         steps = 0
  10.         done = False
  11.         while not done:
  12.             if steps % 10 == 0:
  13.                 eps*=0.01
  14.             #env.render()
  15.             action = aql.act(state)
  16.             next_state, reward, done, _ = env.step(action)
  17.             next_state = transform_state(next_state)
  18.             total_reward += reward
  19.             steps += 1      
  20.             state = next_state
  21.         rewards.append(total_reward)
  22.         print(f"Done episode number {i}, max reward: {total_reward}")
  23.     print(np.mean(rewards))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement