Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- aql = Agent()
- env = make("MountainCar-v0")
- episodes = 20
- rewards = []
- for i in range(episodes):
- eps = 0.1
- state = transform_state(env.reset())
- total_reward = 0
- steps = 0
- done = False
- while not done:
- if steps % 10 == 0:
- eps*=0.01
- #env.render()
- action = aql.act(state)
- next_state, reward, done, _ = env.step(action)
- next_state = transform_state(next_state)
- total_reward += reward
- steps += 1
- state = next_state
- rewards.append(total_reward)
- print(f"Done episode number {i}, max reward: {total_reward}")
- print(np.mean(rewards))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement