Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # For practice purpose as part of Reinforcement Learning course.
- ## Optimized policy via Genetic Algorithm
- best_policy = [3, 2, 3, 2, 3, 2, 2, 0, 3, 3, 3, 3, 2, 3, 2, 1, 2, 2, 0, 2, 2, 2, 2, 2, 0, 3, 1, 1, 0, 2, 2, 2, 0, 3, 3, 0, 2, 1, 3, 2, 1, 2, 0, 0, 1, 0, 3, 2, 2, 0, 0, 0, 3, 2, 0, 2, 0, 1, 2, 1, 3, 3, 0, 0]
- import gym
- from gym import wrappers
- env = gym.make('FrozenLake8x8-v0')
- env = wrappers.Monitor(env, '/tmp/frozenlake-experiment-2')
- for i_episode in range(4000):
- observation = env.reset()
- for t in range(400):
- env.render()
- observation, reward, done, info = env.step(best_policy[observation])
- if done:
- print("Episode finished after {} timesteps".format(t+1))
- break
- env.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement