Advertisement
Guest User

Untitled

a guest
Jul 25th, 2017
82
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.15 KB | None | 0 0
  1. #!/usr/bin/env python3
  2.  
  3. import numpy as np
  4.  
  5. import agent
  6. from utils import *
  7.  
  8. class QLearning(agent.Agent):
  9. def __init__(self, n_states, n_actions, lr = 0.03, discount = 0.9):
  10. self.v = np.random.rand(n_states, n_actions) * 0.1 - 0.05
  11. self.last = None
  12. self.lr = lr
  13. self.discount = discount
  14.  
  15. def step(self, obs, reward, done):
  16. if done:
  17. reward = 1.0 if reward > 0.0 else -1.0
  18. else:
  19. reward = 0.0
  20.  
  21. if self.last is not None:
  22. self.v[self.last] *= 1.0 - self.lr
  23. self.v[self.last] += self.lr * (
  24. reward + self.discount * np.max(self.v[obs])
  25. )
  26.  
  27. self.last = obs, self.policy(obs)
  28. return self.last[1]
  29.  
  30. def policy(self, obs):
  31. if np.random.rand() < 0.001:
  32. return np.random.randint(self.v.shape[1])
  33. return np.argmax(self.v[obs])
  34.  
  35. def __str__(self):
  36. return str(np.round(self.v, 2))
  37.  
  38. def run():
  39. import gym
  40. env = monitored(gym.make('FrozenLake-v0'))
  41. agent = QLearning(env.observation_space.n, env.action_space.n)
  42. agent.train(env, 100000)
  43.  
  44. if __name__ == "__main__":
  45. run()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement