Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- class Agent_QLearning(object): #Keep the class name!
- """The world's simplest agent!"""
- def __init__(self, state_space, action_space, eps, gamma, alpha):
- self.action_space = action_space # integer
- self.state_space = state_space # integer
- self.eps = eps
- self.gamma = gamma
- self.alpha = alpha
- self.Q = np.zeros((state_space, action_space))
- self.lastAct = -1
- self.lastObs = -1
- def observe(self, observation, reward, done):
- # update Q(s, a)
- delta = reward + self.gamma * np.max(self.Q[observation, :]) - self.Q[self.lastObs, self.lastAct]
- self.Q[self.lastObs, self.lastAct] += self.alpha * delta
- return 0
- def act(self, observation):
- # epsilon greedy policy
- if np.random.rand() <= self.eps:
- self.lastAct = np.random.randint(self.action_space)
- else:
- q = self.Q[observation, :]
- self.lastAct = np.random.choice(np.flatnonzero(q == q.max()))
- self.lastObs = observation
- return self.lastAct
Advertisement
Add Comment
Please, Sign In to add comment