Advertisement
-TesseracT-

Untitled

Apr 22nd, 2021
760
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.09 KB | None | 0 0
  1. import numpy as np
  2.  
  3. class Agent_QLearning(object): #Keep the class name!
  4.     """The world's simplest agent!"""
  5.     def __init__(self, state_space, action_space, eps, gamma, alpha):
  6.         self.action_space = action_space # integer
  7.         self.state_space = state_space # integer
  8.         self.eps = eps
  9.         self.gamma = gamma
  10.         self.alpha = alpha
  11.         self.Q = np.zeros((state_space, action_space))
  12.         self.lastAct = -1
  13.         self.lastObs = -1
  14.  
  15.     def observe(self, observation, reward, done):
  16.         # update Q(s, a)
  17.         delta = reward + self.gamma * np.max(self.Q[observation, :]) - self.Q[self.lastObs, self.lastAct]
  18.         self.Q[self.lastObs, self.lastAct] += self.alpha * delta
  19.  
  20.         return 0
  21.  
  22.  
  23.     def act(self, observation):
  24.         # epsilon greedy policy
  25.         if np.random.rand() <= self.eps:
  26.             self.lastAct = np.random.randint(self.action_space)
  27.         else:
  28.             q = self.Q[observation, :]
  29.             self.lastAct = np.random.choice(np.flatnonzero(q == q.max()))
  30.         self.lastObs = observation
  31.         return self.lastAct
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement