Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def __init__(self):
- self.CanDoAction = [0,1,2,3] # up down left right
- self.field = np.arange(16).reshape(width,height) #Environment
- def reset(self):
- self.state=self.field[0][0] # agent start position
- self.done = False
- def move(self, action): # move and boundry Condition
- if self.CanDoAction[action] == 0:
- if self.state is not self.field[0][0] or self.field[0][1] or self.field[0][2] or self.field[0][3]:
- k = -self.field[1][0]
- else:
- k = self.field[0][0]
- elif self.CanDoAction[action] == 1:
- if self.state is not self.field[3][0] or self.field[3][1] or self.field[3][2] or self.field[3][3]:
- k = self.field[1][0]
- else:
- k = self.field[0][0]
- elif self.CanDoAction[action] == 2:
- if self.state is not self.field[0][0] or self.field[1][0] or self.field[2][0] or self.field[3][0]:
- k = - self.field[0][1]
- else:
- k = self.field[0][0]
- else:
- if self.state is not self.field[0][3] or self.field[1][3] or self.field[2][3] or self.field[3][3]:
- k = self.field[0][1]
- else:
- k = self.field[0][0]
- return k
- def step(self,action):
- next_state = self.state + self.move(action)
- if next_state == self.field[0][1]:
- reward = -10
- done = True
- elif next_state == self.field[1][3]:
- reward = -10
- done = True
- elif next_state == self.field[3][3]:
- reward = 100
- done = True
- else:
- reward = 0
- done = False
- return next_state, reward, done
- def __init__(self, actions):
- self.actions = actions
- self.learning_rate = 0.01
- self.discount_factor = 0.9
- self.eplision = 0.1
- self.q_tabel =[[0 for col in range(4)] for row in range(16)]
- def learn(self, state, action, reward, next_state):
- b = next_state.tolist()
- a = state.tolist()
- q_value= self.q_tabel[a][action]
- q_new = reward + self.discount_factor * max(self.q_tabel[b])
- print(q_new)
- self.q_tabel[a][action] += self.learning_rate*(q_new - q_value)
- def get_action(self,state):
- if np.random.rand() < self.eplision:
- action = np.random.choice(self.actions)
- else:
- a = state.tolist()
- state_action = self.q_tabel[a]
- action = self.arg_max(state_action)
- return action
- @staticmethod ##
- def arg_max(state_action):
- max_index_list = []
- max_value = state_action[0]
- for index, value in enumerate(state_action):
- if value > max_value:
- max_index_list.clear()
- max_value = value
- max_index_list.append(index)
- elif value == max_value:
- max_index_list.append(index)
- return random.choice(max_index_list)
- env = Env()
- agent = QlearningAgent(actions=list(range(len(env.CanDoAction))))
- for episodes in range(10):
- env.reset()
- state = env.state
- while True:
- action = agent.get_action(state)
- next_state, reward, done = env.step(action)
- agent.learn(state,action, reward, next_state)
- state = next_state
- print(agent.q_tabel)
- if done:
- break
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement