Advertisement
Guest User

Untitled

a guest
Jun 25th, 2019
78
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.13 KB | None | 0 0
  1. def __init__(self):
  2.  
  3. self.CanDoAction = [0,1,2,3] # up down left right
  4. self.field = np.arange(16).reshape(width,height) #Environment
  5.  
  6.  
  7. def reset(self):
  8. self.state=self.field[0][0] # agent start position
  9. self.done = False
  10.  
  11.  
  12. def move(self, action): # move and boundry Condition
  13.  
  14. if self.CanDoAction[action] == 0:
  15. if self.state is not self.field[0][0] or self.field[0][1] or self.field[0][2] or self.field[0][3]:
  16. k = -self.field[1][0]
  17. else:
  18. k = self.field[0][0]
  19.  
  20. elif self.CanDoAction[action] == 1:
  21. if self.state is not self.field[3][0] or self.field[3][1] or self.field[3][2] or self.field[3][3]:
  22. k = self.field[1][0]
  23. else:
  24. k = self.field[0][0]
  25.  
  26.  
  27. elif self.CanDoAction[action] == 2:
  28. if self.state is not self.field[0][0] or self.field[1][0] or self.field[2][0] or self.field[3][0]:
  29. k = - self.field[0][1]
  30. else:
  31. k = self.field[0][0]
  32.  
  33.  
  34. else:
  35. if self.state is not self.field[0][3] or self.field[1][3] or self.field[2][3] or self.field[3][3]:
  36. k = self.field[0][1]
  37. else:
  38. k = self.field[0][0]
  39.  
  40. return k
  41.  
  42.  
  43. def step(self,action):
  44.  
  45. next_state = self.state + self.move(action)
  46.  
  47. if next_state == self.field[0][1]:
  48. reward = -10
  49. done = True
  50.  
  51. elif next_state == self.field[1][3]:
  52. reward = -10
  53. done = True
  54.  
  55. elif next_state == self.field[3][3]:
  56. reward = 100
  57. done = True
  58.  
  59. else:
  60. reward = 0
  61. done = False
  62.  
  63. return next_state, reward, done
  64.  
  65. def __init__(self, actions):
  66. self.actions = actions
  67. self.learning_rate = 0.01
  68. self.discount_factor = 0.9
  69. self.eplision = 0.1
  70. self.q_tabel =[[0 for col in range(4)] for row in range(16)]
  71.  
  72. def learn(self, state, action, reward, next_state):
  73. b = next_state.tolist()
  74. a = state.tolist()
  75. q_value= self.q_tabel[a][action]
  76.  
  77. q_new = reward + self.discount_factor * max(self.q_tabel[b])
  78. print(q_new)
  79. self.q_tabel[a][action] += self.learning_rate*(q_new - q_value)
  80.  
  81. def get_action(self,state):
  82.  
  83. if np.random.rand() < self.eplision:
  84. action = np.random.choice(self.actions)
  85. else:
  86. a = state.tolist()
  87. state_action = self.q_tabel[a]
  88. action = self.arg_max(state_action)
  89. return action
  90.  
  91. @staticmethod ##
  92.  
  93. def arg_max(state_action):
  94. max_index_list = []
  95. max_value = state_action[0]
  96. for index, value in enumerate(state_action):
  97. if value > max_value:
  98. max_index_list.clear()
  99. max_value = value
  100. max_index_list.append(index)
  101. elif value == max_value:
  102. max_index_list.append(index)
  103. return random.choice(max_index_list)
  104.  
  105. env = Env()
  106. agent = QlearningAgent(actions=list(range(len(env.CanDoAction))))
  107.  
  108. for episodes in range(10):
  109. env.reset()
  110. state = env.state
  111.  
  112. while True:
  113. action = agent.get_action(state)
  114. next_state, reward, done = env.step(action)
  115.  
  116. agent.learn(state,action, reward, next_state)
  117.  
  118.  
  119. state = next_state
  120. print(agent.q_tabel)
  121.  
  122.  
  123. if done:
  124. break
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement