Advertisement
Guest User

Untitled

a guest
Jan 31st, 2019
102
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 11.38 KB | None | 0 0
  1. #######################################################################
  2. # Copyright (C) #
  3. # 2016 - 2018 Shangtong Zhang(zhangshangtong.cpp@gmail.com) #
  4. # 2016 Jan Hakenberg(jan.hakenberg@gmail.com) #
  5. # 2016 Tian Jun(tianjun.cpp@gmail.com) #
  6. # 2016 Kenta Shimada(hyperkentakun@gmail.com) #
  7. # Permission given to modify the code as long as you keep this #
  8. # declaration at the top #
  9. #######################################################################
  10.  
  11. import numpy as np
  12. import pickle
  13.  
  14. BOARD_ROWS = 4
  15. BOARD_COLS = 4
  16. BOARD_SIZE = BOARD_ROWS * BOARD_COLS
  17.  
  18. class State:
  19. def __init__(self):
  20. # the board is represented by an n * n array,
  21. # 1 represents a chessman of the player who moves first,
  22. # -1 represents a chessman of another player
  23. # 0 represents an empty position
  24. self.data = np.zeros((BOARD_ROWS, BOARD_COLS))
  25. self.winner = None
  26. self.hash_val = None
  27. self.end = None
  28.  
  29. # compute the hash value for one state, it's unique
  30. def hash(self):
  31. if self.hash_val is None:
  32. self.hash_val = 0
  33. for i in self.data.reshape(BOARD_ROWS * BOARD_COLS):
  34. if i == -1:
  35. i = 2
  36. self.hash_val = self.hash_val * 3 + i
  37. return int(self.hash_val)
  38.  
  39. # check whether a player has won the game, or it's a tie
  40. def is_end(self):
  41. if self.end is not None:
  42. return self.end
  43. results = []
  44. # check row
  45. for i in range(0, BOARD_ROWS):
  46. results.append(np.sum(self.data[i, :]))
  47. # check columns
  48. for i in range(0, BOARD_COLS):
  49. results.append(np.sum(self.data[:, i]))
  50.  
  51. # check diagonals
  52. results.append(0)
  53. for i in range(0, BOARD_ROWS):
  54. results[-1] += self.data[i, i]
  55. results.append(0)
  56. for i in range(0, BOARD_ROWS):
  57. results[-1] += self.data[i, BOARD_ROWS - 1 - i]
  58.  
  59. for result in results:
  60. if result == 4:
  61. self.winner = 1
  62. self.end = True
  63. return self.end
  64. if result == -4:
  65. self.winner = -1
  66. self.end = True
  67. return self.end
  68.  
  69. # whether it's a tie
  70. sum = np.sum(np.abs(self.data))
  71. if sum == BOARD_ROWS * BOARD_COLS:
  72. self.winner = 0
  73. self.end = True
  74. return self.end
  75.  
  76. # game is still going on
  77. self.end = False
  78. return self.end
  79.  
  80. # @symbol: 1 or -1
  81. # put chessman symbol in position (i, j)
  82. def next_state(self, i, j, symbol):
  83. new_state = State()
  84. new_state.data = np.copy(self.data)
  85. new_state.data[i, j] = symbol
  86. return new_state
  87.  
  88. # print the board
  89. def print_state(self):
  90. for i in range(0, BOARD_ROWS):
  91. print('-------------')
  92. out = '| '
  93. for j in range(0, BOARD_COLS):
  94. if self.data[i, j] == 1:
  95. token = '*'
  96. if self.data[i, j] == 0:
  97. token = '0'
  98. if self.data[i, j] == -1:
  99. token = 'x'
  100. out += token + ' | '
  101. print(out)
  102. print('-------------')
  103.  
  104. def get_all_states_impl(current_state, current_symbol, all_states):
  105. for i in range(0, BOARD_ROWS):
  106. for j in range(0, BOARD_COLS):
  107. if current_state.data[i][j] == 0:
  108. newState = current_state.next_state(i, j, current_symbol)
  109. newHash = newState.hash()
  110. if newHash not in all_states.keys():
  111. isEnd = newState.is_end()
  112. all_states[newHash] = (newState, isEnd)
  113. if not isEnd:
  114. get_all_states_impl(newState, -current_symbol, all_states)
  115.  
  116. def get_all_states():
  117. current_symbol = 1
  118. current_state = State()
  119. all_states = dict()
  120. all_states[current_state.hash()] = (current_state, current_state.is_end())
  121. get_all_states_impl(current_state, current_symbol, all_states)
  122. return all_states
  123.  
  124. # all possible board configurations
  125. all_states = get_all_states()
  126.  
  127. class Judger:
  128. # @player1: the player who will move first, its chessman will be 1
  129. # @player2: another player with a chessman -1
  130. def __init__(self, player1, player2):
  131. self.p1 = player1
  132. self.p2 = player2
  133. self.current_player = None
  134. self.p1_symbol = 1
  135. self.p2_symbol = -1
  136. self.p1.set_symbol(self.p1_symbol)
  137. self.p2.set_symbol(self.p2_symbol)
  138. self.current_state = State()
  139.  
  140. def reset(self):
  141. self.p1.reset()
  142. self.p2.reset()
  143.  
  144. def alternate(self):
  145. while True:
  146. yield self.p1
  147. yield self.p2
  148.  
  149. # @print_state: if True, print each board during the game
  150. def play(self, print_state=False):
  151. alternator = self.alternate()
  152. self.reset()
  153. current_state = State()
  154. self.p1.set_state(current_state)
  155. self.p2.set_state(current_state)
  156. while True:
  157. player = next(alternator)
  158. if print_state:
  159. current_state.print_state()
  160. [i, j, symbol] = player.act()
  161. next_state_hash = current_state.next_state(i, j, symbol).hash()
  162. current_state, is_end = all_states[next_state_hash]
  163. self.p1.set_state(current_state)
  164. self.p2.set_state(current_state)
  165. if is_end:
  166. if print_state:
  167. current_state.print_state()
  168. return current_state.winner
  169.  
  170. # AI player
  171. class Player:
  172. # @step_size: the step size to update estimations
  173. # @epsilon: the probability to explore
  174. def __init__(self, step_size=0.1, epsilon=0.1):
  175. self.estimations = dict()
  176. self.step_size = step_size
  177. self.epsilon = epsilon
  178. self.states = []
  179. self.greedy = []
  180.  
  181. def reset(self):
  182. self.states = []
  183. self.greedy = []
  184.  
  185. def set_state(self, state):
  186. self.states.append(state)
  187. self.greedy.append(True)
  188.  
  189. def set_symbol(self, symbol):
  190. self.symbol = symbol
  191. for hash_val in all_states.keys():
  192. (state, is_end) = all_states[hash_val]
  193. if is_end:
  194. if state.winner == self.symbol:
  195. self.estimations[hash_val] = 1.0
  196. elif state.winner == 0:
  197. # we need to distinguish between a tie and a lose
  198. self.estimations[hash_val] = 0.5
  199. else:
  200. self.estimations[hash_val] = 0
  201. else:
  202. self.estimations[hash_val] = 0.5
  203.  
  204. # update value estimation
  205. def backup(self):
  206. # for debug
  207. # print('player trajectory')
  208. # for state in self.states:
  209. # state.print_state()
  210.  
  211. self.states = [state.hash() for state in self.states]
  212.  
  213. for i in reversed(range(len(self.states) - 1)):
  214. state = self.states[i]
  215. td_error = self.greedy[i] * (self.estimations[self.states[i + 1]] - self.estimations[state])
  216. self.estimations[state] += self.step_size * td_error
  217.  
  218. # choose an action based on the state
  219. def act(self):
  220. state = self.states[-1]
  221. next_states = []
  222. next_positions = []
  223. for i in range(BOARD_ROWS):
  224. for j in range(BOARD_COLS):
  225. if state.data[i, j] == 0:
  226. next_positions.append([i, j])
  227. next_states.append(state.next_state(i, j, self.symbol).hash())
  228.  
  229. if np.random.rand() < self.epsilon:
  230. action = next_positions[np.random.randint(len(next_positions))]
  231. action.append(self.symbol)
  232. self.greedy[-1] = False
  233. return action
  234.  
  235. values = []
  236. for hash, pos in zip(next_states, next_positions):
  237. values.append((self.estimations[hash], pos))
  238. # to select one of the actions of equal value at random
  239. np.random.shuffle(values)
  240. values.sort(key=lambda x: x[0], reverse=True)
  241. action = values[0][1]
  242. action.append(self.symbol)
  243. return action
  244.  
  245. def save_policy(self):
  246. with open('policy_%s.bin' % ('first' if self.symbol == 1 else 'second'), 'wb') as f:
  247. pickle.dump(self.estimations, f)
  248.  
  249. def load_policy(self):
  250. with open('policy_%s.bin' % ('first' if self.symbol == 1 else 'second'), 'rb') as f:
  251. self.estimations = pickle.load(f)
  252.  
  253. # human interface
  254. # input a number to put a chessman
  255. # | a | b | c | d |
  256. # | e | f | g | h |
  257. # | i | j | k | l |
  258. # | m | n | o | p |
  259. class HumanPlayer:
  260. def __init__(self, **kwargs):
  261. self.symbol = None
  262. self.keys = [chr(i+97) for i in range(0, 16)]
  263. self.state = None
  264. return
  265.  
  266. def reset(self):
  267. return
  268.  
  269. def set_state(self, state):
  270. self.state = state
  271.  
  272. def set_symbol(self, symbol):
  273. self.symbol = symbol
  274. return
  275.  
  276. def backup(self, _):
  277. return
  278.  
  279. def act(self):
  280. self.state.print_state()
  281. key = input("Input your position:")
  282. data = self.keys.index(key)
  283. i = data // int(BOARD_COLS)
  284. j = data % BOARD_COLS
  285. return (i, j, self.symbol)
  286.  
  287. def train(epochs, print_every_n=500):
  288. player1 = Player(epsilon=0.01)
  289. player2 = Player(epsilon=0.01)
  290. judger = Judger(player1, player2)
  291. player1_win = 0.0
  292. player2_win = 0.0
  293. for i in range(1, epochs + 1):
  294. winner = judger.play(print_state=False)
  295. if winner == 1:
  296. player1_win += 1
  297. if winner == -1:
  298. player2_win += 1
  299. if i % print_every_n == 0:
  300. print('Epoch %d, player 1 winrate: %.02f, player 2 winrate: %.02f' % (i, player1_win / i, player2_win / i))
  301. player1.backup()
  302. player2.backup()
  303. judger.reset()
  304. player1.save_policy()
  305. player2.save_policy()
  306.  
  307. def compete(turns):
  308. player1 = Player(epsilon=0)
  309. player2 = Player(epsilon=0)
  310. judger = Judger(player1, player2)
  311. player1.load_policy()
  312. player2.load_policy()
  313. player1_win = 0.0
  314. player2_win = 0.0
  315. for _ in range(0, turns):
  316. winner = judger.play()
  317. if winner == 1:
  318. player1_win += 1
  319. if winner == -1:
  320. player2_win += 1
  321. judger.reset()
  322. print('%d turns, player 1 win %.02f, player 2 win %.02f' % (turns, player1_win / turns, player2_win / turns))
  323.  
  324. # The game is a zero sum game. If both players are playing with an optimal strategy, every game will end in a tie.
  325. # So we test whether the AI can guarantee at least a tie if it goes second.
  326. def play():
  327. while True:
  328. player1 = HumanPlayer()
  329. player2 = Player(epsilon=0)
  330. judger = Judger(player1, player2)
  331. player2.load_policy()
  332. winner = judger.play()
  333. if winner == player2.symbol:
  334. print("You lose!")
  335. elif winner == player1.symbol:
  336. print("You win!")
  337. else:
  338. print("It is a tie!")
  339.  
  340. if __name__ == '__main__':
  341. train(int(1e5))
  342. compete(int(1e3))
  343. play()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement