Guest User

Untitled

a guest
Mar 23rd, 2018
99
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.15 KB | None | 0 0
  1. class Board:
  2. '''
  3. class handling state of the board
  4. '''
  5.  
  6. def __init__(self):
  7. self.state = np.zeros([2,3,3])
  8. self.player = 0 # current player's turn
  9.  
  10. def copy(self):
  11. '''
  12. make copy of the board
  13. '''
  14. copy = Board()
  15. copy.player = self.player
  16. copy.state = np.copy(self.state)
  17. return copy
  18.  
  19. def move(self, move):
  20. '''
  21. take move of form [x,y] and play
  22. the move for the current player
  23. '''
  24. if np.any(self.state[:,move[0],move[1]]): return
  25. self.state[self.player][move[0],move[1]] = 1
  26. self.player ^= 1
  27.  
  28. def get_moves(self):
  29. '''
  30. return remaining possible board moves
  31. (ie where there are no O's or X's)
  32. '''
  33. return np.argwhere(self.state[0]+self.state[1]==0).tolist()
  34.  
  35. def result(self):
  36. '''
  37. check rows, columns, and diagonals
  38. for sequence of 3 X's or 3 O's
  39. '''
  40. board = self.state[self.player^1]
  41. col_sum = np.any(np.sum(board,axis=0)==3)
  42. row_sum = np.any(np.sum(board,axis=1)==3)
  43. d1_sum = np.any(np.trace(board)==3)
  44. d2_sum = np.any(np.trace(np.flip(board,1))==3)
  45. return col_sum or row_sum or d1_sum or d2_sum
  46.  
  47. class Node:
  48. '''
  49. maintains state of nodes in
  50. the monte carlo search tree
  51. '''
  52. def __init__(self, parent=None, action=None, board=None):
  53. self.parent = parent
  54. self.board = board
  55. self.children = []
  56. self.wins = 0
  57. self.visits = 0
  58. self.untried_actions = board.get_moves()
  59. self.action = action
  60.  
  61. def select(self):
  62. '''
  63. select child of node with
  64. highest UCB1 value
  65. '''
  66. s = sorted(self.children, key=lambda c:c.wins/c.visits+0.2*sqrt(2*log(self.visits)/c.visits))
  67. return s[-1]
  68.  
  69. def expand(self, action, board):
  70. '''
  71. expand parent node (self) by adding child
  72. node with given action and state
  73. '''
  74. child = Node(parent=self, action=action, board=board)
  75. self.untried_actions.remove(action)
  76. self.children.append(child)
  77. return child
  78.  
  79. def update(self, result):
  80. self.visits += 1
  81. self.wins += result
  82.  
  83. def UCT(rootstate, maxiters):
  84.  
  85. root = Node(board=rootstate)
  86.  
  87. for i in range(maxiters):
  88. node = root
  89. board = rootstate.copy()
  90.  
  91. # selection - select best child if parent fully expanded and not terminal
  92. while node.untried_actions == [] and node.children != []:
  93. node = node.select()
  94. board.move(node.action)
  95.  
  96. # expansion - expand parent to a random untried action
  97. if node.untried_actions != []:
  98. a = random.choice(node.untried_actions)
  99. board.move(a)
  100. node = node.expand(a, board.copy())
  101.  
  102. # simulation - rollout to terminal state from current
  103. # state using random actions
  104. while board.get_moves() != [] and not board.result():
  105. board.move(random.choice(board.get_moves()))
  106.  
  107. # backpropagation - propagate result of rollout game up the tree
  108. # reverse the result if player at the node lost the rollout game
  109. while node != None:
  110. result = board.result()
  111. if result:
  112. if node.board.player==board.player:
  113. result = 1
  114. else: result = -1
  115. else: result = 0
  116. node.update(result)
  117. node = node.parent
  118.  
  119. s = sorted(root.children, key=lambda c:c.wins/c.visits)
  120. return s[-1].action
Add Comment
Please, Sign In to add comment