Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # This is a very simple implementation of the UCT Monte Carlo Tree Search algorithm in Python 2.7.
- # The function UCT(rootstate, itermax, verbose = False) is towards the bottom of the code.
- # It aims to have the clearest and simplest possible code, and for the sake of clarity, the code
- # is orders of magnitude less efficient than it could be made, particularly by using a
- # state.GetRandomMove() or state.DoRandomRollout() function.
- #
- # Example GameState classes for Nim, OXO and Othello are included to give some idea of how you
- # can write your own GameState use UCT in your 2-player game. Change the game to be played in
- # the UCTPlayGame() function at the bottom of the code.
- #
- # Written by Peter Cowling, Ed Powley, Daniel Whitehouse (University of York, UK) September 2012.
- #
- # Licence is granted to freely use and distribute for any sensible/legal purpose so long as this comment
- # remains in any distributed code.
- #
- # For more information about Monte Carlo Tree Search check out our web site at www.mcts.ai
- from math import log, sqrt
- import random
- import numpy as np
- from copy import deepcopy
- class BigGameState:
- def __init__(self):
- self.board = np.zeros((10, 10), dtype="int8")
- self.curr = 1
- self.playerJustMoved = 2 # At the root pretend the player just moved is player 2 - player 1 has the first move
- def Clone(self):
- """ Create a deep clone of this game state.
- """
- st = BigGameState()
- st.playerJustMoved = self.playerJustMoved
- st.curr = self.curr
- st.board = deepcopy(self.board)
- return st
- def DoMove(self, move):
- """ Update a state by carrying out the given move.
- Must update playerJustMoved.
- """
- self.playerJustMoved = 3 - self.playerJustMoved
- if move >= 1 and move <= 9 and move == int(move) and self.board[self.curr][move] == 0:
- self.board[self.curr][move] = self.playerJustMoved
- self.curr = move
- def GetMoves(self):
- """ Get all possible moves from this state.
- """
- return [i for i in range(1, 10) if self.board[self.curr][i] == 0]
- def GetResult(self, playerjm):
- """ Get the game result from the viewpoint of playerjm.
- """
- for bo in self.board:
- for (x,y,z) in [(1,2,3),(4,5,6),(7,8,9),(1,4,7),(2,5,8),(3,6,9),(1,5,9),(3,5,7)]:
- if bo[x] == [y] == bo[z]:
- if bo[x] == playerjm:
- return 1.0
- else:
- return 0.0
- if self.GetMoves() == []: return 0.5 # draw
- def drawboard(self):
- print_board_row(self.board, 1, 2, 3, 1, 2, 3)
- print_board_row(self.board, 1, 2, 3, 4, 5, 6)
- print_board_row(self.board, 1, 2, 3, 7, 8, 9)
- print(" ------+-------+------")
- print_board_row(self.board, 4, 5, 6, 1, 2, 3)
- print_board_row(self.board, 4, 5, 6, 4, 5, 6)
- print_board_row(self.board, 4, 5, 6, 7, 8, 9)
- print(" ------+-------+------")
- print_board_row(self.board, 7, 8, 9, 1, 2, 3)
- print_board_row(self.board, 7, 8, 9, 4, 5, 6)
- print_board_row(self.board, 7, 8, 9, 7, 8, 9)
- print()
- def print_board_row(board, a, b, c, i, j, k):
- # The marking script doesn't seem to like this either, so just take it out to submit
- print("", board[a][i], board[a][j], board[a][k], end = " | ")
- print(board[b][i], board[b][j], board[b][k], end = " | ")
- print(board[c][i], board[c][j], board[c][k])
- class Node:
- """ A node in the game tree. Note wins is always from the viewpoint of playerJustMoved.
- Crashes if state not specified.
- """
- def __init__(self, move = None, parent = None, state = None):
- self.move = move # the move that got us to this node - "None" for the root node
- self.parentNode = parent # "None" for the root node
- self.childNodes = []
- self.wins = 0
- self.visits = 0
- self.untriedMoves = state.GetMoves() # future child nodes
- self.playerJustMoved = state.playerJustMoved # the only part of the state that the Node needs later
- def UCTSelectChild(self):
- """ Use the UCB1 formula to select a child node. Often a constant UCTK is applied so we have
- lambda c: c.wins/c.visits + UCTK * sqrt(2*log(self.visits)/c.visits to vary the amount of
- exploration versus exploitation.
- """
- s = sorted(self.childNodes, key = lambda c: c.wins/c.visits + 0.2 * sqrt(2*log(self.visits)/c.visits))[-1]
- return s
- def AddChild(self, m, s):
- """ Remove m from untriedMoves and add a new child node for this move.
- Return the added child node
- """
- n = Node(move = m, parent = self, state = s)
- self.untriedMoves.remove(m)
- self.childNodes.append(n)
- return n
- def Update(self, result):
- """ Update this node - one additional visit and result additional wins. result must be from the viewpoint of playerJustmoved.
- """
- self.visits += 1
- self.wins += result
- def __repr__(self):
- return "[M:" + str(self.move) + " W/V:" + str(self.wins) + "/" + str(self.visits) + " U:" + str(self.untriedMoves) + "]"
- def TreeToString(self, indent):
- s = self.IndentString(indent) + str(self)
- for c in self.childNodes:
- s += c.TreeToString(indent+1)
- return s
- def IndentString(self,indent):
- s = "\n"
- for i in range (1,indent+1):
- s += "| "
- return s
- def ChildrenToString(self):
- s = ""
- for c in self.childNodes:
- s += str(c) + "\n"
- return s
- def UCT(rootstate, itermax, verbose = False):
- """ Conduct a UCT search for itermax iterations starting from rootstate.
- Return the best move from the rootstate.
- Assumes 2 alternating players (player 1 starts), with game results in the range [0.0, 1.0]."""
- rootnode = Node(state = rootstate)
- for i in range(itermax):
- node = rootnode
- state = rootstate.Clone()
- # Select
- while node.untriedMoves == [] and node.childNodes != []: # node is fully expanded and non-terminal
- node = node.UCTSelectChild()
- state.DoMove(node.move)
- # Expand
- if node.untriedMoves != []: # if we can expand (i.e. state/node is non-terminal)
- m = random.choice(node.untriedMoves)
- state.DoMove(m)
- node = node.AddChild(m,state) # add child and descend tree
- # Rollout - this can often be made orders of magnitude quicker using a state.GetRandomMove() function
- while state.GetMoves() != []: # while state is non-terminal
- state.DoMove(random.choice(state.GetMoves()))
- # Backpropagate
- while node != None: # backpropagate from the expanded node and work back to the root node
- node.Update(state.GetResult(node.playerJustMoved)) # state is terminal. Update node with result from POV of node.playerJustMoved
- node = node.parentNode
- # Output some information about the tree - can be omitted
- if (verbose): print(rootnode.TreeToString(0))
- else: print(rootnode.ChildrenToString())
- return sorted(rootnode.childNodes, key = lambda c: c.visits)[-1].move # return the move that was most visited
- def UCTPlayGame():
- """ Play a sample game between two UCT players where each player gets a different number
- of UCT iterations (= simulations = tree nodes).
- """
- state = BigGameState() # uncomment to play OXO
- while (state.GetMoves() != []):
- state.drawboard()
- m = UCT(rootstate = state, itermax = 1000, verbose = False) # play with values for itermax and verbose = True
- print("Best Move: " + str(m) + "\n")
- state.DoMove(m)
- if state.GetResult(state.playerJustMoved) == 1.0:
- print("Player " + str(state.playerJustMoved) + " wins!")
- elif state.GetResult(state.playerJustMoved) == 0.0:
- print("Player " + str(3 - state.playerJustMoved) + " wins!")
- else: print("Nobody wins!")
- if __name__ == "__main__":
- """ Play a single game to the end using UCT for both players.
- """
- UCTPlayGame()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement