Advertisement
SciQuest_csp

MCTS-AI, Main

Mar 23rd, 2012
222
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Lua 9.28 KB | None | 0 0
  1. -- README / INTRODUCTION --
  2. -- Welcome! Monte-Carlo Tree Search (MCTS) is a probabilistic technique for finding a
  3. -- probably approximately correct solution to a generally intractable problem.
  4. -- What that MEANS is that it is good at finding 'good enough' solutions to
  5. -- problems that would else wise be a borderline impossible to solve (like AI!).
  6. -- Monte-Carlo references a city known for its gambling, and tree search just
  7. -- means that it is searching a tree data structure. (note, no actual explicit tree
  8. -- data structure is in this program)
  9.  
  10. -- WHAT IT ACTUAL DOES (in layman's terms)
  11. -- Here, MCTS is used as an AI. On the AI's turn, it will start running simulations.
  12. -- It will create a list of moves it is capable of making, then make the first move
  13. -- on a copy of the actual game board (this copy is called 'sim') and from there it
  14. -- will play a random game, making random moves, and remember who won. It will do this
  15. -- up to thousands of times per potential move. These random games are called playouts.
  16. -- It will then calculate which move had the highest percentage of wins for the AI,
  17. -- and assume that this is the best move to make, then make that move.
  18.  
  19. -- WHAT THE OPTIONS DO
  20. -- Game: controls which game you are making. 0 is intended for instructions.
  21. -- AIonline: turns the AI on & off. When it is off you can play against another human.
  22. -- AIstarts: decides who starts, red (player) or blue (computer).
  23. -- Difficulty: controls how many playouts are made per potential move.
  24. -- -- It is an EXPONENTIAL scale! 10^n
  25. -- -- This means it is entirely possible to CRASH CODEA by turning up the difficulty!!
  26. -- MaxSearchDepth: number of ply deep in the search tree the playout will simulate.
  27. -- -- If the playout ends before the search horizon, board:positionStrength() is
  28. -- -- called and returns a rough estimate of how the game is going. Higher numbers
  29. -- -- are better for the AI.
  30. -- Timeout: planned, not implement. creates a hard-cap on the time-complexity of mcts().
  31.  
  32. -- IMPORTANT VARIABLES
  33. -- board: the actual game board. an instance of one of many possible classes.
  34. -- -- each of these classes (ConnectFour, etc) must implement certain functions.
  35. -- -- specifically, those functions called by MCTS and playout. (amongst others)
  36. -- sim: another instance of the same class as board. used for playouts.
  37. -- player, ai: currently stored in each class individual. store values of 0 and 1.
  38. -- -- these could/should be global variables though.
  39.  
  40. -- NOTE
  41. -- parameters are saved so that they persist through resets. this could potentially
  42. -- get you in trouble. (ie, AIstarts==1, Difficulty==4, MaxSearchDepth==0, Game~=0, etc)
  43.  
  44. -- END OF INTRODUCTION --
  45.  
  46. -- Just some colors, for convenience & consistency
  47.  red = color(255, 0, 0, 255)
  48.  blue = color(0, 0, 255, 255)
  49.  black = color(0, 0, 0, 255)
  50.  white = color(255, 255, 255, 255)
  51.  gray = color(127, 127, 127, 255)
  52.  
  53. -- For parameters common to all games
  54.  function setupParams()
  55.      iparameter("Game", 0, 3)
  56.      iparameter("AIonline", 0, 1)
  57.      iparameter("AIstarts", 0, 1)
  58.      parameter("Difficulty", 0, 4)
  59.      iparameter("MaxSearchDepth", 0, 20)
  60.      
  61.      -- reads persistent data.
  62.      -- how i am keeping the parameters persistent is clunky.
  63.      Game = readProjectData("game", 0)
  64.      AIonline = readProjectData("aiOnline", 1)
  65.      AIstarts = readProjectData("aiStarts", 0)
  66.      Difficulty = readProjectData("difficulty", 2)
  67.      MaxSearchDepth = readProjectData("maxSearchDepth", 6)
  68.  end
  69.  
  70. -- invoked to save parameters common to all games
  71.  function saveParams()
  72.      saveProjectData("game", Game)
  73.      saveProjectData("aiOnline", AIonline)
  74.      saveProjectData("aiStarts", AIstarts)
  75.      saveProjectData("difficulty", Difficulty)
  76.      saveProjectData("maxSearchDepth", MaxSearchDepth)
  77.  end
  78.  
  79. -- called once (and first) at start up.
  80.  function setup()
  81.      wait = true
  82.      setupParams()
  83.      strokeWidth(5)
  84.      prevGame = 0
  85.  end
  86.  
  87. -- updates the important global variables board and sim to reflect the
  88. -- correct game. I dont like how this is handled either, as it violates
  89. -- the 0-1-infinity rule.
  90.  function startGame()
  91.      clearOutput()
  92.      -- print("Game 0: Tutorial")
  93.      print("Game 1: Connect")
  94.      print("Game 2: Mancala")
  95.      print("Game 3: TicTacToe")
  96.      if Game == 0 then
  97.          -- display tutorial stuff.
  98.          board = nil
  99.          sim = nil
  100.      elseif Game == 1 then
  101.          print("loading Connect")
  102.          ConnectFour:setupParams()
  103.          board = ConnectFour()
  104.          sim = ConnectFour()
  105.      elseif Game == 2 then
  106.          print("loading Mancala")
  107.          Mancala:setupParams()
  108.          board = Mancala()
  109.          sim = Mancala()
  110.      elseif Game == 3 then
  111.          print("loading TicTacToe")
  112.          -- no params to set up
  113.          board = TicTacToe()
  114.          sim = TicTacToe()
  115.      end
  116.  end
  117.  
  118. -- the main game loop!
  119.  function draw()
  120.      background(40, 40, 50)
  121.      saveParams()
  122.      
  123.      -- what to do if the game has changed
  124.      if Game ~= prevGame then
  125.          fin = false
  126.          clearParameters()
  127.          clearOutput()
  128.          setupParams()
  129.          startGame()
  130.          prevGame = Game
  131.      end
  132.      
  133.      -- this should only happen if game == 0
  134.      if board==nil then return end
  135.  
  136.      -- yet again, i dont like this, but it works.
  137.      -- i should theoretically at least make this check to see if
  138.      -- a param has changed before saving it.
  139.      board:saveParams()
  140.      board:draw()
  141.      
  142.      -- this gets reset if the game changes.
  143.      if fin == true then return end
  144.  
  145.      if board.gameWon == board.ai then
  146.          print("ai wins!")
  147.          fin = true
  148.          return
  149.      elseif board.gameWon == board.player then
  150.          fin = true
  151.          print("player wins!")
  152.          return
  153.      end
  154.      
  155.      -- the purpose of the wait variable is to add a 1 frame delay before calling MCTS.
  156.      -- this allows the board to be rendered before calling the (potentially slow) AI
  157.      if wait == true then
  158.          wait = false
  159.      elseif board.turn == board.ai and AIonline == 1 and board.gameWon == nil then
  160.          mcts()
  161.      end
  162.  end
  163.  
  164. -- accepts player input!
  165.  function touched(t)
  166.      if t.state == ENDED and board:gameOver() == nil then
  167.          print("move acknowledged. please wait...")
  168.          -- all the heavy lifting is handled by each game individually
  169.          board:touched(t)
  170.          wait = true
  171.      end
  172.  end
  173.  
  174. -- where the magic happens! (aka, the AI)
  175. -- this could be split into smaller functions, but i like it together.
  176.  function mcts()
  177.      -- initial setup and building list of all potential moves
  178.      wait = true
  179.      local playouts = math.floor(10^Difficulty)
  180.      possibleMoves = {}
  181.      searchResults = {}
  182.      local count = 0
  183.      interval = board:possibleMovesInterval()
  184.      for i = interval.x, interval.y do
  185.          if board:validMove(i) == true then
  186.              count = count + 1
  187.              table.insert(possibleMoves, count, i)
  188.              table.insert(searchResults, count, 0)
  189.          end
  190.      end
  191.  
  192.      -- if there are no possible moves, the game ends.
  193.      print("number of possible moves is "..count)
  194.      if count == 0 then
  195.          fin = true
  196.          return
  197.      end
  198.  
  199.      -- runs the playouts for each potential move, stores results, and
  200.      for i = 1, count do
  201.          local sum = 0
  202.          for j = 1, playouts do
  203.              sim:copy(board)
  204.              sim:move(possibleMoves[i])
  205.              local playoutResult = playout(sim)
  206.              sum = sum + playoutResult
  207.          end
  208.          sum = sum / playouts
  209.          searchResults[i] = sum
  210.          print("probability for move "..possibleMoves[i].." is "..sum)
  211.      end
  212.  
  213.      -- we want to maximize the search result. 0=player, 1=ai
  214.      -- actually, couldnt this stuff be done in the outer loop above? ...ill fix that.
  215.      bestResult = -99
  216.      bestIndex = -99
  217.      for i,r in pairs(searchResults) do
  218.          if bestResult < r then
  219.              bestResult = r
  220.              bestIndex = i
  221.          end
  222.      end
  223.  
  224.      -- actually make the move.
  225.      print("bestResult is "..bestResult)
  226.      print("bestMove is "..possibleMoves[bestIndex])
  227.      if bestIndex == -99 then
  228.          -- never had this get called. hopeful that means everything works!
  229.          print("oops! mcts failed? (thats not good)")
  230.      else
  231.          board:move(possibleMoves[bestIndex])
  232.      end
  233.  end
  234.  
  235. -- simulates a random game!
  236. -- **THIS IS THE PERFORMANCE BOTTLENECK**
  237. -- AI's effective strength can increase with a more efficient playout.
  238.  function playout(b)
  239.      local result = b:gameOver()
  240.      local searchDepth = 0
  241.      while (result == nil) do
  242.          local movesLeft = b:numberOfMoves()
  243.          if movesLeft == 0 or MaxSearchDepth <= searchDepth and MaxSearchDepth ~= 0 then
  244.              return b:positionStrength()
  245.          end
  246.          b:randomMove( movesLeft )
  247.          searchDepth = searchDepth + 1
  248.          result = b:gameOver()
  249.      end
  250.      return result
  251.  end
  252.  
  253. -- individual boards can each call this function like: flipTurn(self)
  254.  function flipTurn(b)
  255.      if b.turn == b.player then
  256.          b.turn = b.ai
  257.      else
  258.          b.turn = b.player
  259.      end
  260.  end
  261.  
  262. -- should return a random truth value. anyone know a better way to do this?
  263.  function randBool()
  264.      return math.random(2) == 1
  265.  end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement