Advertisement
Guest User

AI

a guest
Mar 18th, 2012
71
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 7.89 KB | None | 0 0
  1. import sys
  2.  
  3. class Board(object):
  4.     def __init__(self):
  5.         self.b = {i:None for i in range(1,10)}
  6.         self.games_played = 0
  7.    
  8.     def get_available(self):
  9.         return [pos for (pos, taken) in self.b.items() if taken is None]
  10.    
  11.     def get_played_positions(self, piece):
  12.         return [pos for (pos, taken) in self.b.items() if taken == piece]
  13.        
  14.     def place_piece(self, position, piece):
  15.         if position not in self.get_available() or not 1 <= position < 10:
  16.             raise ValueError('%d is not a valid position' % position)
  17.         self.b[position] = piece
  18.    
  19.     def clear(self):
  20.         self.b = {i:None for i in range(1,10)}
  21.    
  22.     def print_board(self):
  23.         for next_char in self._yield_board():
  24.             sys.stdout.write(next_char)
  25.    
  26.     def add_games_played(self):
  27.         self.games_played += 1
  28.         self.clear()
  29.    
  30.     def _yield_board(self):
  31.         order = [7, 8, 9, 4, 5, 6, 1, 2, 3]
  32.         for value in order:
  33.             if self.b[value]:
  34.                 yield " " + self.b[value] + " "
  35.             else: yield ' _ '
  36.             if value in (3, 6, 9):
  37.                 yield '\n'
  38.                
  39. class Memory(object):
  40.     def __init__(self):
  41.         self.m = {i:[1, 0] for i in range(1,10)}
  42.         self.total_loss = 0
  43.         self.total_win = 0
  44.    
  45.     def add_loss(self, positions):
  46.         for pos in positions:
  47.             self.m[pos][1] += 1
  48.         self.total_loss += 1
  49.    
  50.     def add_win(self, positions):
  51.         for pos in positions:
  52.             self.m[pos][0] += 1
  53.         self.total_win += 1
  54.            
  55.     def get_win_ratio_all(self):
  56.         return self.get_win_ratio([i for i in range(1,10)])
  57.    
  58.     def get_win_ratio(self, positions):
  59.         ratio = []
  60.         for pos in positions:
  61.             if self.m[pos][1] == 0:
  62.                 ratio.append((pos, 1))
  63.             else: ratio.append((pos, self.m[pos][0] / self.m[pos][1]))
  64.         return ratio
  65.  
  66.  
  67. import random
  68. import operator
  69.  
  70. winning_pos = set([(1,2,3), (1,4,7), (1,5,9), (2,5,8), (3,5,7), (3,6,9), (4,5,6), (7,8,9)])
  71. pieces = set(('O', 'X'))
  72. DIAGNOSTIC = 0
  73.  
  74. class AI(object):
  75.     def __init__(self, piece):
  76.         '''An AI object which makes decisions based on some inbuilt memory of previous
  77.        win/loss ratios based on played positions. The decision tree can be seen at
  78.        a high level in play_next_move.'''
  79.         self.mem = Memory()
  80.         if piece not in pieces:
  81.             raise ValueError('%s must be one of ("X", "O")' % piece)
  82.         self.piece = piece
  83.         self.other_piece = (pieces - set(piece)).pop()
  84.         self.played = []
  85.         self.set_win = False
  86.         self.set_draw = False
  87.    
  88.     def play_next_move(self, board):
  89.         '''We look to see if there is a winning move, if there isn't, we look to see if our
  90.        opponent has a winning move, blocking them if they do. Finally, if neither of these
  91.        options are possible, we randomly pick a move, which is weighted based on memories
  92.        of past wins/losses stored in mem.'''
  93.         if DIAGNOSTIC:
  94.             board.print_board()
  95.             print('\n')
  96.         possible = board.get_available()
  97.         self.set_win = self._win_move(possible, board, self.played)
  98.         if self.set_win:
  99.             self.mem.add_win(self.played)
  100.         elif self._block_move(possible, board):
  101.             pass
  102.         else:
  103.             self._decide_best_move(board, possible)
  104.              
  105.     def signal_loss(self, board):
  106.         self.mem.add_loss(self.played)
  107.         self.clear()
  108.        
  109.     def clear(self):
  110.         self.played = []
  111.         self.set_win = self.set_draw = False
  112.    
  113.     def _decide_best_move(self, board, possible):
  114.         ratio = self.mem.get_win_ratio(possible)
  115.         acc = accumulate(j for (i, j) in ratio)
  116.         cum_sum = {pos:p for (pos, p) in zip(possible, acc)}
  117.         if cum_sum == {}:
  118.             self.set_draw = True
  119.             return None
  120.         for i in range(1,10):
  121.             if i not in cum_sum:
  122.                 cum_sum[i] = 0
  123.         rv = random.random() * max(cum_sum.values())
  124.         j = 1
  125.         while cum_sum[j] < rv and j < 9:
  126.             j += 1
  127.         self.played.append(j)
  128.         board.place_piece(j, self.piece)
  129.            
  130.     def _win_move(self, possible, board, played):
  131.         '''See if we have a possible winning move. This is done in a pretty crappy brute
  132.        force way: we look at each pair of played moves, and see if the set difference
  133.        between our played moves and each of our winning positions is a single position.
  134.        If it is, we check if that position is free. This is an O(n^2) method, which
  135.        could really use a much better way of calculating possible winning moves.'''
  136.         for pair in self._generate_pairs(played):
  137.             sd = ((set(wp) - pair) for wp in winning_pos if len(set(wp) - pair) == 1)
  138.             for s in sd:
  139.                 val = s.pop()
  140.                 if val in possible:
  141.                     board.place_piece(val, self.piece)
  142.                     self.played.append(val)
  143.                     return True
  144.         return False
  145.    
  146.     def _block_move(self, possible, board):
  147.         '''This is perhaps the only clever bit of code here. To search for moves to block,
  148.        we pretend to be our opponent looking for a winning move. If we find one, we place
  149.        a piece there, which has the effect of blocking them'''
  150.         other_played = board.get_played_positions(self.other_piece)
  151.         return self._win_move(possible, board, other_played)
  152.  
  153.     def _generate_pairs(self, played):
  154.         for i in range(len(played) - 1):
  155.             first = played[i]
  156.             for second in played[i+1:]:
  157.                 yield set((first, second))
  158.  
  159. def accumulate(iterable, func=operator.add):
  160.     '''Unashamedly stolen from python itertools documentation:
  161.    http://docs.python.org/dev/library/itertools.html#itertools.accumulate'''
  162.     it = iter(iterable)
  163.     total = next(it)
  164.     yield total
  165.     for element in it:
  166.         total = func(total, element)
  167.         yield total
  168.     print(m.get_win_ratio_all())
  169.  
  170.  
  171. class Manager(object):
  172.     def __init__(self, player0, player1, board):
  173.         self.player0 = player0
  174.         self.player1 = player1
  175.         self.board = board
  176.         self.draws = 0
  177.        
  178.     def play_game(self):
  179.         while 1:
  180.             self.player0.play_next_move(self.board)
  181.            
  182.             if self.player0.set_win:
  183.                 self.player1.signal_loss(self.board)
  184.                 self.player0.clear()
  185.                 self.board.add_games_played()
  186.                 return 0
  187.             elif self.player0.set_draw:
  188.                 self.board.add_games_played()
  189.                 self.draws += 1
  190.                 self.player0.clear()
  191.                 self.player1.clear()
  192.                 self.board.add_games_played()
  193.                 return -1
  194.                
  195.             self.player1.play_next_move(self.board)
  196.            
  197.             if self.player1.set_win:
  198.                 self.player0.signal_loss(self.board)
  199.                 self.player1.clear()
  200.                 self.board.add_games_played()
  201.                 return 1
  202.             elif self.player0.set_draw:
  203.                 self.draws += 1
  204.                 self.board.add_games_played()
  205.                 self.player0.clear()
  206.                 self.player1.clear()
  207.                 return -1
  208.    
  209.     def play_games(self, n):
  210.         for i in range(n):
  211.             self.play_game()
  212.        
  213. if __name__ == '__main__':
  214.     p1 = AI('X')
  215.     p2 = AI('O')
  216.     b = Board()
  217.     man = Manager(p1, p2, b)
  218.     man.play_games(10000)
  219.  
  220.     print(p1.mem.get_win_ratio_all())
  221.     print(p2.mem.get_win_ratio_all())
  222.    
  223.     print('Draws: %d' % man.draws)
  224.     print('p1 wins: %d' % p1.mem.total_win)
  225.     print('p2 wins: %d ' % p2.mem.total_win)
  226.     print('win ratio: %f' % (p1.mem.total_win / p2.mem.total_win))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement