Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import sys
- class Board(object):
- def __init__(self):
- self.b = {i:None for i in range(1,10)}
- self.games_played = 0
- def get_available(self):
- return [pos for (pos, taken) in self.b.items() if taken is None]
- def get_played_positions(self, piece):
- return [pos for (pos, taken) in self.b.items() if taken == piece]
- def place_piece(self, position, piece):
- if position not in self.get_available() or not 1 <= position < 10:
- raise ValueError('%d is not a valid position' % position)
- self.b[position] = piece
- def clear(self):
- self.b = {i:None for i in range(1,10)}
- def print_board(self):
- for next_char in self._yield_board():
- sys.stdout.write(next_char)
- def add_games_played(self):
- self.games_played += 1
- self.clear()
- def _yield_board(self):
- order = [7, 8, 9, 4, 5, 6, 1, 2, 3]
- for value in order:
- if self.b[value]:
- yield " " + self.b[value] + " "
- else: yield ' _ '
- if value in (3, 6, 9):
- yield '\n'
- class Memory(object):
- def __init__(self):
- self.m = {i:[1, 0] for i in range(1,10)}
- self.total_loss = 0
- self.total_win = 0
- def add_loss(self, positions):
- for pos in positions:
- self.m[pos][1] += 1
- self.total_loss += 1
- def add_win(self, positions):
- for pos in positions:
- self.m[pos][0] += 1
- self.total_win += 1
- def get_win_ratio_all(self):
- return self.get_win_ratio([i for i in range(1,10)])
- def get_win_ratio(self, positions):
- ratio = []
- for pos in positions:
- if self.m[pos][1] == 0:
- ratio.append((pos, 1))
- else: ratio.append((pos, self.m[pos][0] / self.m[pos][1]))
- return ratio
- import random
- import operator
- winning_pos = set([(1,2,3), (1,4,7), (1,5,9), (2,5,8), (3,5,7), (3,6,9), (4,5,6), (7,8,9)])
- pieces = set(('O', 'X'))
- DIAGNOSTIC = 0
- class AI(object):
- def __init__(self, piece):
- '''An AI object which makes decisions based on some inbuilt memory of previous
- win/loss ratios based on played positions. The decision tree can be seen at
- a high level in play_next_move.'''
- self.mem = Memory()
- if piece not in pieces:
- raise ValueError('%s must be one of ("X", "O")' % piece)
- self.piece = piece
- self.other_piece = (pieces - set(piece)).pop()
- self.played = []
- self.set_win = False
- self.set_draw = False
- def play_next_move(self, board):
- '''We look to see if there is a winning move, if there isn't, we look to see if our
- opponent has a winning move, blocking them if they do. Finally, if neither of these
- options are possible, we randomly pick a move, which is weighted based on memories
- of past wins/losses stored in mem.'''
- if DIAGNOSTIC:
- board.print_board()
- print('\n')
- possible = board.get_available()
- self.set_win = self._win_move(possible, board, self.played)
- if self.set_win:
- self.mem.add_win(self.played)
- elif self._block_move(possible, board):
- pass
- else:
- self._decide_best_move(board, possible)
- def signal_loss(self, board):
- self.mem.add_loss(self.played)
- self.clear()
- def clear(self):
- self.played = []
- self.set_win = self.set_draw = False
- def _decide_best_move(self, board, possible):
- ratio = self.mem.get_win_ratio(possible)
- acc = accumulate(j for (i, j) in ratio)
- cum_sum = {pos:p for (pos, p) in zip(possible, acc)}
- if cum_sum == {}:
- self.set_draw = True
- return None
- for i in range(1,10):
- if i not in cum_sum:
- cum_sum[i] = 0
- rv = random.random() * max(cum_sum.values())
- j = 1
- while cum_sum[j] < rv and j < 9:
- j += 1
- self.played.append(j)
- board.place_piece(j, self.piece)
- def _win_move(self, possible, board, played):
- '''See if we have a possible winning move. This is done in a pretty crappy brute
- force way: we look at each pair of played moves, and see if the set difference
- between our played moves and each of our winning positions is a single position.
- If it is, we check if that position is free. This is an O(n^2) method, which
- could really use a much better way of calculating possible winning moves.'''
- for pair in self._generate_pairs(played):
- sd = ((set(wp) - pair) for wp in winning_pos if len(set(wp) - pair) == 1)
- for s in sd:
- val = s.pop()
- if val in possible:
- board.place_piece(val, self.piece)
- self.played.append(val)
- return True
- return False
- def _block_move(self, possible, board):
- '''This is perhaps the only clever bit of code here. To search for moves to block,
- we pretend to be our opponent looking for a winning move. If we find one, we place
- a piece there, which has the effect of blocking them'''
- other_played = board.get_played_positions(self.other_piece)
- return self._win_move(possible, board, other_played)
- def _generate_pairs(self, played):
- for i in range(len(played) - 1):
- first = played[i]
- for second in played[i+1:]:
- yield set((first, second))
- def accumulate(iterable, func=operator.add):
- '''Unashamedly stolen from python itertools documentation:
- http://docs.python.org/dev/library/itertools.html#itertools.accumulate'''
- it = iter(iterable)
- total = next(it)
- yield total
- for element in it:
- total = func(total, element)
- yield total
- print(m.get_win_ratio_all())
- class Manager(object):
- def __init__(self, player0, player1, board):
- self.player0 = player0
- self.player1 = player1
- self.board = board
- self.draws = 0
- def play_game(self):
- while 1:
- self.player0.play_next_move(self.board)
- if self.player0.set_win:
- self.player1.signal_loss(self.board)
- self.player0.clear()
- self.board.add_games_played()
- return 0
- elif self.player0.set_draw:
- self.board.add_games_played()
- self.draws += 1
- self.player0.clear()
- self.player1.clear()
- self.board.add_games_played()
- return -1
- self.player1.play_next_move(self.board)
- if self.player1.set_win:
- self.player0.signal_loss(self.board)
- self.player1.clear()
- self.board.add_games_played()
- return 1
- elif self.player0.set_draw:
- self.draws += 1
- self.board.add_games_played()
- self.player0.clear()
- self.player1.clear()
- return -1
- def play_games(self, n):
- for i in range(n):
- self.play_game()
- if __name__ == '__main__':
- p1 = AI('X')
- p2 = AI('O')
- b = Board()
- man = Manager(p1, p2, b)
- man.play_games(10000)
- print(p1.mem.get_win_ratio_all())
- print(p2.mem.get_win_ratio_all())
- print('Draws: %d' % man.draws)
- print('p1 wins: %d' % p1.mem.total_win)
- print('p2 wins: %d ' % p2.mem.total_win)
- print('win ratio: %f' % (p1.mem.total_win / p2.mem.total_win))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement