Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- from functools import lru_cache
- def get_position_mask_bitmap(board, player):
- """Convert the game board numpy representation
- to a bitmap (encoded as integer). Store a bitmap
- for the positions of player his tokens and a
- bitmap for occupied cells. The order of
- the bits is the following (0-bit is least significant):
- . . . . . . .
- 5 12 19 26 33 40 47
- 4 11 18 25 32 39 46
- 3 10 17 24 31 38 45
- 2 9 16 23 30 37 44
- 1 8 15 22 29 36 43
- 0 7 14 21 28 35 42
- Args:
- board (numpy array)
- player (int (1 or 2))
- Return:
- position (int): token bitmap
- mask (int): occupied cell bitmap
- """
- position, mask = '', ''
- for j in range(6, -1, -1):
- mask += '0'
- position += '0'
- for i in range(0, 6):
- mask += ['0', '1'][board[i, j] != 0]
- position += ['0', '1'][board[i, j] == player]
- return int(position, 2), int(mask, 2)
- def print_bitmap(bitmap):
- """Print out the bitmap (7x7), encoded as int"""
- bitstring = bin(bitmap)[2:]
- bitstring = '0'*(49 - len(bitstring)) + bitstring
- for i in range(7):
- print(bitstring[i::7][::-1])
- @lru_cache(maxsize=None)
- def is_legal_move(mask, col):
- """If there is no 1-bit in the highest row
- of column `col`, then we can move there"""
- return (mask & (1 << 5 << (col*7))) == 0
- @lru_cache(maxsize=None)
- def make_move(position, mask, col):
- """Flip the highest zero-bit in column `col`
- of the mask bitmap and flip the bits in the
- position bitmap for the opponent to make a move"""
- new_position = position ^ mask
- new_mask = mask | (mask + (1 << (col*7)))
- return new_position, new_mask
- @lru_cache(maxsize=None)
- def connected_four(position):
- # Horizontal check
- m = position & (position >> 7)
- if m & (m >> 14):
- return True
- # Diagonal \
- m = position & (position >> 6)
- if m & (m >> 12):
- return True
- # Diagonal /
- m = position & (position >> 8)
- if m & (m >> 16):
- return True
- # Vertical
- m = position & (position >> 1)
- if m & (m >> 2):
- return True
- # Nothing found
- return False
- @lru_cache(maxsize=None)
- def possible(mask):
- bottom_mask = 4432676798593
- board_mask = bottom_mask * 63
- return (mask + bottom_mask) & board_mask
- @lru_cache(maxsize=None)
- def compute_winning_position(position, mask):
- # Vertical
- r = (position << 1) & (position << 2) & (position << 3)
- # Horizontal
- p = (position << 7) & (position << 14)
- r |= p & (position << 21)
- r |= p & (position >> 7)
- p >>= 21
- r |= p & (position << 7)
- r |= p & (position >> 21)
- # Diagonal \
- p = (position << 6) & (position << 12)
- r |= p & (position << 18)
- r |= p & (position >> 6)
- p >>= 18
- r |= p & (position << 6)
- r |= p & (position >> 18)
- # Diagonal /
- p = (position << 8) & (position << 16)
- r |= p & (position << 24)
- r |= p & (position >> 8)
- p >>= 24
- r |= p & (position << 8)
- r |= p & (position >> 24)
- return r & (4432676798593 * 63 ^ mask)
- @lru_cache(maxsize=None)
- def get_columns_with_bit(bmp):
- cols = [63, 8064, 1032192, 132120576, 16911433728,
- 2164663517184, 277076930199552]
- return [i for i in range(len(cols)) if cols[i] & bmp]
- @lru_cache(maxsize=None)
- def possible_non_losing_moves(position, mask, possible_mask):
- opponent_win = compute_winning_position(position ^ mask, mask)
- forced_moves = possible_mask & opponent_win
- if forced_moves:
- if forced_moves & (forced_moves - 1):
- return []
- else:
- possible_mask = forced_moves
- return get_columns_with_bit(possible_mask & ~(opponent_win >> 1))
- @lru_cache(maxsize=None)
- def popcount(m):
- c = 0
- while m:
- m &= (m - 1)
- c += 1
- return c
- @lru_cache(maxsize=2048)
- def insertion(M):
- idx = list(range(len(M)))
- M = list(M)
- for i in range(1, len(M)):
- if M[i] >= M[i-1]:
- continue
- for j in range(i):
- if M[i] < M[j]:
- M[j], M[j+1:i+1] = M[i], M[j:i]
- idx[j], idx[j+1:i+1] = idx[i], idx[j:i]
- break
- return M, idx
- def alphabeta_search(position, mask, ub_cache=None,
- lb_cache=None, valid_actions=[]):
- """Search game to determine best action; use alpha-beta pruning.
- this version searches all the way to the leaves."""
- # First explore columns in the center
- actions = [3, 2, 4, 1, 5, 0, 6]
- # Initialize our cache (dynamic programming)
- if ub_cache is None:
- ub_cache = {}
- if lb_cache is None:
- lb_cache = {}
- # Functions used by alphabeta
- def max_value(position, mask, alpha, beta, moves):
- possible_mask = possible(mask)
- # Check if game terminates
- if compute_winning_position(position, mask) & possible_mask:
- return int(np.ceil((42 - moves) / 2))
- key = position + mask
- # Try to get an upper bound for this position from cache
- try:
- v = ub_cache[key]
- if beta >= v:
- beta = v
- except KeyError as e:
- pass
- v = max(alpha, (-42 + moves)//2)
- if v >= alpha:
- alpha = v
- if alpha >= beta:
- return beta
- if moves < 30:
- # This helps for positions close to the end
- _actions = possible_non_losing_moves(position, mask, possible_mask)
- if not len(_actions):
- return -int(np.ceil((41 - moves) / 2))
- nr_winning_actions_per_col = [0]*7
- for col in actions:
- new_position, new_mask = make_move(position, mask, col)
- nr_winning_actions_per_col[col] = popcount(
- compute_winning_position(new_position ^ new_mask, new_mask)
- )
- # This can possibly be replaced with np.argsort
- _actions = insertion(tuple(nr_winning_actions_per_col))[1][::-1]
- else:
- _actions = actions
- for col in _actions:
- if is_legal_move(mask, col):
- new_position, new_mask = make_move(position, mask, col)
- v = max(v, min_value(
- new_position, new_mask,
- alpha, beta, moves + 1
- )
- )
- if v >= beta:
- return v
- if v > 0: # We found a move towards a win (not optimal)
- return v
- alpha = max(alpha, v)
- # Put our new upper bound in the cache
- ub_cache[key] = alpha
- return v
- def min_value(position, mask, alpha, beta, moves):
- possible_mask = possible(mask)
- # Check if game terminates
- if compute_winning_position(position, mask) & possible_mask:
- return -int(np.ceil((42 - moves) / 2))
- key = position + mask
- try:
- v = lb_cache[key]
- if alpha <= v:
- alpha = v
- except KeyError as e:
- pass
- v = min(beta, (42 - moves)//2)
- if v <= beta:
- beta = v
- if alpha >= beta:
- return alpha
- if moves < 30:
- # This helps for positions close to the end
- _actions = possible_non_losing_moves(position, mask, possible_mask)
- if not len(_actions):
- return int(np.ceil((41 - moves) / 2))
- nr_winning_actions_per_col = [0]*7
- for col in actions:
- new_position, new_mask = make_move(position, mask, col)
- nr_winning_actions_per_col[col] = popcount(
- compute_winning_position(new_position ^ new_mask, new_mask)
- )
- # This can possibly be replaced with np.argsort
- _actions = insertion(tuple(nr_winning_actions_per_col))[1][::-1]
- else:
- _actions = actions
- for col in _actions:
- if is_legal_move(mask, col):
- new_position, new_mask = make_move(position, mask, col)
- v = min(v, max_value(
- new_position, new_mask,
- alpha, beta, moves + 1
- )
- )
- if v <= alpha:
- return v
- if v < 0: # We found a move towards a win (not optimal)
- return v
- beta = min(beta, v)
- lb_cache[key] = beta
- return v
- # Body of alphabeta_cutoff_search:
- n_bits = popcount(mask)
- best_score = -(42 - n_bits)//2
- beta = (42 - n_bits)//2
- best_action = None
- nr_winning_actions_per_col = [0]*7
- for col in actions:
- if col in valid_actions and is_legal_move(mask, col):
- new_position, new_mask = make_move(position, mask, col)
- if connected_four(new_position ^ new_mask):
- return col, int(np.ceil((43 - n_bits - 1) / 2))
- nr_winning_actions_per_col[col] = popcount(
- compute_winning_position(new_position ^ new_mask, new_mask)
- )
- actions = insertion(tuple(nr_winning_actions_per_col))[1][::-1]
- for col in actions:
- if is_legal_move(mask, col):
- new_position, new_mask = make_move(position, mask, col)
- v = min_value(new_position, new_mask, best_score, beta, n_bits + 1)
- if v > best_score:
- best_score = v
- best_action = col
- return best_action, best_score
- def generate_move(board, player, saved_state):
- """Contains all code required to generate a move,
- given a current game state (board & player)
- Args:
- board (2D np.array): game board (element is 0, 1 or 2)
- player (int): your player number (token to place: 1 or 2)
- saved_state (object): returned value from previous call
- Returns:
- action (int): number in [0, 6]
- saved_state (optional, object): will be returned to you the
- next time your function is called
- """
- position, mask = get_position_mask_bitmap(board, player)
- return alphabeta_search(position, mask)
Add Comment
Please, Sign In to add comment