Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <string>
- #include <algorithm>
- #include <iostream>
- #include <vector>
- #include <cstdio>
- #include <cstring>
- #include <sstream>
- #include <cmath>
- #include <cassert>
- #include <queue>
- #include <bitset>
- #include <map>
- #include <set>
- #define pb push_back
- #define mp make_pair
- #define sz(v) ((int)(v).size())
- #define all(v) (v).begin(),(v).end()
- using namespace std;
- typedef long long int64;
- typedef pair<int, int> ii;
- typedef vector<int> vi;
- const int N = 10;
- const int N2 = N * N;
- struct Move {
- int x0, y0;
- int x1, y1;
- int x2, y2;
- Move() : x0(-1) {}
- Move(int x0, int y0, int x1, int y1, int x2, int y2)
- : x0(x0), y0(y0), x1(x1), y1(y1), x2(x2), y2(y2) {}
- Move(int cell0, int cell1, int cell2)
- : x0(cell0 / N), y0(cell0 % N),
- x1(cell1 / N), y1(cell1 % N),
- x2(cell2 / N), y2(cell2 % N) {}
- bool operator<(const Move& other) const {
- return false;
- }
- bool IsValid() const {
- return x0 != -1;
- }
- int Cell0() const {
- return x0 * N + y0;
- }
- int Cell1() const {
- return x1 * N + y1;
- }
- int Cell2() const {
- return x2 * N + y2;
- }
- };
- struct ScoringConfig {
- int own;
- int both;
- ScoringConfig() : own(16), both(1) {}
- ScoringConfig(int own, int both)
- : own(own), both(both) {}
- };
- struct Board {
- // TODO: reverse index for queens.
- char field[N2];
- int player;
- int move_index;
- bool has_score;
- int score;
- Board() {
- memset(field, 0, sizeof(field));
- player = 0;
- move_index = 0;
- has_score = false;
- score = 0;
- field[0 * N + 3] = -1;
- field[0 * N + 6] = -1;
- field[3 * N + 0] = -1;
- field[3 * N + 9] = -1;
- field[6 * N + 0] = 1;
- field[6 * N + 9] = 1;
- field[9 * N + 3] = 1;
- field[9 * N + 6] = 1;
- }
- Board(const Board& other) {
- memcpy(field, other.field, sizeof(field));
- player = other.player;
- move_index = other.move_index;
- has_score = other.has_score;
- score = other.score;
- }
- vector<Move> GetAllMoves() {
- vector<Move> res;
- int p = player == 0 ? 1 : -1;
- for (int x = 0; x < N; ++x)
- for (int y = 0; y < N; ++y) {
- if (field[x * N + y] != p)
- continue;
- vector<int> cells = QueenMoves(x, y);
- for (int i = 0; i < sz(cells); ++i) {
- int x1 = cells[i] / N, y1 = cells[i] % N;
- swap(field[x * N + y], field[cells[i]]);
- vector<int> cells1 = QueenMoves(x1, y1);
- swap(field[x * N + y], field[cells[i]]);
- for (int j = 0; j < sz(cells1); ++j) {
- res.pb(Move(x, y, x1, y1, cells1[j] / N, cells1[j] % N));
- }
- }
- }
- return res;
- }
- Move GetRandomMove() {
- vi queens;
- int p = player == 0 ? 1 : -1;
- for (int x = 0; x < N; ++x)
- for (int y = 0; y < N; ++y) {
- if (field[x * N + y] != p) continue;
- queens.pb(x * N + y);
- }
- random_shuffle(all(queens));
- vector<vector<int> > queen_moves;
- int total_moves = 0;
- for (int i = 0; i < sz(queens); ++i) {
- int x = queens[i] / N, y = queens[i] % N;
- vector<int> cells = QueenMoves(x, y);
- queen_moves.pb(cells);
- total_moves += sz(cells);
- }
- if (total_moves == 0)
- return Move();
- int r = rand() % total_moves;
- for (int i = 0; i < sz(queens); ++i) {
- r -= sz(queen_moves[i]);
- if (r >= 0) continue;
- int x = queens[i] / N, y = queens[i] % N;
- vector<int> cells = queen_moves[i];
- random_shuffle(all(cells));
- for (int j = 0; j < sz(cells); ++j) {
- int x1 = cells[j] / N, y1 = cells[j] % N;
- swap(field[x * N + y], field[cells[j]]);
- vector<int> cells1 = QueenMoves(x1, y1);
- swap(field[x * N + y], field[cells[j]]);
- random_shuffle(all(cells1));
- assert(sz(cells1) > 0);
- return Move(x, y, x1, y1, cells1[0] / N, cells1[0] % N);
- }
- }
- return Move();
- }
- void ApplyMove(const Move& move) {
- int cell0 = move.Cell0();
- int cell1 = move.Cell1();
- int cell2 = move.Cell2();
- int p = player == 0 ? 1 : -1;
- assert(field[cell0] == p);
- assert(field[cell1] == 0);
- assert(field[cell2] == 0 || cell2 == cell0);
- field[cell0] = 0;
- field[cell1] = p;
- field[cell2] = p * 2;
- player = 1 - player;
- ++move_index;
- has_score = false;
- score = 0;
- }
- void UndoMove(const Move& move) {
- int cell0 = move.Cell0();
- int cell1 = move.Cell1();
- int cell2 = move.Cell2();
- int p = player == 0 ? -1 : 1;
- assert(field[cell0] == 0 || cell2 == cell0);
- assert(field[cell1] == p);
- assert(field[cell2] == 2 * p);
- field[cell1] = 0;
- field[cell2] = 0;
- field[cell0] = p;
- player = 1 - player;
- --move_index;
- has_score = false;
- score = 0;
- }
- vector<int> QueenMoves(int x, int y) const {
- vector<int> res;
- for (int dx = -1; dx <= 1; ++dx)
- for (int dy = -1; dy <= 1; ++dy) {
- if (dx == 0 && dy == 0) continue;
- int cx = x + dx, cy = y + dy;
- while (cx >= 0 && cx < N && cy >= 0 && cy < N &&
- field[cx * N + cy] == 0) {
- res.pb(cx * N + cy);
- cx += dx;
- cy += dy;
- }
- }
- return res;
- }
- int Score(const ScoringConfig& config) {
- if (has_score)
- return score;
- char u[N2];
- memset(u, 0, sizeof(u));
- char q[N2 + 4];
- bitset<N2> borders;
- int head = 0, tail = 0;
- int p = player == 0 ? 1 : -1;
- for (int s = -1; s <= 1; s += 2)
- for (int x = 0; x < N; ++x)
- for (int y = 0; y < N; ++y) {
- if (field[x * N + y] != -s * p) continue;
- q[tail++] = x * N + y;
- u[x * N + y] = field[x * N + y];
- }
- score = 0;
- while (tail > head) {
- int cell = q[head++];
- int nd = u[cell];
- if (nd < 0) --nd;
- else ++nd;
- int x = cell / N, y = cell % N;
- vector<int> cells = QueenMoves(x, y);
- for (int i = 0; i < sz(cells); ++i) {
- int c = cells[i];
- if (u[c]) {
- if (u[c] * p > 0 && u[c] == -nd && !borders[c]) {
- borders[c] = true;
- score += (config.both - config.own) * p;
- }
- continue;
- }
- u[c] = nd;
- if (u[c] < 0){
- score -= config.own;
- } else {
- score += config.own;
- }
- q[tail++] = c;
- }
- }
- has_score = true;
- return score;
- }
- string ToString() {
- ostringstream res;
- res << "Player: " << player << " (move: " << move_index << ", score: "
- << Score(ScoringConfig()) << ")\n";
- for (int i = 0; i < N; ++i) {
- for (int j = 0; j < N; ++j) {
- char c;
- if (field[i * N + j] == 0)
- c = '.';
- else if (field[i * N + j] == 1)
- c = 'x';
- else if (field[i * N + j] == -1)
- c = 'o';
- else
- c = '#';
- res << c;
- }
- res << "\n";
- }
- res << "\n";
- return res.str();
- }
- };
- class Strategy {
- public:
- virtual Move GetMove(Board board) = 0;
- };
- class RandomStrategy : public Strategy {
- public:
- virtual Move GetMove(Board board) {
- return board.GetRandomMove();
- }
- };
- class GreedyDepth1 : public Strategy {
- public:
- GreedyDepth1() {}
- GreedyDepth1(ScoringConfig scoring_config)
- : scoring_config(scoring_config) {}
- virtual Move GetMove(Board board) {
- vector<Move> moves = board.GetAllMoves();
- Move res;
- int res_score = -1000 * 1000;
- for (int i = 0; i < sz(moves); ++i) {
- Move move = moves[i];
- board.ApplyMove(move);
- int cur = board.Score(scoring_config);
- board.UndoMove(move);
- if (board.player == 1)
- cur *= -1;
- if (cur > res_score) {
- res_score = cur;
- res = move;
- }
- }
- return res;
- }
- private:
- ScoringConfig scoring_config;
- };
- const int inf = 1000 * 1000 * 1000;
- class AlphaBeta : public Strategy {
- public:
- AlphaBeta(ScoringConfig scoring_config, int max_depth, double tl, int max_moves)
- : max_depth(max_depth), tl(tl), max_moves(max_moves) {}
- virtual Move GetMove(Board board) {
- start_time = clock();
- Move res = Move();
- for (int depth = 1; depth <= max_depth; ++depth) {
- best_move = Move();
- Search(board, depth, -inf, inf, true /* is_root */);
- if (TLExceeded())
- break;
- res = best_move;
- }
- return res;
- }
- private:
- bool TLExceeded() {
- return clock() - start_time > tl * CLOCKS_PER_SEC;
- }
- int Search(Board& board, int depth, int alpha, int beta, bool is_root=false) {
- if (depth == 0)
- return board.Score(scoring_config);
- if (TLExceeded())
- return board.player == 0 ? -inf : inf;
- vector<Move> moves;
- vector<Move> raw_moves = board.GetAllMoves();
- vector<pair<double, Move> > scored_moves;
- for (int i = 0; i < sz(raw_moves); ++i) {
- Move move = raw_moves[i];
- board.ApplyMove(move);
- double score = board.Score(scoring_config);
- board.UndoMove(move);
- if (board.player == 0)
- score = -score;
- scored_moves.pb(mp(score, move));
- }
- sort(all(scored_moves));
- for (int i = 0; i < sz(scored_moves) && i < max_moves; ++i)
- moves.pb(scored_moves[i].second);
- if (sz(moves) == 0)
- return board.Score(scoring_config);
- if (depth == 1 && !is_root) {
- return scored_moves[0].first * (board.player == 0 ? -1 : 1);
- }
- if (board.player == 0) {
- int res = -inf;
- for (int i = 0; i < sz(moves); ++i) {
- board.ApplyMove(moves[i]);
- int cur = Search(board, depth - 1, alpha, beta);
- board.UndoMove(moves[i]);
- if (res < cur) {
- res = cur;
- if (is_root)
- best_move = moves[i];
- }
- if (res > alpha)
- alpha = res;
- if (beta <= alpha)
- break;
- }
- return res;
- } else {
- int res = inf;
- for (int i = 0; i < sz(moves); ++i) {
- board.ApplyMove(moves[i]);
- int cur = Search(board, depth - 1, alpha, beta);
- board.UndoMove(moves[i]);
- if (res > cur) {
- res = cur;
- if (is_root)
- best_move = moves[i];
- }
- if (res < beta)
- beta = res;
- if (beta <= alpha)
- break;
- }
- return res;
- }
- }
- ScoringConfig scoring_config;
- int max_depth;
- double tl;
- int max_moves;
- time_t start_time;
- Move best_move;
- };
- class MonteCarlo : public Strategy {
- public:
- struct Config {
- ScoringConfig scoring_config;
- double tl;
- int playout_depth;
- double exploration_coeff;
- int expansion_threshold;
- vector<int> playout_thresholds;
- vector<int> num_moves;
- Config() : tl(0.9), playout_depth(5), exploration_coeff(0.3) {
- expansion_threshold = 30;
- playout_thresholds.pb(expansion_threshold - 1);
- num_moves.pb(40);
- playout_thresholds.pb(1000);
- num_moves.pb(50);
- playout_thresholds.pb(2000);
- num_moves.pb(60);
- playout_thresholds.pb(3000);
- num_moves.pb(70);
- playout_thresholds.pb(6000);
- num_moves.pb(100);
- playout_thresholds.pb(8000);
- num_moves.pb(200);
- playout_thresholds.pb(10000);
- num_moves.pb(400);
- }
- };
- MonteCarlo() {}
- MonteCarlo(Config config) : config(config) {}
- virtual Move GetMove(Board board) {
- start_time = clock();
- root = new Node(board, config);
- int num_expands = 0;
- while (clock() - start_time < config.tl * CLOCKS_PER_SEC) {
- ExpandTree();
- ++num_expands;
- }
- Move res;
- double res_val = -1E100;
- for (int i = 0; i < sz(root->kids); ++i) {
- if (root->kids[i]->num_playouts == 0) continue;
- double val = root->kids[i]->sum_scores / root->kids[i]->num_playouts;
- if (root->board.player == 1) val = -val;
- val -= sqrt(log(root->num_playouts) / root->kids[i]->num_playouts) * config.exploration_coeff;
- if (val > res_val) {
- res_val = val;
- res = root->moves[i];
- }
- }
- delete root;
- return res;
- }
- private:
- struct Node {
- Board board;
- const Config& config;
- bool initialized;
- vector<Move> moves; // sorted
- vector<Node*> kids;
- double num_playouts;
- double sum_scores;
- Node(Board board, const Config& config)
- : board(board), config(config), num_playouts(0), sum_scores(0),
- initialized(false) {}
- ~Node() {
- for (int i = 0; i < sz(kids); ++i)
- delete kids[i];
- }
- void Initialize() {
- if (initialized)
- return;
- initialized = true;
- vector<Move> raw_moves = board.GetAllMoves();
- vector<pair<double, Move> > scored_moves;
- for (int i = 0; i < sz(raw_moves); ++i) {
- Move move = raw_moves[i];
- board.ApplyMove(move);
- double score = board.Score(config.scoring_config);
- board.UndoMove(move);
- if (board.player == 0)
- score = -score;
- scored_moves.pb(mp(score, move));
- }
- sort(all(scored_moves));
- for (int i = 0; i < sz(scored_moves); ++i)
- moves.pb(scored_moves[i].second);
- }
- void Expand() {
- Initialize();
- int ind = 0;
- while (ind < sz(config.playout_thresholds) &&
- config.playout_thresholds[ind] <= num_playouts)
- ++ind;
- if (ind > 0)
- --ind;
- int cnt = config.num_moves[ind];
- while (sz(kids) < cnt && sz(kids) < sz(moves)) {
- Board new_board = board;
- Move move = moves[sz(kids)];
- new_board.ApplyMove(move);
- kids.pb(new Node(new_board, config));
- }
- }
- double ExplorationExploitationScore(double total_playouts) {
- double exploitation = sum_scores * (board.player == 0 ? -1 : 1) * 1.0 / num_playouts;
- double exploration = sqrt(log(total_playouts) / num_playouts);
- return exploration * config.exploration_coeff + exploitation;
- }
- };
- void ExpandTree() {
- Node* cur_node = root;
- vector<Node*> path;
- path.pb(cur_node);
- while (cur_node->num_playouts >= config.expansion_threshold) {
- cur_node->Expand();
- double total = 0.0;
- for (int i = 0; i < sz(cur_node->kids); ++i)
- total += cur_node->kids[i]->num_playouts;
- Node* kid = NULL;
- double kid_val = -1E100;
- for (int i = 0; i < sz(cur_node->kids); ++i) {
- Node* cur = cur_node->kids[i];
- if (cur->num_playouts == 0) {
- kid = cur;
- kid_val = 1E100;
- break;
- }
- double val = cur->ExplorationExploitationScore(total);
- if (val > kid_val) {
- kid = cur;
- kid_val = val;
- }
- }
- if (kid == NULL)
- break;
- cur_node = kid;
- path.pb(cur_node);
- }
- int val = PlayOut(cur_node->board);
- val = 1 - 2 * val;
- for (int i = 0; i < sz(path); ++i) {
- path[i]->num_playouts += 1;
- path[i]->sum_scores += val;
- }
- }
- int PlayOut(Board board) {
- for (int it = 0; it < config.playout_depth || board.player != 0; ++it) {
- Move move = board.GetRandomMove();
- if (!move.IsValid())
- return 1 - board.player;
- board.ApplyMove(move);
- }
- // Return player index.
- return board.Score(config.scoring_config) > 0 ? 0 : 1;
- }
- time_t start_time;
- Node* root;
- Config config;
- };
- int PlayGame(Strategy* strategy1, Strategy* strategy2) {
- Board board;
- while (board.GetAllMoves().size() > 0) {
- Move move = board.player == 0 ?
- strategy1->GetMove(board) :
- strategy2->GetMove(board);
- board.ApplyMove(move);
- cout << board.ToString() << "\n";
- }
- int res = 1 - board.player;
- cout << "Winner: " << res << "\n";
- return res;
- }
- double Evaluate(Strategy* strategy1, Strategy* strategy2) {
- double num = 0, den = 0;
- RandomStrategy initial_strategy;
- for (int it = 0; it < 100; ++it) {
- Board board;
- while (board.GetAllMoves().size() > 0) {
- // Make the first several moves randomly.
- if (board.move_index < 2) {
- Move move = initial_strategy.GetMove(board);
- board.ApplyMove(move);
- } else {
- Move move = (board.player + it * 1) % 2 == 0 ?
- strategy1->GetMove(board) :
- strategy2->GetMove(board);
- board.ApplyMove(move);
- }
- }
- int res = 1 - board.player;
- if (it % 2 == 1) res = 1 - res;
- num += 1.0 - res;
- den += 1.0;
- cerr << it << " " << res << "\n";
- if ((it + 1) % 10 == 0)
- cerr << "Iteration " << it << ": " << num / den << "\n";
- }
- return num / den;
- }
- void StdinMove(Strategy* strategy) {
- Board board;
- for (int i = 0; i < N; ++i)
- for (int j = 0; j < N; ++j) {
- int c = i * N + j;
- int x;
- cin >> x;
- if (x == 0)
- board.field[c] = 0;
- else if (x == 1)
- board.field[c] = 1;
- else if (x == 2)
- board.field[c] = -1;
- else
- board.field[c] = -2;
- }
- int player;
- cin >> player;
- board.player = player == 1 ? 0 : 1;
- Move move = strategy->GetMove(board);
- cout << move.x0 << " " << move.y0 << "\n";
- cout << move.x1 << " " << move.y1 << "\n";
- cout << move.x2 << " " << move.y2 << "\n";
- }
- int main() {
- srand(time(0));
- RandomStrategy random_strategy;
- ScoringConfig default_config;
- GreedyDepth1 greedy(default_config);
- AlphaBeta alpha_beta(default_config, 12, 0.5, 400);
- MonteCarlo monte_carlo;
- //PlayGame(&monte_carlo, &alpha_beta);
- StdinMove(&monte_carlo);
- //ScoringConfig old_config(1, 1);
- //GreedyDepth1 greedy_old(old_config);
- //MonteCarlo::Config new_config;
- //new_config.scoring_config = ScoringConfig(16, 1);
- //new_config.tl = 0.9;
- //MonteCarlo monte_carlo_new(new_config);
- //double res = Evaluate(&monte_carlo, &monte_carlo_new);
- //cerr << "Val: " << res << "\n";
- return 0;
- }
Add Comment
Please, Sign In to add comment