Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <iostream>
- #include <fstream>
- #include <vector>
- #include <deque>
- #include <queue>
- #include <random>
- #include <cassert>
- #include <ctime>
- constexpr int SIZE = 10;
- constexpr int AMAZONS_CNT = 4;
- constexpr int INF = 1000*1000*1000;
- constexpr int WIN = 10000;
- using namespace std;
- typedef pair<int, int> pos_t;
- typedef pair<pair<pos_t, pos_t>, pos_t> move_t;
- typedef double score_t;
- class GameState {
- int board[SIZE][SIZE];
- int turn;
- pos_t amazons[2][AMAZONS_CNT];
- public:
- GameState(const int B[SIZE][SIZE], int t,
- vector<pos_t>& fst, vector<pos_t>& snd) {
- for (int i = 0; i < SIZE; ++i) {
- for (int j = 0; j < SIZE; ++j)
- board[i][j] = B[i][j];
- }
- turn = t - 1;
- assert(fst.size() == AMAZONS_CNT);
- assert(snd.size() == AMAZONS_CNT);
- for (int i = 0; i < AMAZONS_CNT; ++i) {
- amazons[0][i] = fst[i];
- int x = amazons[0][i].first;
- int y = amazons[0][i].second;
- assert(board[x][y] == 1);
- }
- for (int i = 0; i < AMAZONS_CNT; ++i) {
- amazons[1][i] = snd[i];
- int x = amazons[1][i].first;
- int y = amazons[1][i].second;
- assert(board[x][y] == 2);
- }
- }
- GameState(const GameState& state) {
- for (int i = 0; i < SIZE; ++i) {
- for (int j = 0; j < SIZE; ++j)
- board[i][j] = state.board[i][j];
- }
- turn = state.turn;
- for (int i = 0; i < 2; ++i) {
- for (int j = 0; j < AMAZONS_CNT; ++j) {
- amazons[i][j] = state.amazons[i][j];
- }
- }
- }
- inline const int* operator[](int index) { return board[index]; }
- inline const int get_turn() { return turn; }
- inline const pos_t* get_amazons(int player) { return amazons[player]; }
- void make_move(move_t move) {
- int amazon_x = move.first.first.first;
- int amazon_y = move.first.first.second;
- int target_x = move.first.second.first;
- int target_y = move.first.second.second;
- board[target_x][target_y] = board[amazon_x][amazon_y];
- board[amazon_x][amazon_y] = 0;
- for (int i = 0; i < AMAZONS_CNT; ++i) {
- if (amazons[turn][i] == make_pair(amazon_x, amazon_y)) {
- amazons[turn][i] = make_pair(target_x, target_y);
- break;
- }
- }
- int arrow_x = move.second.first;
- int arrow_y = move.second.second;
- board[arrow_x][arrow_y] = -1;
- turn = 1 - turn;
- }
- deque<move_t> get_moves() {
- constexpr int DIR_CNT = 8;
- int dx[DIR_CNT] = {+1, +1, +1, 0, -1, -1, -1, 0};
- int dy[DIR_CNT] = {+1, 0, -1, -1, -1, 0, +1, +1};
- deque<move_t> res;
- for (int piece = 0; piece < AMAZONS_CNT; ++piece) {
- int x = amazons[turn][piece].first;
- int y = amazons[turn][piece].second;
- pos_t init_pos = make_pair(x, y);
- for (int dir = 0; dir < DIR_CNT; ++dir) {
- int cur_x = x;
- int cur_y = y;
- for (int len = 0; ; ++len) {
- cur_x += dx[dir];
- cur_y += dy[dir];
- if (cur_x < 0 || cur_x >= SIZE) break;
- else if (cur_y < 0 || cur_y >= SIZE) break;
- else if (board[cur_x][cur_y] != 0) break;
- else {
- pos_t target = make_pair(cur_x, cur_y);
- int tmp = board[x][y];
- board[x][y] = 0;
- for (int arrow_dir = 0;
- arrow_dir < DIR_CNT; ++arrow_dir) {
- int cur_arr_x = cur_x;
- int cur_arr_y = cur_y;
- for (int arrow_len = 0; ; ++len) {
- cur_arr_x += dx[arrow_dir];
- cur_arr_y += dy[arrow_dir];
- if (cur_arr_x < 0 || cur_arr_x >= SIZE)
- break;
- else if (cur_arr_y < 0 || cur_arr_y >= SIZE)
- break;
- else if (board[cur_arr_x][cur_arr_y] != 0)
- break;
- else {
- pos_t arrow =
- make_pair(cur_arr_x, cur_arr_y);
- res.push_back(
- make_pair(
- make_pair(init_pos, target),
- arrow));
- }
- }
- }
- board[x][y] = tmp;
- }
- }
- }
- }
- return res;
- }
- int get_winner() {
- int result = 1 - turn;
- constexpr int DIR_CNT = 8;
- int dx[DIR_CNT] = {+1, +1, +1, 0, -1, -1, -1, 0};
- int dy[DIR_CNT] = {+1, 0, -1, -1, -1, 0, +1, +1};
- for (int i = 0; i < AMAZONS_CNT; ++i) {
- int x = amazons[turn][i].first;
- int y = amazons[turn][i].second;
- for (int dir = 0; dir < DIR_CNT; ++dir) {
- int new_x = x + dx[dir];
- int new_y = y + dy[dir];
- if (new_x < 0 || new_x >= SIZE) continue;
- if (new_y < 0 || new_y >= SIZE) continue;
- if (board[new_x][new_y] == 0) {
- result = -1;
- break;
- }
- }
- if (result == -1) break;
- }
- return result;
- }
- void print(ostream& out) {
- out << "Board:\n";
- out << " | ";
- for (int i = 0; i < SIZE; ++i) {
- out << i << " ";
- }
- out << "\n";
- out << "----";
- for (int i = 0; i < SIZE; ++i) {
- out << "--";
- }
- out << "\n";
- for (int i = 0; i < SIZE; ++i) {
- out << i << " | ";
- for (int j = 0; j < SIZE; ++j) {
- if (board[i][j] == -1) out << "* ";
- else out << board[i][j] << " ";
- }
- out << "\n";
- }
- out << "Turn: " << turn+1 << "\n";
- out << "Amazon positions:\n";
- out << "White (1): ";
- for (int i = 0; i < AMAZONS_CNT; ++i) {
- out << "(" << amazons[0][i].first << ", "
- << amazons[0][i].second << ") ";
- }
- out << "\n";
- out << "Black (2): ";
- for (int i = 0; i < AMAZONS_CNT; ++i) {
- out << "(" << amazons[1][i].first << ", "
- << amazons[1][i].second << ") ";
- }
- out << "\n";
- }
- };
- typedef vector<vector<double>> Matrix;
- class NeuralNetwork {
- vector<Matrix> weights, traces;
- double learning_rate, decay_rate;
- int cnt = 0;
- public:
- NeuralNetwork() {
- learning_rate = 0.1;
- decay_rate = 0.0;
- }
- NeuralNetwork(vector<int> input_sizes, double eta, double lambda) {
- assert(input_sizes[input_sizes.size() - 1] == 1);
- learning_rate = eta;
- decay_rate = lambda;
- std::uniform_real_distribution<double> unif{-1, +1};
- std::default_random_engine re(std::random_device{}());
- for (int i = 0; i < input_sizes.size() - 1; ++i) {
- Matrix w; Matrix t;
- int rows = input_sizes[i + 1];
- int cols = input_sizes[i] + 1;
- for (int j = 0; j < rows; ++j) {
- vector<double> line;
- t.push_back(line);
- for (int k = 0; k < cols; ++k) {
- line.push_back(unif(re));
- t[j].push_back(0);
- }
- w.push_back(line);
- }
- weights.push_back(w);
- traces.push_back(t);
- }
- }
- NeuralNetwork(vector<Matrix> w, double eta, double lambda) {
- weights = w;
- learning_rate = eta;
- decay_rate = lambda;
- for (int i = 0; i < weights.size(); ++i) {
- Matrix t;
- int rows = weights[i].size();
- int cols = weights[i][0].size();
- for (int j = 0; j < rows; ++j) {
- vector<double> line;
- t.push_back(line);
- for (int k = 0; k < cols; ++k) {
- t[j].push_back(0);
- }
- }
- traces.push_back(t);
- }
- }
- inline vector<double> feedforward(const vector<double>& inputs) const {
- auto signals = calc_outputs(inputs);
- return signals[signals.size() - 1];
- }
- void train(const vector<double>& inputs,
- const vector<double>& target) {
- vector<vector<double>> outputs = calc_outputs(inputs);
- vector<double> prev_deltas, deltas;
- for (int layer = outputs.size() - 1; layer >= 0; --layer) {
- for (int i = 1; i < outputs[layer].size(); ++i) {
- double o = outputs[layer][i];
- double delta = 0;
- if (layer == outputs.size() - 1) {
- delta = o * (1 - o) * (target[i-1] - o);
- }
- else {
- for (int k = 0;
- k < prev_deltas.size(); k++) {
- delta += prev_deltas[k] * weights[layer+1][k][i];
- }
- delta *= o * (1 - o);
- }
- deltas.push_back(delta);
- for (int j = 0; j < weights[layer][i-1].size();++j) {
- double grad = 0;
- if (layer > 0) {
- grad = delta * outputs[layer-1][j];
- }
- else {
- if (j > 0) {
- grad = delta * inputs[j-1];
- }
- else {
- grad = delta;
- }
- }
- traces[layer][i-1][j] *= decay_rate;
- traces[layer][i-1][j] += grad;
- weights[layer][i-1][j] +=
- learning_rate * traces[layer][i-1][j];
- }
- }
- prev_deltas = deltas;
- deltas.clear();
- }
- cnt += 1;
- if (cnt % 5000000 == 0) {
- learning_rate *= 0.99;
- learning_rate = max(0.05, learning_rate);
- cnt = 0;
- cerr << "*** ATTENTION ***\n";
- cerr << "New learning rate = " << learning_rate << "\n";
- }
- }
- inline vector<double> operator()(const vector<double>& inputs) const {
- return feedforward(inputs);
- }
- void print(ostream& out) {
- out << "{\n";
- for (int i = 0; i < weights.size(); ++i) {
- out << "{" << "\n";
- for (int j = 0; j < weights[i].size(); ++j) {
- auto v = weights[i][j];
- out << "{";
- for (int k = 0; k < weights[i][j].size(); ++k) {
- auto l = weights[i][j][k];
- out << l;
- if (k < weights[i][j].size() - 1) out << ", ";
- }
- out << "}";
- if (j < weights[i].size() - 1) out << ", ";
- out << "\n";
- }
- out << "}";
- if (i < weights.size() - 1) out << ", ";
- out << "\n";
- }
- out << "};\n";
- }
- private:
- inline double sigmoid(double x) const {
- return 1.0 / (1.0 + exp(-x));
- }
- inline double sigmoid_prime(double x) const {
- return sigmoid(x) * (1 - sigmoid(x));
- }
- vector<vector<double>> calc_outputs(const vector<double>& inputs) const {
- vector<vector<double>> res;
- vector<double> signals{1};
- for (auto i : inputs) {
- signals.push_back(i);
- }
- for (Matrix w : weights) {
- assert(w[0].size() == signals.size());
- vector<double> new_signals(w.size() + 1, 0);
- new_signals[0] = 1;
- for (int i = 1; i <= w.size(); ++i) {
- double c0, c1, c2, c3;
- size_t sz = w[i-1].size();
- for (int j = 0; j < sz; j += 4) {
- c0 = c1 = c2 = c3 = 0.0;
- c0 = signals[j] * w[i - 1][j];
- if (j+1 < sz)
- c1 = signals[j+1] * w[i - 1][j+1];
- if (j+2 < sz)
- c2 = signals[j+2] * w[i - 1][j+2];
- if (j+3 < sz)
- c3 = signals[j+3] * w[i - 1][j+3];
- new_signals[i] += c0+c1+c2+c3;
- }
- new_signals[i] = sigmoid(new_signals[i]);
- }
- signals = new_signals;
- res.push_back(signals);
- }
- return res;
- }
- };
- inline vector<double> get_vector(GameState& state) {
- constexpr int DIR_CNT = 8;
- constexpr int dx[DIR_CNT] = {+1, +1, +1, 0, -1, -1, -1, 0};
- constexpr int dy[DIR_CNT] = {+1, 0, -1, -1, -1, 0, +1, +1};
- vector<double> inputs{0};
- for (int i = 0; i < SIZE; ++i) {
- for (int j = 0; j < SIZE; ++j) {
- if (state[i][j] == -1)
- inputs[0]++;
- }
- }
- inputs[0] /= 100;
- int turn = state.get_turn();
- // vector<vector<pos_t>> amazons(2, vector<pos_t>(AMAZONS_CNT));
- pos_t amazons[2][AMAZONS_CNT];
- constexpr int FEATURES_CNT = 13;
- constexpr int X_INDEX = 0, Y_INDEX = 1, DIR_MIN_INDEX = 2;
- constexpr int ONE_PLY_INDEX = DIR_MIN_INDEX + DIR_CNT;
- constexpr int TWO_PLY_INDEX = 1 + ONE_PLY_INDEX;
- constexpr int FAR_INDEX = 1 + TWO_PLY_INDEX;
- static_assert(FAR_INDEX + 1 == FEATURES_CNT, "Indices goin\' berzerk :(");
- double amazons_features_c[2*AMAZONS_CNT][FEATURES_CNT];
- for (int stage = 0; stage < 2; ++stage) {
- for (int i = 0; i < AMAZONS_CNT; ++i) {
- amazons[stage][i] = state.get_amazons(turn)[i];
- int x = amazons[stage][i].first;
- int y = amazons[stage][i].second;
- amazons_features_c[stage*AMAZONS_CNT+i][X_INDEX] = 1.0*x/10;
- amazons_features_c[stage*AMAZONS_CNT+i][Y_INDEX] = 1.0*y/10;
- for (int dir = 0; dir < DIR_CNT; ++dir) {
- int cur_x = x;
- int cur_y = y;
- int cnt = 0;
- while (true) {
- cur_x += dx[dir];
- cur_y += dy[dir];
- if (cur_x >= SIZE || cur_x < 0) break;
- if (cur_y >= SIZE || cur_y < 0) break;
- if (state[cur_x][cur_y] != 0) break;
- else ++cnt;
- }
- amazons_features_c[stage*AMAZONS_CNT+i][DIR_MIN_INDEX+dir] = 1.0*cnt/9;
- }
- }
- turn = 1 - turn;
- }
- int dist_map[2*AMAZONS_CNT][SIZE][SIZE];
- for (int i = 0; i < 2*AMAZONS_CNT; ++i)
- for (int j = 0; j < SIZE; ++j)
- for (int k = 0; k < SIZE; ++k)
- dist_map[i][j][k] = +INF;
- int control_map[2][SIZE][SIZE];
- for (int i = 0; i < 2; ++i)
- for (int j = 0; j < SIZE; ++j)
- for (int k = 0; k < SIZE; ++k)
- control_map[i][j][k] = +INF;
- int index = 0;
- for (int turn = 0; turn <= 1; ++turn) {
- queue<pos_t> Q;
- for (pos_t a : amazons[turn]) {
- dist_map[index][a.first][a.second] = 0;
- control_map[turn][a.first][a.second] = 0;
- Q.push(a);
- while (!Q.empty()) {
- pos_t a = Q.front(); Q.pop();
- int x = a.first, y = a.second;
- for (int dir = 0; dir < DIR_CNT; ++dir) {
- int cur_x = x;
- int cur_y = y;
- while (true) {
- cur_x += dx[dir];
- cur_y += dy[dir];
- if (cur_x >= SIZE || cur_x < 0) break;
- if (cur_y >= SIZE || cur_y < 0) break;
- if (state[cur_x][cur_y] != 0) break;
- if (dist_map[index][cur_x][cur_y] == INF) {
- dist_map[index][cur_x][cur_y] =
- dist_map[index][x][y] + 1;
- control_map[turn][cur_x][cur_y] =
- min(control_map[turn][cur_x][cur_y],
- dist_map[index][cur_x][cur_y]);
- Q.push(make_pair(cur_x, cur_y));
- }
- }
- }
- }
- ++index;
- }
- }
- int w_amazon_1ply[AMAZONS_CNT] = {0};
- int w_amazon_2ply[AMAZONS_CNT] = {0};
- int w_amazon_far[AMAZONS_CNT] = {0};
- for (int num = 0; num < AMAZONS_CNT; ++num) {
- for (int i = 0; i < SIZE; ++i) {
- for (int j = 0; j < SIZE; ++j) {
- if (dist_map[num][i][j] < control_map[1][i][j] &&
- dist_map[num][i][j] > 0) {
- if (dist_map[num][i][j] == 1)
- w_amazon_1ply[num]++;
- else if (dist_map[num][i][j] == 2)
- w_amazon_2ply[num]++;
- else
- w_amazon_far[num]++;
- }
- }
- }
- amazons_features_c[num][ONE_PLY_INDEX] = (w_amazon_1ply[num] * 0.05);
- amazons_features_c[num][TWO_PLY_INDEX] = (w_amazon_2ply[num] * 0.05);
- amazons_features_c[num][FAR_INDEX] = (w_amazon_far[num] * 0.05);
- }
- int b_amazon_1ply[AMAZONS_CNT] = {0};
- int b_amazon_2ply[AMAZONS_CNT] = {0};
- int b_amazon_far[AMAZONS_CNT] = {0};
- for (int num = 0; num < AMAZONS_CNT; ++num) {
- for (int i = 0; i < SIZE; ++i) {
- for (int j = 0; j < SIZE; ++j) {
- if (dist_map[num+AMAZONS_CNT][i][j] <
- control_map[0][i][j] &&
- dist_map[num+AMAZONS_CNT][i][j] > 0) {
- if (dist_map[num+AMAZONS_CNT][i][j] == 1)
- b_amazon_1ply[num]++;
- else if (dist_map[num+AMAZONS_CNT][i][j] == 2)
- b_amazon_2ply[num]++;
- else
- b_amazon_far[num]++;
- }
- }
- }
- amazons_features_c[num+AMAZONS_CNT][ONE_PLY_INDEX] = (w_amazon_1ply[num] * 0.05);
- amazons_features_c[num+AMAZONS_CNT][TWO_PLY_INDEX] = (w_amazon_2ply[num] * 0.05);
- amazons_features_c[num+AMAZONS_CNT][FAR_INDEX] = (w_amazon_far[num] * 0.05);
- }
- int w_control = 0, b_control = 0;
- for (int i = 0; i < SIZE; ++i) {
- for (int j = 0; j < SIZE; ++j) {
- if (control_map[0][i][j] < control_map[1][i][j] &&
- control_map[0][i][j] > 0)
- w_control++;
- if (control_map[0][i][j] > control_map[1][i][j] &&
- control_map[1][i][j] > 0)
- b_control++;
- }
- }
- inputs.push_back(w_control * 0.01);
- inputs.push_back(b_control * 0.01);
- for (int i = 0; i < 2*AMAZONS_CNT; ++i) {
- for (int j = 0; j < FEATURES_CNT; ++j) {
- inputs.push_back(amazons_features_c[i][j]);
- }
- }
- return inputs;
- }
- class NNGameBot {
- clock_t start_time, max_ticks;
- bool terminating;
- NeuralNetwork predictor;
- public:
- NNGameBot(double time, NeuralNetwork& N) {
- start_time = clock();
- max_ticks = clock_t(time * CLOCKS_PER_SEC);
- terminating = false;
- predictor = N;
- }
- move_t make_move(GameState& state) {
- double max_score = +INF;
- move_t res;
- auto moves = state.get_moves();
- for (auto move : moves) {
- GameState new_state(state);
- new_state.make_move(move);
- double est = estimate(new_state);
- if (est < max_score) {
- res = move;
- max_score = est;
- }
- }
- return res;
- }
- bool time_check() {
- if (clock() - start_time >= max_ticks) {
- terminating = true;
- }
- return terminating;
- }
- double estimate(GameState& state) {
- if (state.get_winner() != -1) return 0.0;
- auto inputs = get_vector(state);
- return predictor(inputs)[1];
- }
- void train(vector<double>& input, vector<double>& output) {
- predictor.train(input, output);
- }
- void print_net(ostream& out) {
- predictor.print(out);
- }
- };
- int main() {
- constexpr int INPUT_SIZE = 3+8*13;
- vector<int> sizes{INPUT_SIZE, 60, 40, 1};
- vector<Matrix> w(3);
- for (int i = 0; i < sizes[1]; ++i) {
- std::uniform_real_distribution<double> unif{1e-6, 1e-5};
- std::default_random_engine re(std::random_device{}());
- if (i == 0) {
- vector<double> t;
- for (int j = 0; j < sizes[0]+1; ++j) {
- if (j == 2) t.push_back(+2.5);
- else if (j == 3) t.push_back(-2.5);
- else t.push_back(unif(re));
- }
- w[0].push_back(t);
- }
- else {
- vector<double> t(sizes[0]+1, 0);
- for (int i = 0; i < sizes[0]+1; ++i) {
- t[i] = unif(re);
- }
- w[0].push_back(t);
- }
- }
- {
- std::uniform_real_distribution<double> unif{1e-6, 1e-5};
- std::default_random_engine re(std::random_device{}());
- Matrix t1(sizes[2], vector<double>(sizes[1]+1, 0));
- for (int i = 0; i < sizes[2]; ++i) {
- for (int j = 0; j < sizes[1]+1; ++j) {
- t1[i][j] = unif(re);
- }
- }
- t1[0][0] = -0.5; t1[0][1] = +1;
- w[1] = t1;
- assert(sizes[3] == 1);
- Matrix t2(sizes[3], vector<double>(sizes[2]+1, 0));
- for (int i = 0; i < sizes[2]+1; ++i) {
- t2[0][i] = unif(re);
- }
- t2[0][0] = -0.5; t2[0][1] = +1;
- w[2] = t2;
- }
- NNGameBot bot(0.96, N);
- bot.print_net(cout);
- for (int epoch = 0; epoch < 1000001; ++epoch) {
- int board[SIZE][SIZE];
- for (int i = 0; i < SIZE; ++i) {
- for (int j = 0; j < SIZE; ++j) {
- board[i][j] = 0;
- }
- }
- board[0][3] = board[0][6] = 2;
- board[3][0] = board[3][9] = 2;
- board[6][0] = board[6][9] = 1;
- board[9][3] = board[9][6] = 1;
- int turn = 1;
- vector<pos_t> snd {
- make_pair(0, 3),
- make_pair(0, 6),
- make_pair(3, 0),
- make_pair(3, 9)
- };
- vector<pos_t> fst {
- make_pair(6, 0),
- make_pair(6, 9),
- make_pair(9, 3),
- make_pair(9, 6)
- };
- GameState state(board, turn, fst, snd);
- std::uniform_real_distribution<double> unif{0, 1};
- std::default_random_engine re(std::random_device{}());
- double eps = 0.075 - epoch * 0.000001;
- eps = max(0.0, eps);
- if (epoch % 100 == 0) cout << "Epoch " << epoch << "\n";
- while (state.get_winner() == -1) {
- srand(time(NULL));
- if (epoch % 100 == 0) {
- state.print(cout);
- cout << bot.estimate(state) << "\n";
- }
- move_t move;
- bool rnd = unif(re) <= eps;
- if (rnd) {
- auto moves = state.get_moves();
- move = moves[rand() % moves.size()];
- }
- else {
- move = bot.make_move(state);
- }
- GameState new_state(state);
- new_state.make_move(move);
- if (!rnd) {
- vector<double> new_est{1-bot.estimate(new_state)};
- vector<double> state_vector = get_vector(state);
- bot.train(state_vector, new_est);
- }
- state.make_move(move);
- }
- cout << "Final " << epoch << ":\n";
- state.print(cout);
- if (epoch % 100 == 0) {
- ofstream fout("nn.txt");
- bot.print_net(fout);
- fout.flush();
- }
- }
- return 0;
- }
Add Comment
Please, Sign In to add comment