Game of Amazon (NN learning code).

#include <iostream>
#include <fstream>
#include <vector>
#include <deque>
#include <queue>
#include <random>
#include <cassert>
#include <ctime>

constexpr int SIZE = 10;
constexpr int AMAZONS_CNT = 4;
constexpr int INF = 1000*1000*1000;
constexpr int WIN = 10000;

using namespace std;

typedef pair<int, int> pos_t;
typedef pair<pair<pos_t, pos_t>, pos_t> move_t;
typedef double score_t;

class GameState {
    int board[SIZE][SIZE];
    int turn;
    pos_t amazons[2][AMAZONS_CNT];
public:
    GameState(const int B[SIZE][SIZE], int t,
              vector<pos_t>& fst, vector<pos_t>& snd) {
        for (int i = 0; i < SIZE; ++i) {
            for (int j = 0; j < SIZE; ++j)
                board[i][j] = B[i][j];
        }
        turn = t - 1;
        assert(fst.size() == AMAZONS_CNT);
        assert(snd.size() == AMAZONS_CNT);
        for (int i = 0; i < AMAZONS_CNT; ++i) {
            amazons[0][i] = fst[i];
            int x = amazons[0][i].first;
            int y = amazons[0][i].second;
            assert(board[x][y] == 1);
        }
        for (int i = 0; i < AMAZONS_CNT; ++i) {
            amazons[1][i] = snd[i];
            int x = amazons[1][i].first;
            int y = amazons[1][i].second;
            assert(board[x][y] == 2);
        }
    }

    GameState(const GameState& state) {
        for (int i = 0; i < SIZE; ++i) {
            for (int j = 0; j < SIZE; ++j)
                board[i][j] = state.board[i][j];
        }
        turn = state.turn;
        for (int i = 0; i < 2; ++i) {
            for (int j = 0; j < AMAZONS_CNT; ++j) {
                amazons[i][j] = state.amazons[i][j];
            }
        }
    }

    inline const int* operator[](int index) { return board[index]; }
    inline const int get_turn() { return turn; }
    inline const pos_t* get_amazons(int player) { return amazons[player]; }

    void make_move(move_t move) {
        int amazon_x = move.first.first.first;
        int amazon_y = move.first.first.second;
        int target_x = move.first.second.first;
        int target_y = move.first.second.second;
        board[target_x][target_y] = board[amazon_x][amazon_y];
        board[amazon_x][amazon_y] = 0;
        for (int i = 0; i < AMAZONS_CNT; ++i) {
            if (amazons[turn][i] == make_pair(amazon_x, amazon_y)) {
                amazons[turn][i] = make_pair(target_x, target_y);
                break;
            }
        }
        int arrow_x = move.second.first;
        int arrow_y = move.second.second;
        board[arrow_x][arrow_y] = -1;
        turn = 1 - turn;
    }

    deque<move_t> get_moves() {
        constexpr int DIR_CNT = 8;
        int dx[DIR_CNT] = {+1, +1, +1,  0, -1, -1, -1,  0};
        int dy[DIR_CNT] = {+1,  0, -1, -1, -1,  0, +1, +1};
        deque<move_t> res;
        for (int piece = 0; piece < AMAZONS_CNT; ++piece) {
            int x = amazons[turn][piece].first;
            int y = amazons[turn][piece].second;
            pos_t init_pos = make_pair(x, y);
            for (int dir = 0; dir < DIR_CNT; ++dir) {
                int cur_x = x;
                int cur_y = y;
                for (int len = 0; ; ++len) {
                    cur_x += dx[dir];
                    cur_y += dy[dir];
                    if (cur_x < 0 || cur_x >= SIZE) break;
                    else if (cur_y < 0 || cur_y >= SIZE) break;
                    else if (board[cur_x][cur_y] != 0) break;
                    else {
                        pos_t target = make_pair(cur_x, cur_y);
                        int tmp = board[x][y];
                        board[x][y] = 0;
                        for (int arrow_dir = 0;
                             arrow_dir < DIR_CNT; ++arrow_dir) {
                            int cur_arr_x = cur_x;
                            int cur_arr_y = cur_y;
                            for (int arrow_len = 0; ; ++len) {
                                cur_arr_x += dx[arrow_dir];
                                cur_arr_y += dy[arrow_dir];
                                if (cur_arr_x < 0 || cur_arr_x >= SIZE)
                                    break;
                                else if (cur_arr_y < 0 || cur_arr_y >= SIZE)
                                    break;
                                else if (board[cur_arr_x][cur_arr_y] != 0)
                                    break;
                                else {
                                    pos_t arrow =
                                    make_pair(cur_arr_x, cur_arr_y);
                                    res.push_back(
                                                  make_pair(
                                                            make_pair(init_pos, target),
                                                            arrow));
                                }
                            }
                        }
                        board[x][y] = tmp;
                    }
                }
            }
        }
        return res;
    }

    int get_winner() {
        int result = 1 - turn;
        constexpr int DIR_CNT = 8;
        int dx[DIR_CNT] = {+1, +1, +1,  0, -1, -1, -1,  0};
        int dy[DIR_CNT] = {+1,  0, -1, -1, -1,  0, +1, +1};
        for (int i = 0; i < AMAZONS_CNT; ++i) {
            int x = amazons[turn][i].first;
            int y = amazons[turn][i].second;
            for (int dir = 0; dir < DIR_CNT; ++dir) {
                int new_x = x + dx[dir];
                int new_y = y + dy[dir];
                if (new_x < 0 || new_x >= SIZE) continue;
                if (new_y < 0 || new_y >= SIZE) continue;
                if (board[new_x][new_y] == 0) {
                    result = -1;
                    break;
                }
            }
            if (result == -1) break;
        }
        return result;
    }

    void print(ostream& out) {
        out << "Board:\n";
        out << "  | ";
        for (int i = 0; i < SIZE; ++i) {
            out << i << " ";
        }
        out << "\n";
        out << "----";
        for (int i = 0; i < SIZE; ++i) {
            out << "--";
        }
        out << "\n";
        for (int i = 0; i < SIZE; ++i) {
            out << i << " | ";
            for (int j = 0; j < SIZE; ++j) {
                if (board[i][j] == -1) out << "* ";
                else out << board[i][j] << " ";
            }
            out << "\n";
        }
        out << "Turn: " << turn+1 << "\n";
        out << "Amazon positions:\n";
        out << "White (1): ";
        for (int i = 0; i < AMAZONS_CNT; ++i) {
            out << "(" << amazons[0][i].first << ", "
            << amazons[0][i].second << ") ";
        }
        out << "\n";
        out << "Black (2): ";
        for (int i = 0; i < AMAZONS_CNT; ++i) {
            out << "(" << amazons[1][i].first << ", "
            << amazons[1][i].second << ") ";
        }
        out << "\n";
    }
};

typedef vector<vector<double>> Matrix;

class NeuralNetwork {
    vector<Matrix> weights, traces;
    double learning_rate, decay_rate;
    int cnt = 0;
public:
    NeuralNetwork() {
        learning_rate = 0.1;
        decay_rate = 0.0;
    }
    NeuralNetwork(vector<int> input_sizes, double eta, double lambda) {
        assert(input_sizes[input_sizes.size() - 1] == 1);
        learning_rate = eta;
        decay_rate = lambda;
        std::uniform_real_distribution<double> unif{-1, +1};
        std::default_random_engine re(std::random_device{}());
        for (int i = 0; i < input_sizes.size() - 1; ++i) {
            Matrix w; Matrix t;
            int rows = input_sizes[i + 1];
            int cols = input_sizes[i] + 1;
            for (int j = 0; j < rows; ++j) {
                vector<double> line;
                t.push_back(line);
                for (int k = 0; k < cols; ++k) {
                    line.push_back(unif(re));
                    t[j].push_back(0);
                }
                w.push_back(line);
            }
            weights.push_back(w);
            traces.push_back(t);
        }
    }
    NeuralNetwork(vector<Matrix> w, double eta, double lambda) {
        weights = w;
        learning_rate = eta;
        decay_rate = lambda;
        for (int i = 0; i < weights.size(); ++i) {
            Matrix t;
            int rows = weights[i].size();
            int cols = weights[i][0].size();
            for (int j = 0; j < rows; ++j) {
                vector<double> line;
                t.push_back(line);
                for (int k = 0; k < cols; ++k) {
                    t[j].push_back(0);
                }
            }
            traces.push_back(t);
        }
    }
    inline vector<double> feedforward(const vector<double>& inputs) const {
        auto signals = calc_outputs(inputs);
        return signals[signals.size() - 1];
    }
    void train(const vector<double>& inputs,
               const vector<double>& target) {
        vector<vector<double>> outputs = calc_outputs(inputs);
        vector<double> prev_deltas, deltas;
        for (int layer = outputs.size() - 1; layer >= 0; --layer) {
            for (int i = 1; i < outputs[layer].size(); ++i) {
                double o = outputs[layer][i];
                double delta = 0;
                if (layer == outputs.size() - 1) {
                    delta = o * (1 - o) * (target[i-1] - o);
                }
                else {
                    for (int k = 0;
                         k < prev_deltas.size(); k++) {
                        delta += prev_deltas[k] * weights[layer+1][k][i];
                    }
                    delta *= o * (1 - o);
                }
                deltas.push_back(delta);
                for (int j = 0; j < weights[layer][i-1].size();++j) {
                    double grad = 0;
                    if (layer > 0) {
                        grad = delta * outputs[layer-1][j];
                    }
                    else {
                        if (j > 0) {
                            grad = delta * inputs[j-1];
                        }
                        else {
                            grad = delta;
                        }
                    }
                    traces[layer][i-1][j] *= decay_rate;
                    traces[layer][i-1][j] += grad;
                    weights[layer][i-1][j] +=
                    learning_rate * traces[layer][i-1][j];
                }
            }
            prev_deltas = deltas;
            deltas.clear();
        }
        cnt += 1;
        if (cnt % 5000000 == 0) {
            learning_rate *= 0.99;
            learning_rate = max(0.05, learning_rate);
            cnt = 0;
            cerr << "*** ATTENTION ***\n";
            cerr << "New learning rate = " << learning_rate << "\n";
        }
    }
    inline vector<double> operator()(const vector<double>& inputs) const {
        return feedforward(inputs);
    }
    void print(ostream& out) {
        out << "{\n";
        for (int i = 0; i < weights.size(); ++i) {
            out << "{" << "\n";
            for (int j = 0; j < weights[i].size(); ++j) {
                auto v = weights[i][j];
                out << "{";
                for (int k = 0; k < weights[i][j].size(); ++k) {
                    auto l = weights[i][j][k];
                    out << l;
                    if (k < weights[i][j].size() - 1) out << ", ";
                }
                out << "}";
                if (j < weights[i].size() - 1) out << ", ";
                out << "\n";
            }
            out << "}";
            if (i < weights.size() - 1) out << ", ";
            out << "\n";
        }
        out << "};\n";
    }
private:
    inline double sigmoid(double x) const {
        return 1.0 / (1.0 + exp(-x));
    }
    inline double sigmoid_prime(double x) const {
        return sigmoid(x) * (1 - sigmoid(x));
    }
    vector<vector<double>> calc_outputs(const vector<double>& inputs) const {
        vector<vector<double>> res;
        vector<double> signals{1};
        for (auto i : inputs) {
            signals.push_back(i);
        }
        for (Matrix w : weights) {
            assert(w[0].size() == signals.size());
            vector<double> new_signals(w.size() + 1, 0);
            new_signals[0] = 1;
            for (int i = 1; i <= w.size(); ++i) {
                double c0, c1, c2, c3;
                size_t sz = w[i-1].size();
                for (int j = 0; j < sz; j += 4) {
                    c0 = c1 = c2 = c3 = 0.0;
                    c0 = signals[j] * w[i - 1][j];
                    if (j+1 < sz)
                        c1 = signals[j+1] * w[i - 1][j+1];
                    if (j+2 < sz)
                        c2 = signals[j+2] * w[i - 1][j+2];
                    if (j+3 < sz)
                        c3 = signals[j+3] * w[i - 1][j+3];
                    new_signals[i] += c0+c1+c2+c3;
                }
                new_signals[i] = sigmoid(new_signals[i]);
            }
            signals = new_signals;
            res.push_back(signals);
        }
        return res;
    }
};


inline vector<double> get_vector(GameState& state) {
    constexpr int DIR_CNT = 8;
    constexpr int dx[DIR_CNT] = {+1, +1, +1,  0, -1, -1, -1,  0};
    constexpr int dy[DIR_CNT] = {+1,  0, -1, -1, -1,  0, +1, +1};
    vector<double> inputs{0};
    for (int i = 0; i < SIZE; ++i) {
        for (int j = 0; j < SIZE; ++j) {
            if (state[i][j] == -1)
                inputs[0]++;
        }
    }
    inputs[0] /= 100;
    int turn = state.get_turn();
    // vector<vector<pos_t>> amazons(2, vector<pos_t>(AMAZONS_CNT));
    pos_t amazons[2][AMAZONS_CNT];
    constexpr int FEATURES_CNT = 13;
    constexpr int X_INDEX = 0, Y_INDEX = 1, DIR_MIN_INDEX = 2;
    constexpr int ONE_PLY_INDEX = DIR_MIN_INDEX + DIR_CNT;
    constexpr int TWO_PLY_INDEX = 1 + ONE_PLY_INDEX;
    constexpr int FAR_INDEX = 1 + TWO_PLY_INDEX;
    static_assert(FAR_INDEX + 1 == FEATURES_CNT, "Indices goin\' berzerk :(");
    double amazons_features_c[2*AMAZONS_CNT][FEATURES_CNT];
    for (int stage = 0; stage < 2; ++stage) {
        for (int i = 0; i < AMAZONS_CNT; ++i) {
            amazons[stage][i] = state.get_amazons(turn)[i];
            int x = amazons[stage][i].first;
            int y = amazons[stage][i].second;
            amazons_features_c[stage*AMAZONS_CNT+i][X_INDEX] = 1.0*x/10;
            amazons_features_c[stage*AMAZONS_CNT+i][Y_INDEX] = 1.0*y/10;
            for (int dir = 0; dir < DIR_CNT; ++dir) {
                int cur_x = x;
                int cur_y = y;
                int cnt = 0;
                while (true) {
                    cur_x += dx[dir];
                    cur_y += dy[dir];
                    if (cur_x >= SIZE || cur_x < 0) break;
                    if (cur_y >= SIZE || cur_y < 0) break;
                    if (state[cur_x][cur_y] != 0) break;
                    else ++cnt;
                }
                amazons_features_c[stage*AMAZONS_CNT+i][DIR_MIN_INDEX+dir] = 1.0*cnt/9;
            }
        }
        turn = 1 - turn;
    }
    int dist_map[2*AMAZONS_CNT][SIZE][SIZE];
    for (int i = 0; i < 2*AMAZONS_CNT; ++i)
        for (int j = 0; j < SIZE; ++j)
            for (int k = 0; k < SIZE; ++k)
                dist_map[i][j][k] = +INF;
    int control_map[2][SIZE][SIZE];
    for (int i = 0; i < 2; ++i)
        for (int j = 0; j < SIZE; ++j)
            for (int k = 0; k < SIZE; ++k)
                control_map[i][j][k] = +INF;
    int index = 0;
    for (int turn = 0; turn <= 1; ++turn) {
        queue<pos_t> Q;
        for (pos_t a : amazons[turn]) {
            dist_map[index][a.first][a.second] = 0;
            control_map[turn][a.first][a.second] = 0;
            Q.push(a);
            while (!Q.empty()) {
                pos_t a = Q.front(); Q.pop();
                int x = a.first, y = a.second;
                for (int dir = 0; dir < DIR_CNT; ++dir) {
                    int cur_x = x;
                    int cur_y = y;
                    while (true) {
                        cur_x += dx[dir];
                        cur_y += dy[dir];
                        if (cur_x >= SIZE || cur_x < 0) break;
                        if (cur_y >= SIZE || cur_y < 0) break;
                        if (state[cur_x][cur_y] != 0) break;
                        if (dist_map[index][cur_x][cur_y] == INF) {
                            dist_map[index][cur_x][cur_y] =
                            dist_map[index][x][y] + 1;
                            control_map[turn][cur_x][cur_y] =
                            min(control_map[turn][cur_x][cur_y],
                                dist_map[index][cur_x][cur_y]);
                            Q.push(make_pair(cur_x, cur_y));
                        }
                    }
                }
            }
            ++index;
        }
    }
    int w_amazon_1ply[AMAZONS_CNT] = {0};
    int w_amazon_2ply[AMAZONS_CNT] = {0};
    int w_amazon_far[AMAZONS_CNT] = {0};
    for (int num = 0; num < AMAZONS_CNT; ++num) {
        for (int i = 0; i < SIZE; ++i) {
            for (int j = 0; j < SIZE; ++j) {
                if (dist_map[num][i][j] < control_map[1][i][j] &&
                    dist_map[num][i][j] > 0) {
                    if (dist_map[num][i][j] == 1)
                        w_amazon_1ply[num]++;
                    else if (dist_map[num][i][j] == 2)
                        w_amazon_2ply[num]++;
                    else
                        w_amazon_far[num]++;
                }
            }
        }
        amazons_features_c[num][ONE_PLY_INDEX] = (w_amazon_1ply[num] * 0.05);
        amazons_features_c[num][TWO_PLY_INDEX] = (w_amazon_2ply[num] * 0.05);
        amazons_features_c[num][FAR_INDEX] = (w_amazon_far[num] * 0.05);
    }
    int b_amazon_1ply[AMAZONS_CNT] = {0};
    int b_amazon_2ply[AMAZONS_CNT] = {0};
    int b_amazon_far[AMAZONS_CNT] = {0};
    for (int num = 0; num < AMAZONS_CNT; ++num) {
        for (int i = 0; i < SIZE; ++i) {
            for (int j = 0; j < SIZE; ++j) {
                if (dist_map[num+AMAZONS_CNT][i][j] <
                    control_map[0][i][j] &&
                    dist_map[num+AMAZONS_CNT][i][j] > 0) {
                    if (dist_map[num+AMAZONS_CNT][i][j] == 1)
                        b_amazon_1ply[num]++;
                    else if (dist_map[num+AMAZONS_CNT][i][j] == 2)
                        b_amazon_2ply[num]++;
                    else
                        b_amazon_far[num]++;
                }
            }
        }
        amazons_features_c[num+AMAZONS_CNT][ONE_PLY_INDEX] = (w_amazon_1ply[num] * 0.05);
        amazons_features_c[num+AMAZONS_CNT][TWO_PLY_INDEX] = (w_amazon_2ply[num] * 0.05);
        amazons_features_c[num+AMAZONS_CNT][FAR_INDEX] = (w_amazon_far[num] * 0.05);
    }
    int w_control = 0, b_control = 0;
    for (int i = 0; i < SIZE; ++i) {
        for (int j = 0; j < SIZE; ++j) {
            if (control_map[0][i][j] < control_map[1][i][j] &&
                control_map[0][i][j] > 0)
                w_control++;
            if (control_map[0][i][j] > control_map[1][i][j] &&
                control_map[1][i][j] > 0)
                b_control++;
        }
    }
    inputs.push_back(w_control * 0.01);
    inputs.push_back(b_control * 0.01);
    for (int i = 0; i < 2*AMAZONS_CNT; ++i) {
        for (int j = 0; j < FEATURES_CNT; ++j) {
            inputs.push_back(amazons_features_c[i][j]);
        }
    }
    return inputs;
}


class NNGameBot {
    clock_t start_time, max_ticks;
    bool terminating;
    NeuralNetwork predictor;
public:
    NNGameBot(double time, NeuralNetwork& N) {
        start_time = clock();
        max_ticks = clock_t(time * CLOCKS_PER_SEC);
        terminating = false;
        predictor = N;
    }
    move_t make_move(GameState& state) {
        double max_score = +INF;
        move_t res;
        auto moves = state.get_moves();
        for (auto move : moves) {
            GameState new_state(state);
            new_state.make_move(move);
            double est = estimate(new_state);
            if (est < max_score) {
                res = move;
                max_score = est;
            }
        }
        return res;
    }
    bool time_check() {
        if (clock() - start_time >= max_ticks) {
            terminating = true;
        }
        return terminating;
    }
    double estimate(GameState& state) {
        if (state.get_winner() != -1) return 0.0;
        auto inputs = get_vector(state);
        return predictor(inputs)[1];
    }
    void train(vector<double>& input, vector<double>& output) {
        predictor.train(input, output);
    }
    void print_net(ostream& out) {
        predictor.print(out);
    }
};

int main() {
    constexpr int INPUT_SIZE = 3+8*13;
    vector<int> sizes{INPUT_SIZE, 60, 40, 1};
    vector<Matrix> w(3);
    for (int i = 0; i < sizes[1]; ++i) {
        std::uniform_real_distribution<double> unif{1e-6, 1e-5};
        std::default_random_engine re(std::random_device{}());
        if (i == 0) {
            vector<double> t;
            for (int j = 0; j < sizes[0]+1; ++j) {
                if (j == 2) t.push_back(+2.5);
                else if (j == 3) t.push_back(-2.5);
                else t.push_back(unif(re));
            }
            w[0].push_back(t);
        }
        else {
            vector<double> t(sizes[0]+1, 0);
            for (int i = 0; i < sizes[0]+1; ++i) {
                t[i] = unif(re);
            }
            w[0].push_back(t);
        }
    }
    {
        std::uniform_real_distribution<double> unif{1e-6, 1e-5};
        std::default_random_engine re(std::random_device{}());
        Matrix t1(sizes[2], vector<double>(sizes[1]+1, 0));
        for (int i = 0; i < sizes[2]; ++i) {
            for (int j = 0; j < sizes[1]+1; ++j) {
                t1[i][j] = unif(re);
            }
        }
        t1[0][0] = -0.5; t1[0][1] = +1;
        w[1] = t1;
        assert(sizes[3] == 1);
        Matrix t2(sizes[3], vector<double>(sizes[2]+1, 0));
        for (int i = 0; i < sizes[2]+1; ++i) {
            t2[0][i] = unif(re);
        }
        t2[0][0] = -0.5; t2[0][1] = +1;
        w[2] = t2;
    }
    NNGameBot bot(0.96, N);
    bot.print_net(cout);
    for (int epoch = 0; epoch < 1000001; ++epoch) {
        int board[SIZE][SIZE];
        for (int i = 0; i < SIZE; ++i) {
            for (int j = 0; j < SIZE; ++j) {
                board[i][j] = 0;
            }
        }
        board[0][3] = board[0][6] = 2;
        board[3][0] = board[3][9] = 2;
        board[6][0] = board[6][9] = 1;
        board[9][3] = board[9][6] = 1;
        int turn = 1;
        vector<pos_t> snd {
            make_pair(0, 3),
            make_pair(0, 6),
            make_pair(3, 0),
            make_pair(3, 9)
        };
        vector<pos_t> fst {
            make_pair(6, 0),
            make_pair(6, 9),
            make_pair(9, 3),
            make_pair(9, 6)
        };
        GameState state(board, turn, fst, snd);
        std::uniform_real_distribution<double> unif{0, 1};
        std::default_random_engine re(std::random_device{}());
        double eps = 0.075 - epoch * 0.000001;
        eps = max(0.0, eps);
        if (epoch % 100 == 0) cout << "Epoch " << epoch << "\n";
        while (state.get_winner() == -1) {
            srand(time(NULL));
            if (epoch % 100 == 0) {
                state.print(cout);
                cout << bot.estimate(state) << "\n";
            }
            move_t move;
            bool rnd = unif(re) <= eps;
            if (rnd) {
                auto moves = state.get_moves();
                move = moves[rand() % moves.size()];
            }
            else {
                move = bot.make_move(state);
            }
            GameState new_state(state);
            new_state.make_move(move);
            if (!rnd) {
                vector<double> new_est{1-bot.estimate(new_state)};
                vector<double> state_vector = get_vector(state);
                bot.train(state_vector, new_est);
            }
            state.make_move(move);
        }
        cout << "Final " << epoch << ":\n";
        state.print(cout);
        if (epoch % 100 == 0) {
            ofstream fout("nn.txt");
            bot.print_net(fout);
            fout.flush();
        }
    }
    return 0;
}