Untitled

// Compile and run with:
//
// $ g++ -Wall -Wconversion tournament.cc -g -O3 -o tournament --std=c++17 && time ./tournament

#include <numeric>
#include <limits>
#include <map>
#include <vector>
#include <iostream>
#include <algorithm>
// Uncomment the next line, and the line in main() if you have Boost libraries installed.
// #include <boost/rational.hpp>

using namespace std;

constexpr int factorial(int n) {
  return n <= 1 ? 1 : (n * factorial(n-1));
}

const int K = 5;   // Number of players.
const int N = 8;   // Number of matches.
const int KX = factorial(K);

// state[i][j] = how many times player i won against player j.
using state_t = array<array<int, K>, K>;

// cache_key_t is a compact representation of state_t.
using cache_key_t = long long;

// entry is the best strategy found for the given state_t
struct entry {
  double prob;   // Probability of success.
  int i, j;      // Which players should play against each other next.
                 // If j < 0, then nobody plays, just guess player i.
};

// Stores all strategies computed so far. Crucial for speed.
map<cache_key_t, entry> cache;

// List of all permutations of K elements.
vector<array<int, K>> perms;

// Transforms state_t into the corresponding cache_key_t.
// This results in ~5x speedup.
cache_key_t StateKey(const state_t &state) {
  static_assert(K*(K-1) + N < std::numeric_limits<cache_key_t>::digits,
                "K and N too large for cache key");
  cache_key_t key = 0;
  for (int i = 0; i < K; ++i)
    for (int j = 0; j < K; ++j)
      if (i != j) {
        key <<= 1;
        key |= 1;
        key <<= state[i][j];
      }
  return key;
}

// Looks up the given state in cache. Because of problem symmetry, it
// tries all possible K! state permutations. Trying all permutations
// results in ~10x speedup.
bool FindInCache(state_t state, entry* entry) {
  for (const auto& perm : perms) {
    state_t sstate;
    for (int i = 0; i < K; ++i)
      for (int j = 0; j < K; ++j)
        sstate[i][j] = state[perm[i]][perm[j]];

    auto it = cache.find(StateKey(sstate));
    if (it != cache.end()) {
      *entry = it->second;
      entry->i = perm[entry->i];
      entry->j = entry->j >= 0 ? perm[entry->j] : -1;
      return true;
    }
  }
  return false;
}

// Recursively finds the best strategy.
double FindBestStrategy(state_t state, int total) {
  entry cached;
  if (FindInCache(state, &cached)) return cached.prob;

  // Compute probabilies of each permutation in current state.
  array<double, KX> probs;
  {
    double lhtotal = 0.0;
    for (int k = 0; k < KX; ++k) {
      const auto& perm = perms[k];
      int badcount = 0;
      for (int i = 0; i < K; ++i)
        for (int j = 0; j < K; ++j)
          if (perm[i] < perm[j]) badcount += state[i][j];
      double lhood = 1.0 / (1 << badcount);
      lhtotal += lhood;
      probs[k] = lhood;
    }
    for (double& prob : probs) prob /= lhtotal;
  }

  entry best = {0, -1, -1};
  if (total == N) {
    // All N matches have been played. Compute the probability that
    // the most probable winner is the actual best player.
    for (int i = 0; i < K; ++i) {
      double prob = 0;
      for (int k = 0; k < KX; ++k) {
        const auto& perm = perms[k];
        if (perm[i] == K-1) prob += probs[k];
      }
      if (prob > best.prob) best = {prob, i, -1};
    }
  } else {
    // Consider all possible pairs of players (i, j) and recurse.
    for (int i = 0; i < K; ++i)
      for (int j = i+1; j < K; ++j) {
        double probi = 0.0;
        for (int k = 0; k < KX; ++k) {
          const auto& perm = perms[k];
          if (perm[i] > perm[j]) probi += probs[k];
        }
        double probj = 1.0 - probi;
        ++state[i][j];  // i won starts
        double successi = FindBestStrategy(state, total + 1);
        --state[i][j];  // i won ends
        ++state[j][i];  // j won starts
        double successj = FindBestStrategy(state, total + 1);
        --state[j][i];  // j won ends
        double prob = (probi * 2 + probj) / 3.0 * successi + (probj * 2 + probi)/3.0 * successj;
        if (prob > best.prob) best = {prob, i, j};
    }
  }
  cache.emplace(StateKey(state), best);
  return best.prob;
}

// Display the strategy computed by FindBestStrategy in a human-readable form.
void HumanStrategy(state_t state, int total, unsigned bitmask) {
  for (int i = 0; i < total; ++i) {
    cout << ((bitmask & (1<<i)) ? '|' : ' ') << "       ";
  }
  cout << "|- ";

  entry best;
  FindInCache(state, &best);
  if (best.j < 0) {
    cout << "guess " << best.i << endl;
    return;
  }
  cout << best.i << '>' << best.j << " -|"<< endl;

  ++state[best.i][best.j];
  HumanStrategy(state, total+1, bitmask | (1 << total));
  --state[best.i][best.j];

  for (int i = 0; i < total; ++i) {
    cout << ((bitmask & (1<<i)) ? '|' : ' ')<< "       ";
  }
  cout << "|- ";
  cout << best.i << '<' << best.j << " -|" << endl;

  ++state[best.j][best.i];
  HumanStrategy(state, total+1, bitmask);
  --state[best.j][best.i];
}

// Display the strategy computed by FindBestStrategy in a machine-readable form.
void MachineStrategy(state_t state) {
  entry best;
  FindInCache(state, &best);
  if (best.j < 0) {
    cout << best.i;
    return;
  }

  cout << '(' << best.i << ", " << best.j << ", ";

  ++state[best.i][best.j];
  MachineStrategy(state);
  --state[best.i][best.j];

  cout << ", ";

  ++state[best.j][best.i];
  MachineStrategy(state);
  --state[best.j][best.i];

  cout << ")";
}

// Compute the success probability of the strategy computed by
// FindBestStrategy. Even though FindBestStrategy also computes this
// number, this function serves as an independent verification, and
// also is parameterized by the result type, so it can compute using
// floating point or using exact rational value.
template<class T>
T Evaluate(state_t state, const array<int, K>& perm) {
  entry best;
  FindInCache(state, &best);
  if (best.j < 0) return perm[best.i] == K-1 ? T(1) : T(0);

  ++state[best.i][best.j];
  T pij = Evaluate<T>(state, perm);
  --state[best.i][best.j];

  ++state[best.j][best.i];
  T pji = Evaluate<T>(state, perm);
  --state[best.j][best.i];

  if (perm[best.i] > perm[best.j]) return (pij * 2 + pji) / 3;
  else return (pij + pji * 2) / 3;
}

template<class T>
void Evaluate() {
  array<array<int, K>, K> state {0};
  T sum = 0.0;
  for (const auto& perm : perms)
    sum += Evaluate<T>(state, perm);
  T success = sum / T(perms.size());
  cout << "success probability: " << success << endl;
}

int main() {
  cout.precision(17);
  // Compute the list of all permutations.
  array<int, K> perm;
  iota(perm.begin(), perm.end(), 0);
  do {
    perms.push_back(perm);
  } while (next_permutation(perm.begin(), perm.end()));

  // Do the search and verification.
  double prob = FindBestStrategy({0}, 0);
  HumanStrategy({0}, 0, 0);
  MachineStrategy({0}); cout << endl;
  cout << "analysis: " << prob << endl;
  Evaluate<double>();
  // Uncomment the next line, and the #include on top if you have Boost libraries installed.
  // Evaluate<boost::rational<long long>>();
  cout << "cached items: " << cache.size() << endl;
  return 0;
}


// Speed optimization log
//
// K, N, success probability, cache items, time
//
// K=5, N=8, 3085/6561, 3108105, 0m40.447s   # original algorithm
// K=5, N=8, 3085/6561, 1282194, 0m14.357s   # with 'used' symmetry breaking
// K=5, N=8, 3085/6561, 1282194, 0m10.740s   # with -O3 flag, with -fprofile-use is slower
// K=5, N=8, 3085/6561,   27578, 0m03.257s   # with permuted cache lookup
// K=5, N=8, 3085/6561,   27578, 0m03.364s   # removed 'used' symmetry breaking
//                                           # (superceded by permuted cache lookup)
// K=5, N=8, 3085/6561,   27578, 0m00.678s   # cache key is an int, not an array
//
// K=5, N=10, 447236/885735,  257466,  0m5.653s  # same as above for N=10
//
// TODO: speed optimization ideas:
// - cut the search early if we're already doing badly