Advertisement
Rochet2

Non-connected nodes problem

Nov 27th, 2022 (edited)
1,062
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 10.16 KB | None | 0 0
  1. #include <iostream>
  2. #include <vector>
  3. #include <utility>
  4. #include <algorithm>
  5. #include <map>
  6. #include <unordered_map>
  7. #include <set>
  8. #include <unordered_set>
  9. #include <bit>
  10. #include <bitset>
  11.  
  12. using uint8 = uint8_t;
  13. using uint32 = uint32_t;
  14.  
  15. // Using switch instead of map
  16. constexpr uint32 CHAR_TO_BIN_F(uint32 c) {
  17.   switch(c) {
  18.     case 'A': return 0b1;
  19.     case 'B': return 0b10;
  20.     case 'C': return 0b100;
  21.     case 'D': return 0b1000;
  22.     case 'E': return 0b10000;
  23.     case 'F': return 0b100000;
  24.     case 'G': return 0b1000000;
  25.     case 'H': return 0b10000000;
  26.     case 'I': return 0b100000000;
  27.     case 'J': return 0b1000000000;
  28.     case 'K': return 0b10000000000;
  29.     case 'L': return 0b100000000000;
  30.     case 'M': return 0b1000000000000;
  31.     case 'N': return 0b10000000000000;
  32.     case 'O': return 0b100000000000000;
  33.     case 'P': return 0b1000000000000000;
  34.     case 'Q': return 0b10000000000000000;
  35.     case 'R': return 0b100000000000000000;
  36.     case 'S': return 0b1000000000000000000;
  37.     case 'T': return 0b10000000000000000000;
  38.     case 'U': return 0b100000000000000000000;
  39.     case 'V': return 0b1000000000000000000000;
  40.     case 'W': return 0b10000000000000000000000;
  41.     case 'X': return 0b100000000000000000000000;
  42.     case 'Y': return 0b1000000000000000000000000;
  43.     case 'Z': return 0b10000000000000000000000000;
  44.   }
  45.   return -1;
  46. };
  47.  
  48. constexpr uint32 BIN_TO_CHAR_F(uint32 b) {
  49.   switch(b) {
  50.     case 0b1: return 'A';
  51.     case 0b10: return 'B';
  52.     case 0b100: return 'C';
  53.     case 0b1000: return 'D';
  54.     case 0b10000: return 'E';
  55.     case 0b100000: return 'F';
  56.     case 0b1000000: return 'G';
  57.     case 0b10000000: return 'H';
  58.     case 0b100000000: return 'I';
  59.     case 0b1000000000: return 'J';
  60.     case 0b10000000000: return 'K';
  61.     case 0b100000000000: return 'L';
  62.     case 0b1000000000000: return 'M';
  63.     case 0b10000000000000: return 'N';
  64.     case 0b100000000000000: return 'O';
  65.     case 0b1000000000000000: return 'P';
  66.     case 0b10000000000000000: return 'Q';
  67.     case 0b100000000000000000: return 'R';
  68.     case 0b1000000000000000000: return 'S';
  69.     case 0b10000000000000000000: return 'T';
  70.     case 0b100000000000000000000: return 'U';
  71.     case 0b1000000000000000000000: return 'V';
  72.     case 0b10000000000000000000000: return 'W';
  73.     case 0b100000000000000000000000: return 'X';
  74.     case 0b1000000000000000000000000: return 'Y';
  75.     case 0b10000000000000000000000000: return 'Z';
  76.   }
  77.   return -1;
  78. };
  79.  
  80. /*
  81. std::vector<std::pair<uint32, uint32>> T = {
  82. {'A', 'B'},
  83. {'B', 'C'},
  84. {'C', 'A'},
  85. {'X', 'A'},
  86. {'B', 'X'},
  87. {'X', 'C'},
  88. {'G', 'I'},
  89. {'A', 'I'},
  90. {'I', 'B'},
  91. {'I', 'C'}
  92. };
  93. */
  94.  
  95. static std::vector<std::pair<uint32, uint32>> T = {
  96.     {'A', 'B'},
  97.     {'B', 'C'},
  98.     {'C', 'A'},
  99.     {'X', 'A'},
  100.     {'B', 'X'},
  101.     {'X', 'C'},
  102.     {'F', 'J'},
  103.     {'E', 'J'},
  104.     {'G', 'I'},
  105.     {'F', 'E'},
  106.     {'A', 'I'},
  107.     {'I', 'F'},
  108.     {'I', 'B'},
  109.     {'I', 'C'},
  110.     {'I', 'C'}
  111. };
  112.  
  113.  
  114. /*
  115. Problem:
  116. From the given undirected graph find the largest set of nodes that are not connected in any way.
  117. For example, in the above set we can see that (A,J,F) is a possible solution because A is not connected to J or F and J is not connected to F.
  118. If there is no bigger set, for example four nodes that are not connected in any way, then the set is a solution.
  119. */
  120.  
  121.  
  122. // use unordered map instead of map
  123. static std::unordered_map<uint32, uint32> SEEN;
  124. static uint32 ALL = 0;
  125.  
  126. // This function basically goes through all combinations of the solutions of a set of nodes indicated by candidate_set
  127. // We take in a character and a set of all characters it is not linked to.
  128. // We also take in a candidate character from that set.
  129. // We then try to go through the set and try out all solutions (combinations) that yield a solution
  130. // We then finally store the solution
  131. inline void CollectResults0(const uint32 main_char, std::pair<int, uint32>& results, const uint32 candidate_set, const uint32 candidate, decltype(SEEN)::const_iterator b_c) {
  132.     // Take the input set and remove any nodes that the candidate itself is linked to as they cannot be in the solution
  133.     // It becomes this candidate's own solution set
  134.     const uint32 candidate_set_local = candidate_set & (candidate | ((~SEEN.at(candidate)) & ALL));
  135.  
  136.     // Here we loop through the remaining candidates in the new solution set and try remove each one's linked
  137.     // We use iterator to continue from the previous candidate onwards only. This allows us to only test combinations, not permutations
  138.     // we do not care about permutations (so A-B is same as B-A)
  139.     bool last = true;
  140.     for (; b_c != SEEN.end(); ++b_c) {
  141.         // the way we store the data forces us to go through all characters even if they are not in the set
  142.         // this check makes sure we only test characters that are in the set
  143.         // a potential optimization is to figure out how to avoid going through all characters.
  144.         if (candidate_set_local & b_c->first) {
  145.             CollectResults0(main_char, results, candidate_set_local, b_c->first, std::next(b_c, 1));
  146.             last = false;
  147.         }
  148.     }
  149.  
  150.     // If this candidate was the last one in this set, then it is the calcuated possible solution
  151.     if (last) {
  152.         const uint32 solution = main_char | candidate_set_local;
  153.         // We need to check if it is better than existing solution
  154.         const auto bits = std::popcount(solution);
  155.         if (bits > results.first) {
  156.           // was a better solution, store it
  157.           results.first = bits;
  158.           results.second = solution;
  159.         }
  160.     }
  161. }
  162.  
  163. uint32 acual_main0() {
  164.     // We go through the data set and collect all nodes we have seen into ALL set
  165.     // We also collect all edges (connections) of a single character (node) into SEEN
  166.     for (auto& p : T) { // loop over N
  167.         const auto first_b = CHAR_TO_BIN_F(p.first);
  168.         const auto second_b = CHAR_TO_BIN_F(p.second);
  169.         SEEN[first_b] |= second_b;
  170.         SEEN[second_b] |= first_b;
  171.         ALL |= first_b | second_b;
  172.     }
  173.     /*
  174.     // The final dataset might look like this when inversed. The inverse of connected nodes is non-connected nodes.
  175.     // Below you can see a character and then a number (binary) that is the set of nodes it is NOT connected to.
  176.     // In this printout we have also excluded the node itself from its own non-connections
  177.     // and any characters that do not exist in the dataset are also removed.
  178.     // The reason for these exclusions is that this data is what the CollectResults function will see and use
  179.     A: 00000000000000000000001001110000
  180.     B: 00000000000000000000001001110000
  181.     C: 00000000000000000000001001110000
  182.     E: 00000000100000000000000101000111
  183.     F: 00000000100000000000000001000111
  184.     G: 00000000100000000000001000110111
  185.     I: 00000000100000000000001000010000
  186.     J: 00000000100000000000000101000111
  187.     X: 00000000000000000000001101110000
  188.     */
  189.  
  190.     std::pair<int, uint32> results; // pair of <set size, the set itself>
  191.     // In this loop we go through all of the rows of the dataset we collected
  192.     // When we check the A row then we have checked all solutions that contain A.
  193.     // This means that we can skip A from other rows. We store this information to already_checked_global for skipping.
  194.     uint32 already_checked_global = 0;
  195.     for (auto p = SEEN.begin(); p != SEEN.end(); ++p) {
  196.         // inverse "connected" into "not connected" set.
  197.         // Then make sure that we only consider nodes we have seen by using ALL.
  198.         // Then remove "self" from the set.
  199.         const uint32 candidate_set = (~p->second) & ALL & ~p->first;
  200.         // When we check a specific bit on a row then we have checked all solutions that contain that bit.
  201.         // This means that we can skip the bit from other rows. We store this information to already_checked for skipping.
  202.         uint32 already_checked = already_checked_global;
  203.         auto bits = std::popcount((candidate_set & ~already_checked) | p->first);
  204.         if (results.first < bits) // skip solutions if  it cannot be better than already found
  205.         {
  206.             // In this loop we go through all of the set bits in a single row
  207.             // In CollectResults we then try to exclude that bit's connections from the row and move to the next set bit recursively.
  208.             // We try all combinations and when all bits of a row have been looped through we have a solution that contains
  209.             // non-connected nodes.
  210.             // Note that we start from the outer loop's current position to skip characters we have already went through in the outer loop. (see already_checked_global)
  211.             for (auto b_c = p; b_c != SEEN.end(); ++b_c) {
  212.                 // the way we store the data forces us to go through all characters even if they are not in the set
  213.                 // this check makes sure we only test characters that are in the set
  214.                 // a potential optimization is to figure out how to avoid going through all characters.
  215.                 if (candidate_set & b_c->first) {
  216.                     CollectResults0(p->first, results, (candidate_set & ~already_checked) | b_c->first, b_c->first, std::next(b_c, 1));
  217.                     already_checked |= b_c->first;
  218.                     if (results.first >= --bits)
  219.                         break; // rest of the branches on this row cannot contain better results
  220.                 }
  221.             }
  222.         }
  223.         already_checked_global |= p->first;
  224.     }
  225.  
  226.     // It is possible to modify the algorithm to return all results. However, benchmarks showed that it may be best to collect all possible solutions first
  227.     // and only once the algorithm has finished, find the best solutions just before returning right here.
  228.     // Always benchmark when you make a change, collecting results into a map instead of a pair or a vector within CollectResults function doubled the execution time.
  229.     return results.second;
  230. }
  231.  
  232. static void TEST0(benchmark::State& state) {
  233.   for (auto _ : state) {
  234.     ALL = 0;
  235.     SEEN.clear();
  236.  
  237.     benchmark::DoNotOptimize(acual_main0());
  238.   }
  239. }
  240. BENCHMARK(TEST0);
  241. // https://quick-bench.com/q/rkq-fXDE6Tq0V_oyNKzkEhTH908
  242. // https://godbolt.org/z/WrTf9cjxT // Contains debug prints
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement