SHARE
TWEET

Untitled

a guest Oct 16th, 2019 83 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #include <algorithm>
  2. #include <cstdint>
  3. #include <cstdio>
  4.  
  5. #include <xmmintrin.h>
  6.  
  7.  
  8. typedef int32_t v4 __attribute__ ((vector_size(sizeof(int32_t) * 4)));
  9.  
  10. inline bool v4equal(v4 a, v4 b)
  11. {
  12.     __m128i v1 = _mm_load_si128((__m128i*)&a);
  13.     __m128i v2 = _mm_load_si128((__m128i*)&b);
  14.     __m128i vcmp = _mm_cmpeq_epi32(v1, v2);
  15.     uint16_t mask = _mm_movemask_epi8(vcmp);
  16.     return mask == 0xffff;
  17. }
  18.  
  19. inline bool v4anyGreater(v4 a, v4 b)
  20. {
  21.     __m128i v1 = _mm_load_si128((__m128i*)&a);
  22.     __m128i v2 = _mm_load_si128((__m128i*)&b);
  23.     __m128i vcmp = _mm_cmpgt_epi32(v1, v2);
  24.     uint16_t mask = _mm_movemask_epi8(vcmp);
  25.     return mask != 0;
  26. }
  27.  
  28. class Sujiko
  29. {
  30. private:
  31.     uint16_t _usedSet;
  32.     uint16_t _usedCells;
  33.     uint8_t _cells[9];
  34.     v4 _sums;
  35.     uint8_t _diag1, _diag2;
  36.     v4 _clues;
  37.  
  38.     inline bool filled();
  39.     inline bool used(uint8_t v);
  40.     inline void unset(size_t cell, uint8_t value);
  41.     inline bool cantBeValid();
  42.     inline bool valid();
  43. public:
  44.     Sujiko(uint8_t clue1, uint8_t clue2, uint8_t clue3, uint8_t clue4)
  45.         : _usedSet(0),
  46.         _usedCells(0),
  47.         _cells {0},
  48.         _sums {0,0,0,0},
  49.         _diag1(clue1 + clue3),
  50.         _diag2(clue2 + clue4),
  51.         _clues { clue4, clue3, clue2, clue1 }
  52.     {}
  53.  
  54.     inline void set(size_t cell, uint8_t value);
  55.     bool solve();
  56.     void print();
  57. };
  58.  
  59. static const v4 multipliers[] = {
  60.         { 0, 0, 0, 1 },
  61.         { 0, 0, 1, 1 },
  62.         { 0, 0, 1, 0 },
  63.         { 0, 1, 0, 1 },
  64.         { 1, 1, 1, 1 },
  65.         { 1, 0, 1, 0 },
  66.         { 0, 1, 0, 0 },
  67.         { 1, 1, 0, 0 },
  68.         { 1, 0, 0, 0 },
  69. };
  70.  
  71. void Sujiko::set(size_t cell, uint8_t value)
  72. {
  73.     _sums += multipliers[cell] * value;
  74.     _cells[cell] = value;
  75.     _usedSet |= 1 << value;
  76.     _usedCells |= 1 << cell;
  77. }
  78.  
  79. void Sujiko::unset(size_t cell, uint8_t value)
  80. {
  81.     _usedSet ^= 1 << value;
  82.     _usedCells ^= 1 << cell;
  83.     _cells[cell] = 0;
  84.     _sums -= multipliers[cell] * value;
  85. }
  86.  
  87. bool Sujiko::filled()
  88. {
  89.     return _usedSet == 0b1111111110;
  90. }
  91.  
  92. bool Sujiko::valid()
  93. {
  94.     return filled() && v4equal(_sums, _clues);
  95. }
  96.  
  97. bool Sujiko::cantBeValid()
  98. {
  99.     return
  100.         // These identities are from Wikipedia and allow us to bail earlier
  101.         (_cells[0] + _cells[8]) + _diag2 > 45 + _cells[4] ||
  102.         (_cells[2] + _cells[6]) + _diag1 > 45 + _cells[4] ||
  103.         // General properties
  104.         v4anyGreater(_sums, _clues);
  105. }
  106.  
  107. bool Sujiko::used(uint8_t v)
  108. {
  109.     return (_usedSet & (1 << v)) != 0;
  110. }
  111.  
  112. // Precondition: not filled
  113. bool Sujiko::solve()
  114. {
  115.     int firstUnusedCell = 4;
  116.     // This loop gets unrolled, experiments with running from a higher known
  117.     // index didn't make it faster. This static order works from most
  118.     // constrained to least constrained.
  119.     static const size_t constrainedOrder[] = { 4, 1, 5, 7, 3, 0, 2, 6, 8 };
  120.     for (int i = 0; i < 9; i++) {
  121.         // The compiler inlines the test values from above
  122.         if ((_usedCells & (1 << constrainedOrder[i])) == 0) {
  123.             firstUnusedCell = constrainedOrder[i];
  124.             break;
  125.         }
  126.     }
  127.     v4 diff = _clues - _sums;
  128.     if (firstUnusedCell == 0 || firstUnusedCell == 2 || firstUnusedCell == 6 || firstUnusedCell == 8) {
  129.         uint16_t toUse = (((1 << diff[0]) | (1 << diff[1])) | ((1 << diff[2]) | (1 << diff[3]))) & 0b1111111110;
  130.         if ((_usedSet ^ toUse) != 0b1111111110) {
  131.             // Means we would need to duplicate a value
  132.             return false;
  133.         } else {
  134.             if ((_usedCells & (1 << 0)) == 0)
  135.                 set(0, diff[3]);
  136.             if ((_usedCells & (1 << 2)) == 0)
  137.                 set(2, diff[2]);
  138.             if ((_usedCells & (1 << 6)) == 0)
  139.                 set(6, diff[1]);
  140.             if ((_usedCells & (1 << 8)) == 0)
  141.                 set(8, diff[0]);
  142.             return true;
  143.         }
  144.     }
  145.     uint8_t highestUnused = 63 - __builtin_clzl(~_usedSet & 0b1111111111);
  146.     const uint16_t originalSet = _usedSet;
  147.     for (uint8_t testValue = highestUnused; testValue != 0; --testValue) {
  148.         // Testing against the original set is a bit faster because there is no
  149.         // longer a data dependency
  150.         if ((originalSet & (1 << testValue)) == 0) {
  151.             set(firstUnusedCell, testValue);
  152.             if (!cantBeValid()) {
  153.                 // This is the same as not filled
  154.                 if (firstUnusedCell != 8) {
  155.                     if (solve()) {
  156.                         return true;
  157.                     }
  158.                 }
  159.                 if (valid()) {
  160.                     return true;
  161.                 }
  162.             }
  163.             unset(firstUnusedCell, testValue);
  164.         }
  165.     }
  166.     return false;
  167. }
  168.  
  169. void Sujiko::print()
  170. {
  171.     printf("%d,%d,%d,%d\n%d%d%d\n%d%d%d\n%d%d%d\n",
  172.             _clues[3], _clues[2], _clues[1], _clues[0],
  173.             _cells[0], _cells[1], _cells[2],
  174.             _cells[3], _cells[4], _cells[5],
  175.             _cells[6], _cells[7], _cells[8]);
  176. }
  177.  
  178. static __inline__ unsigned long long rdtsc(void)
  179. {
  180.     unsigned hi, lo;
  181.     __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
  182.     return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
  183. }
  184.  
  185. static const int ATTEMPTS = 300000;
  186.  
  187. int main(int argc, char* argv[])
  188. {
  189.     Sujiko board(22, 21, 24, 24);
  190.     board.set(1, 4);
  191.     board.set(2, 3);
  192.     board.solve();
  193.  
  194.     unsigned long long start = rdtsc();
  195.     for (int i = 0; i < ATTEMPTS; i++) {
  196.         Sujiko board(22, 21, 24, 24);
  197.         board.set(1, 4);
  198.         board.set(2, 3);
  199.         board.solve();
  200.     }
  201.     unsigned long long end = rdtsc();
  202.     unsigned long long cycles = end - start;
  203.  
  204.     board.print();
  205.  
  206.     printf("Average time is %f cycles, %f us\n", (double)cycles / ATTEMPTS, (double)cycles / 2.8e9 * 1e6 / (double)ATTEMPTS);
  207.     return 0;
  208. }
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
Not a member of Pastebin yet?
Sign Up, it unlocks many cool features!
 
Top