Advertisement
Guest User

Untitled

a guest
Oct 16th, 2019
99
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.58 KB | None | 0 0
  1. #include <algorithm>
  2. #include <cstdint>
  3. #include <cstdio>
  4.  
  5. #include <xmmintrin.h>
  6.  
  7.  
  8. typedef int32_t v4 __attribute__ ((vector_size(sizeof(int32_t) * 4)));
  9.  
  10. inline bool v4equal(v4 a, v4 b)
  11. {
  12. __m128i v1 = _mm_load_si128((__m128i*)&a);
  13. __m128i v2 = _mm_load_si128((__m128i*)&b);
  14. __m128i vcmp = _mm_cmpeq_epi32(v1, v2);
  15. uint16_t mask = _mm_movemask_epi8(vcmp);
  16. return mask == 0xffff;
  17. }
  18.  
  19. inline bool v4anyGreater(v4 a, v4 b)
  20. {
  21. __m128i v1 = _mm_load_si128((__m128i*)&a);
  22. __m128i v2 = _mm_load_si128((__m128i*)&b);
  23. __m128i vcmp = _mm_cmpgt_epi32(v1, v2);
  24. uint16_t mask = _mm_movemask_epi8(vcmp);
  25. return mask != 0;
  26. }
  27.  
  28. class Sujiko
  29. {
  30. private:
  31. uint16_t _usedSet;
  32. uint16_t _usedCells;
  33. uint8_t _cells[9];
  34. v4 _sums;
  35. uint8_t _diag1, _diag2;
  36. v4 _clues;
  37.  
  38. inline bool filled();
  39. inline bool used(uint8_t v);
  40. inline void unset(size_t cell, uint8_t value);
  41. inline bool cantBeValid();
  42. inline bool valid();
  43. public:
  44. Sujiko(uint8_t clue1, uint8_t clue2, uint8_t clue3, uint8_t clue4)
  45. : _usedSet(0),
  46. _usedCells(0),
  47. _cells {0},
  48. _sums {0,0,0,0},
  49. _diag1(clue1 + clue3),
  50. _diag2(clue2 + clue4),
  51. _clues { clue4, clue3, clue2, clue1 }
  52. {}
  53.  
  54. inline void set(size_t cell, uint8_t value);
  55. bool solve();
  56. void print();
  57. };
  58.  
  59. static const v4 multipliers[] = {
  60. { 0, 0, 0, 1 },
  61. { 0, 0, 1, 1 },
  62. { 0, 0, 1, 0 },
  63. { 0, 1, 0, 1 },
  64. { 1, 1, 1, 1 },
  65. { 1, 0, 1, 0 },
  66. { 0, 1, 0, 0 },
  67. { 1, 1, 0, 0 },
  68. { 1, 0, 0, 0 },
  69. };
  70.  
  71. void Sujiko::set(size_t cell, uint8_t value)
  72. {
  73. _sums += multipliers[cell] * value;
  74. _cells[cell] = value;
  75. _usedSet |= 1 << value;
  76. _usedCells |= 1 << cell;
  77. }
  78.  
  79. void Sujiko::unset(size_t cell, uint8_t value)
  80. {
  81. _usedSet ^= 1 << value;
  82. _usedCells ^= 1 << cell;
  83. _cells[cell] = 0;
  84. _sums -= multipliers[cell] * value;
  85. }
  86.  
  87. bool Sujiko::filled()
  88. {
  89. return _usedSet == 0b1111111110;
  90. }
  91.  
  92. bool Sujiko::valid()
  93. {
  94. return filled() && v4equal(_sums, _clues);
  95. }
  96.  
  97. bool Sujiko::cantBeValid()
  98. {
  99. return
  100. // These identities are from Wikipedia and allow us to bail earlier
  101. (_cells[0] + _cells[8]) + _diag2 > 45 + _cells[4] ||
  102. (_cells[2] + _cells[6]) + _diag1 > 45 + _cells[4] ||
  103. // General properties
  104. v4anyGreater(_sums, _clues);
  105. }
  106.  
  107. bool Sujiko::used(uint8_t v)
  108. {
  109. return (_usedSet & (1 << v)) != 0;
  110. }
  111.  
  112. // Precondition: not filled
  113. bool Sujiko::solve()
  114. {
  115. int firstUnusedCell = 4;
  116. // This loop gets unrolled, experiments with running from a higher known
  117. // index didn't make it faster. This static order works from most
  118. // constrained to least constrained.
  119. static const size_t constrainedOrder[] = { 4, 1, 5, 7, 3, 0, 2, 6, 8 };
  120. for (int i = 0; i < 9; i++) {
  121. // The compiler inlines the test values from above
  122. if ((_usedCells & (1 << constrainedOrder[i])) == 0) {
  123. firstUnusedCell = constrainedOrder[i];
  124. break;
  125. }
  126. }
  127. v4 diff = _clues - _sums;
  128. if (firstUnusedCell == 0 || firstUnusedCell == 2 || firstUnusedCell == 6 || firstUnusedCell == 8) {
  129. uint16_t toUse = (((1 << diff[0]) | (1 << diff[1])) | ((1 << diff[2]) | (1 << diff[3]))) & 0b1111111110;
  130. if ((_usedSet ^ toUse) != 0b1111111110) {
  131. // Means we would need to duplicate a value
  132. return false;
  133. } else {
  134. if ((_usedCells & (1 << 0)) == 0)
  135. set(0, diff[3]);
  136. if ((_usedCells & (1 << 2)) == 0)
  137. set(2, diff[2]);
  138. if ((_usedCells & (1 << 6)) == 0)
  139. set(6, diff[1]);
  140. if ((_usedCells & (1 << 8)) == 0)
  141. set(8, diff[0]);
  142. return true;
  143. }
  144. }
  145. uint8_t highestUnused = 63 - __builtin_clzl(~_usedSet & 0b1111111111);
  146. const uint16_t originalSet = _usedSet;
  147. for (uint8_t testValue = highestUnused; testValue != 0; --testValue) {
  148. // Testing against the original set is a bit faster because there is no
  149. // longer a data dependency
  150. if ((originalSet & (1 << testValue)) == 0) {
  151. set(firstUnusedCell, testValue);
  152. if (!cantBeValid()) {
  153. // This is the same as not filled
  154. if (firstUnusedCell != 8) {
  155. if (solve()) {
  156. return true;
  157. }
  158. }
  159. if (valid()) {
  160. return true;
  161. }
  162. }
  163. unset(firstUnusedCell, testValue);
  164. }
  165. }
  166. return false;
  167. }
  168.  
  169. void Sujiko::print()
  170. {
  171. printf("%d,%d,%d,%d\n%d%d%d\n%d%d%d\n%d%d%d\n",
  172. _clues[3], _clues[2], _clues[1], _clues[0],
  173. _cells[0], _cells[1], _cells[2],
  174. _cells[3], _cells[4], _cells[5],
  175. _cells[6], _cells[7], _cells[8]);
  176. }
  177.  
  178. static __inline__ unsigned long long rdtsc(void)
  179. {
  180. unsigned hi, lo;
  181. __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
  182. return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
  183. }
  184.  
  185. static const int ATTEMPTS = 300000;
  186.  
  187. int main(int argc, char* argv[])
  188. {
  189. Sujiko board(22, 21, 24, 24);
  190. board.set(1, 4);
  191. board.set(2, 3);
  192. board.solve();
  193.  
  194. unsigned long long start = rdtsc();
  195. for (int i = 0; i < ATTEMPTS; i++) {
  196. Sujiko board(22, 21, 24, 24);
  197. board.set(1, 4);
  198. board.set(2, 3);
  199. board.solve();
  200. }
  201. unsigned long long end = rdtsc();
  202. unsigned long long cycles = end - start;
  203.  
  204. board.print();
  205.  
  206. printf("Average time is %f cycles, %f us\n", (double)cycles / ATTEMPTS, (double)cycles / 2.8e9 * 1e6 / (double)ATTEMPTS);
  207. return 0;
  208. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement