Advertisement
Guest User

Untitled

a guest
Nov 11th, 2019
101
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 41.81 KB | None | 0 0
  1. #ifndef K9_AVX_H_
  2. #define K9_AVX_H_
  3.  
  4. //#include "common.h"
  5. #include "sort.h"
  6. #include "stdint.h"
  7. //#include <algorithm>
  8. #include <cstring>
  9. //#include <cassert>
  10. //#include "timer.h"
  11.  
  12. const int tab32[32] = {
  13. 0, 9, 1, 10, 13, 21, 2, 29,
  14. 11, 14, 16, 18, 22, 25, 3, 30,
  15. 8, 12, 20, 28, 15, 17, 24, 7,
  16. 19, 27, 23, 6, 26, 5, 4, 31 };
  17.  
  18. inline int log2_32(uint32_t value){
  19. value |= value >> 1;
  20. value |= value >> 2;
  21. value |= value >> 4;
  22. value |= value >> 8;
  23. value |= value >> 16;
  24. return tab32[(uint32_t)(value * 0x07C4ACDD) >> 27];
  25. }
  26.  
  27. namespace made {
  28. typedef uint8_t Digit;
  29. //typedef uint_fast8_t Digit;
  30. //typedef uint32_t Digit;
  31. //typedef uint16_t Digit;
  32. //typedef uint32_t Element; // 0..10^9 value range
  33. // typedef uint32_t ElementCounter; // 0..10^7 array size
  34. typedef unsigned int Element; // 0..10^9 value range
  35. typedef unsigned int ElementCounter; // 0..10^7 array size
  36. typedef uint16_t Digit2Byte;
  37. typedef uint_fast32_t FE;
  38. const uint_fast8_t ELEMENT_SIZE = sizeof(Element);
  39. const uint_fast8_t DIGIT_SIZE = sizeof(Digit);
  40. const uint_fast32_t MAX_DIGIT_COUNT = 1 << (DIGIT_SIZE * 8);
  41. const uint_fast32_t LOWER_DIGIT_MASK = MAX_DIGIT_COUNT - 1; // digit = 1 byte: 0xFF, digit = 2 byte: 0xFFFF, etc.;
  42. const uint_fast8_t DIGIT_2BYTE_SIZE = sizeof(Digit2Byte);
  43. const uint_fast32_t MAX_DIGIT_2BYTE_COUNT = 1 << (DIGIT_2BYTE_SIZE * 8);
  44. const uint_fast8_t DIGIT_2BYTE_STEP = ELEMENT_SIZE / DIGIT_2BYTE_SIZE;
  45. //typedef uint32_t FE;
  46. //const ElementCounter L1_CACHE_SIZE = 32 * 1024; // 32 KB // 2^13 = 8192 integers
  47. //const ElementCounter L1_CACHE_SIZE = 64 * 1024; // 64 KB // 2^14 = 16384 integers
  48. const ElementCounter L1_CACHE_SIZE = 32 * 8 * 1024; // 256 KB // 2^16 = 65536 integers
  49. const ElementCounter L2_CACHE_SIZE = 2 * 1024 * 1024; // 2 MB 2^19 = 524288 integers
  50. const ElementCounter L3_CACHE_SIZE = 20 * 1024 * 1024; // 20 MB = 20 * 2^20 = 5242880 integers
  51. }
  52.  
  53. #define INLINE_SWAP(a, b) {const Element __SWAP_TEMP__ = a; a = b; b = __SWAP_TEMP__;}
  54.  
  55. #define UNROL_LOOP(begin, size, digit_number, TIterator, iterator, additional_initializers, body)\
  56. {uint_fast32_t size_trunc_8 = (size) & (~0 << 3);\
  57. TIterator iterator = (TIterator)(begin) + (digit_number);\
  58. TIterator dend = (TIterator)((begin) + size_trunc_8);\
  59. additional_initializers;\
  60. for (; iterator < dend;) { body; body; body; body; body; body; body; body; }\
  61. for (dend = (TIterator)((begin) + (size)); iterator < dend;) { body; }}
  62.  
  63. #define SORT_DIGIT(begin, size, buffer, digit_size, shift)\
  64. const FE DIGIT_AMOUNT = 1 << (digit_size);\
  65. const FE DIGIT_MASK = DIGIT_AMOUNT - 1;\
  66. FE count[DIGIT_AMOUNT] = { 0 };\
  67. Element *pends[DIGIT_AMOUNT];\
  68. /*Counting amounts*/UNROL_LOOP((begin), (size), 0, Element*, dp, , ++count[(*dp >> (shift)) & DIGIT_MASK]; dp++);\
  69. /*Calculating places*/UNROL_LOOP(0, DIGIT_AMOUNT, 0, FE, i, Element *q = (buffer); , pends[i] = q; q += count[i++]);\
  70. /*Sorting - moving to ordered places*/UNROL_LOOP((begin), (size), 0, Element*, dp, , *pends[(*dp >> shift) & DIGIT_MASK]++ = *dp; dp++;);\
  71.  
  72. // bool skiped = false;\
  73. //for (FE i = 0; i < DIGIT_AMOUNT; ++i) {skiped = skiped || (count[i] == size);}\
  74. // if (!skiped) {\
  75. ///*Calculating places*/UNROL_LOOP(0, DIGIT_AMOUNT, 0, FE, i, Element *q = (buffer); , pends[i] = q; q += count[i++]);\
  76. ///*Sorting - moving to ordered places*/UNROL_LOOP((begin), (size), 0, Element*, dp, , *pends[(*dp >> shift) & DIGIT_MASK]++ = *dp; dp++;);\
  77. // }
  78.  
  79.  
  80. namespace made {
  81. namespace bruteforce {
  82. struct dnums {
  83. FE D1, D2, D3, D4;
  84. FE D1shift, D2shift, D3shift, D4shift;
  85. };
  86. const dnums nums = {9, 9, 5, 7, 0, 9, 18, 23};
  87. void LSB_1_2(Element *begin, ElementCounter size, Element *buffer) {
  88. {SORT_DIGIT(begin, size, buffer, nums.D1, nums.D1shift);}
  89. {SORT_DIGIT(buffer, size, begin, nums.D2, nums.D2shift);}
  90. }
  91. void LSB_3times(Element *begin, ElementCounter size, Element *buffer) {
  92. {SORT_DIGIT(begin, size, buffer, nums.D1, nums.D1shift);}
  93. {SORT_DIGIT(buffer, size, begin, nums.D2, nums.D2shift);}
  94. {SORT_DIGIT(begin, size, buffer, nums.D3, nums.D3shift);}
  95. }
  96. void MSB3(Element *begin, ElementCounter size, Element *buffer) {
  97. SORT_DIGIT(begin, size, buffer, nums.D3, nums.D3shift)
  98. for (FE i = 0; i < DIGIT_AMOUNT; ++i) {/*for each digit call subsort*/
  99. ElementCounter sub_size = count[i];
  100. if (sub_size != 0) {// skipping empty buckets
  101. Element *pend = pends[i];
  102. Element *pstart = pend - sub_size;
  103. LSB_1_2(pstart, sub_size, begin);
  104. }
  105. }
  106. }
  107.  
  108. void MSB4(Element *begin, ElementCounter size, Element *buffer) {
  109. SORT_DIGIT(begin, size, buffer, nums.D4, nums.D4shift)
  110. for (FE i = 0; i < DIGIT_AMOUNT; ++i) {/*for each digit call subsort*/
  111. ElementCounter sub_size = count[i];
  112. if (sub_size != 0) {// skipping empty buckets
  113. Element *pend = pends[i];
  114. Element *pstart = pend - sub_size;
  115. MSB3(pstart, sub_size, begin + (pstart - buffer));
  116. //LSB_3times(pstart, sub_size, begin + (pstart - buffer));
  117. }
  118. }
  119. }
  120. void MSBCaller(Element *begin, ElementCounter size) {
  121. Element *buffer = (Element *)malloc(sizeof(Element) * size);
  122. MSB4(begin, size, buffer);
  123. free(buffer);
  124. }
  125. }
  126.  
  127. namespace avx {
  128. const FE max_digits = 2;
  129. FE counts[max_digits][MAX_DIGIT_COUNT] = { 0 }; // when needed, expand to 3
  130. Element *pends_arr[max_digits][MAX_DIGIT_COUNT];
  131.  
  132. void LSB(Element *begin, ElementCounter size, Element *buffer, Element maxshift) {
  133. Element *end = begin + size;
  134. FE count[MAX_DIGIT_COUNT];
  135. Element *pends[MAX_DIGIT_COUNT];
  136. Digit* dp;
  137. Digit* dend;
  138. for (Element shift = 0; shift <= maxshift; shift += 8) {
  139. //Element shift = 0;
  140. std::memset(count, 0, sizeof(count));
  141. dp = (Digit*)(begin)+(shift >> 3); // digit pointer
  142. for (dend = (Digit*)(begin + (size & (~0 << 3))); dp < dend;) {
  143. ++count[*dp]; dp += ELEMENT_SIZE;
  144. ++count[*dp]; dp += ELEMENT_SIZE;
  145. ++count[*dp]; dp += ELEMENT_SIZE;
  146. ++count[*dp]; dp += ELEMENT_SIZE;
  147. ++count[*dp]; dp += ELEMENT_SIZE;
  148. ++count[*dp]; dp += ELEMENT_SIZE;
  149. ++count[*dp]; dp += ELEMENT_SIZE;
  150. ++count[*dp]; dp += ELEMENT_SIZE;
  151. }
  152. for (dp = (Digit*)(begin + (size & (~0 << 3))) + (shift >> 3), dend = (Digit*)(begin + size); dp < dend;) {
  153. ++count[*dp]; dp += ELEMENT_SIZE;
  154. }
  155. //for (Element shift = 0; shift <= maxshift; shift += 8) {
  156. Element *q = buffer;
  157. for (ElementCounter i = 0; i < MAX_DIGIT_COUNT; q += count[i++]) {
  158. pends[i] = q;
  159. }
  160. // std::cout << "Starting sorting" << std::endl;
  161. Element* arr_ptr = begin;
  162. Element *arr_end = begin + (size & (~0 << 3));
  163. dp = (Digit*)(begin)+(shift >> 3);
  164. for (; arr_ptr < arr_end;) {
  165. *pends[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  166. *pends[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  167. *pends[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  168. *pends[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  169. *pends[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  170. *pends[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  171. *pends[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  172. *pends[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  173. }
  174. for (arr_ptr = begin + (size & (~0 << 3)), arr_end = begin + size; arr_ptr < arr_end;) {
  175. *pends[*dp]++ = *arr_ptr; ++arr_ptr; dp += ELEMENT_SIZE;
  176. }
  177. std::swap(begin, buffer);
  178. }
  179. }
  180.  
  181. void LSB_1(Element *begin, ElementCounter size, Element *buffer) {
  182. const FE DIGIT_AMOUNT = MAX_DIGIT_COUNT;
  183. FE(&count)[DIGIT_AMOUNT] = counts[0];
  184. std::memset(count, 0, sizeof(count));
  185. Element*(&pends)[DIGIT_AMOUNT] = pends_arr[0];
  186. // FE count[DIGIT_AMOUNT] = { 0 };
  187. // Element *pends[DIGIT_AMOUNT];
  188. const FE num_digit = 0;
  189. /*Counting amounts*/
  190. UNROL_LOOP(begin, size, num_digit, Digit*, dp, , ++count[*dp]; dp += ELEMENT_SIZE;);
  191. /*Calculating places*/
  192. UNROL_LOOP(0, DIGIT_AMOUNT, 0, FE, i, Element *q = buffer; , pends[i] = q; q += count[i++]);
  193. /*Sorting - moving to ordered places*/
  194. UNROL_LOOP(begin, size, 0, Element*, arr_ptr, Digit *dp = (Digit*)(begin)+num_digit;, *pends[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;);
  195. }
  196.  
  197. void LSB1(Element *begin, ElementCounter size, Element *buffer) {
  198. FE size_trunc_8 = size & (~0 << 3);
  199. FE count[MAX_DIGIT_COUNT] = { 0 };
  200. Element *pends[MAX_DIGIT_COUNT];
  201. /*Counting amounts*/ {
  202. Digit *dp0 = (Digit*)(begin);
  203. Digit *dend = (Digit*)(begin + size_trunc_8);
  204. for (; dp0 < dend;) {
  205. ++count[*dp0]; dp0 += ELEMENT_SIZE;
  206. ++count[*dp0]; dp0 += ELEMENT_SIZE;
  207. ++count[*dp0]; dp0 += ELEMENT_SIZE;
  208. ++count[*dp0]; dp0 += ELEMENT_SIZE;
  209. ++count[*dp0]; dp0 += ELEMENT_SIZE;
  210. ++count[*dp0]; dp0 += ELEMENT_SIZE;
  211. ++count[*dp0]; dp0 += ELEMENT_SIZE;
  212. ++count[*dp0]; dp0 += ELEMENT_SIZE;
  213. }
  214. for (dend = (Digit*)(begin + size); dp0 < dend;) {
  215. ++count[*dp0]; dp0 += ELEMENT_SIZE;
  216. }}
  217. /*Calculating places*/ {
  218. Element *q = buffer;
  219. for (FE i = 0; i < MAX_DIGIT_COUNT; ) {
  220. pends[i] = q; q += count[i++];
  221. pends[i] = q; q += count[i++];
  222. pends[i] = q; q += count[i++];
  223. pends[i] = q; q += count[i++];
  224. pends[i] = q; q += count[i++];
  225. pends[i] = q; q += count[i++];
  226. pends[i] = q; q += count[i++];
  227. pends[i] = q; q += count[i++];
  228. }}
  229. /*Sorting - moving to ordered places*/ {
  230. Element* arr_ptr = begin;
  231. Element *arr_end = begin + size_trunc_8;
  232. Digit *dp = (Digit*)(begin);
  233. for (; arr_ptr < arr_end;) {
  234. *pends[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  235. *pends[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  236. *pends[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  237. *pends[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  238. *pends[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  239. *pends[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  240. *pends[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  241. *pends[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  242. }
  243. for (arr_end = begin + size; arr_ptr < arr_end;) {
  244. *pends[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  245. }
  246. }
  247. }
  248.  
  249. void LSB2(Element *begin, ElementCounter size, Element *buffer) {
  250. FE size_trunc_8 = size & (~0 << 3);
  251. const Digit num_digits = 2;
  252. // FE counts[num_digits][MAX_DIGIT_COUNT] = { 0 };
  253. FE(&count0)[MAX_DIGIT_COUNT] = counts[0];
  254. FE(&count1)[MAX_DIGIT_COUNT] = counts[1];
  255. std::memset(count0, 0, sizeof(count0));
  256. std::memset(count1, 0, sizeof(count1));
  257. // Element *pends_arr[num_digits][MAX_DIGIT_COUNT];
  258. /*Counting amounts*/ {
  259. Digit *dp0 = (Digit*)(begin), *dp1 = (Digit*)(begin)+1;
  260. Digit *dend = (Digit*)(begin + size_trunc_8);
  261. for (; dp0 < dend;) {
  262. ++count0[*dp0]; dp0 += ELEMENT_SIZE; ++count1[*dp1]; dp1 += ELEMENT_SIZE;
  263. ++count0[*dp0]; dp0 += ELEMENT_SIZE; ++count1[*dp1]; dp1 += ELEMENT_SIZE;
  264. ++count0[*dp0]; dp0 += ELEMENT_SIZE; ++count1[*dp1]; dp1 += ELEMENT_SIZE;
  265. ++count0[*dp0]; dp0 += ELEMENT_SIZE; ++count1[*dp1]; dp1 += ELEMENT_SIZE;
  266. ++count0[*dp0]; dp0 += ELEMENT_SIZE; ++count1[*dp1]; dp1 += ELEMENT_SIZE;
  267. ++count0[*dp0]; dp0 += ELEMENT_SIZE; ++count1[*dp1]; dp1 += ELEMENT_SIZE;
  268. ++count0[*dp0]; dp0 += ELEMENT_SIZE; ++count1[*dp1]; dp1 += ELEMENT_SIZE;
  269. ++count0[*dp0]; dp0 += ELEMENT_SIZE; ++count1[*dp1]; dp1 += ELEMENT_SIZE;
  270. }
  271. for (dend = (Digit*)(begin + size); dp0 < dend;) {
  272. ++count0[*dp0]; dp0 += ELEMENT_SIZE; ++count1[*dp1]; dp1 += ELEMENT_SIZE;
  273. }
  274. }
  275. /*Calculating places*/ {
  276. Element*(&pend0)[MAX_DIGIT_COUNT] = pends_arr[0];
  277. Element*(&pend1)[MAX_DIGIT_COUNT] = pends_arr[1];
  278. Element *q0 = buffer; Element *q1 = begin;
  279. for (FE i = 0; i < MAX_DIGIT_COUNT; ) {
  280. pend0[i] = q0; q0 += count0[i]; pend1[i] = q1; q1 += count1[i++];
  281. pend0[i] = q0; q0 += count0[i]; pend1[i] = q1; q1 += count1[i++];
  282. pend0[i] = q0; q0 += count0[i]; pend1[i] = q1; q1 += count1[i++];
  283. pend0[i] = q0; q0 += count0[i]; pend1[i] = q1; q1 += count1[i++];
  284. pend0[i] = q0; q0 += count0[i]; pend1[i] = q1; q1 += count1[i++];
  285. pend0[i] = q0; q0 += count0[i]; pend1[i] = q1; q1 += count1[i++];
  286. pend0[i] = q0; q0 += count0[i]; pend1[i] = q1; q1 += count1[i++];
  287. pend0[i] = q0; q0 += count0[i]; pend1[i] = q1; q1 += count1[i++];
  288. }}
  289. //Sorting - moving to ordered places
  290. Element *arr_ptr = begin;
  291. Element *arr_end = begin + size_trunc_8;
  292. Digit *dp = (Digit*)(begin)+0;
  293. {
  294. Element*(&pend)[MAX_DIGIT_COUNT] = pends_arr[0];
  295. for (; arr_ptr < arr_end;) {
  296. *pend[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  297. *pend[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  298. *pend[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  299. *pend[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  300. *pend[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  301. *pend[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  302. *pend[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  303. *pend[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  304. }
  305. for (arr_ptr = begin + size_trunc_8, arr_end = begin + size; arr_ptr < arr_end;) {
  306. *pend[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  307. }
  308. }
  309. Element *temp = begin; begin = buffer; buffer = temp;
  310. arr_ptr = begin;
  311. arr_end = begin + size_trunc_8;
  312. dp = (Digit*)(begin)+1;
  313. {
  314. Element*(&pend)[MAX_DIGIT_COUNT] = pends_arr[1];
  315. for (; arr_ptr < arr_end;) {
  316. *pend[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  317. *pend[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  318. *pend[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  319. *pend[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  320. *pend[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  321. *pend[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  322. *pend[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  323. *pend[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  324. }
  325. for (arr_ptr = begin + size_trunc_8, arr_end = begin + size; arr_ptr < arr_end;) {
  326. *pend[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  327. }
  328. }
  329. }
  330.  
  331. void LSB3(Element *begin, ElementCounter size, Element *buffer) {
  332. FE size_trunc_8 = size & (~0 << 3);
  333. const Digit num_digits = 3;
  334. FE counts[num_digits][MAX_DIGIT_COUNT] = { 0 };
  335. FE(&count0)[MAX_DIGIT_COUNT] = counts[0];
  336. FE(&count1)[MAX_DIGIT_COUNT] = counts[1];
  337. FE(&count2)[MAX_DIGIT_COUNT] = counts[2];
  338. Element *pends_arr[num_digits][MAX_DIGIT_COUNT];
  339. /*Counting amounts*/ {
  340. Digit *dp0 = (Digit*)(begin), *dp1 = (Digit*)(begin)+1, *dp2 = (Digit*)(begin)+2;
  341. Digit *dend = (Digit*)(begin + size_trunc_8);
  342. for (; dp0 < dend;) {
  343. ++count0[*dp0]; dp0 += ELEMENT_SIZE; ++count1[*dp1]; dp1 += ELEMENT_SIZE; ++count2[*dp2]; dp2 += ELEMENT_SIZE;
  344. ++count0[*dp0]; dp0 += ELEMENT_SIZE; ++count1[*dp1]; dp1 += ELEMENT_SIZE; ++count2[*dp2]; dp2 += ELEMENT_SIZE;
  345. ++count0[*dp0]; dp0 += ELEMENT_SIZE; ++count1[*dp1]; dp1 += ELEMENT_SIZE; ++count2[*dp2]; dp2 += ELEMENT_SIZE;
  346. ++count0[*dp0]; dp0 += ELEMENT_SIZE; ++count1[*dp1]; dp1 += ELEMENT_SIZE; ++count2[*dp2]; dp2 += ELEMENT_SIZE;
  347. ++count0[*dp0]; dp0 += ELEMENT_SIZE; ++count1[*dp1]; dp1 += ELEMENT_SIZE; ++count2[*dp2]; dp2 += ELEMENT_SIZE;
  348. ++count0[*dp0]; dp0 += ELEMENT_SIZE; ++count1[*dp1]; dp1 += ELEMENT_SIZE; ++count2[*dp2]; dp2 += ELEMENT_SIZE;
  349. ++count0[*dp0]; dp0 += ELEMENT_SIZE; ++count1[*dp1]; dp1 += ELEMENT_SIZE; ++count2[*dp2]; dp2 += ELEMENT_SIZE;
  350. ++count0[*dp0]; dp0 += ELEMENT_SIZE; ++count1[*dp1]; dp1 += ELEMENT_SIZE; ++count2[*dp2]; dp2 += ELEMENT_SIZE;
  351. }
  352. for (dp0 = (Digit*)(begin + size_trunc_8), dend = (Digit*)(begin + size); dp0 < dend;) {
  353. ++count0[*dp0]; dp0 += ELEMENT_SIZE; ++count1[*dp1]; dp1 += ELEMENT_SIZE; ++count2[*dp2]; dp2 += ELEMENT_SIZE;
  354. }}
  355. /*Calculating places*/ {
  356. Element*(&pend0)[MAX_DIGIT_COUNT] = pends_arr[0];
  357. Element*(&pend1)[MAX_DIGIT_COUNT] = pends_arr[1];
  358. Element*(&pend2)[MAX_DIGIT_COUNT] = pends_arr[2];
  359. Element *q0 = buffer; Element *q1 = begin; Element *q2 = buffer;
  360. for (FE i = 0; i < MAX_DIGIT_COUNT; ) {
  361. pend0[i] = q0; q0 += count0[i]; pend1[i] = q1; q1 += count1[i]; pend2[i] = q2; q2 += count2[i++];
  362. pend0[i] = q0; q0 += count0[i]; pend1[i] = q1; q1 += count1[i]; pend2[i] = q2; q2 += count2[i++];
  363. pend0[i] = q0; q0 += count0[i]; pend1[i] = q1; q1 += count1[i]; pend2[i] = q2; q2 += count2[i++];
  364. pend0[i] = q0; q0 += count0[i]; pend1[i] = q1; q1 += count1[i]; pend2[i] = q2; q2 += count2[i++];
  365. pend0[i] = q0; q0 += count0[i]; pend1[i] = q1; q1 += count1[i]; pend2[i] = q2; q2 += count2[i++];
  366. pend0[i] = q0; q0 += count0[i]; pend1[i] = q1; q1 += count1[i]; pend2[i] = q2; q2 += count2[i++];
  367. pend0[i] = q0; q0 += count0[i]; pend1[i] = q1; q1 += count1[i]; pend2[i] = q2; q2 += count2[i++];
  368. pend0[i] = q0; q0 += count0[i]; pend1[i] = q1; q1 += count1[i]; pend2[i] = q2; q2 += count2[i++];
  369. }}
  370. /*Sorting - moving to ordered places*/ {
  371. for (FE i = 0; i < num_digits; ++i) {
  372. Element *arr_ptr = begin;
  373. Element *arr_end = begin + size_trunc_8;
  374. Element*(&pend)[MAX_DIGIT_COUNT] = pends_arr[i];
  375. Digit *dp = (Digit*)(begin)+i;
  376. for (; arr_ptr < arr_end;) {
  377. *pend[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  378. *pend[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  379. *pend[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  380. *pend[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  381. *pend[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  382. *pend[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  383. *pend[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  384. *pend[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  385. }
  386. for (arr_ptr = begin + size_trunc_8, arr_end = begin + size; arr_ptr < arr_end;) {
  387. *pend[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  388. }
  389. std::swap(begin, buffer);
  390. }}
  391. }
  392.  
  393. void MSB_extra_mem_2(Element *begin, ElementCounter size, Element *buffer) {
  394. FE(&count)[MAX_DIGIT_COUNT] = counts[0];
  395. std::memset(count, 0, sizeof(count));
  396. Element*(&pends)[MAX_DIGIT_COUNT] = pends_arr[0];
  397. // FE count[MAX_DIGIT_COUNT] = { 0 };
  398. // Element *pends[MAX_DIGIT_COUNT];
  399. const FE size_trunc_8 = size & (~0 << 3);
  400. const FE shift = 8;
  401. const FE num_digit = shift >> 3;
  402. /*Counting amounts*/ {
  403. Digit *dp = (Digit*)(begin)+num_digit;
  404. Digit *dend = (Digit*)(begin + size_trunc_8);
  405. for (; dp < dend;) {
  406. ++count[*dp]; dp += ELEMENT_SIZE;
  407. ++count[*dp]; dp += ELEMENT_SIZE;
  408. ++count[*dp]; dp += ELEMENT_SIZE;
  409. ++count[*dp]; dp += ELEMENT_SIZE;
  410. ++count[*dp]; dp += ELEMENT_SIZE;
  411. ++count[*dp]; dp += ELEMENT_SIZE;
  412. ++count[*dp]; dp += ELEMENT_SIZE;
  413. ++count[*dp]; dp += ELEMENT_SIZE;
  414. }
  415. for (dp = (Digit*)(begin + size_trunc_8) + num_digit, dend = (Digit*)(begin + size); dp < dend;) {
  416. ++count[*dp]; dp += ELEMENT_SIZE;
  417. }}
  418. /*Calculating places*/ {
  419. Element *q = buffer;
  420. for (FE i = 0; i < MAX_DIGIT_COUNT;) {
  421. pends[i] = q; q += count[i++];
  422. pends[i] = q; q += count[i++];
  423. pends[i] = q; q += count[i++];
  424. pends[i] = q; q += count[i++];
  425. pends[i] = q; q += count[i++];
  426. pends[i] = q; q += count[i++];
  427. pends[i] = q; q += count[i++];
  428. pends[i] = q; q += count[i++];
  429. }}
  430. /*Sorting - moving to ordered places*/ {
  431. Element *arr_ptr = begin;
  432. Element *arr_end = begin + size_trunc_8;
  433. Digit *dp = (Digit*)(begin)+num_digit;
  434. for (; arr_ptr < arr_end;) {
  435. *pends[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  436. *pends[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  437. *pends[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  438. *pends[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  439. *pends[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  440. *pends[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  441. *pends[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  442. *pends[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  443. }
  444. for (arr_ptr = begin + size_trunc_8, arr_end = begin + size; arr_ptr < arr_end;) {
  445. *pends[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;
  446. }}
  447. /*for each digit call subsort*/ {
  448. for (FE i = 0; i < MAX_DIGIT_COUNT; ++i) {
  449. ElementCounter sub_size = count[i];
  450. if (sub_size != 0) {// skipping empty buckets
  451. Element *pend = pends[i];
  452. Element *pstart = pend - sub_size;
  453. LSB1(pstart, sub_size, begin + (pstart - buffer));
  454. //InsertionSort(pstart, sub_size);
  455. }
  456. }}
  457. }
  458.  
  459. void InsertionSort(Element* begin, Element* end) {
  460. Element* j;
  461. for (Element* i = begin + 1; i < end; ++i) {
  462. j = i - 1;
  463. while ((j >= begin) && (*j > *(j + 1))) {
  464. INLINE_SWAP(*j, *(j + 1));
  465. --j;
  466. }
  467. }
  468. }
  469.  
  470. void MSB_inplace_1_full(Element *begin, ElementCounter size) {
  471. FE count[MAX_DIGIT_COUNT] = { 0 };
  472. Element *pends[MAX_DIGIT_COUNT];
  473. Element *pstarts[MAX_DIGIT_COUNT];
  474. const FE shift = 0;
  475. const FE num_digit = shift >> 3;
  476. //Counting amounts
  477. UNROL_LOOP(begin, size, num_digit, Digit*, dp, Element *arr_ptr = begin;, ++count[*dp]; dp += ELEMENT_SIZE;);
  478. //Calculating places*/
  479. UNROL_LOOP(0, MAX_DIGIT_COUNT, 0, FE, i, Element *q = begin;,
  480. pstarts[i] = q; q += count[i]; pends[i++] = q;);
  481. /*Sorting inplace - moving to ordered places*/ {
  482. for (FE i = 0; i < MAX_DIGIT_COUNT; ++i) {
  483. while (pstarts[i] != pends[i]) {
  484. FE value = *pstarts[i];
  485. FE y = (value >> shift) & LOWER_DIGIT_MASK;
  486. while (i != y) {
  487. FE temp = *pstarts[y];
  488. *(pstarts[y]++) = value;
  489. value = temp;
  490. y = (value >> shift) & LOWER_DIGIT_MASK;
  491. }
  492. *(pstarts[i]++) = value;
  493. }
  494. }}
  495.  
  496. }
  497.  
  498. void MSB_inplace_2_full(Element *begin, ElementCounter size) {
  499. FE count[MAX_DIGIT_COUNT] = { 0 };
  500. Element *pends[MAX_DIGIT_COUNT];
  501. Element *pstarts[MAX_DIGIT_COUNT];
  502. const FE shift = 8;
  503. const FE num_digit = shift >> 3;
  504. //Counting amounts
  505. UNROL_LOOP(begin, size, num_digit, Digit*, dp, Element *arr_ptr = begin;, ++count[*dp]; dp += ELEMENT_SIZE;);
  506. //Calculating places*/
  507. UNROL_LOOP(0, MAX_DIGIT_COUNT, 0, FE, i, Element *q = begin;,
  508. pstarts[i] = q; q += count[i]; pends[i++] = q;);
  509. /*Sorting inplace - moving to ordered places*/ {
  510. for (FE i = 0; i < MAX_DIGIT_COUNT; ++i) {
  511. while (pstarts[i] != pends[i]) {
  512. FE value = *pstarts[i];
  513. FE y = (value >> shift) & LOWER_DIGIT_MASK;
  514. while (i != y) {
  515. FE temp = *pstarts[y];
  516. *(pstarts[y]++) = value;
  517. value = temp;
  518. y = (value >> shift) & LOWER_DIGIT_MASK;
  519. }
  520. *(pstarts[i]++) = value;
  521. }
  522. }
  523. }
  524. /*for each digit call subsort in LSB order*/ {
  525. for (FE i = 0; i < MAX_DIGIT_COUNT; ++i) {
  526. ElementCounter sub_size = count[i];
  527. if (sub_size != 0) {// skipping empty buckets
  528. Element *pend = pends[i];
  529. Element *pstart = pend - sub_size;
  530. if (sub_size > 16)
  531. MSB_inplace_1_full(pstart, sub_size);
  532. else
  533. InsertionSort(pstart, pend);
  534. }
  535. }}
  536. }
  537.  
  538. void MSB_inplace_3_full(Element *begin, ElementCounter size) {
  539. FE count[MAX_DIGIT_COUNT] = { 0 };
  540. Element *pends[MAX_DIGIT_COUNT];
  541. Element *pstarts[MAX_DIGIT_COUNT];
  542. const FE shift = 16;
  543. const FE num_digit = shift >> 3;
  544. //Counting amounts
  545. UNROL_LOOP(begin, size, num_digit, Digit*, dp, Element *arr_ptr = begin;, ++count[*dp]; dp += ELEMENT_SIZE;);
  546. //Calculating places*/
  547. UNROL_LOOP(0, MAX_DIGIT_COUNT, 0, FE, i, Element *q = begin;,
  548. pstarts[i] = q; q += count[i]; pends[i++] = q;);
  549. /*Sorting inplace - moving to ordered places*/ {
  550. for (FE i = 0; i < MAX_DIGIT_COUNT; ++i) {
  551. while (pstarts[i] != pends[i]) {
  552. FE value = *pstarts[i];
  553. FE y = (value >> shift) & LOWER_DIGIT_MASK;
  554. while (i != y) {
  555. FE temp = *pstarts[y];
  556. *(pstarts[y]++) = value;
  557. value = temp;
  558. y = (value >> shift) & LOWER_DIGIT_MASK;
  559. }
  560. *(pstarts[i]++) = value;
  561. }
  562. }
  563. }
  564. /*for each digit call subsort in LSB order*/ {
  565. for (FE i = 0; i < MAX_DIGIT_COUNT; ++i) {
  566. ElementCounter sub_size = count[i];
  567. if (sub_size != 0) {// skipping empty buckets
  568. Element *pend = pends[i];
  569. Element *pstart = pend - sub_size;
  570. //LSB2(pstart, sub_size, buffer + (pstart - begin));
  571. if (sub_size > 16)
  572. MSB_inplace_2_full(pstart, sub_size);
  573. else
  574. InsertionSort(pstart, pend);
  575. }
  576. }}
  577. }
  578.  
  579.  
  580. void MSB_inplace_3_extra(Element *begin, ElementCounter size) {
  581. FE count[MAX_DIGIT_COUNT] = { 0 };
  582. Element *pends[MAX_DIGIT_COUNT];
  583. Element *pstarts[MAX_DIGIT_COUNT];
  584. const FE size_trunc_8 = size & (~0 << 3);
  585. const FE shift = 16;
  586. const FE num_digit = shift >> 3;
  587. /*Counting amounts*/ {
  588. Digit* dp = (Digit*)(begin)+num_digit;
  589. Digit* dend = (Digit*)(begin + size_trunc_8);
  590. for (; dp < dend;) {
  591. ++count[*dp]; dp += ELEMENT_SIZE;
  592. ++count[*dp]; dp += ELEMENT_SIZE;
  593. ++count[*dp]; dp += ELEMENT_SIZE;
  594. ++count[*dp]; dp += ELEMENT_SIZE;
  595. ++count[*dp]; dp += ELEMENT_SIZE;
  596. ++count[*dp]; dp += ELEMENT_SIZE;
  597. ++count[*dp]; dp += ELEMENT_SIZE;
  598. ++count[*dp]; dp += ELEMENT_SIZE;
  599. }
  600. for (dp = (Digit*)(begin + size_trunc_8) + num_digit, dend = (Digit*)(begin + size); dp < dend;) {
  601. ++count[*dp]; dp += ELEMENT_SIZE;
  602. }}
  603. FE max_subsize = count[0];
  604. ///*Calculating places*/ {
  605. // Element *q = begin;
  606. // FE c;
  607. // for (FE i = 0; i < MAX_DIGIT_COUNT;) {
  608. // pstarts[i] = q; c = count[i]; q += c; pends[i++] = q; max_subsize = c > max_subsize ? c : max_subsize;
  609. // pstarts[i] = q; c = count[i]; q += c; pends[i++] = q; max_subsize = c > max_subsize ? c : max_subsize;
  610. // pstarts[i] = q; c = count[i]; q += c; pends[i++] = q; max_subsize = c > max_subsize ? c : max_subsize;
  611. // pstarts[i] = q; c = count[i]; q += c; pends[i++] = q; max_subsize = c > max_subsize ? c : max_subsize;
  612. // pstarts[i] = q; c = count[i]; q += c; pends[i++] = q; max_subsize = c > max_subsize ? c : max_subsize;
  613. // pstarts[i] = q; c = count[i]; q += c; pends[i++] = q; max_subsize = c > max_subsize ? c : max_subsize;
  614. // pstarts[i] = q; c = count[i]; q += c; pends[i++] = q; max_subsize = c > max_subsize ? c : max_subsize;
  615. // pstarts[i] = q; c = count[i]; q += c; pends[i++] = q; max_subsize = c > max_subsize ? c : max_subsize;
  616. // }}
  617. UNROL_LOOP(0, MAX_DIGIT_COUNT, 0, FE, i, Element *q = begin; FE c;,
  618. pstarts[i] = q; c = count[i]; q += c; pends[i++] = q; max_subsize = c > max_subsize ? c : max_subsize;);
  619. Element *buffer = (Element *)malloc(sizeof(Element) * max_subsize);
  620. /*Sorting inplace - moving to ordered places*/ {
  621. for (FE i = 0; i < MAX_DIGIT_COUNT; ++i) {
  622. while (pstarts[i] != pends[i]) {
  623. FE value = *pstarts[i];
  624. FE y = (value >> shift) & LOWER_DIGIT_MASK;
  625. while (i != y) {
  626. FE temp = *pstarts[y];
  627. *(pstarts[y]++) = value;
  628. value = temp;
  629. y = (value >> shift) & LOWER_DIGIT_MASK;
  630. }
  631. *(pstarts[i]++) = value;
  632. }
  633. }
  634. }
  635. /*for each digit call subsort in LSB order*/ {
  636. for (FE i = 0; i < MAX_DIGIT_COUNT; ++i) {
  637. ElementCounter sub_size = count[i];
  638. if (sub_size != 0) {// skipping empty buckets
  639. Element *pend = pends[i];
  640. Element *pstart = pend - sub_size;
  641. //LSB2(pstart, sub_size, buffer + (pstart - begin));
  642. LSB2(pstart, sub_size, buffer);
  643. }
  644. }}
  645. free(buffer);
  646. }
  647.  
  648. void MSB_extra_mem_3(Element *begin, ElementCounter size, Element *buffer) {
  649. const FE DIGIT_AMOUNT = MAX_DIGIT_COUNT;
  650. // FE(&count)[DIGIT_AMOUNT] = counts[2];
  651. // std::memset(count, 0, sizeof(count));
  652. // Element*(&pends)[DIGIT_AMOUNT] = pends_arr[2];
  653. FE count[DIGIT_AMOUNT] = { 0 };
  654. Element *pends[DIGIT_AMOUNT];
  655. const FE size_trunc_8 = size & (~0 << 3);
  656. const FE shift = 16;
  657. const FE num_digit = shift >> 3;
  658. /*Counting amounts*/
  659. UNROL_LOOP(begin, size, num_digit, Digit*, dp, , ++count[*dp]; dp += ELEMENT_SIZE;);
  660. /*Calculating places*/
  661. UNROL_LOOP(0, DIGIT_AMOUNT, 0, FE, i, Element *q = buffer; , pends[i] = q; q += count[i++]);
  662. /*Sorting - moving to ordered places*/
  663. UNROL_LOOP(begin, size, 0, Element*, arr_ptr, Digit *dp = (Digit*)(begin)+num_digit;, *pends[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;);
  664. /*for each digit call subsort*/ {
  665. for (FE i = 0; i < MAX_DIGIT_COUNT; ++i) {
  666. ElementCounter sub_size = count[i];
  667. if (sub_size != 0) {// skipping empty buckets
  668. Element *pend = pends[i];
  669. Element *pstart = pend - sub_size;
  670. //LSB2(pstart, sub_size, begin + (pstart - buffer));
  671. LSB2(pstart, sub_size, begin);
  672. }
  673. }}
  674. }
  675.  
  676. void decide_steps(ElementCounter size, Element max) {
  677. const Element highest_bit = 30;//log2_32(max)+1;
  678. // divide by 2 to fit array copy
  679. const ElementCounter L1_SIZE = L1_CACHE_SIZE / sizeof(Element) / 2; // 8192 integers
  680. const ElementCounter L2_SIZE = L2_CACHE_SIZE / sizeof(Element) / 2; // 262144 integers
  681. if (size < L1_SIZE) {
  682. const FE maxshift = (highest_bit >> 3) << 2;
  683. //LSB from 0 to maxshift
  684. } else {
  685. // MSB step. After step check
  686. }
  687. /*
  688. size >
  689. */
  690. }
  691.  
  692. void MSB_extra_mem_4(Element *begin, ElementCounter size, Element *buffer) {
  693. const FE DIGIT_AMOUNT = MAX_DIGIT_COUNT >> 2;
  694. FE count[DIGIT_AMOUNT] = { 0 };
  695. Element *pends[DIGIT_AMOUNT];
  696. // FE(&count)[MAX_DIGIT_COUNT] = counts[3];
  697. // std::memset(count, 0, sizeof(count));
  698. // Element*(&pends)[MAX_DIGIT_COUNT] = pends_arr[3];
  699. //const FE size_trunc_8 = size & (~0 << 3);
  700. const FE shift = 24;
  701. const FE num_digit = shift >> 3; // 3
  702. // FE max = *begin;
  703. /*Counting amounts and getting maximum*/
  704. // UNROL_LOOP(begin, size, num_digit, Digit*, dp, Element *arr_ptr = begin;,
  705. // ++count[*dp]; dp += ELEMENT_SIZE; max = *arr_ptr > max ? *arr_ptr : max; ++arr_ptr;);
  706. UNROL_LOOP(begin, size, num_digit, Digit*, dp, Element *arr_ptr = begin;, ++count[*dp]; dp += ELEMENT_SIZE;);
  707. // /*Checking if we need to skip steps*/ {
  708. // // decide_steps(size, max);
  709. // // if ((size > 8000000) && (size < 12000000) && (max < 1 << 24))
  710. // // throw new std::exception();
  711. // // if ((size > 8000000) && (size < 12000000) && (max < 1 << 24))
  712. // // throw new std::exception();
  713. // // if ((max < 1 << 8) && (size > 1000))
  714. // // throw new std::exception();
  715. // if (max < 1 << 16) {
  716. // if ((size > 1000) && (size < 150000000))
  717. // throw new std::exception();
  718. // LSB2(begin, size, buffer);
  719. // return;
  720. // }
  721. // else if (max < 1 << 24) {
  722. // // if ((size > 50000) && (size < 5000000))
  723. // // throw new std::exception();
  724. // MSB_extra_mem_3(begin, size, buffer);
  725. // memcpy(begin, buffer, size * 4);
  726. // return;
  727. // }
  728. // }
  729. //Calculating places
  730. UNROL_LOOP(0, DIGIT_AMOUNT, 0, FE, i, Element *q = buffer; , pends[i] = q; q += count[i++]);
  731. //Sorting - moving to ordered places
  732. UNROL_LOOP(begin, size, 0, Element*, arr_ptr, Digit *dp = (Digit*)(begin)+num_digit;, *pends[*dp]++ = *arr_ptr++; dp += ELEMENT_SIZE;);
  733. /*for each digit call subsort*/ {
  734. for (FE i = 0; i < DIGIT_AMOUNT; ++i) {
  735. ElementCounter sub_size = count[i];
  736. if (sub_size != 0) {// skipping empty buckets
  737. Element *pend = pends[i];
  738. Element *pstart = pend - sub_size;
  739. MSB_extra_mem_3(pstart, sub_size, begin + (pstart - buffer));
  740. }
  741. }}
  742. }
  743.  
  744. void k9_sort(Element *begin, ElementCounter size) {
  745. //FE highest_bit = log2_32(size * 4) + 1;
  746. //if (highest_bit < 9) {
  747. // Element *buffer = (Element *)malloc(sizeof(Element) * size);
  748. // LSB1(begin, size, buffer);
  749. // memcpy(begin, buffer, size * sizeof(Element));
  750. // free(buffer);
  751. //} else if (highest_bit < 25) {
  752. // FE last_bits = (highest_bit & (~0 << 3)) - 1; // -1 just in case to not have too small buffer
  753. // FE size_decrease = 1 << last_bits;
  754. // FE small_size = size / size_decrease;
  755. // //Element *buffer = (Element *)malloc(sizeof(Element) * size);
  756. // //Element *buffer = (Element *)malloc(sizeof(Element) * small_size);
  757. // MSB_inplace_3(begin, size);
  758. // //MSB_extra_mem_3(begin, size, buffer);
  759. // //memcpy(begin, buffer, size * sizeof(Element));
  760. // //free(buffer);
  761. //} else {
  762. // Element *buffer = (Element *)malloc(sizeof(Element) * size);
  763. // // bruteforce::MSB4(begin, size, buffer);// currently da best
  764. // MSB_extra_mem_4(begin, size, buffer);// currently da best
  765. // free(buffer);
  766. //}
  767. Element *buffer = (Element *)malloc(sizeof(Element) * size);
  768. if (size < 1000) {
  769. LSB1(begin, size, buffer);
  770. memcpy(begin, buffer, size * sizeof(Element));
  771. //} else if (highest_bit < 25) { MSB_inplace_3_extra(begin, size);
  772. } else if (size < 12000000) {
  773. MSB_extra_mem_3(begin, size, buffer);
  774. memcpy(begin, buffer, size * sizeof(Element));
  775. } else {
  776. // bruteforce::MSB4(begin, size, buffer);// currently da best
  777. MSB_extra_mem_4(begin, size, buffer);// currently da best
  778. }
  779. free(buffer);
  780. //if (size < 1000) {
  781. // LSB_1(begin, size, buffer);
  782. // memcpy(begin, buffer, size * sizeof(Element));
  783. //} else if (size < 12000000) {
  784. // MSB_extra_mem_3(begin, size, buffer);
  785. // memcpy(begin, buffer, size * sizeof(Element));
  786. //} else {
  787. // // bruteforce::MSB4(begin, size, buffer);// currently da best
  788. // MSB_extra_mem_4(begin, size, buffer);// currently da best
  789. //}
  790. //memcpy(buffer, begin, size * sizeof(Element));
  791. }
  792. }
  793.  
  794. }
  795.  
  796. #ifndef MADE_CUSTOM_SORT_H_
  797. #define MADE_CUSTOM_SORT_H_
  798. void Sort(made::Element *arr, made::ElementCounter size) {
  799. made::avx::k9_sort(arr, size);
  800. }
  801. void Sort(int *arr, int size) {
  802. Sort((made::Element*)arr, (made::ElementCounter)size);
  803. }
  804. #endif // !MADE_CUSTOM_SORT_H_
  805.  
  806. #endif // !K9_AVX_H_
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement