Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /**
- * blogbitdetect.cpp
- * Daniel Lemire, April 2012
- * http://lemire.me/en/
- * This software is in the public domain.
- $ g++-4 --version
- g++-4 (GCC) 4.6.2
- $ g++-4 -Ofast -o blogbitdetect blogbitdetect.cpp
- $ ./blogbitdetect
- 1 189.6 1 325.8 1 754 1 197.4 1 105.2 1 1049 1 1917
- 2 174.3 2 318.1 2 754 2 296.9 2 105.2 2 1032 2 1864
- 3 161.7 3 324.2 3 754 3 397.1 3 106.5 3 1032 3 1864
- 4 151.1 4 318.1 4 762.6 4 482.8 4 105.2 4 1032 4 2097
- 5 137 5 316.6 5 754 5 536.9 5 107 5 1049 5 1864
- 6 127.3 6 318.1 6 762.6 6 578.5 6 104.2 6 1017 6 1721
- 7 117.3 7 319.6 7 745.7 7 588.7 7 104.7 7 1049 7 1561
- 8 106.7 8 327.4 8 762.6 8 599.2 8 105.2 8 1049 8 2165
- 9 97.83 9 325.8 9 762.6 9 615.7 9 104.9 9 1082 9 2165
- 10 94.79 10 324.2 10 737.5 10 604.6 10 105.5 10 1065 10 1491
- 11 86.37 11 318.1 11 762.6 11 615.7 11 106.2 11 1049 11 1459
- 12 81.54 12 324.2 12 745.7 12 615.7 12 106 12 1065 12 1428
- 13 77.58 13 322.6 13 754 13 621.4 13 104.5 13 1032 13 1291
- 14 72.01 14 324.2 14 745.7 14 621.4 14 102.6 14 1032 14 1316
- 15 67.31 15 321.1 15 745.7 15 615.7 15 103.9 15 1049 15 1428
- 16 65.41 16 324.2 16 745.7 16 615.7 16 106.7 16 1065 16 1917
- 17 63.61 17 322.6 17 745.7 17 610.1 17 104.4 17 1032 17 1220
- 18 60.51 18 325.8 18 771.4 18 610.1 18 104.5 18 1032 18 1157
- 19 57.41 19 324.2 19 745.7 19 610.1 19 105.2 19 1032 19 1137
- 20 54.65 20 319.6 20 762.6 20 604.6 20 105.5 20 1017 20 1137
- 21 52.68 21 316.6 21 762.6 21 615.7 21 104.5 21 1049 21 1100
- 22 50.12 22 321.1 22 745.7 22 615.7 22 105 22 1049 22 1177
- 23 49.56 23 324.2 23 745.7 23 610.1 23 102.9 23 1032 23 1049
- 24 47.83 24 325.8 24 754 24 615.7 24 106.7 24 1032 24 1157
- 25 46.38 25 324.2 25 754 25 627.2 25 103.9 25 1049 25 1049
- 26 44.8 26 322.6 26 754 26 615.7 26 105.5 26 1032 26 1002
- 27 43.07 27 313.6 27 633.1 27 532.6 27 105.2 27 958.7 27 1002
- 28 41.27 28 318.1 28 737.5 28 610.1 28 103.7 28 1049 28 1002
- 29 39.48 29 315.1 29 780.3 29 621.4 29 105.4 29 1032 29 945.2
- 30 38.37 30 318.1 30 771.4 30 615.7 30 104.9 30 1032 30 945.2
- 31 38.22 31 315.1 31 745.7 31 604.6 31 103.2 31 1032 31 932.1
- 32 849.5 0 318.1 0 745.7 1 1157 0 104 0 1032 0 8389
- */
- #include <iostream>
- #include <iomanip>
- #include <sys/stat.h>
- #include <sys/time.h>
- #include <sys/types.h>
- #include <vector>
- #include <cstdlib>
- typedef unsigned int uint32;
- using namespace std;
- vector<uint32> generateArray32(uint N, const uint mask = 0xFFFFFFFF) {
- vector<uint32> ans(N);
- for (size_t k = 0; k < N; ++k)
- ans[k] = rand() & mask;
- return ans;
- }
- class ZTimer {
- public:
- struct timeval t1, t2;
- public:
- ZTimer() :
- t1(), t2() {
- gettimeofday(&t1, 0);
- t2 = t1;
- }
- void reset() {
- gettimeofday(&t1, 0);
- t2 = t1;
- }
- int elapsed() {
- return ((t2.tv_sec - t1.tv_sec) * 1000) + ((t2.tv_usec - t1. tv_usec)
- / 1000);
- }
- int split() {
- gettimeofday(&t2, 0);
- return elapsed();
- }
- };
- /*
- uint asmbits(const uint v) {
- if(v==0) return 0;
- uint32 answer;
- __asm__("bsr %1, %0;" :"=r"(answer) :"r"(v));
- return answer+1;
- }*/
- uint gccbits(const uint v) {
- if (v == 0)
- return 0;
- return 32 - __builtin_clz(v);
- }
- uint slowbits(uint v) {
- uint r = 0;
- while (v) {
- r++;
- v = v >> 1;
- }
- return r;
- }
- uint bits(uint v) {
- uint r(0);
- if (v >= (1U << 15)) {
- v >>= 16;
- r += 16;
- }
- if (v >= (1U << 7)) {
- v >>= 8;
- r += 8;
- }
- if (v >= (1U << 3)) {
- v >>= 4;
- r += 4;
- }
- if (v >= (1U << 1)) {
- v >>= 2;
- r += 2;
- }
- if (v >= (1U << 0)) {
- v >>= 1;
- r += 1;
- }
- return r;
- }
- /*assumes that integers fit in the prescribed number of bits */
- void __fastpackwithoutmask1(const uint32 * __restrict__ in,
- uint32 * __restrict__ out) {
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 1;
- ++in;
- *out |= ((*in)) << 2;
- ++in;
- *out |= ((*in)) << 3;
- ++in;
- *out |= ((*in)) << 4;
- ++in;
- *out |= ((*in)) << 5;
- ++in;
- *out |= ((*in)) << 6;
- ++in;
- *out |= ((*in)) << 7;
- ++in;
- *out |= ((*in)) << 8;
- ++in;
- *out |= ((*in)) << 9;
- ++in;
- *out |= ((*in)) << 10;
- ++in;
- *out |= ((*in)) << 11;
- ++in;
- *out |= ((*in)) << 12;
- ++in;
- *out |= ((*in)) << 13;
- ++in;
- *out |= ((*in)) << 14;
- ++in;
- *out |= ((*in)) << 15;
- ++in;
- *out |= ((*in)) << 16;
- ++in;
- *out |= ((*in)) << 17;
- ++in;
- *out |= ((*in)) << 18;
- ++in;
- *out |= ((*in)) << 19;
- ++in;
- *out |= ((*in)) << 20;
- ++in;
- *out |= ((*in)) << 21;
- ++in;
- *out |= ((*in)) << 22;
- ++in;
- *out |= ((*in)) << 23;
- ++in;
- *out |= ((*in)) << 24;
- ++in;
- *out |= ((*in)) << 25;
- ++in;
- *out |= ((*in)) << 26;
- ++in;
- *out |= ((*in)) << 27;
- ++in;
- *out |= ((*in)) << 28;
- ++in;
- *out |= ((*in)) << 29;
- ++in;
- *out |= ((*in)) << 30;
- ++in;
- *out |= ((*in)) << 31;
- }
- /*assumes that integers fit in the prescribed number of bits */
- void __fastpackwithoutmask2(const uint32 * __restrict__ in,
- uint32 * __restrict__ out) {
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 2;
- ++in;
- *out |= ((*in)) << 4;
- ++in;
- *out |= ((*in)) << 6;
- ++in;
- *out |= ((*in)) << 8;
- ++in;
- *out |= ((*in)) << 10;
- ++in;
- *out |= ((*in)) << 12;
- ++in;
- *out |= ((*in)) << 14;
- ++in;
- *out |= ((*in)) << 16;
- ++in;
- *out |= ((*in)) << 18;
- ++in;
- *out |= ((*in)) << 20;
- ++in;
- *out |= ((*in)) << 22;
- ++in;
- *out |= ((*in)) << 24;
- ++in;
- *out |= ((*in)) << 26;
- ++in;
- *out |= ((*in)) << 28;
- ++in;
- *out |= ((*in)) << 30;
- ++out;
- ++in;
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 2;
- ++in;
- *out |= ((*in)) << 4;
- ++in;
- *out |= ((*in)) << 6;
- ++in;
- *out |= ((*in)) << 8;
- ++in;
- *out |= ((*in)) << 10;
- ++in;
- *out |= ((*in)) << 12;
- ++in;
- *out |= ((*in)) << 14;
- ++in;
- *out |= ((*in)) << 16;
- ++in;
- *out |= ((*in)) << 18;
- ++in;
- *out |= ((*in)) << 20;
- ++in;
- *out |= ((*in)) << 22;
- ++in;
- *out |= ((*in)) << 24;
- ++in;
- *out |= ((*in)) << 26;
- ++in;
- *out |= ((*in)) << 28;
- ++in;
- *out |= ((*in)) << 30;
- }
- /*assumes that integers fit in the prescribed number of bits */
- void __fastpackwithoutmask3(const uint32 * __restrict__ in,
- uint32 * __restrict__ out) {
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 3;
- ++in;
- *out |= ((*in)) << 6;
- ++in;
- *out |= ((*in)) << 9;
- ++in;
- *out |= ((*in)) << 12;
- ++in;
- *out |= ((*in)) << 15;
- ++in;
- *out |= ((*in)) << 18;
- ++in;
- *out |= ((*in)) << 21;
- ++in;
- *out |= ((*in)) << 24;
- ++in;
- *out |= ((*in)) << 27;
- ++in;
- *out |= ((*in)) << 30;
- ++out;
- *out |= ((*in)) >> (3 - 1);
- ++in;
- *out |= ((*in)) << 1;
- ++in;
- *out |= ((*in)) << 4;
- ++in;
- *out |= ((*in)) << 7;
- ++in;
- *out |= ((*in)) << 10;
- ++in;
- *out |= ((*in)) << 13;
- ++in;
- *out |= ((*in)) << 16;
- ++in;
- *out |= ((*in)) << 19;
- ++in;
- *out |= ((*in)) << 22;
- ++in;
- *out |= ((*in)) << 25;
- ++in;
- *out |= ((*in)) << 28;
- ++in;
- *out |= ((*in)) << 31;
- ++out;
- *out |= ((*in)) >> (3 - 2);
- ++in;
- *out |= ((*in)) << 2;
- ++in;
- *out |= ((*in)) << 5;
- ++in;
- *out |= ((*in)) << 8;
- ++in;
- *out |= ((*in)) << 11;
- ++in;
- *out |= ((*in)) << 14;
- ++in;
- *out |= ((*in)) << 17;
- ++in;
- *out |= ((*in)) << 20;
- ++in;
- *out |= ((*in)) << 23;
- ++in;
- *out |= ((*in)) << 26;
- ++in;
- *out |= ((*in)) << 29;
- }
- /*assumes that integers fit in the prescribed number of bits */
- void __fastpackwithoutmask5(const uint32 * __restrict__ in,
- uint32 * __restrict__ out) {
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 5;
- ++in;
- *out |= ((*in)) << 10;
- ++in;
- *out |= ((*in)) << 15;
- ++in;
- *out |= ((*in)) << 20;
- ++in;
- *out |= ((*in)) << 25;
- ++in;
- *out |= ((*in)) << 30;
- ++out;
- *out |= ((*in)) >> (5 - 3);
- ++in;
- *out |= ((*in)) << 3;
- ++in;
- *out |= ((*in)) << 8;
- ++in;
- *out |= ((*in)) << 13;
- ++in;
- *out |= ((*in)) << 18;
- ++in;
- *out |= ((*in)) << 23;
- ++in;
- *out |= ((*in)) << 28;
- ++out;
- *out |= ((*in)) >> (5 - 1);
- ++in;
- *out |= ((*in)) << 1;
- ++in;
- *out |= ((*in)) << 6;
- ++in;
- *out |= ((*in)) << 11;
- ++in;
- *out |= ((*in)) << 16;
- ++in;
- *out |= ((*in)) << 21;
- ++in;
- *out |= ((*in)) << 26;
- ++in;
- *out |= ((*in)) << 31;
- ++out;
- *out |= ((*in)) >> (5 - 4);
- ++in;
- *out |= ((*in)) << 4;
- ++in;
- *out |= ((*in)) << 9;
- ++in;
- *out |= ((*in)) << 14;
- ++in;
- *out |= ((*in)) << 19;
- ++in;
- *out |= ((*in)) << 24;
- ++in;
- *out |= ((*in)) << 29;
- ++out;
- *out |= ((*in)) >> (5 - 2);
- ++in;
- *out |= ((*in)) << 2;
- ++in;
- *out |= ((*in)) << 7;
- ++in;
- *out |= ((*in)) << 12;
- ++in;
- *out |= ((*in)) << 17;
- ++in;
- *out |= ((*in)) << 22;
- ++in;
- *out |= ((*in)) << 27;
- }
- /*assumes that integers fit in the prescribed number of bits */
- void __fastpackwithoutmask6(const uint32 * __restrict__ in,
- uint32 * __restrict__ out) {
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 6;
- ++in;
- *out |= ((*in)) << 12;
- ++in;
- *out |= ((*in)) << 18;
- ++in;
- *out |= ((*in)) << 24;
- ++in;
- *out |= ((*in)) << 30;
- ++out;
- *out |= ((*in)) >> (6 - 4);
- ++in;
- *out |= ((*in)) << 4;
- ++in;
- *out |= ((*in)) << 10;
- ++in;
- *out |= ((*in)) << 16;
- ++in;
- *out |= ((*in)) << 22;
- ++in;
- *out |= ((*in)) << 28;
- ++out;
- *out |= ((*in)) >> (6 - 2);
- ++in;
- *out |= ((*in)) << 2;
- ++in;
- *out |= ((*in)) << 8;
- ++in;
- *out |= ((*in)) << 14;
- ++in;
- *out |= ((*in)) << 20;
- ++in;
- *out |= ((*in)) << 26;
- ++out;
- ++in;
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 6;
- ++in;
- *out |= ((*in)) << 12;
- ++in;
- *out |= ((*in)) << 18;
- ++in;
- *out |= ((*in)) << 24;
- ++in;
- *out |= ((*in)) << 30;
- ++out;
- *out |= ((*in)) >> (6 - 4);
- ++in;
- *out |= ((*in)) << 4;
- ++in;
- *out |= ((*in)) << 10;
- ++in;
- *out |= ((*in)) << 16;
- ++in;
- *out |= ((*in)) << 22;
- ++in;
- *out |= ((*in)) << 28;
- ++out;
- *out |= ((*in)) >> (6 - 2);
- ++in;
- *out |= ((*in)) << 2;
- ++in;
- *out |= ((*in)) << 8;
- ++in;
- *out |= ((*in)) << 14;
- ++in;
- *out |= ((*in)) << 20;
- ++in;
- *out |= ((*in)) << 26;
- }
- /*assumes that integers fit in the prescribed number of bits */
- void __fastpackwithoutmask7(const uint32 * __restrict__ in,
- uint32 * __restrict__ out) {
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 7;
- ++in;
- *out |= ((*in)) << 14;
- ++in;
- *out |= ((*in)) << 21;
- ++in;
- *out |= ((*in)) << 28;
- ++out;
- *out |= ((*in)) >> (7 - 3);
- ++in;
- *out |= ((*in)) << 3;
- ++in;
- *out |= ((*in)) << 10;
- ++in;
- *out |= ((*in)) << 17;
- ++in;
- *out |= ((*in)) << 24;
- ++in;
- *out |= ((*in)) << 31;
- ++out;
- *out |= ((*in)) >> (7 - 6);
- ++in;
- *out |= ((*in)) << 6;
- ++in;
- *out |= ((*in)) << 13;
- ++in;
- *out |= ((*in)) << 20;
- ++in;
- *out |= ((*in)) << 27;
- ++out;
- *out |= ((*in)) >> (7 - 2);
- ++in;
- *out |= ((*in)) << 2;
- ++in;
- *out |= ((*in)) << 9;
- ++in;
- *out |= ((*in)) << 16;
- ++in;
- *out |= ((*in)) << 23;
- ++in;
- *out |= ((*in)) << 30;
- ++out;
- *out |= ((*in)) >> (7 - 5);
- ++in;
- *out |= ((*in)) << 5;
- ++in;
- *out |= ((*in)) << 12;
- ++in;
- *out |= ((*in)) << 19;
- ++in;
- *out |= ((*in)) << 26;
- ++out;
- *out |= ((*in)) >> (7 - 1);
- ++in;
- *out |= ((*in)) << 1;
- ++in;
- *out |= ((*in)) << 8;
- ++in;
- *out |= ((*in)) << 15;
- ++in;
- *out |= ((*in)) << 22;
- ++in;
- *out |= ((*in)) << 29;
- ++out;
- *out |= ((*in)) >> (7 - 4);
- ++in;
- *out |= ((*in)) << 4;
- ++in;
- *out |= ((*in)) << 11;
- ++in;
- *out |= ((*in)) << 18;
- ++in;
- *out |= ((*in)) << 25;
- }
- /*assumes that integers fit in the prescribed number of bits */
- void __fastpackwithoutmask9(const uint32 * __restrict__ in,
- uint32 * __restrict__ out) {
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 9;
- ++in;
- *out |= ((*in)) << 18;
- ++in;
- *out |= ((*in)) << 27;
- ++out;
- *out |= ((*in)) >> (9 - 4);
- ++in;
- *out |= ((*in)) << 4;
- ++in;
- *out |= ((*in)) << 13;
- ++in;
- *out |= ((*in)) << 22;
- ++in;
- *out |= ((*in)) << 31;
- ++out;
- *out |= ((*in)) >> (9 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++in;
- *out |= ((*in)) << 17;
- ++in;
- *out |= ((*in)) << 26;
- ++out;
- *out |= ((*in)) >> (9 - 3);
- ++in;
- *out |= ((*in)) << 3;
- ++in;
- *out |= ((*in)) << 12;
- ++in;
- *out |= ((*in)) << 21;
- ++in;
- *out |= ((*in)) << 30;
- ++out;
- *out |= ((*in)) >> (9 - 7);
- ++in;
- *out |= ((*in)) << 7;
- ++in;
- *out |= ((*in)) << 16;
- ++in;
- *out |= ((*in)) << 25;
- ++out;
- *out |= ((*in)) >> (9 - 2);
- ++in;
- *out |= ((*in)) << 2;
- ++in;
- *out |= ((*in)) << 11;
- ++in;
- *out |= ((*in)) << 20;
- ++in;
- *out |= ((*in)) << 29;
- ++out;
- *out |= ((*in)) >> (9 - 6);
- ++in;
- *out |= ((*in)) << 6;
- ++in;
- *out |= ((*in)) << 15;
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (9 - 1);
- ++in;
- *out |= ((*in)) << 1;
- ++in;
- *out |= ((*in)) << 10;
- ++in;
- *out |= ((*in)) << 19;
- ++in;
- *out |= ((*in)) << 28;
- ++out;
- *out |= ((*in)) >> (9 - 5);
- ++in;
- *out |= ((*in)) << 5;
- ++in;
- *out |= ((*in)) << 14;
- ++in;
- *out |= ((*in)) << 23;
- }
- /*assumes that integers fit in the prescribed number of bits */
- void __fastpackwithoutmask10(const uint32 * __restrict__ in,
- uint32 * __restrict__ out) {
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 10;
- ++in;
- *out |= ((*in)) << 20;
- ++in;
- *out |= ((*in)) << 30;
- ++out;
- *out |= ((*in)) >> (10 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++in;
- *out |= ((*in)) << 18;
- ++in;
- *out |= ((*in)) << 28;
- ++out;
- *out |= ((*in)) >> (10 - 6);
- ++in;
- *out |= ((*in)) << 6;
- ++in;
- *out |= ((*in)) << 16;
- ++in;
- *out |= ((*in)) << 26;
- ++out;
- *out |= ((*in)) >> (10 - 4);
- ++in;
- *out |= ((*in)) << 4;
- ++in;
- *out |= ((*in)) << 14;
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (10 - 2);
- ++in;
- *out |= ((*in)) << 2;
- ++in;
- *out |= ((*in)) << 12;
- ++in;
- *out |= ((*in)) << 22;
- ++out;
- ++in;
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 10;
- ++in;
- *out |= ((*in)) << 20;
- ++in;
- *out |= ((*in)) << 30;
- ++out;
- *out |= ((*in)) >> (10 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++in;
- *out |= ((*in)) << 18;
- ++in;
- *out |= ((*in)) << 28;
- ++out;
- *out |= ((*in)) >> (10 - 6);
- ++in;
- *out |= ((*in)) << 6;
- ++in;
- *out |= ((*in)) << 16;
- ++in;
- *out |= ((*in)) << 26;
- ++out;
- *out |= ((*in)) >> (10 - 4);
- ++in;
- *out |= ((*in)) << 4;
- ++in;
- *out |= ((*in)) << 14;
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (10 - 2);
- ++in;
- *out |= ((*in)) << 2;
- ++in;
- *out |= ((*in)) << 12;
- ++in;
- *out |= ((*in)) << 22;
- }
- /*assumes that integers fit in the prescribed number of bits */
- void __fastpackwithoutmask11(const uint32 * __restrict__ in,
- uint32 * __restrict__ out) {
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 11;
- ++in;
- *out |= ((*in)) << 22;
- ++out;
- *out |= ((*in)) >> (11 - 1);
- ++in;
- *out |= ((*in)) << 1;
- ++in;
- *out |= ((*in)) << 12;
- ++in;
- *out |= ((*in)) << 23;
- ++out;
- *out |= ((*in)) >> (11 - 2);
- ++in;
- *out |= ((*in)) << 2;
- ++in;
- *out |= ((*in)) << 13;
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (11 - 3);
- ++in;
- *out |= ((*in)) << 3;
- ++in;
- *out |= ((*in)) << 14;
- ++in;
- *out |= ((*in)) << 25;
- ++out;
- *out |= ((*in)) >> (11 - 4);
- ++in;
- *out |= ((*in)) << 4;
- ++in;
- *out |= ((*in)) << 15;
- ++in;
- *out |= ((*in)) << 26;
- ++out;
- *out |= ((*in)) >> (11 - 5);
- ++in;
- *out |= ((*in)) << 5;
- ++in;
- *out |= ((*in)) << 16;
- ++in;
- *out |= ((*in)) << 27;
- ++out;
- *out |= ((*in)) >> (11 - 6);
- ++in;
- *out |= ((*in)) << 6;
- ++in;
- *out |= ((*in)) << 17;
- ++in;
- *out |= ((*in)) << 28;
- ++out;
- *out |= ((*in)) >> (11 - 7);
- ++in;
- *out |= ((*in)) << 7;
- ++in;
- *out |= ((*in)) << 18;
- ++in;
- *out |= ((*in)) << 29;
- ++out;
- *out |= ((*in)) >> (11 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++in;
- *out |= ((*in)) << 19;
- ++in;
- *out |= ((*in)) << 30;
- ++out;
- *out |= ((*in)) >> (11 - 9);
- ++in;
- *out |= ((*in)) << 9;
- ++in;
- *out |= ((*in)) << 20;
- ++in;
- *out |= ((*in)) << 31;
- ++out;
- *out |= ((*in)) >> (11 - 10);
- ++in;
- *out |= ((*in)) << 10;
- ++in;
- *out |= ((*in)) << 21;
- }
- /*assumes that integers fit in the prescribed number of bits */
- void __fastpackwithoutmask12(const uint32 * __restrict__ in,
- uint32 * __restrict__ out) {
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 12;
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (12 - 4);
- ++in;
- *out |= ((*in)) << 4;
- ++in;
- *out |= ((*in)) << 16;
- ++in;
- *out |= ((*in)) << 28;
- ++out;
- *out |= ((*in)) >> (12 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++in;
- *out |= ((*in)) << 20;
- ++out;
- ++in;
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 12;
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (12 - 4);
- ++in;
- *out |= ((*in)) << 4;
- ++in;
- *out |= ((*in)) << 16;
- ++in;
- *out |= ((*in)) << 28;
- ++out;
- *out |= ((*in)) >> (12 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++in;
- *out |= ((*in)) << 20;
- ++out;
- ++in;
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 12;
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (12 - 4);
- ++in;
- *out |= ((*in)) << 4;
- ++in;
- *out |= ((*in)) << 16;
- ++in;
- *out |= ((*in)) << 28;
- ++out;
- *out |= ((*in)) >> (12 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++in;
- *out |= ((*in)) << 20;
- ++out;
- ++in;
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 12;
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (12 - 4);
- ++in;
- *out |= ((*in)) << 4;
- ++in;
- *out |= ((*in)) << 16;
- ++in;
- *out |= ((*in)) << 28;
- ++out;
- *out |= ((*in)) >> (12 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++in;
- *out |= ((*in)) << 20;
- }
- /*assumes that integers fit in the prescribed number of bits */
- void __fastpackwithoutmask13(const uint32 * __restrict__ in,
- uint32 * __restrict__ out) {
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 13;
- ++in;
- *out |= ((*in)) << 26;
- ++out;
- *out |= ((*in)) >> (13 - 7);
- ++in;
- *out |= ((*in)) << 7;
- ++in;
- *out |= ((*in)) << 20;
- ++out;
- *out |= ((*in)) >> (13 - 1);
- ++in;
- *out |= ((*in)) << 1;
- ++in;
- *out |= ((*in)) << 14;
- ++in;
- *out |= ((*in)) << 27;
- ++out;
- *out |= ((*in)) >> (13 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++in;
- *out |= ((*in)) << 21;
- ++out;
- *out |= ((*in)) >> (13 - 2);
- ++in;
- *out |= ((*in)) << 2;
- ++in;
- *out |= ((*in)) << 15;
- ++in;
- *out |= ((*in)) << 28;
- ++out;
- *out |= ((*in)) >> (13 - 9);
- ++in;
- *out |= ((*in)) << 9;
- ++in;
- *out |= ((*in)) << 22;
- ++out;
- *out |= ((*in)) >> (13 - 3);
- ++in;
- *out |= ((*in)) << 3;
- ++in;
- *out |= ((*in)) << 16;
- ++in;
- *out |= ((*in)) << 29;
- ++out;
- *out |= ((*in)) >> (13 - 10);
- ++in;
- *out |= ((*in)) << 10;
- ++in;
- *out |= ((*in)) << 23;
- ++out;
- *out |= ((*in)) >> (13 - 4);
- ++in;
- *out |= ((*in)) << 4;
- ++in;
- *out |= ((*in)) << 17;
- ++in;
- *out |= ((*in)) << 30;
- ++out;
- *out |= ((*in)) >> (13 - 11);
- ++in;
- *out |= ((*in)) << 11;
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (13 - 5);
- ++in;
- *out |= ((*in)) << 5;
- ++in;
- *out |= ((*in)) << 18;
- ++in;
- *out |= ((*in)) << 31;
- ++out;
- *out |= ((*in)) >> (13 - 12);
- ++in;
- *out |= ((*in)) << 12;
- ++in;
- *out |= ((*in)) << 25;
- ++out;
- *out |= ((*in)) >> (13 - 6);
- ++in;
- *out |= ((*in)) << 6;
- ++in;
- *out |= ((*in)) << 19;
- }
- /*assumes that integers fit in the prescribed number of bits */
- void __fastpackwithoutmask14(const uint32 * __restrict__ in,
- uint32 * __restrict__ out) {
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 14;
- ++in;
- *out |= ((*in)) << 28;
- ++out;
- *out |= ((*in)) >> (14 - 10);
- ++in;
- *out |= ((*in)) << 10;
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (14 - 6);
- ++in;
- *out |= ((*in)) << 6;
- ++in;
- *out |= ((*in)) << 20;
- ++out;
- *out |= ((*in)) >> (14 - 2);
- ++in;
- *out |= ((*in)) << 2;
- ++in;
- *out |= ((*in)) << 16;
- ++in;
- *out |= ((*in)) << 30;
- ++out;
- *out |= ((*in)) >> (14 - 12);
- ++in;
- *out |= ((*in)) << 12;
- ++in;
- *out |= ((*in)) << 26;
- ++out;
- *out |= ((*in)) >> (14 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++in;
- *out |= ((*in)) << 22;
- ++out;
- *out |= ((*in)) >> (14 - 4);
- ++in;
- *out |= ((*in)) << 4;
- ++in;
- *out |= ((*in)) << 18;
- ++out;
- ++in;
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 14;
- ++in;
- *out |= ((*in)) << 28;
- ++out;
- *out |= ((*in)) >> (14 - 10);
- ++in;
- *out |= ((*in)) << 10;
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (14 - 6);
- ++in;
- *out |= ((*in)) << 6;
- ++in;
- *out |= ((*in)) << 20;
- ++out;
- *out |= ((*in)) >> (14 - 2);
- ++in;
- *out |= ((*in)) << 2;
- ++in;
- *out |= ((*in)) << 16;
- ++in;
- *out |= ((*in)) << 30;
- ++out;
- *out |= ((*in)) >> (14 - 12);
- ++in;
- *out |= ((*in)) << 12;
- ++in;
- *out |= ((*in)) << 26;
- ++out;
- *out |= ((*in)) >> (14 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++in;
- *out |= ((*in)) << 22;
- ++out;
- *out |= ((*in)) >> (14 - 4);
- ++in;
- *out |= ((*in)) << 4;
- ++in;
- *out |= ((*in)) << 18;
- }
- /*assumes that integers fit in the prescribed number of bits */
- void __fastpackwithoutmask15(const uint32 * __restrict__ in,
- uint32 * __restrict__ out) {
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 15;
- ++in;
- *out |= ((*in)) << 30;
- ++out;
- *out |= ((*in)) >> (15 - 13);
- ++in;
- *out |= ((*in)) << 13;
- ++in;
- *out |= ((*in)) << 28;
- ++out;
- *out |= ((*in)) >> (15 - 11);
- ++in;
- *out |= ((*in)) << 11;
- ++in;
- *out |= ((*in)) << 26;
- ++out;
- *out |= ((*in)) >> (15 - 9);
- ++in;
- *out |= ((*in)) << 9;
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (15 - 7);
- ++in;
- *out |= ((*in)) << 7;
- ++in;
- *out |= ((*in)) << 22;
- ++out;
- *out |= ((*in)) >> (15 - 5);
- ++in;
- *out |= ((*in)) << 5;
- ++in;
- *out |= ((*in)) << 20;
- ++out;
- *out |= ((*in)) >> (15 - 3);
- ++in;
- *out |= ((*in)) << 3;
- ++in;
- *out |= ((*in)) << 18;
- ++out;
- *out |= ((*in)) >> (15 - 1);
- ++in;
- *out |= ((*in)) << 1;
- ++in;
- *out |= ((*in)) << 16;
- ++in;
- *out |= ((*in)) << 31;
- ++out;
- *out |= ((*in)) >> (15 - 14);
- ++in;
- *out |= ((*in)) << 14;
- ++in;
- *out |= ((*in)) << 29;
- ++out;
- *out |= ((*in)) >> (15 - 12);
- ++in;
- *out |= ((*in)) << 12;
- ++in;
- *out |= ((*in)) << 27;
- ++out;
- *out |= ((*in)) >> (15 - 10);
- ++in;
- *out |= ((*in)) << 10;
- ++in;
- *out |= ((*in)) << 25;
- ++out;
- *out |= ((*in)) >> (15 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++in;
- *out |= ((*in)) << 23;
- ++out;
- *out |= ((*in)) >> (15 - 6);
- ++in;
- *out |= ((*in)) << 6;
- ++in;
- *out |= ((*in)) << 21;
- ++out;
- *out |= ((*in)) >> (15 - 4);
- ++in;
- *out |= ((*in)) << 4;
- ++in;
- *out |= ((*in)) << 19;
- ++out;
- *out |= ((*in)) >> (15 - 2);
- ++in;
- *out |= ((*in)) << 2;
- ++in;
- *out |= ((*in)) << 17;
- }
- /*assumes that integers fit in the prescribed number of bits */
- void __fastpackwithoutmask17(const uint32 * __restrict__ in,
- uint32 * __restrict__ out) {
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 17;
- ++out;
- *out |= ((*in)) >> (17 - 2);
- ++in;
- *out |= ((*in)) << 2;
- ++in;
- *out |= ((*in)) << 19;
- ++out;
- *out |= ((*in)) >> (17 - 4);
- ++in;
- *out |= ((*in)) << 4;
- ++in;
- *out |= ((*in)) << 21;
- ++out;
- *out |= ((*in)) >> (17 - 6);
- ++in;
- *out |= ((*in)) << 6;
- ++in;
- *out |= ((*in)) << 23;
- ++out;
- *out |= ((*in)) >> (17 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++in;
- *out |= ((*in)) << 25;
- ++out;
- *out |= ((*in)) >> (17 - 10);
- ++in;
- *out |= ((*in)) << 10;
- ++in;
- *out |= ((*in)) << 27;
- ++out;
- *out |= ((*in)) >> (17 - 12);
- ++in;
- *out |= ((*in)) << 12;
- ++in;
- *out |= ((*in)) << 29;
- ++out;
- *out |= ((*in)) >> (17 - 14);
- ++in;
- *out |= ((*in)) << 14;
- ++in;
- *out |= ((*in)) << 31;
- ++out;
- *out |= ((*in)) >> (17 - 16);
- ++in;
- *out |= ((*in)) << 16;
- ++out;
- *out |= ((*in)) >> (17 - 1);
- ++in;
- *out |= ((*in)) << 1;
- ++in;
- *out |= ((*in)) << 18;
- ++out;
- *out |= ((*in)) >> (17 - 3);
- ++in;
- *out |= ((*in)) << 3;
- ++in;
- *out |= ((*in)) << 20;
- ++out;
- *out |= ((*in)) >> (17 - 5);
- ++in;
- *out |= ((*in)) << 5;
- ++in;
- *out |= ((*in)) << 22;
- ++out;
- *out |= ((*in)) >> (17 - 7);
- ++in;
- *out |= ((*in)) << 7;
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (17 - 9);
- ++in;
- *out |= ((*in)) << 9;
- ++in;
- *out |= ((*in)) << 26;
- ++out;
- *out |= ((*in)) >> (17 - 11);
- ++in;
- *out |= ((*in)) << 11;
- ++in;
- *out |= ((*in)) << 28;
- ++out;
- *out |= ((*in)) >> (17 - 13);
- ++in;
- *out |= ((*in)) << 13;
- ++in;
- *out |= ((*in)) << 30;
- ++out;
- *out |= ((*in)) >> (17 - 15);
- ++in;
- *out |= ((*in)) << 15;
- }
- /*assumes that integers fit in the prescribed number of bits */
- void __fastpackwithoutmask18(const uint32 * __restrict__ in,
- uint32 * __restrict__ out) {
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 18;
- ++out;
- *out |= ((*in)) >> (18 - 4);
- ++in;
- *out |= ((*in)) << 4;
- ++in;
- *out |= ((*in)) << 22;
- ++out;
- *out |= ((*in)) >> (18 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++in;
- *out |= ((*in)) << 26;
- ++out;
- *out |= ((*in)) >> (18 - 12);
- ++in;
- *out |= ((*in)) << 12;
- ++in;
- *out |= ((*in)) << 30;
- ++out;
- *out |= ((*in)) >> (18 - 16);
- ++in;
- *out |= ((*in)) << 16;
- ++out;
- *out |= ((*in)) >> (18 - 2);
- ++in;
- *out |= ((*in)) << 2;
- ++in;
- *out |= ((*in)) << 20;
- ++out;
- *out |= ((*in)) >> (18 - 6);
- ++in;
- *out |= ((*in)) << 6;
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (18 - 10);
- ++in;
- *out |= ((*in)) << 10;
- ++in;
- *out |= ((*in)) << 28;
- ++out;
- *out |= ((*in)) >> (18 - 14);
- ++in;
- *out |= ((*in)) << 14;
- ++out;
- ++in;
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 18;
- ++out;
- *out |= ((*in)) >> (18 - 4);
- ++in;
- *out |= ((*in)) << 4;
- ++in;
- *out |= ((*in)) << 22;
- ++out;
- *out |= ((*in)) >> (18 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++in;
- *out |= ((*in)) << 26;
- ++out;
- *out |= ((*in)) >> (18 - 12);
- ++in;
- *out |= ((*in)) << 12;
- ++in;
- *out |= ((*in)) << 30;
- ++out;
- *out |= ((*in)) >> (18 - 16);
- ++in;
- *out |= ((*in)) << 16;
- ++out;
- *out |= ((*in)) >> (18 - 2);
- ++in;
- *out |= ((*in)) << 2;
- ++in;
- *out |= ((*in)) << 20;
- ++out;
- *out |= ((*in)) >> (18 - 6);
- ++in;
- *out |= ((*in)) << 6;
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (18 - 10);
- ++in;
- *out |= ((*in)) << 10;
- ++in;
- *out |= ((*in)) << 28;
- ++out;
- *out |= ((*in)) >> (18 - 14);
- ++in;
- *out |= ((*in)) << 14;
- }
- /*assumes that integers fit in the prescribed number of bits */
- void __fastpackwithoutmask19(const uint32 * __restrict__ in,
- uint32 * __restrict__ out) {
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 19;
- ++out;
- *out |= ((*in)) >> (19 - 6);
- ++in;
- *out |= ((*in)) << 6;
- ++in;
- *out |= ((*in)) << 25;
- ++out;
- *out |= ((*in)) >> (19 - 12);
- ++in;
- *out |= ((*in)) << 12;
- ++in;
- *out |= ((*in)) << 31;
- ++out;
- *out |= ((*in)) >> (19 - 18);
- ++in;
- *out |= ((*in)) << 18;
- ++out;
- *out |= ((*in)) >> (19 - 5);
- ++in;
- *out |= ((*in)) << 5;
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (19 - 11);
- ++in;
- *out |= ((*in)) << 11;
- ++in;
- *out |= ((*in)) << 30;
- ++out;
- *out |= ((*in)) >> (19 - 17);
- ++in;
- *out |= ((*in)) << 17;
- ++out;
- *out |= ((*in)) >> (19 - 4);
- ++in;
- *out |= ((*in)) << 4;
- ++in;
- *out |= ((*in)) << 23;
- ++out;
- *out |= ((*in)) >> (19 - 10);
- ++in;
- *out |= ((*in)) << 10;
- ++in;
- *out |= ((*in)) << 29;
- ++out;
- *out |= ((*in)) >> (19 - 16);
- ++in;
- *out |= ((*in)) << 16;
- ++out;
- *out |= ((*in)) >> (19 - 3);
- ++in;
- *out |= ((*in)) << 3;
- ++in;
- *out |= ((*in)) << 22;
- ++out;
- *out |= ((*in)) >> (19 - 9);
- ++in;
- *out |= ((*in)) << 9;
- ++in;
- *out |= ((*in)) << 28;
- ++out;
- *out |= ((*in)) >> (19 - 15);
- ++in;
- *out |= ((*in)) << 15;
- ++out;
- *out |= ((*in)) >> (19 - 2);
- ++in;
- *out |= ((*in)) << 2;
- ++in;
- *out |= ((*in)) << 21;
- ++out;
- *out |= ((*in)) >> (19 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++in;
- *out |= ((*in)) << 27;
- ++out;
- *out |= ((*in)) >> (19 - 14);
- ++in;
- *out |= ((*in)) << 14;
- ++out;
- *out |= ((*in)) >> (19 - 1);
- ++in;
- *out |= ((*in)) << 1;
- ++in;
- *out |= ((*in)) << 20;
- ++out;
- *out |= ((*in)) >> (19 - 7);
- ++in;
- *out |= ((*in)) << 7;
- ++in;
- *out |= ((*in)) << 26;
- ++out;
- *out |= ((*in)) >> (19 - 13);
- ++in;
- *out |= ((*in)) << 13;
- }
- /*assumes that integers fit in the prescribed number of bits */
- void __fastpackwithoutmask20(const uint32 * __restrict__ in,
- uint32 * __restrict__ out) {
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 20;
- ++out;
- *out |= ((*in)) >> (20 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++in;
- *out |= ((*in)) << 28;
- ++out;
- *out |= ((*in)) >> (20 - 16);
- ++in;
- *out |= ((*in)) << 16;
- ++out;
- *out |= ((*in)) >> (20 - 4);
- ++in;
- *out |= ((*in)) << 4;
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (20 - 12);
- ++in;
- *out |= ((*in)) << 12;
- ++out;
- ++in;
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 20;
- ++out;
- *out |= ((*in)) >> (20 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++in;
- *out |= ((*in)) << 28;
- ++out;
- *out |= ((*in)) >> (20 - 16);
- ++in;
- *out |= ((*in)) << 16;
- ++out;
- *out |= ((*in)) >> (20 - 4);
- ++in;
- *out |= ((*in)) << 4;
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (20 - 12);
- ++in;
- *out |= ((*in)) << 12;
- ++out;
- ++in;
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 20;
- ++out;
- *out |= ((*in)) >> (20 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++in;
- *out |= ((*in)) << 28;
- ++out;
- *out |= ((*in)) >> (20 - 16);
- ++in;
- *out |= ((*in)) << 16;
- ++out;
- *out |= ((*in)) >> (20 - 4);
- ++in;
- *out |= ((*in)) << 4;
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (20 - 12);
- ++in;
- *out |= ((*in)) << 12;
- ++out;
- ++in;
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 20;
- ++out;
- *out |= ((*in)) >> (20 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++in;
- *out |= ((*in)) << 28;
- ++out;
- *out |= ((*in)) >> (20 - 16);
- ++in;
- *out |= ((*in)) << 16;
- ++out;
- *out |= ((*in)) >> (20 - 4);
- ++in;
- *out |= ((*in)) << 4;
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (20 - 12);
- ++in;
- *out |= ((*in)) << 12;
- }
- /*assumes that integers fit in the prescribed number of bits */
- void __fastpackwithoutmask21(const uint32 * __restrict__ in,
- uint32 * __restrict__ out) {
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 21;
- ++out;
- *out |= ((*in)) >> (21 - 10);
- ++in;
- *out |= ((*in)) << 10;
- ++in;
- *out |= ((*in)) << 31;
- ++out;
- *out |= ((*in)) >> (21 - 20);
- ++in;
- *out |= ((*in)) << 20;
- ++out;
- *out |= ((*in)) >> (21 - 9);
- ++in;
- *out |= ((*in)) << 9;
- ++in;
- *out |= ((*in)) << 30;
- ++out;
- *out |= ((*in)) >> (21 - 19);
- ++in;
- *out |= ((*in)) << 19;
- ++out;
- *out |= ((*in)) >> (21 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++in;
- *out |= ((*in)) << 29;
- ++out;
- *out |= ((*in)) >> (21 - 18);
- ++in;
- *out |= ((*in)) << 18;
- ++out;
- *out |= ((*in)) >> (21 - 7);
- ++in;
- *out |= ((*in)) << 7;
- ++in;
- *out |= ((*in)) << 28;
- ++out;
- *out |= ((*in)) >> (21 - 17);
- ++in;
- *out |= ((*in)) << 17;
- ++out;
- *out |= ((*in)) >> (21 - 6);
- ++in;
- *out |= ((*in)) << 6;
- ++in;
- *out |= ((*in)) << 27;
- ++out;
- *out |= ((*in)) >> (21 - 16);
- ++in;
- *out |= ((*in)) << 16;
- ++out;
- *out |= ((*in)) >> (21 - 5);
- ++in;
- *out |= ((*in)) << 5;
- ++in;
- *out |= ((*in)) << 26;
- ++out;
- *out |= ((*in)) >> (21 - 15);
- ++in;
- *out |= ((*in)) << 15;
- ++out;
- *out |= ((*in)) >> (21 - 4);
- ++in;
- *out |= ((*in)) << 4;
- ++in;
- *out |= ((*in)) << 25;
- ++out;
- *out |= ((*in)) >> (21 - 14);
- ++in;
- *out |= ((*in)) << 14;
- ++out;
- *out |= ((*in)) >> (21 - 3);
- ++in;
- *out |= ((*in)) << 3;
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (21 - 13);
- ++in;
- *out |= ((*in)) << 13;
- ++out;
- *out |= ((*in)) >> (21 - 2);
- ++in;
- *out |= ((*in)) << 2;
- ++in;
- *out |= ((*in)) << 23;
- ++out;
- *out |= ((*in)) >> (21 - 12);
- ++in;
- *out |= ((*in)) << 12;
- ++out;
- *out |= ((*in)) >> (21 - 1);
- ++in;
- *out |= ((*in)) << 1;
- ++in;
- *out |= ((*in)) << 22;
- ++out;
- *out |= ((*in)) >> (21 - 11);
- ++in;
- *out |= ((*in)) << 11;
- }
- /*assumes that integers fit in the prescribed number of bits */
- void __fastpackwithoutmask22(const uint32 * __restrict__ in,
- uint32 * __restrict__ out) {
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 22;
- ++out;
- *out |= ((*in)) >> (22 - 12);
- ++in;
- *out |= ((*in)) << 12;
- ++out;
- *out |= ((*in)) >> (22 - 2);
- ++in;
- *out |= ((*in)) << 2;
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (22 - 14);
- ++in;
- *out |= ((*in)) << 14;
- ++out;
- *out |= ((*in)) >> (22 - 4);
- ++in;
- *out |= ((*in)) << 4;
- ++in;
- *out |= ((*in)) << 26;
- ++out;
- *out |= ((*in)) >> (22 - 16);
- ++in;
- *out |= ((*in)) << 16;
- ++out;
- *out |= ((*in)) >> (22 - 6);
- ++in;
- *out |= ((*in)) << 6;
- ++in;
- *out |= ((*in)) << 28;
- ++out;
- *out |= ((*in)) >> (22 - 18);
- ++in;
- *out |= ((*in)) << 18;
- ++out;
- *out |= ((*in)) >> (22 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++in;
- *out |= ((*in)) << 30;
- ++out;
- *out |= ((*in)) >> (22 - 20);
- ++in;
- *out |= ((*in)) << 20;
- ++out;
- *out |= ((*in)) >> (22 - 10);
- ++in;
- *out |= ((*in)) << 10;
- ++out;
- ++in;
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 22;
- ++out;
- *out |= ((*in)) >> (22 - 12);
- ++in;
- *out |= ((*in)) << 12;
- ++out;
- *out |= ((*in)) >> (22 - 2);
- ++in;
- *out |= ((*in)) << 2;
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (22 - 14);
- ++in;
- *out |= ((*in)) << 14;
- ++out;
- *out |= ((*in)) >> (22 - 4);
- ++in;
- *out |= ((*in)) << 4;
- ++in;
- *out |= ((*in)) << 26;
- ++out;
- *out |= ((*in)) >> (22 - 16);
- ++in;
- *out |= ((*in)) << 16;
- ++out;
- *out |= ((*in)) >> (22 - 6);
- ++in;
- *out |= ((*in)) << 6;
- ++in;
- *out |= ((*in)) << 28;
- ++out;
- *out |= ((*in)) >> (22 - 18);
- ++in;
- *out |= ((*in)) << 18;
- ++out;
- *out |= ((*in)) >> (22 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++in;
- *out |= ((*in)) << 30;
- ++out;
- *out |= ((*in)) >> (22 - 20);
- ++in;
- *out |= ((*in)) << 20;
- ++out;
- *out |= ((*in)) >> (22 - 10);
- ++in;
- *out |= ((*in)) << 10;
- }
- /*assumes that integers fit in the prescribed number of bits */
- void __fastpackwithoutmask23(const uint32 * __restrict__ in,
- uint32 * __restrict__ out) {
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 23;
- ++out;
- *out |= ((*in)) >> (23 - 14);
- ++in;
- *out |= ((*in)) << 14;
- ++out;
- *out |= ((*in)) >> (23 - 5);
- ++in;
- *out |= ((*in)) << 5;
- ++in;
- *out |= ((*in)) << 28;
- ++out;
- *out |= ((*in)) >> (23 - 19);
- ++in;
- *out |= ((*in)) << 19;
- ++out;
- *out |= ((*in)) >> (23 - 10);
- ++in;
- *out |= ((*in)) << 10;
- ++out;
- *out |= ((*in)) >> (23 - 1);
- ++in;
- *out |= ((*in)) << 1;
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (23 - 15);
- ++in;
- *out |= ((*in)) << 15;
- ++out;
- *out |= ((*in)) >> (23 - 6);
- ++in;
- *out |= ((*in)) << 6;
- ++in;
- *out |= ((*in)) << 29;
- ++out;
- *out |= ((*in)) >> (23 - 20);
- ++in;
- *out |= ((*in)) << 20;
- ++out;
- *out |= ((*in)) >> (23 - 11);
- ++in;
- *out |= ((*in)) << 11;
- ++out;
- *out |= ((*in)) >> (23 - 2);
- ++in;
- *out |= ((*in)) << 2;
- ++in;
- *out |= ((*in)) << 25;
- ++out;
- *out |= ((*in)) >> (23 - 16);
- ++in;
- *out |= ((*in)) << 16;
- ++out;
- *out |= ((*in)) >> (23 - 7);
- ++in;
- *out |= ((*in)) << 7;
- ++in;
- *out |= ((*in)) << 30;
- ++out;
- *out |= ((*in)) >> (23 - 21);
- ++in;
- *out |= ((*in)) << 21;
- ++out;
- *out |= ((*in)) >> (23 - 12);
- ++in;
- *out |= ((*in)) << 12;
- ++out;
- *out |= ((*in)) >> (23 - 3);
- ++in;
- *out |= ((*in)) << 3;
- ++in;
- *out |= ((*in)) << 26;
- ++out;
- *out |= ((*in)) >> (23 - 17);
- ++in;
- *out |= ((*in)) << 17;
- ++out;
- *out |= ((*in)) >> (23 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++in;
- *out |= ((*in)) << 31;
- ++out;
- *out |= ((*in)) >> (23 - 22);
- ++in;
- *out |= ((*in)) << 22;
- ++out;
- *out |= ((*in)) >> (23 - 13);
- ++in;
- *out |= ((*in)) << 13;
- ++out;
- *out |= ((*in)) >> (23 - 4);
- ++in;
- *out |= ((*in)) << 4;
- ++in;
- *out |= ((*in)) << 27;
- ++out;
- *out |= ((*in)) >> (23 - 18);
- ++in;
- *out |= ((*in)) << 18;
- ++out;
- *out |= ((*in)) >> (23 - 9);
- ++in;
- *out |= ((*in)) << 9;
- }
- /*assumes that integers fit in the prescribed number of bits */
- void __fastpackwithoutmask24(const uint32 * __restrict__ in,
- uint32 * __restrict__ out) {
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (24 - 16);
- ++in;
- *out |= ((*in)) << 16;
- ++out;
- *out |= ((*in)) >> (24 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++out;
- ++in;
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (24 - 16);
- ++in;
- *out |= ((*in)) << 16;
- ++out;
- *out |= ((*in)) >> (24 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++out;
- ++in;
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (24 - 16);
- ++in;
- *out |= ((*in)) << 16;
- ++out;
- *out |= ((*in)) >> (24 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++out;
- ++in;
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (24 - 16);
- ++in;
- *out |= ((*in)) << 16;
- ++out;
- *out |= ((*in)) >> (24 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++out;
- ++in;
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (24 - 16);
- ++in;
- *out |= ((*in)) << 16;
- ++out;
- *out |= ((*in)) >> (24 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++out;
- ++in;
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (24 - 16);
- ++in;
- *out |= ((*in)) << 16;
- ++out;
- *out |= ((*in)) >> (24 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++out;
- ++in;
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (24 - 16);
- ++in;
- *out |= ((*in)) << 16;
- ++out;
- *out |= ((*in)) >> (24 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++out;
- ++in;
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (24 - 16);
- ++in;
- *out |= ((*in)) << 16;
- ++out;
- *out |= ((*in)) >> (24 - 8);
- ++in;
- *out |= ((*in)) << 8;
- }
- /*assumes that integers fit in the prescribed number of bits */
- void __fastpackwithoutmask25(const uint32 * __restrict__ in,
- uint32 * __restrict__ out) {
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 25;
- ++out;
- *out |= ((*in)) >> (25 - 18);
- ++in;
- *out |= ((*in)) << 18;
- ++out;
- *out |= ((*in)) >> (25 - 11);
- ++in;
- *out |= ((*in)) << 11;
- ++out;
- *out |= ((*in)) >> (25 - 4);
- ++in;
- *out |= ((*in)) << 4;
- ++in;
- *out |= ((*in)) << 29;
- ++out;
- *out |= ((*in)) >> (25 - 22);
- ++in;
- *out |= ((*in)) << 22;
- ++out;
- *out |= ((*in)) >> (25 - 15);
- ++in;
- *out |= ((*in)) << 15;
- ++out;
- *out |= ((*in)) >> (25 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++out;
- *out |= ((*in)) >> (25 - 1);
- ++in;
- *out |= ((*in)) << 1;
- ++in;
- *out |= ((*in)) << 26;
- ++out;
- *out |= ((*in)) >> (25 - 19);
- ++in;
- *out |= ((*in)) << 19;
- ++out;
- *out |= ((*in)) >> (25 - 12);
- ++in;
- *out |= ((*in)) << 12;
- ++out;
- *out |= ((*in)) >> (25 - 5);
- ++in;
- *out |= ((*in)) << 5;
- ++in;
- *out |= ((*in)) << 30;
- ++out;
- *out |= ((*in)) >> (25 - 23);
- ++in;
- *out |= ((*in)) << 23;
- ++out;
- *out |= ((*in)) >> (25 - 16);
- ++in;
- *out |= ((*in)) << 16;
- ++out;
- *out |= ((*in)) >> (25 - 9);
- ++in;
- *out |= ((*in)) << 9;
- ++out;
- *out |= ((*in)) >> (25 - 2);
- ++in;
- *out |= ((*in)) << 2;
- ++in;
- *out |= ((*in)) << 27;
- ++out;
- *out |= ((*in)) >> (25 - 20);
- ++in;
- *out |= ((*in)) << 20;
- ++out;
- *out |= ((*in)) >> (25 - 13);
- ++in;
- *out |= ((*in)) << 13;
- ++out;
- *out |= ((*in)) >> (25 - 6);
- ++in;
- *out |= ((*in)) << 6;
- ++in;
- *out |= ((*in)) << 31;
- ++out;
- *out |= ((*in)) >> (25 - 24);
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (25 - 17);
- ++in;
- *out |= ((*in)) << 17;
- ++out;
- *out |= ((*in)) >> (25 - 10);
- ++in;
- *out |= ((*in)) << 10;
- ++out;
- *out |= ((*in)) >> (25 - 3);
- ++in;
- *out |= ((*in)) << 3;
- ++in;
- *out |= ((*in)) << 28;
- ++out;
- *out |= ((*in)) >> (25 - 21);
- ++in;
- *out |= ((*in)) << 21;
- ++out;
- *out |= ((*in)) >> (25 - 14);
- ++in;
- *out |= ((*in)) << 14;
- ++out;
- *out |= ((*in)) >> (25 - 7);
- ++in;
- *out |= ((*in)) << 7;
- }
- /*assumes that integers fit in the prescribed number of bits */
- void __fastpackwithoutmask26(const uint32 * __restrict__ in,
- uint32 * __restrict__ out) {
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 26;
- ++out;
- *out |= ((*in)) >> (26 - 20);
- ++in;
- *out |= ((*in)) << 20;
- ++out;
- *out |= ((*in)) >> (26 - 14);
- ++in;
- *out |= ((*in)) << 14;
- ++out;
- *out |= ((*in)) >> (26 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++out;
- *out |= ((*in)) >> (26 - 2);
- ++in;
- *out |= ((*in)) << 2;
- ++in;
- *out |= ((*in)) << 28;
- ++out;
- *out |= ((*in)) >> (26 - 22);
- ++in;
- *out |= ((*in)) << 22;
- ++out;
- *out |= ((*in)) >> (26 - 16);
- ++in;
- *out |= ((*in)) << 16;
- ++out;
- *out |= ((*in)) >> (26 - 10);
- ++in;
- *out |= ((*in)) << 10;
- ++out;
- *out |= ((*in)) >> (26 - 4);
- ++in;
- *out |= ((*in)) << 4;
- ++in;
- *out |= ((*in)) << 30;
- ++out;
- *out |= ((*in)) >> (26 - 24);
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (26 - 18);
- ++in;
- *out |= ((*in)) << 18;
- ++out;
- *out |= ((*in)) >> (26 - 12);
- ++in;
- *out |= ((*in)) << 12;
- ++out;
- *out |= ((*in)) >> (26 - 6);
- ++in;
- *out |= ((*in)) << 6;
- ++out;
- ++in;
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 26;
- ++out;
- *out |= ((*in)) >> (26 - 20);
- ++in;
- *out |= ((*in)) << 20;
- ++out;
- *out |= ((*in)) >> (26 - 14);
- ++in;
- *out |= ((*in)) << 14;
- ++out;
- *out |= ((*in)) >> (26 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++out;
- *out |= ((*in)) >> (26 - 2);
- ++in;
- *out |= ((*in)) << 2;
- ++in;
- *out |= ((*in)) << 28;
- ++out;
- *out |= ((*in)) >> (26 - 22);
- ++in;
- *out |= ((*in)) << 22;
- ++out;
- *out |= ((*in)) >> (26 - 16);
- ++in;
- *out |= ((*in)) << 16;
- ++out;
- *out |= ((*in)) >> (26 - 10);
- ++in;
- *out |= ((*in)) << 10;
- ++out;
- *out |= ((*in)) >> (26 - 4);
- ++in;
- *out |= ((*in)) << 4;
- ++in;
- *out |= ((*in)) << 30;
- ++out;
- *out |= ((*in)) >> (26 - 24);
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (26 - 18);
- ++in;
- *out |= ((*in)) << 18;
- ++out;
- *out |= ((*in)) >> (26 - 12);
- ++in;
- *out |= ((*in)) << 12;
- ++out;
- *out |= ((*in)) >> (26 - 6);
- ++in;
- *out |= ((*in)) << 6;
- }
- /*assumes that integers fit in the prescribed number of bits */
- void __fastpackwithoutmask27(const uint32 * __restrict__ in,
- uint32 * __restrict__ out) {
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 27;
- ++out;
- *out |= ((*in)) >> (27 - 22);
- ++in;
- *out |= ((*in)) << 22;
- ++out;
- *out |= ((*in)) >> (27 - 17);
- ++in;
- *out |= ((*in)) << 17;
- ++out;
- *out |= ((*in)) >> (27 - 12);
- ++in;
- *out |= ((*in)) << 12;
- ++out;
- *out |= ((*in)) >> (27 - 7);
- ++in;
- *out |= ((*in)) << 7;
- ++out;
- *out |= ((*in)) >> (27 - 2);
- ++in;
- *out |= ((*in)) << 2;
- ++in;
- *out |= ((*in)) << 29;
- ++out;
- *out |= ((*in)) >> (27 - 24);
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (27 - 19);
- ++in;
- *out |= ((*in)) << 19;
- ++out;
- *out |= ((*in)) >> (27 - 14);
- ++in;
- *out |= ((*in)) << 14;
- ++out;
- *out |= ((*in)) >> (27 - 9);
- ++in;
- *out |= ((*in)) << 9;
- ++out;
- *out |= ((*in)) >> (27 - 4);
- ++in;
- *out |= ((*in)) << 4;
- ++in;
- *out |= ((*in)) << 31;
- ++out;
- *out |= ((*in)) >> (27 - 26);
- ++in;
- *out |= ((*in)) << 26;
- ++out;
- *out |= ((*in)) >> (27 - 21);
- ++in;
- *out |= ((*in)) << 21;
- ++out;
- *out |= ((*in)) >> (27 - 16);
- ++in;
- *out |= ((*in)) << 16;
- ++out;
- *out |= ((*in)) >> (27 - 11);
- ++in;
- *out |= ((*in)) << 11;
- ++out;
- *out |= ((*in)) >> (27 - 6);
- ++in;
- *out |= ((*in)) << 6;
- ++out;
- *out |= ((*in)) >> (27 - 1);
- ++in;
- *out |= ((*in)) << 1;
- ++in;
- *out |= ((*in)) << 28;
- ++out;
- *out |= ((*in)) >> (27 - 23);
- ++in;
- *out |= ((*in)) << 23;
- ++out;
- *out |= ((*in)) >> (27 - 18);
- ++in;
- *out |= ((*in)) << 18;
- ++out;
- *out |= ((*in)) >> (27 - 13);
- ++in;
- *out |= ((*in)) << 13;
- ++out;
- *out |= ((*in)) >> (27 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++out;
- *out |= ((*in)) >> (27 - 3);
- ++in;
- *out |= ((*in)) << 3;
- ++in;
- *out |= ((*in)) << 30;
- ++out;
- *out |= ((*in)) >> (27 - 25);
- ++in;
- *out |= ((*in)) << 25;
- ++out;
- *out |= ((*in)) >> (27 - 20);
- ++in;
- *out |= ((*in)) << 20;
- ++out;
- *out |= ((*in)) >> (27 - 15);
- ++in;
- *out |= ((*in)) << 15;
- ++out;
- *out |= ((*in)) >> (27 - 10);
- ++in;
- *out |= ((*in)) << 10;
- ++out;
- *out |= ((*in)) >> (27 - 5);
- ++in;
- *out |= ((*in)) << 5;
- }
- /*assumes that integers fit in the prescribed number of bits */
- void __fastpackwithoutmask28(const uint32 * __restrict__ in,
- uint32 * __restrict__ out) {
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 28;
- ++out;
- *out |= ((*in)) >> (28 - 24);
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (28 - 20);
- ++in;
- *out |= ((*in)) << 20;
- ++out;
- *out |= ((*in)) >> (28 - 16);
- ++in;
- *out |= ((*in)) << 16;
- ++out;
- *out |= ((*in)) >> (28 - 12);
- ++in;
- *out |= ((*in)) << 12;
- ++out;
- *out |= ((*in)) >> (28 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++out;
- *out |= ((*in)) >> (28 - 4);
- ++in;
- *out |= ((*in)) << 4;
- ++out;
- ++in;
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 28;
- ++out;
- *out |= ((*in)) >> (28 - 24);
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (28 - 20);
- ++in;
- *out |= ((*in)) << 20;
- ++out;
- *out |= ((*in)) >> (28 - 16);
- ++in;
- *out |= ((*in)) << 16;
- ++out;
- *out |= ((*in)) >> (28 - 12);
- ++in;
- *out |= ((*in)) << 12;
- ++out;
- *out |= ((*in)) >> (28 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++out;
- *out |= ((*in)) >> (28 - 4);
- ++in;
- *out |= ((*in)) << 4;
- ++out;
- ++in;
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 28;
- ++out;
- *out |= ((*in)) >> (28 - 24);
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (28 - 20);
- ++in;
- *out |= ((*in)) << 20;
- ++out;
- *out |= ((*in)) >> (28 - 16);
- ++in;
- *out |= ((*in)) << 16;
- ++out;
- *out |= ((*in)) >> (28 - 12);
- ++in;
- *out |= ((*in)) << 12;
- ++out;
- *out |= ((*in)) >> (28 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++out;
- *out |= ((*in)) >> (28 - 4);
- ++in;
- *out |= ((*in)) << 4;
- ++out;
- ++in;
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 28;
- ++out;
- *out |= ((*in)) >> (28 - 24);
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (28 - 20);
- ++in;
- *out |= ((*in)) << 20;
- ++out;
- *out |= ((*in)) >> (28 - 16);
- ++in;
- *out |= ((*in)) << 16;
- ++out;
- *out |= ((*in)) >> (28 - 12);
- ++in;
- *out |= ((*in)) << 12;
- ++out;
- *out |= ((*in)) >> (28 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++out;
- *out |= ((*in)) >> (28 - 4);
- ++in;
- *out |= ((*in)) << 4;
- }
- /*assumes that integers fit in the prescribed number of bits */
- void __fastpackwithoutmask29(const uint32 * __restrict__ in,
- uint32 * __restrict__ out) {
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 29;
- ++out;
- *out |= ((*in)) >> (29 - 26);
- ++in;
- *out |= ((*in)) << 26;
- ++out;
- *out |= ((*in)) >> (29 - 23);
- ++in;
- *out |= ((*in)) << 23;
- ++out;
- *out |= ((*in)) >> (29 - 20);
- ++in;
- *out |= ((*in)) << 20;
- ++out;
- *out |= ((*in)) >> (29 - 17);
- ++in;
- *out |= ((*in)) << 17;
- ++out;
- *out |= ((*in)) >> (29 - 14);
- ++in;
- *out |= ((*in)) << 14;
- ++out;
- *out |= ((*in)) >> (29 - 11);
- ++in;
- *out |= ((*in)) << 11;
- ++out;
- *out |= ((*in)) >> (29 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++out;
- *out |= ((*in)) >> (29 - 5);
- ++in;
- *out |= ((*in)) << 5;
- ++out;
- *out |= ((*in)) >> (29 - 2);
- ++in;
- *out |= ((*in)) << 2;
- ++in;
- *out |= ((*in)) << 31;
- ++out;
- *out |= ((*in)) >> (29 - 28);
- ++in;
- *out |= ((*in)) << 28;
- ++out;
- *out |= ((*in)) >> (29 - 25);
- ++in;
- *out |= ((*in)) << 25;
- ++out;
- *out |= ((*in)) >> (29 - 22);
- ++in;
- *out |= ((*in)) << 22;
- ++out;
- *out |= ((*in)) >> (29 - 19);
- ++in;
- *out |= ((*in)) << 19;
- ++out;
- *out |= ((*in)) >> (29 - 16);
- ++in;
- *out |= ((*in)) << 16;
- ++out;
- *out |= ((*in)) >> (29 - 13);
- ++in;
- *out |= ((*in)) << 13;
- ++out;
- *out |= ((*in)) >> (29 - 10);
- ++in;
- *out |= ((*in)) << 10;
- ++out;
- *out |= ((*in)) >> (29 - 7);
- ++in;
- *out |= ((*in)) << 7;
- ++out;
- *out |= ((*in)) >> (29 - 4);
- ++in;
- *out |= ((*in)) << 4;
- ++out;
- *out |= ((*in)) >> (29 - 1);
- ++in;
- *out |= ((*in)) << 1;
- ++in;
- *out |= ((*in)) << 30;
- ++out;
- *out |= ((*in)) >> (29 - 27);
- ++in;
- *out |= ((*in)) << 27;
- ++out;
- *out |= ((*in)) >> (29 - 24);
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (29 - 21);
- ++in;
- *out |= ((*in)) << 21;
- ++out;
- *out |= ((*in)) >> (29 - 18);
- ++in;
- *out |= ((*in)) << 18;
- ++out;
- *out |= ((*in)) >> (29 - 15);
- ++in;
- *out |= ((*in)) << 15;
- ++out;
- *out |= ((*in)) >> (29 - 12);
- ++in;
- *out |= ((*in)) << 12;
- ++out;
- *out |= ((*in)) >> (29 - 9);
- ++in;
- *out |= ((*in)) << 9;
- ++out;
- *out |= ((*in)) >> (29 - 6);
- ++in;
- *out |= ((*in)) << 6;
- ++out;
- *out |= ((*in)) >> (29 - 3);
- ++in;
- *out |= ((*in)) << 3;
- }
- /*assumes that integers fit in the prescribed number of bits */
- void __fastpackwithoutmask30(const uint32 * __restrict__ in,
- uint32 * __restrict__ out) {
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 30;
- ++out;
- *out |= ((*in)) >> (30 - 28);
- ++in;
- *out |= ((*in)) << 28;
- ++out;
- *out |= ((*in)) >> (30 - 26);
- ++in;
- *out |= ((*in)) << 26;
- ++out;
- *out |= ((*in)) >> (30 - 24);
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (30 - 22);
- ++in;
- *out |= ((*in)) << 22;
- ++out;
- *out |= ((*in)) >> (30 - 20);
- ++in;
- *out |= ((*in)) << 20;
- ++out;
- *out |= ((*in)) >> (30 - 18);
- ++in;
- *out |= ((*in)) << 18;
- ++out;
- *out |= ((*in)) >> (30 - 16);
- ++in;
- *out |= ((*in)) << 16;
- ++out;
- *out |= ((*in)) >> (30 - 14);
- ++in;
- *out |= ((*in)) << 14;
- ++out;
- *out |= ((*in)) >> (30 - 12);
- ++in;
- *out |= ((*in)) << 12;
- ++out;
- *out |= ((*in)) >> (30 - 10);
- ++in;
- *out |= ((*in)) << 10;
- ++out;
- *out |= ((*in)) >> (30 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++out;
- *out |= ((*in)) >> (30 - 6);
- ++in;
- *out |= ((*in)) << 6;
- ++out;
- *out |= ((*in)) >> (30 - 4);
- ++in;
- *out |= ((*in)) << 4;
- ++out;
- *out |= ((*in)) >> (30 - 2);
- ++in;
- *out |= ((*in)) << 2;
- ++out;
- ++in;
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 30;
- ++out;
- *out |= ((*in)) >> (30 - 28);
- ++in;
- *out |= ((*in)) << 28;
- ++out;
- *out |= ((*in)) >> (30 - 26);
- ++in;
- *out |= ((*in)) << 26;
- ++out;
- *out |= ((*in)) >> (30 - 24);
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (30 - 22);
- ++in;
- *out |= ((*in)) << 22;
- ++out;
- *out |= ((*in)) >> (30 - 20);
- ++in;
- *out |= ((*in)) << 20;
- ++out;
- *out |= ((*in)) >> (30 - 18);
- ++in;
- *out |= ((*in)) << 18;
- ++out;
- *out |= ((*in)) >> (30 - 16);
- ++in;
- *out |= ((*in)) << 16;
- ++out;
- *out |= ((*in)) >> (30 - 14);
- ++in;
- *out |= ((*in)) << 14;
- ++out;
- *out |= ((*in)) >> (30 - 12);
- ++in;
- *out |= ((*in)) << 12;
- ++out;
- *out |= ((*in)) >> (30 - 10);
- ++in;
- *out |= ((*in)) << 10;
- ++out;
- *out |= ((*in)) >> (30 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++out;
- *out |= ((*in)) >> (30 - 6);
- ++in;
- *out |= ((*in)) << 6;
- ++out;
- *out |= ((*in)) >> (30 - 4);
- ++in;
- *out |= ((*in)) << 4;
- ++out;
- *out |= ((*in)) >> (30 - 2);
- ++in;
- *out |= ((*in)) << 2;
- }
- /*assumes that integers fit in the prescribed number of bits */
- void __fastpackwithoutmask31(const uint32 * __restrict__ in,
- uint32 * __restrict__ out) {
- *out |= (*in);
- ++in;
- *out |= ((*in)) << 31;
- ++out;
- *out |= ((*in)) >> (31 - 30);
- ++in;
- *out |= ((*in)) << 30;
- ++out;
- *out |= ((*in)) >> (31 - 29);
- ++in;
- *out |= ((*in)) << 29;
- ++out;
- *out |= ((*in)) >> (31 - 28);
- ++in;
- *out |= ((*in)) << 28;
- ++out;
- *out |= ((*in)) >> (31 - 27);
- ++in;
- *out |= ((*in)) << 27;
- ++out;
- *out |= ((*in)) >> (31 - 26);
- ++in;
- *out |= ((*in)) << 26;
- ++out;
- *out |= ((*in)) >> (31 - 25);
- ++in;
- *out |= ((*in)) << 25;
- ++out;
- *out |= ((*in)) >> (31 - 24);
- ++in;
- *out |= ((*in)) << 24;
- ++out;
- *out |= ((*in)) >> (31 - 23);
- ++in;
- *out |= ((*in)) << 23;
- ++out;
- *out |= ((*in)) >> (31 - 22);
- ++in;
- *out |= ((*in)) << 22;
- ++out;
- *out |= ((*in)) >> (31 - 21);
- ++in;
- *out |= ((*in)) << 21;
- ++out;
- *out |= ((*in)) >> (31 - 20);
- ++in;
- *out |= ((*in)) << 20;
- ++out;
- *out |= ((*in)) >> (31 - 19);
- ++in;
- *out |= ((*in)) << 19;
- ++out;
- *out |= ((*in)) >> (31 - 18);
- ++in;
- *out |= ((*in)) << 18;
- ++out;
- *out |= ((*in)) >> (31 - 17);
- ++in;
- *out |= ((*in)) << 17;
- ++out;
- *out |= ((*in)) >> (31 - 16);
- ++in;
- *out |= ((*in)) << 16;
- ++out;
- *out |= ((*in)) >> (31 - 15);
- ++in;
- *out |= ((*in)) << 15;
- ++out;
- *out |= ((*in)) >> (31 - 14);
- ++in;
- *out |= ((*in)) << 14;
- ++out;
- *out |= ((*in)) >> (31 - 13);
- ++in;
- *out |= ((*in)) << 13;
- ++out;
- *out |= ((*in)) >> (31 - 12);
- ++in;
- *out |= ((*in)) << 12;
- ++out;
- *out |= ((*in)) >> (31 - 11);
- ++in;
- *out |= ((*in)) << 11;
- ++out;
- *out |= ((*in)) >> (31 - 10);
- ++in;
- *out |= ((*in)) << 10;
- ++out;
- *out |= ((*in)) >> (31 - 9);
- ++in;
- *out |= ((*in)) << 9;
- ++out;
- *out |= ((*in)) >> (31 - 8);
- ++in;
- *out |= ((*in)) << 8;
- ++out;
- *out |= ((*in)) >> (31 - 7);
- ++in;
- *out |= ((*in)) << 7;
- ++out;
- *out |= ((*in)) >> (31 - 6);
- ++in;
- *out |= ((*in)) << 6;
- ++out;
- *out |= ((*in)) >> (31 - 5);
- ++in;
- *out |= ((*in)) << 5;
- ++out;
- *out |= ((*in)) >> (31 - 4);
- ++in;
- *out |= ((*in)) << 4;
- ++out;
- *out |= ((*in)) >> (31 - 3);
- ++in;
- *out |= ((*in)) << 3;
- ++out;
- *out |= ((*in)) >> (31 - 2);
- ++in;
- *out |= ((*in)) << 2;
- ++out;
- *out |= ((*in)) >> (31 - 1);
- ++in;
- *out |= ((*in)) << 1;
- }
- /*assumes that integers fit in the prescribed number of bits */
- void __fastpackwithoutmask32(const uint32 * __restrict__ in,
- uint32 * __restrict__ out) {
- *out = *in;
- ++out;
- ++in;
- *out = *in;
- ++out;
- ++in;
- *out = *in;
- ++out;
- ++in;
- *out = *in;
- ++out;
- ++in;
- *out = *in;
- ++out;
- ++in;
- *out = *in;
- ++out;
- ++in;
- *out = *in;
- ++out;
- ++in;
- *out = *in;
- ++out;
- ++in;
- *out = *in;
- ++out;
- ++in;
- *out = *in;
- ++out;
- ++in;
- *out = *in;
- ++out;
- ++in;
- *out = *in;
- ++out;
- ++in;
- *out = *in;
- ++out;
- ++in;
- *out = *in;
- ++out;
- ++in;
- *out = *in;
- ++out;
- ++in;
- *out = *in;
- ++out;
- ++in;
- *out = *in;
- ++out;
- ++in;
- *out = *in;
- ++out;
- ++in;
- *out = *in;
- ++out;
- ++in;
- *out = *in;
- ++out;
- ++in;
- *out = *in;
- ++out;
- ++in;
- *out = *in;
- ++out;
- ++in;
- *out = *in;
- ++out;
- ++in;
- *out = *in;
- ++out;
- ++in;
- *out = *in;
- ++out;
- ++in;
- *out = *in;
- ++out;
- ++in;
- *out = *in;
- ++out;
- ++in;
- *out = *in;
- ++out;
- ++in;
- *out = *in;
- ++out;
- ++in;
- *out = *in;
- ++out;
- ++in;
- *out = *in;
- ++out;
- ++in;
- *out = *in;
- }
- /*assumes that integers fit in the prescribed number of bits*/
- void __fastpackwithoutmask4(const uint32 * __restrict__ in,
- uint32 * __restrict__ out) {
- for (uint outer = 0; outer < 4; ++outer) {
- for (uint inwordpointer = 0; inwordpointer < 32; inwordpointer += 4)
- *out |= ((*(in++))) << inwordpointer;
- ++out;
- }
- }
- /*assumes that integers fit in the prescribed number of bits*/
- void __fastpackwithoutmask8(const uint32 * __restrict__ in,
- uint32 * __restrict__ out) {
- for (uint outer = 0; outer < 8; ++outer) {
- for (uint inwordpointer = 0; inwordpointer < 32; inwordpointer += 8)
- *out |= ((*(in++))) << inwordpointer;
- ++out;
- }
- }
- /*assumes that integers fit in the prescribed number of bits*/
- void __fastpackwithoutmask16(const uint32 * __restrict__ in,
- uint32 * __restrict__ out) {
- for (uint outer = 0; outer < 16; ++outer) {
- for (uint inwordpointer = 0; inwordpointer < 32; inwordpointer += 16)
- *out |= ((*(in++))) << inwordpointer;
- ++out;
- }
- }
- /*assumes that integers fit in the prescribed number of bits*/
- void fastpackwithoutmask(const uint32 * __restrict__ in,
- uint32 * __restrict__ out, const uint bit) {
- // Could have used function pointers instead of switch.
- // Switch calls do offer the compiler more opportunities for optimization in
- // theory. In this case, it makes no difference with a good compiler.
- switch (bit) {
- case 1:
- __fastpackwithoutmask1(in, out);
- break;
- case 2:
- __fastpackwithoutmask2(in, out);
- break;
- case 3:
- __fastpackwithoutmask3(in, out);
- break;
- case 4:
- __fastpackwithoutmask4(in, out);
- break;
- case 5:
- __fastpackwithoutmask5(in, out);
- break;
- case 6:
- __fastpackwithoutmask6(in, out);
- break;
- case 7:
- __fastpackwithoutmask7(in, out);
- break;
- case 8:
- __fastpackwithoutmask8(in, out);
- break;
- case 9:
- __fastpackwithoutmask9(in, out);
- break;
- case 10:
- __fastpackwithoutmask10(in, out);
- break;
- case 11:
- __fastpackwithoutmask11(in, out);
- break;
- case 12:
- __fastpackwithoutmask12(in, out);
- break;
- case 13:
- __fastpackwithoutmask13(in, out);
- break;
- case 14:
- __fastpackwithoutmask14(in, out);
- break;
- case 15:
- __fastpackwithoutmask15(in, out);
- break;
- case 16:
- __fastpackwithoutmask16(in, out);
- break;
- case 17:
- __fastpackwithoutmask17(in, out);
- break;
- case 18:
- __fastpackwithoutmask18(in, out);
- break;
- case 19:
- __fastpackwithoutmask19(in, out);
- break;
- case 20:
- __fastpackwithoutmask20(in, out);
- break;
- case 21:
- __fastpackwithoutmask21(in, out);
- break;
- case 22:
- __fastpackwithoutmask22(in, out);
- break;
- case 23:
- __fastpackwithoutmask23(in, out);
- break;
- case 24:
- __fastpackwithoutmask24(in, out);
- break;
- case 25:
- __fastpackwithoutmask25(in, out);
- break;
- case 26:
- __fastpackwithoutmask26(in, out);
- break;
- case 27:
- __fastpackwithoutmask27(in, out);
- break;
- case 28:
- __fastpackwithoutmask28(in, out);
- break;
- case 29:
- __fastpackwithoutmask29(in, out);
- break;
- case 30:
- __fastpackwithoutmask30(in, out);
- break;
- case 31:
- __fastpackwithoutmask31(in, out);
- break;
- case 32:
- __fastpackwithoutmask32(in, out);
- break;
- default:
- break;
- }
- }
- int main() {
- const uint N = 1U << 26;
- for (uint b = 1; b <= 32; ++b) {
- cout << b << "\t";
- const uint MAXV = b < 32 ? (1U << b) - 1 : 0xFFFFFFFF;
- vector<uint32> data = generateArray32(N, MAXV);
- for (int k = 0; k < 16; ++k)
- data.push_back(MAXV);
- vector<uint32> out(N);// just a tmp store
- ZTimer z;
- uint detectedb = 0;
- z.reset();
- detectedb = 0;
- for (vector<uint32>::const_iterator i = data.begin(); i != data.end(); ++i) {
- const uint tmpbits = slowbits(*i);
- if (detectedb < tmpbits)
- detectedb = tmpbits;
- }
- cout << std::setprecision(4) << N * 0.001 / z.split() << " "
- << detectedb << "\t";
- z.reset();
- detectedb = 0;
- for (vector<uint32>::const_iterator i = data.begin(); i != data.end(); ++i) {
- const uint tmpbits = bits(*i);
- if (detectedb < tmpbits)
- detectedb = tmpbits;
- }
- cout << std::setprecision(4) << N * 0.001 / z.split() << " "
- << detectedb << "\t";
- z.reset();
- uint myclz = 32;
- for (vector<uint32>::const_iterator i = data.begin(); i != data.end(); ++i) {
- if (*i == 0)
- continue;
- const uint tmpbits = __builtin_clz(*i);
- if (myclz > tmpbits)
- myclz = tmpbits;
- }
- detectedb = 32 - myclz;
- cout << std::setprecision(4) << N * 0.001 / z.split() << " "
- << detectedb << "\t";
- z.reset();
- detectedb = bits(data[0]);
- if (data.size() % 1 != 0) {
- for (vector<uint32>::const_iterator i = data.begin() + 1; i
- != data.end(); i += 2) {
- if ((*i >> detectedb) > 0)
- detectedb = bits(*i);
- if ((*(i + 1) >> detectedb) > 0)
- detectedb = bits(*(i + 1));
- }
- } else {
- for (vector<uint32>::const_iterator i = data.begin(); i
- != data.end(); i += 2) {
- if ((*i >> detectedb) > 0)
- detectedb = bits(*i);
- if ((*(i + 1) >> detectedb) > 0)
- detectedb = bits(*(i + 1));
- }
- }
- cout << std::setprecision(4) << N * 0.001 / z.split() << " "
- << detectedb << "\t";
- z.reset();
- detectedb = bits(data[0]);
- const size_t N = data.size();
- for (uint k = 0; k < N / 32; ++k) {
- fastpackwithoutmask(&data[0] + 32 * k,
- &out[0] + (32 * detectedb) * k / 32, detectedb);
- }
- cout << std::setprecision(4) << N * 0.001 / z.split() << "\t";
- cout << endl;
- }
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement