Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /**
- * bitpacking.cpp
- *
- * Original 6199 line source by Daniel Lemire, http://lemire.me/blog/
- * Question: if you pack and unpack bits, is it much faster if you
- * pack into 8 or 16 bits than, say, 31 or 7 bits?
- *
- * Modified 354 line source by willcodeforfood@reddit
- * g++ -DMAIN -DBENCH -O3 bitpacking.cpp -o bench
- * g++ -DMAIN -DSPECS -O1 bitpacking.cpp -o specs
- * g++ -O3 bitpacking.cpp -c -o bitpack.o
- */
- extern "C" {
- # include <sys/time.h>
- }
- #include <cassert>
- #include <cstdlib>
- #include <iostream>
- #include <vector>
- #ifdef __USE_GNU
- # define RESTRICT __restrict__
- # define NOINLINE __attribute__ ((noinline))
- #else
- # define RESTRICT
- # define NOINLINE
- #endif
- #define PARAMS cuint * RESTRICT i, uint * RESTRICT o
- #define FOREACH_1to31(F) \
- F (1) F (2) F (3) F (4) F (5) F (6) F (7) F (8) \
- F (9) F (10) F (11) F (12) F (13) F (14) F (15) F (16) \
- F (17) F (18) F (19) F (20) F (21) F (22) F (23) F (24) \
- F (25) F (26) F (27) F (28) F (29) F (30) F (31)
- #define FOREACH_1to32(F) FOREACH_1to31(F) F (32)
- #define SUB(a, b) ((a) - (b))
- #define LSH(a, b) ((a) << (b))
- #define RSH(a, b) ((a) >> (b))
- #define MASK(x, k) ((x) % (1U << (k)))
- #define die(why) do { cerr << why << endl; assert(0); \
- exit(EXIT_FAILURE); } while(0)
- using namespace std;
- typedef const bool cbool;
- typedef const uint cuint;
- typedef vector<uint> vuint;
- typedef const vuint cvuint;
- // P = pack, U = unpack, S = spec
- // defines 96 functions: {p,u}{1..32}(PARAMS) and s{1..32}()
- namespace metaprog {
- #define TPAR cuint * RESTRICT &i, uint * RESTRICT &o
- #define TCALL(f) (f (i, o))
- #define IGNORE(x) ((void)x)
- // in this context, dirty means that some bits weren't processed.
- // [PU][CD][CD] = (pack|unpack) start:(clean|dirty) end:(clean|dirty)
- // [PU]Dx = called when dirty at the start; handles remainders
- // [PU]Rx = adds the remainder bits to the output
- // these functional macros aren't fully general; some aren't expr-safe,
- // and some evaluate their params more than once.
- #define PCD(n, k) do { (*o) |= LSH (MASK ((*i), n), k); } while (0)
- #define UCD(n, k) do { (*o) = MASK (RSH ((*i), k), n); } while (0)
- #define PCC(n, k) do { PCD(n, k); (++i); } while (0)
- #define UCC(n, k) do { UCD(n, k); (++o); } while (0)
- #define PRx(n, k) do { (*o) |= RSH (MASK ((*i), n), SUB (n, k)); } while (0)
- #define URx(n, k) do { (*o) |= LSH (MASK ((*i), k), SUB (n, k)); } while (0)
- #define PDx(n, k) do { (++o); PRx(n, k); (++i); } while (0)
- #define UDx(n, k) do { (++i); URx(n, k); (++o); } while (0)
- #define PDD(n, k) do { PDx(n, k); PCD(n, k); } while (0)
- #define UDD(n, k) do { UDx(n, k); UCD(n, k); } while (0)
- #define PDC(n, k) do { PDD(n, k); (++i); } while (0)
- #define UDC(n, k) do { UDD(n, k); (++o); } while (0)
- // before and after refer to dirty state relative to X
- template <cuint n, cuint b, cbool before, cbool after>
- struct X {
- static inline void P (TPAR);
- static inline void U (TPAR);
- };
- #define SPECIALIZE_X(x, y, F) \
- template <cuint n, cuint b> \
- struct X <n, b, (x), (y)> { \
- static inline void P (TPAR) { P##F (n, b); } \
- static inline void U (TPAR) { U##F (n, b); } \
- static inline void S () { cout << " " << #F << " (" << n \
- << ", " << b << ");" << endl; } \
- }
- // real execution work hides here
- SPECIALIZE_X(true, true, CC);
- SPECIALIZE_X(true, false, CD);
- SPECIALIZE_X(false, true, DC);
- SPECIALIZE_X(false, false, DD);
- static inline void nop(TPAR) {
- IGNORE(i);
- IGNORE(o);
- }
- template <cbool end>
- struct E {
- static inline void P(TPAR) {
- TCALL(nop);
- }
- static inline void U(TPAR) {
- TCALL(nop);
- }
- static inline void S() { }
- };
- // end states advance the input or output
- template <>
- struct E<true> {
- static inline void P(TPAR) {
- IGNORE(i);
- ++o;
- }
- static inline void U(TPAR) {
- IGNORE(o);
- ++i;
- }
- static inline void S() {
- cout << " EE ();" << endl;
- }
- };
- template <cuint n, cuint prev, cuint current, cuint next>
- struct W {
- // WX = execute single step
- // WE = end at alignment 0 (mod 32)
- #define WX X<n, current, before, after>
- #define WE E<end>
- // dirty state before and after the W routine
- static cbool before = (current == 0 || prev < current);
- static cbool end = (next == 0);
- static cbool after = (end || current < next);
- static inline void P (TPAR) {
- TCALL(WX::P);
- TCALL(WE::P);
- }
- static inline void U (TPAR) {
- TCALL(WX::U);
- TCALL(WE::U);
- }
- static inline void S () {
- WX::S();
- WE::S();
- }
- };
- // recursive metaprogram; assumes 0th step is specialized
- template <cuint n, cuint step>
- struct R {
- // RR = recursion
- // RW = does the work for this step
- #define RR R<n, (step - 1)>
- #define RW W<n, prev, current, next>
- static cuint prev = ((step - 1) * n) % 32U;
- static cuint current = (step * n) % 32U;
- static cuint next = ((step + 1) * n) % 32U;
- static inline void P (TPAR) {
- TCALL(RR::P);
- TCALL(RW::P);
- }
- static inline void U (TPAR) {
- TCALL(RR::U);
- TCALL(RW::U);
- }
- static inline void S () {
- RR::S ();
- RW::S ();
- }
- };
- // each bit size recurses from 31 to 0
- #define FR(n) R<n, 31>
- #define SPECIALIZE_R(n) \
- void p##n (PARAMS) { TCALL(FR(n)::P); } \
- void u##n (PARAMS) { TCALL(FR(n)::U); } \
- void s##n () { FR(n)::S(); } \
- template<> struct R <(n), 0> { \
- static inline void P (TPAR) { PCC (n, 0); } \
- static inline void U (TPAR) { UCC (n, 0); } \
- static inline void S () { \
- cout << " CC (" << n << ", 0);" << endl; } };
- // primarily defines {p,u}{1..31}
- FOREACH_1to31(SPECIALIZE_R)
- // {p,u}32 calls slowcpy
- static void slowcpy (PARAMS) {
- for (int j = 0; j < 32; ++j)
- *o++ = *i++;
- }
- void p32(PARAMS) {
- TCALL(slowcpy);
- }
- void u32(PARAMS) {
- TCALL(slowcpy);
- }
- void s32() {
- for (int j = 0; j < 32; ++j)
- cout << " *o++ = *i++;" << endl;
- }
- }
- namespace switchfns {
- // pack and unpack switch functions call the recursive metaprograms
- #define CASE_P(n) case (n): do { TCALL(metaprog::p##n); return; } while(0);
- #define CASE_U(n) case (n): do { TCALL(metaprog::u##n); return; } while(0);
- #define CASE_S(n) case (n): do { metaprog::s##n(); return; } while (0);
- #define DEF_SWITCH(fname, cases) \
- void fname (PARAMS, cuint bit) { \
- switch (bit) { \
- FOREACH_1to32 (cases); \
- default: die("invalid case"); } }
- DEF_SWITCH(pack, CASE_P)
- DEF_SWITCH(unpack, CASE_U)
- void spec (cuint bit) {
- switch (bit) {
- FOREACH_1to32 (CASE_S);
- default:
- die("invalid case");
- }
- }
- }
- #ifdef BENCH
- namespace bench {
- struct ZTimer {
- struct timeval t1, t2;
- ZTimer() : t1(), t2() {
- reset();
- }
- int elapsed () {
- return ((t2.tv_sec - t1.tv_sec) * 1000) +
- ((t2.tv_usec - t1.tv_usec) / 1000);
- }
- void reset () {
- gettimeofday (&t1, 0);
- t2 = t1;
- }
- int split () {
- gettimeofday (&t2, 0);
- return elapsed ();
- }
- };
- NOINLINE void fast_pack (cvuint & i, vuint & o, cuint bit) {
- cuint N = i.size () / 32;
- for (uint k = 0; k < N; ++k)
- switchfns::pack (&i[0] + 32 * k,
- &o[0] + (32 * bit) * k / 32, bit);
- }
- NOINLINE void fast_unpack (cvuint & i, vuint & o, cuint bit) {
- cuint N = o.size () / 32;
- for (uint k = 0; k < N; ++k)
- switchfns::unpack (&i[0] + (32 * bit) * k / 32,
- &o[0] + 32 * k, bit);
- }
- bool validate (cvuint & i, cvuint & o, cuint bit) {
- if (bit == 32)
- return i == o;
- cuint N = i.size ();
- for (uint k = 0; k < N; ++k)
- if (MASK(i[k], bit) != MASK(o[k], bit)) {
- cerr << " Error at k = " << k << " i[k]= " << i[k]
- << " o[k]=" << o[k] << endl;
- return false;
- }
- return true;
- }
- vuint generateArray (cuint N) {
- vuint ans (N);
- for (uint k = 0; k < N; ++k)
- ans[k] = rand ();
- return ans;
- }
- void simplebenchmark (cuint N = 1U << 26, cuint T = 5) {
- ZTimer z;
- cvuint data = generateArray (N);
- vuint compressed (N, 0), recovered (N, 0);
- cout << "bits" << "\t" << "pack" << "\t" << "unpack" << endl;
- for (uint bit = 1; bit <= 32; ++bit) {
- double packtime = 0, unpacktime = 0;
- for (uint t = 0; t < T; ++t) {
- compressed.clear ();
- compressed.resize (N * bit / 32, 0);
- recovered.clear ();
- recovered.resize (N, 0);
- z.reset ();
- fast_pack (data, compressed, bit);
- packtime += z.split ();
- z.reset ();
- fast_unpack (compressed, recovered, bit);
- unpacktime += z.split ();
- if (!validate (data, recovered, bit))
- die("failed validation");
- }
- cout << bit << "\t" << packtime << "\t" << unpacktime << endl;
- }
- }
- }
- #endif
- #ifdef SPECS
- namespace specs {
- void print() {
- for (uint bit = 1; bit<= 32; ++bit) {
- cout << "SPEC(" << bit << ") {" << endl;
- switchfns::spec(bit);
- cout << "}" << endl;
- }
- }
- }
- #endif
- #ifdef MAIN
- int main () {
- #ifdef SPECS
- specs::print();
- #endif
- #ifdef BENCH
- bench::simplebenchmark ();
- #endif
- return 0;
- }
- #endif
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement