Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- #include <time.h>
- #include <vector>
- #include <string>
- #include <algorithm>
- #include <smmintrin.h>
- #include <stdint.h>
- #include <string.h>
- #ifdef _MSC_VER
- #define ALIGN(n) __declspec(align(n))
- #else
- #define ALIGN(n) __attribute__((aligned(n)))
- #endif
- typedef unsigned int UINT32;
- //=============================================== original =================================================
- inline UINT32 GetIP(const char *p)
- {
- UINT32 dwIP=0,dwIP_Part=0;
- while(true)
- {
- if(p[0] == 0)
- {
- dwIP = (dwIP << 8) | dwIP_Part;
- break;
- }
- if(p[0]=='.')
- {
- dwIP = (dwIP << 8) | dwIP_Part;
- dwIP_Part = 0;
- p++;
- }
- dwIP_Part = (dwIP_Part*10)+(p[0]-'0');
- p++;
- }
- return dwIP;
- }
- //=============================================== stgatilov =================================================
- __m128i shuffleTable[65536]; //can be reduced 256x times, see solution by @IwillnotexistIdonotexist below
- void MyInit() {
- memset(shuffleTable, -1, sizeof(shuffleTable));
- int len[4];
- for (len[0] = 1; len[0] <= 3; len[0]++)
- for (len[1] = 1; len[1] <= 3; len[1]++)
- for (len[2] = 1; len[2] <= 3; len[2]++)
- for (len[3] = 1; len[3] <= 3; len[3]++) {
- int slen = len[0] + len[1] + len[2] + len[3] + 4;
- int rem = 16 - slen;
- for (int rmask = 0; rmask < 1<<rem; rmask++) {
- // { int rmask = (1<<rem)-1; //note: only maximal rmask is possible if strings are zero-padded
- int mask = 0;
- char shuf[16] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1};
- int pos = 0;
- for (int i = 0; i < 4; i++) {
- for (int j = 0; j < len[i]; j++) {
- shuf[(3-i) * 4 + (len[i]-1-j)] = pos;
- pos++;
- }
- mask ^= (1<<pos);
- pos++;
- }
- mask ^= (rmask<<slen);
- _mm_store_si128(&shuffleTable[mask], _mm_loadu_si128((__m128i*)shuf));
- }
- }
- }
- inline UINT32 MyGetIP(const char *str) {
- __m128i input = _mm_lddqu_si128((const __m128i*)str);
- // for (int i = 0; i < 16; i++) printf("%c ", input.m128i_u8[i]); printf("\n");
- input = _mm_sub_epi8(input, _mm_set1_epi8('0'));
- // for (int i = 0; i < 16; i++) printf("%d ", int(input.m128i_u8[i])); printf("\n");
- __m128i cmp = input;
- // for (int i = 0; i < 16; i++) printf("%c", cmp.m128i_i8[i] < 0 ? 'X' : '.'); printf("\n");
- uint64_t mask = _mm_movemask_epi8(cmp);
- // printf("%d\n", mask);
- __m128i shuf = shuffleTable[mask];
- // for (int i = 0; i < 16; i++) printf("%d ", int(shuf.m128i_i8[i])); printf("\n");
- __m128i arr = _mm_shuffle_epi8(input, shuf);
- // for (int i = 0; i < 16; i++) printf("%d ", int(arr.m128i_u8[i])); printf("\n");
- __m128i coeffs = _mm_set_epi8(0, 100, 10, 1, 0, 100, 10, 1, 0, 100, 10, 1, 0, 100, 10, 1);
- __m128i prod = _mm_maddubs_epi16(coeffs, arr);
- // for (int i = 0; i < 8; i++) printf("%d ", int(prod.m128i_u16[i])); printf("\n");
- prod = _mm_hadd_epi16(prod, prod);
- // for (int i = 0; i < 4; i++) printf("%d ", int(prod.m128i_u16[i])); printf("\n");
- prod = _mm_shuffle_epi8(prod, _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 6, 4, 2, 0));
- // for (int i = 0; i < 4; i++) printf("%d ", int(prod.m128i_u8[i])); printf("\n");
- return _mm_extract_epi32(prod, 0);
- // return (UINT32(_mm_extract_epi16(prod, 1)) << 16) + UINT32(_mm_extract_epi16(prod, 0));
- }
- //==================================== @IwillnotexistIdonotexist ===============================================
- //lookup table is compressed to 4Kb
- //taken from http://pastebin.com/XJ7w9nq1/* DEEP MAGIC BEGINS HERE. */
- return (unsigned)(v*0x008d981a)>>24;
- }
- inline UINT32 MyGetIP_comp(const char *str) {
- __m128i input = _mm_lddqu_si128((const __m128i*)str); //"192.167.1.3"
- uint64_t zm = _mm_movemask_epi8(_mm_cmpeq_epi8(input, _mm_setzero_si128()));
- zm ^= (zm-1); //(1<<12) - 1
- input = _mm_sub_epi8(input, _mm_set1_epi8('0')); //1 9 2 254 1 6 7 254 1 254 3 208 245 0 8 40
- __m128i cmp = input; //...X...X.X.XX... (signs)
- uint64_t mask = _mm_movemask_epi8(cmp); //6792 - magic index
- mask &= zm;
- uint64_t hashmask = perfecthash(mask);
- __m128i shuf = ((const __m128i*)TBL)[hashmask]; //10 -1 -1 -1 8 -1 -1 -1 6 5 4 -1 2 1 0 -1
- __m128i arr = _mm_shuffle_epi8(input, shuf); //3 0 0 0 | 1 0 0 0 | 7 6 1 0 | 2 9 1 0
- __m128i coeffs = _mm_set_epi8(0, 100, 10, 1, 0, 100, 10, 1, 0, 100, 10, 1, 0, 100, 10, 1);
- __m128i prod = _mm_maddubs_epi16(coeffs, arr); //3 0 | 1 0 | 67 100 | 92 100
- prod = _mm_hadd_epi16(prod, prod); //3 | 1 | 167 | 192 | ? | ? | ? | ?
- __m128i imm = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 6, 4, 2, 0);
- prod = _mm_shuffle_epi8(prod, imm); //3 1 167 192 0 0 0 0 0 0 0 0 0 0 0 0
- return _mm_extract_epi32(prod, 0);
- }
- //========================================== TESTING =================================================
- static const int CNT = 16<<10;
- int main() {
- MyInit();
- printf("%08X\n", MyGetIP("192.167.1.3"));
- std::vector<std::string> samples;
- for (int i = 0; i < CNT; i++) {
- std::string str;
- for (int j = 0; j < 4; j++) {
- if (j) str += '.';
- int x = rand() % 256;
- char qq[16];
- sprintf(qq, "%d", x);
- str += qq;
- }
- samples.push_back(str);
- // printf("%s\n", samples[i].c_str());
- auto a = GetIP(samples.back().c_str());
- auto b = MyGetIP(samples.back().c_str());
- auto c = MyGetIP_comp(samples.back().c_str());
- if (a != b || a != c || b != c)
- printf("%s: %08X vs %08X vs %08X\n", samples[i].c_str(), a, b, c);
- }
- {
- int start = clock();
- int sum = 0;
- for (int i = 0; i < 1<<27; i++) {
- const char *input = samples[(i+0) & (CNT-1)].c_str();
- sum += MyGetIP(input);
- }
- int elapsed = clock() - start;
- printf("Time = %0.3lf (%d)\n", double(elapsed) / CLOCKS_PER_SEC, sum);
- }
- {
- int start = clock();
- int sum = 0;
- for (int i = 0; i < 1<<27; i++) {
- const char *input = samples[(i+0) & (CNT-1)].c_str();
- sum += MyGetIP_comp(input);
- }
- int elapsed = clock() - start;
- printf("Time = %0.3lf (%d)\n", double(elapsed) / CLOCKS_PER_SEC, sum);
- }
- {
- int start = clock();
- int sum = 0;
- for (int i = 0; i < 1<<27; i++) {
- const char *input = samples[i & (CNT-1)].c_str();
- sum += GetIP(input);
- }
- int elapsed = clock() - start;
- printf("Time = %0.3lf (%d)\n", double(elapsed) / CLOCKS_PER_SEC, sum);
- }
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement