Advertisement
zhangsongcui

FAST number counting

Mar 28th, 2018
192
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 3.31 KB | None | 0 0
  1. #include <cstring>
  2. #include <cstdio>
  3. #include <cstddef>
  4. #include <array>
  5. #include <immintrin.h>
  6. #include <vector>
  7. #include <thread>
  8. #include <atomic>
  9. #include "mmf.hpp"
  10.  
  11. #ifndef __AVX2__
  12. #   define __m256i __m128i
  13. #   define __v32qi __m128i
  14. #   define __v8si __m128i
  15. #   define _mm256_set1_epi32 _mm_set1_epi32
  16. #   define _mm256_set1_epi8 _mm_set1_epi8
  17. #   define _mm256_loadu_si256 _mm_loadu_si128
  18. #   define _mm256_and_si256 _mm_and_si128
  19. #   define _mm256_sub_epi8 _mm_sub_epi8
  20. #   define _mm256_mullo_epi32 _mm_mullo_epi32
  21. #   define _mm256_add_epi32 _mm_add_epi32
  22. #   define _mm256_srli_epi32 _mm_srli_epi32
  23. #   define _mm256_extract_epi32 _mm_extract_epi32
  24. #endif
  25.  
  26. #ifdef _MSC_VER
  27.     using __v32qi = __m256i;
  28.     using __v8si  = __m256i;
  29. #endif
  30.  
  31. int main(int argc, const char *argv[]) {
  32.     if (argc != 2) {
  33.         std::fputs("Usage: PROGRAM filename.txt\n", stderr);
  34.         return 1;
  35.     }
  36.     const auto filename = argv[1];
  37.  
  38. //    const auto filename = "/Users/Carter/numbers.txt";
  39.  
  40.     memory_mapped_file::read_only_mmf mf(filename);
  41.     if (!mf.is_open()) {
  42.         std::fputs("Failed to open the file\n", stderr);
  43.         return 2;
  44.     }
  45.     if (!mf.data()) {
  46.         std::fputs("Failed to map file into memory\n", stderr);
  47.         return 3;
  48.     }
  49.     const auto thdCount = std::thread::hardware_concurrency();
  50.     const auto filesize = mf.file_size();
  51.     if (filesize % (sizeof(__m256i) * thdCount)) {
  52.         std::fputs("Invalid filesize\n", stderr);
  53.         return 4;
  54.     }
  55.  
  56.     std::vector<std::thread> thds;
  57.     std::array<std::atomic<int>, 1000> arr = {};
  58.  
  59.     const auto totalPerThd = filesize / sizeof(__m256i) / thdCount;
  60.  
  61.     for (int ti = 0; ti < thdCount; ++ti) {
  62.         thds.push_back(std::thread([&](int ti) {
  63.             std::array<int, 1000> localArr = {};
  64.             const __m256i
  65.                 yumFF = _mm256_set1_epi32(0xFF),
  66.                 yum10 = _mm256_set1_epi32(10);
  67.  
  68.             for (ptrdiff_t offset = totalPerThd * ti, end = totalPerThd * (ti + 1); offset < end; ++offset) {
  69.                 __v32qi source = _mm256_loadu_si256((const __m256i *)mf.data() + offset); // 大端存储
  70.                 source = _mm256_sub_epi8(source, _mm256_set1_epi8('0'));
  71.  
  72.                 __v8si result = _mm256_and_si256(source, yumFF); // 从 source 中取出最高位(10进制)
  73.  
  74.                 for (int i = 0; i < 2; ++i) {
  75.                     result = _mm256_mullo_epi32(result, yum10); // 结果左移一位(10进制)
  76.                     source = _mm256_srli_epi32(source, 8); // source 右移一位(丢弃最高位位数)
  77.                     result = _mm256_add_epi32(result, _mm256_and_si256(source, yumFF)); // 取出次高位加到结果中
  78.                 }
  79.  
  80.                 for (int j = 0; j < sizeof(__m256i) / 4; ++j) {
  81.                     const auto num = _mm256_extract_epi32(result, j);
  82.                     ++localArr[num];
  83.                 }
  84.             }
  85.             for (int i = 0; i < localArr.size(); ++i) {
  86.                 arr[i].fetch_add(localArr[i], std::memory_order::memory_order_relaxed);
  87.             }
  88.         }, ti));
  89.     }
  90.  
  91.     for (auto&& thd : thds) {
  92.         thd.join();
  93.     }
  94.  
  95.     for (auto beg = arr.begin(); beg != arr.end(); ++beg) {
  96.         std::printf("%d\n", beg->load(std::memory_order::memory_order_relaxed));
  97.     }
  98. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement