Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <cstring>
- #include <cstdio>
- #include <cstddef>
- #include <array>
- #include <immintrin.h>
- #include <vector>
- #include <thread>
- #include <atomic>
- #include "mmf.hpp"
- #ifndef __AVX2__
- # define __m256i __m128i
- # define __v32qi __m128i
- # define __v8si __m128i
- # define _mm256_set1_epi32 _mm_set1_epi32
- # define _mm256_set1_epi8 _mm_set1_epi8
- # define _mm256_loadu_si256 _mm_loadu_si128
- # define _mm256_and_si256 _mm_and_si128
- # define _mm256_sub_epi8 _mm_sub_epi8
- # define _mm256_mullo_epi32 _mm_mullo_epi32
- # define _mm256_add_epi32 _mm_add_epi32
- # define _mm256_srli_epi32 _mm_srli_epi32
- # define _mm256_extract_epi32 _mm_extract_epi32
- #endif
- #ifdef _MSC_VER
- using __v32qi = __m256i;
- using __v8si = __m256i;
- #endif
- int main(int argc, const char *argv[]) {
- if (argc != 2) {
- std::fputs("Usage: PROGRAM filename.txt\n", stderr);
- return 1;
- }
- const auto filename = argv[1];
- // const auto filename = "/Users/Carter/numbers.txt";
- memory_mapped_file::read_only_mmf mf(filename);
- if (!mf.is_open()) {
- std::fputs("Failed to open the file\n", stderr);
- return 2;
- }
- if (!mf.data()) {
- std::fputs("Failed to map file into memory\n", stderr);
- return 3;
- }
- const auto thdCount = std::thread::hardware_concurrency();
- const auto filesize = mf.file_size();
- if (filesize % (sizeof(__m256i) * thdCount)) {
- std::fputs("Invalid filesize\n", stderr);
- return 4;
- }
- std::vector<std::thread> thds;
- std::array<std::atomic<int>, 1000> arr = {};
- const auto totalPerThd = filesize / sizeof(__m256i) / thdCount;
- for (int ti = 0; ti < thdCount; ++ti) {
- thds.push_back(std::thread([&](int ti) {
- std::array<int, 1000> localArr = {};
- const __m256i
- yumFF = _mm256_set1_epi32(0xFF),
- yum10 = _mm256_set1_epi32(10);
- for (ptrdiff_t offset = totalPerThd * ti, end = totalPerThd * (ti + 1); offset < end; ++offset) {
- __v32qi source = _mm256_loadu_si256((const __m256i *)mf.data() + offset); // 大端存储
- source = _mm256_sub_epi8(source, _mm256_set1_epi8('0'));
- __v8si result = _mm256_and_si256(source, yumFF); // 从 source 中取出最高位(10进制)
- for (int i = 0; i < 2; ++i) {
- result = _mm256_mullo_epi32(result, yum10); // 结果左移一位(10进制)
- source = _mm256_srli_epi32(source, 8); // source 右移一位(丢弃最高位位数)
- result = _mm256_add_epi32(result, _mm256_and_si256(source, yumFF)); // 取出次高位加到结果中
- }
- for (int j = 0; j < sizeof(__m256i) / 4; ++j) {
- const auto num = _mm256_extract_epi32(result, j);
- ++localArr[num];
- }
- }
- for (int i = 0; i < localArr.size(); ++i) {
- arr[i].fetch_add(localArr[i], std::memory_order::memory_order_relaxed);
- }
- }, ti));
- }
- for (auto&& thd : thds) {
- thd.join();
- }
- for (auto beg = arr.begin(); beg != arr.end(); ++beg) {
- std::printf("%d\n", beg->load(std::memory_order::memory_order_relaxed));
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement