Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #define _GNU_SOURCE
- #include <stdio.h>
- #include <stdlib.h>
- #include <sys/mman.h>
- #include <stdint.h>
- #include <string.h>
- #include <omp.h>
- #include <x86intrin.h>
- #define bind(f, arg) ({typeof(f(arg)) new(void) { return f(arg);} new;})
- #define bind2(f, arg, arg2) ({typeof(f(arg, arg2)) new(void) { return f(arg, arg2);} new;})
- #define bind3(f, arg, arg2, arg3) ({typeof(f(arg, arg2, arg3)) new(void) { return f(arg, arg2, arg3);} new;})
- static inline void * alloc(uint64_t size, uint64_t flags) {
- return mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS | flags, 0, 0);
- }
- typedef void*(*vpvf_t)(void);
- #define GB (1024ul*1024*1024)
- #define KB (1024)
- void bench_memcpy(uint64_t total, uint64_t pass, vpvf_t f) {
- uint64_t i = (total/pass);
- double start = omp_get_wtime();
- do {f();} while(--i);
- double time = omp_get_wtime() - start;
- fprintf(stderr, "%fGB/s\n", ((total * 2)/GB)/time);
- }
- void * memcpy_sphkavx(__m256i * to, __m256i * from, uint64_t n) {
- typeof(from) from_end = ((void *)from + n), ret = to;
- do {
- _mm256_store_si256((to + 0), _mm256_load_si256(from + 0));
- _mm256_store_si256((to + 1), _mm256_load_si256(from + 1));
- _mm256_store_si256((to + 2), _mm256_load_si256(from + 2));
- _mm256_store_si256((to + 3), _mm256_load_si256(from + 3));
- } while((to += 4), (from += 4) != from_end);
- return ret;
- }
- void * memcpy_sphk(__m128i * to, __m128i * from, uint64_t n) {
- typeof(from) from_end = ((void *)from + n), ret = to;
- do {
- _mm_stream_si128((to + 0), _mm_stream_load_si128(from + 0));
- _mm_stream_si128((to + 1), _mm_stream_load_si128(from + 1));
- _mm_stream_si128((to + 2), _mm_stream_load_si128(from + 2));
- _mm_stream_si128((to + 3), _mm_stream_load_si128(from + 3));
- } while((to += 4), (from += 4) != from_end);
- return ret;
- }
- int main(void) {
- vpvf_t alloc1gb = bind2(alloc, GB, MAP_POPULATE);
- void * from = alloc1gb(), * to = alloc1gb();
- uint64_t block_size = KB*KB*8, total = GB*2;
- do {
- fprintf(stderr, "\nBLOCK SIZE: %luKB\n", block_size/KB);
- fprintf(stderr, "SPHK:\n");
- if(block_size < (8*KB*KB)) {
- bench_memcpy(total, block_size, bind3(memcpy_sphkavx, to, from, block_size));
- bench_memcpy(total, block_size, bind3(memcpy_sphkavx, to, from, block_size));
- } else {
- bench_memcpy(total, block_size, bind3(memcpy_sphk, to, from, block_size));
- bench_memcpy(total, block_size, bind3(memcpy_sphk, to, from, block_size));
- }
- fprintf(stderr, "memcpy:\n");
- bench_memcpy(total, block_size, bind3(memcpy, to, from, block_size));
- bench_memcpy(total, block_size, bind3(memcpy, to, from, block_size));
- } while((block_size *= 2) != (GB*2));
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement