Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <sys/mman.h>
- #include <stdio.h>
- #include <string.h>
- #include <error.h>
- #include <errno.h>
- #include <malloc.h>
- #include <unistd.h>
- #include <stdint.h>
- #include <x86intrin.h>
- #include <omp.h>
- #define SIZE ((1ul << 24))
- typedef struct {
- float * begin, * end;
- } vec_t;
- typedef struct {
- vec_t x, y, z;
- } vec3f_t;
- vec3f_t coords, speed;
- inline void * _alloc(uint64_t size) {
- // fprintf(stderr, "%lu\n", size/1024/1024);
- return mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE, 0, 0);
- }
- // // #define _alloc_pop(T, size) _alloc_pop(sizeof(T) * size)
- inline vec_t vec_constr(uint64_t size) {
- void * ptr = _alloc(size * sizeof(float));
- if((uintptr_t)ptr == -1) error(-1, errno, "");
- // fprintf(stderr, "%ld\n", ptr);
- return (vec_t){ptr, ptr + size};
- }
- inline vec3f_t vec3f_constr(uint64_t size) {
- return (vec3f_t){vec_constr(size), vec_constr(size), vec_constr(size)};
- }
- __attribute__((constructor)) void init_vec3f(void) {
- coords = vec3f_constr(SIZE);
- speed = vec3f_constr(SIZE);
- }
- void _fast_memset(__v4sf * begin, __v4sf * end, __m128 t) {
- do {
- _mm_stream_ps((float *)(begin + 0), t);
- _mm_stream_ps((float *)(begin + 1), t);
- _mm_stream_ps((float *)(begin + 2), t);
- _mm_stream_ps((float *)(begin + 3), t);
- } while((begin += 4) != end);
- }
- __m128 rand128() {
- return (__m128){(float)rand(), (float)rand(), (float)rand(), (float)rand()};
- }
- void _fast_memrand(__v4sf * begin, __v4sf * end) {
- do {
- __m128 t = rand128();
- _mm_stream_ps((float *)(begin + 0), t);
- _mm_stream_ps((float *)(begin + 1), t);
- _mm_stream_ps((float *)(begin + 2), t);
- _mm_stream_ps((float *)(begin + 3), t);
- } while((begin += 4) != end);
- }
- void _fast_memadd(__v4sf * begin, __v4sf * end, __v4sf * add) {
- do {
- _mm_stream_ps((float *)(begin + 0), _mm_add_ps(_mm_load_ps((float *)(begin + 0)), _mm_load_ps(add + 0)));
- _mm_stream_ps((float *)(begin + 1), _mm_add_ps(_mm_load_ps((float *)(begin + 1)), _mm_load_ps(add + 1)));
- _mm_stream_ps((float *)(begin + 2), _mm_add_ps(_mm_load_ps((float *)(begin + 2)), _mm_load_ps(add + 2)));
- _mm_stream_ps((float *)(begin + 3), _mm_add_ps(_mm_load_ps((float *)(begin + 3)), _mm_load_ps(add + 3)));
- } while((add += 4),(begin += 4) != end);
- }
- void memset_speed(vec3f_t vec, uint32_t i);
- void memadd_speed(uint32_t i);
- void memrand_speed(vec3f_t vec, uint32_t i);
- int main(void) {
- memset_speed(coords, 20);
- memset_speed(speed, 20);
- memadd_speed(20);
- memrand_speed(coords, 20);
- // float * it = coords.x.begin, * end = coords.x.end;
- // do {
- // fprintf(stderr, "%f\n", *it);
- // } while(++it != end);
- return 0;
- }
- void test_vec(vec_t vec, float t) {
- float * it = vec.begin, * end = vec.end;
- do {
- if(*it != t)
- error(-1, errno, "test_vec(%lu) failed: *it = %f, t = %f\n", (it - (float *)(vec.begin)), *it, t);
- } while(++it != end);
- }
- void test_vec3f(vec3f_t vec, float t) {
- test_vec(vec.x, t);
- test_vec(vec.y, t);
- test_vec(vec.z, t);
- }
- void memrand_speed(vec3f_t vec, uint32_t i) {
- double start = omp_get_wtime();
- do {
- _fast_memrand(vec.x.begin, vec.x.end);
- _fast_memrand(vec.y.begin, vec.y.end);
- _fast_memrand(vec.z.begin, vec.z.end);
- } while(--i);
- double time = omp_get_wtime() - start;
- fprintf(stderr, "MEMRAND: %.3fsec - %.2fGB/s\n", time, ((sizeof(float) * SIZE * 3. * 20.)/1024./1024/1024)/(time));
- }
- void memadd_speed(uint32_t i) {
- double start = omp_get_wtime();
- do {
- _fast_memadd(coords.x.begin, coords.x.end, speed.x.begin);
- _fast_memadd(coords.y.begin, coords.y.end, speed.x.begin);
- _fast_memadd(coords.z.begin, coords.z.end, speed.x.begin);
- } while(--i);
- double time = omp_get_wtime() - start;
- fprintf(stderr, "MEMADD: %.3fsec - %.2fGB/s\n", time, ((sizeof(float) * SIZE * 3. * 20. * 2)/1024./1024/1024)/(time));
- test_vec3f(coords, 1. + (1. * 20.));
- }
- void memset_speed(vec3f_t vec, uint32_t i) {
- double start = omp_get_wtime();
- // do {
- // memset(coords.x.begin, 1., SIZE*4);
- // memset(coords.y.begin, 1., SIZE*4);
- // memset(coords.z.begin, 1., SIZE*4);
- // } while(--i);
- do {
- _fast_memset(vec.x.begin, vec.x.end, _mm_set_ps1(1.f));
- _fast_memset(vec.y.begin, vec.y.end, _mm_set_ps1(1.f));
- _fast_memset(vec.z.begin, vec.z.end, _mm_set_ps1(1.f));
- } while(--i);
- double time = omp_get_wtime() - start;
- fprintf(stderr, "MEMSET: %.3fsec - %.2fGB/s\n", time, ((sizeof(float) * SIZE * 3. * 20.)/1024./1024/1024)/(time));
- test_vec3f(coords, 1.f);
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement