Advertisement
Guest User

Untitled

a guest
Nov 2nd, 2013
126
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 4.56 KB | None | 0 0
  1. #include <sys/mman.h>
  2. #include <stdio.h>
  3. #include <string.h>
  4. #include <error.h>
  5. #include <errno.h>
  6. #include <malloc.h>
  7. #include <unistd.h>
  8. #include <stdint.h>
  9. #include <x86intrin.h>
  10. #include <omp.h>
  11.  
  12. #define SIZE ((1ul << 24))
  13.  
  14.  
  15. typedef struct {
  16.   float * begin, * end;
  17. } vec_t;
  18.  
  19. typedef struct {
  20.   vec_t x, y, z;
  21. } vec3f_t;
  22.  
  23. vec3f_t coords, speed;
  24.  
  25. inline void * _alloc(uint64_t size) {
  26. //   fprintf(stderr, "%lu\n", size/1024/1024);
  27.   return mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE, 0, 0);
  28. }
  29. // // #define _alloc_pop(T, size) _alloc_pop(sizeof(T) * size)
  30.  
  31. inline vec_t vec_constr(uint64_t size) {
  32.   void * ptr = _alloc(size * sizeof(float));
  33.   if((uintptr_t)ptr == -1) error(-1, errno, "");
  34. //   fprintf(stderr, "%ld\n", ptr);
  35.   return (vec_t){ptr, ptr + size};
  36. }
  37.  
  38. inline vec3f_t vec3f_constr(uint64_t size) {
  39.   return (vec3f_t){vec_constr(size), vec_constr(size), vec_constr(size)};
  40. }
  41.  
  42. __attribute__((constructor)) void init_vec3f(void) {
  43.   coords = vec3f_constr(SIZE);
  44.   speed = vec3f_constr(SIZE);
  45. }
  46.  
  47. void _fast_memset(__v4sf * begin, __v4sf * end, __m128 t) {
  48.   do {
  49.     _mm_stream_ps((float *)(begin + 0), t);
  50.     _mm_stream_ps((float *)(begin + 1), t);
  51.     _mm_stream_ps((float *)(begin + 2), t);
  52.     _mm_stream_ps((float *)(begin + 3), t);
  53.   } while((begin += 4) != end);
  54. }
  55.  
  56. __m128 rand128() {
  57.   return (__m128){(float)rand(), (float)rand(), (float)rand(), (float)rand()};
  58. }
  59.  
  60. void _fast_memrand(__v4sf * begin, __v4sf * end) {
  61.   do {
  62.     __m128 t = rand128();
  63.     _mm_stream_ps((float *)(begin + 0), t);
  64.     _mm_stream_ps((float *)(begin + 1), t);
  65.     _mm_stream_ps((float *)(begin + 2), t);
  66.     _mm_stream_ps((float *)(begin + 3), t);
  67.   } while((begin += 4) != end);
  68. }
  69.  
  70. void _fast_memadd(__v4sf * begin, __v4sf * end, __v4sf * add) {
  71.   do {
  72.     _mm_stream_ps((float *)(begin + 0), _mm_add_ps(_mm_load_ps((float *)(begin + 0)), _mm_load_ps(add + 0)));
  73.     _mm_stream_ps((float *)(begin + 1), _mm_add_ps(_mm_load_ps((float *)(begin + 1)), _mm_load_ps(add + 1)));
  74.     _mm_stream_ps((float *)(begin + 2), _mm_add_ps(_mm_load_ps((float *)(begin + 2)), _mm_load_ps(add + 2)));
  75.     _mm_stream_ps((float *)(begin + 3), _mm_add_ps(_mm_load_ps((float *)(begin + 3)), _mm_load_ps(add + 3)));
  76.   } while((add += 4),(begin += 4) != end);
  77. }
  78.  
  79. void memset_speed(vec3f_t vec, uint32_t i);
  80. void memadd_speed(uint32_t i);
  81. void memrand_speed(vec3f_t vec, uint32_t i);
  82. int main(void) {
  83.   memset_speed(coords, 20);
  84.   memset_speed(speed, 20);
  85.   memadd_speed(20);
  86.   memrand_speed(coords, 20);
  87. //   float * it = coords.x.begin, * end = coords.x.end;
  88. //   do {
  89. //     fprintf(stderr, "%f\n", *it);
  90. //   } while(++it != end);
  91.   return 0;
  92. }
  93.  
  94.  
  95. void test_vec(vec_t vec, float t) {
  96.   float * it = vec.begin, * end = vec.end;
  97.   do {
  98.     if(*it != t)
  99.       error(-1, errno, "test_vec(%lu) failed: *it = %f, t = %f\n", (it - (float *)(vec.begin)), *it, t);
  100.   } while(++it != end);
  101. }
  102.  
  103. void test_vec3f(vec3f_t vec, float t) {
  104.   test_vec(vec.x, t);
  105.   test_vec(vec.y, t);
  106.   test_vec(vec.z, t);
  107. }
  108.  
  109. void memrand_speed(vec3f_t vec, uint32_t i) {
  110.   double start = omp_get_wtime();
  111.   do {
  112.     _fast_memrand(vec.x.begin, vec.x.end);
  113.     _fast_memrand(vec.y.begin, vec.y.end);
  114.     _fast_memrand(vec.z.begin, vec.z.end);
  115.   } while(--i);
  116.   double time = omp_get_wtime() - start;
  117.   fprintf(stderr, "MEMRAND: %.3fsec - %.2fGB/s\n", time, ((sizeof(float) * SIZE * 3. * 20.)/1024./1024/1024)/(time));
  118. }
  119.  
  120. void memadd_speed(uint32_t i) {
  121.   double start = omp_get_wtime();
  122.   do {
  123.     _fast_memadd(coords.x.begin, coords.x.end, speed.x.begin);
  124.     _fast_memadd(coords.y.begin, coords.y.end, speed.x.begin);
  125.     _fast_memadd(coords.z.begin, coords.z.end, speed.x.begin);
  126.   } while(--i);
  127.   double time = omp_get_wtime() - start;
  128.   fprintf(stderr, "MEMADD: %.3fsec - %.2fGB/s\n", time, ((sizeof(float) * SIZE * 3. * 20. * 2)/1024./1024/1024)/(time));
  129.   test_vec3f(coords, 1. + (1. * 20.));
  130.  
  131. }
  132.  
  133. void memset_speed(vec3f_t vec, uint32_t i) {
  134.   double start = omp_get_wtime();
  135. //   do {
  136. //     memset(coords.x.begin, 1., SIZE*4);
  137. //     memset(coords.y.begin, 1., SIZE*4);
  138. //     memset(coords.z.begin, 1., SIZE*4);
  139. //   } while(--i);
  140.   do {
  141.     _fast_memset(vec.x.begin, vec.x.end, _mm_set_ps1(1.f));
  142.     _fast_memset(vec.y.begin, vec.y.end, _mm_set_ps1(1.f));
  143.     _fast_memset(vec.z.begin, vec.z.end, _mm_set_ps1(1.f));
  144.   } while(--i);
  145.   double time = omp_get_wtime() - start;
  146.   fprintf(stderr, "MEMSET: %.3fsec - %.2fGB/s\n", time, ((sizeof(float) * SIZE * 3. * 20.)/1024./1024/1024)/(time));
  147.   test_vec3f(coords, 1.f);
  148. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement