Guest User

Untitled

a guest
Oct 26th, 2015
231
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 4.51 KB | None | 0 0
  1. #include <x86intrin.h>
  2. #include <stdint.h>
  3. #include <omp.h>
  4. #include <stdio.h>
  5. typedef float float_v4_t __attribute__((vector_size(16)));
  6. typedef uint32_t uint32_v4_t __attribute__((vector_size(16)));
  7.  
  8.  
  9. extern "C" {
  10.   void process(float_v4_t * x, float_v4_t * dirx, float_v4_t * y, float_v4_t * diry, uint64_t count) {
  11.     static const float_v4_t vmin_x = _mm_set1_ps(0.f);
  12.     static const float_v4_t vmax_x = _mm_set1_ps(640.0f);
  13.     static const float_v4_t vmin_y = _mm_set1_ps(0.f);
  14.     static const float_v4_t vmax_y = _mm_set1_ps(24.0f);
  15.     float_v4_t * end = (float_v4_t *)((float *)x + count);
  16.     do {
  17.       {
  18.     uint32_v4_t xmask = (uint32_v4_t)(*x <= vmin_x) | (uint32_v4_t)(*x >= vmax_x);
  19.     uint32_v4_t ymask = (uint32_v4_t)(*y <= vmin_y) | (uint32_v4_t)(*y >= vmax_y);
  20.     uint32_v4_t xmask_sign = (xmask & (uint32_v4_t)_mm_set1_epi32(1 << 31));
  21.     uint32_v4_t ymask_sign = (ymask & (uint32_v4_t)_mm_set1_epi32(1 << 31));
  22.    
  23.     *x += (*dirx = (float_v4_t)(xmask_sign ^ (uint32_v4_t)(*dirx)));
  24.     *y += (*diry = (float_v4_t)(ymask_sign ^ (uint32_v4_t)(*diry)));
  25.     ++x, ++y, ++dirx, ++diry;
  26.       }
  27.       {
  28.     uint32_v4_t xmask = (uint32_v4_t)(*x <= vmin_x) | (uint32_v4_t)(*x >= vmax_x);
  29.     uint32_v4_t ymask = (uint32_v4_t)(*y <= vmin_y) | (uint32_v4_t)(*y >= vmax_y);
  30.     uint32_v4_t xmask_sign = (xmask & (uint32_v4_t)_mm_set1_epi32(1 << 31));
  31.     uint32_v4_t ymask_sign = (ymask & (uint32_v4_t)_mm_set1_epi32(1 << 31));
  32.    
  33.     *x += (*dirx = (float_v4_t)(xmask_sign ^ (uint32_v4_t)(*dirx)));
  34.     *y += (*diry = (float_v4_t)(ymask_sign ^ (uint32_v4_t)(*diry)));
  35.     ++x, ++y, ++dirx, ++diry;
  36.       }
  37.       {
  38.     uint32_v4_t xmask = (uint32_v4_t)(*x <= vmin_x) | (uint32_v4_t)(*x >= vmax_x);
  39.     uint32_v4_t ymask = (uint32_v4_t)(*y <= vmin_y) | (uint32_v4_t)(*y >= vmax_y);
  40.     uint32_v4_t xmask_sign = (xmask & (uint32_v4_t)_mm_set1_epi32(1 << 31));
  41.     uint32_v4_t ymask_sign = (ymask & (uint32_v4_t)_mm_set1_epi32(1 << 31));
  42.    
  43.     *x += (*dirx = (float_v4_t)(xmask_sign ^ (uint32_v4_t)(*dirx)));
  44.     *y += (*diry = (float_v4_t)(ymask_sign ^ (uint32_v4_t)(*diry)));
  45.     ++x, ++y, ++dirx, ++diry;
  46.       }
  47.       {
  48.     uint32_v4_t xmask = (uint32_v4_t)(*x <= vmin_x) | (uint32_v4_t)(*x >= vmax_x);
  49.     uint32_v4_t ymask = (uint32_v4_t)(*y <= vmin_y) | (uint32_v4_t)(*y >= vmax_y);
  50.     uint32_v4_t xmask_sign = (xmask & (uint32_v4_t)_mm_set1_epi32(1 << 31));
  51.     uint32_v4_t ymask_sign = (ymask & (uint32_v4_t)_mm_set1_epi32(1 << 31));
  52.    
  53.     *x += (*dirx = (float_v4_t)(xmask_sign ^ (uint32_v4_t)(*dirx)));
  54.     *y += (*diry = (float_v4_t)(ymask_sign ^ (uint32_v4_t)(*diry)));
  55.     ++x, ++y, ++dirx, ++diry;
  56.       }
  57.       {
  58.     uint32_v4_t xmask = (uint32_v4_t)(*x <= vmin_x) | (uint32_v4_t)(*x >= vmax_x);
  59.     uint32_v4_t ymask = (uint32_v4_t)(*y <= vmin_y) | (uint32_v4_t)(*y >= vmax_y);
  60.     uint32_v4_t xmask_sign = (xmask & (uint32_v4_t)_mm_set1_epi32(1 << 31));
  61.     uint32_v4_t ymask_sign = (ymask & (uint32_v4_t)_mm_set1_epi32(1 << 31));
  62.    
  63.     *x += (*dirx = (float_v4_t)(xmask_sign ^ (uint32_v4_t)(*dirx)));
  64.     *y += (*diry = (float_v4_t)(ymask_sign ^ (uint32_v4_t)(*diry)));
  65.     ++x, ++y, ++dirx, ++diry;
  66.       }
  67.       {
  68.     uint32_v4_t xmask = (uint32_v4_t)(*x <= vmin_x) | (uint32_v4_t)(*x >= vmax_x);
  69.     uint32_v4_t ymask = (uint32_v4_t)(*y <= vmin_y) | (uint32_v4_t)(*y >= vmax_y);
  70.     uint32_v4_t xmask_sign = (xmask & (uint32_v4_t)_mm_set1_epi32(1 << 31));
  71.     uint32_v4_t ymask_sign = (ymask & (uint32_v4_t)_mm_set1_epi32(1 << 31));
  72.    
  73.     *x += (*dirx = (float_v4_t)(xmask_sign ^ (uint32_v4_t)(*dirx)));
  74.     *y += (*diry = (float_v4_t)(ymask_sign ^ (uint32_v4_t)(*diry)));
  75.     ++x, ++y, ++dirx, ++diry;
  76.       }
  77.       {
  78.     uint32_v4_t xmask = (uint32_v4_t)(*x <= vmin_x) | (uint32_v4_t)(*x >= vmax_x);
  79.     uint32_v4_t ymask = (uint32_v4_t)(*y <= vmin_y) | (uint32_v4_t)(*y >= vmax_y);
  80.     uint32_v4_t xmask_sign = (xmask & (uint32_v4_t)_mm_set1_epi32(1 << 31));
  81.     uint32_v4_t ymask_sign = (ymask & (uint32_v4_t)_mm_set1_epi32(1 << 31));
  82.    
  83.     *x += (*dirx = (float_v4_t)(xmask_sign ^ (uint32_v4_t)(*dirx)));
  84.     *y += (*diry = (float_v4_t)(ymask_sign ^ (uint32_v4_t)(*diry)));
  85.     ++x, ++y, ++dirx, ++diry;
  86.       }
  87.       {
  88.     uint32_v4_t xmask = (uint32_v4_t)(*x <= vmin_x) | (uint32_v4_t)(*x >= vmax_x);
  89.     uint32_v4_t ymask = (uint32_v4_t)(*y <= vmin_y) | (uint32_v4_t)(*y >= vmax_y);
  90.     uint32_v4_t xmask_sign = (xmask & (uint32_v4_t)_mm_set1_epi32(1 << 31));
  91.     uint32_v4_t ymask_sign = (ymask & (uint32_v4_t)_mm_set1_epi32(1 << 31));
  92.    
  93.     *x += (*dirx = (float_v4_t)(xmask_sign ^ (uint32_v4_t)(*dirx)));
  94.     *y += (*diry = (float_v4_t)(ymask_sign ^ (uint32_v4_t)(*diry)));
  95.     ++x, ++y, ++dirx, ++diry;
  96.       }
  97.     } while(x != end);
  98.   }
  99. };
Advertisement
Add Comment
Please, Sign In to add comment