Guest User

Vector rotation (SIMD)

a guest
Mar 25th, 2016
88
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #include <xmmintrin.h>
  2. #include <pmmintrin.h>
  3. #include "test2.h"
  4.  
  5. static __v4sf dot_product_ (__v4sf a, __v4sf b)
  6. {
  7.     __v4sf res = a*b;
  8.     res = _mm_hadd_ps (res, res);
  9.     res = _mm_hadd_ps (res, res);
  10.     return res;
  11. }
  12.  
  13. static __v4sf cross_product_ (__v4sf a, __v4sf b)
  14. {
  15.     __v4sf r1 = a * _mm_shuffle_ps (b, b, _MM_SHUFFLE (1, 3, 2, 0));
  16.     __v4sf r2 = b * _mm_shuffle_ps (a, a, _MM_SHUFFLE (1, 3, 2, 0));
  17.     __v4sf r = r1 - r2;
  18.     return _mm_shuffle_ps (r, r, _MM_SHUFFLE (1, 3, 2, 0));
  19. }
  20.  
  21. static __v4sf rotate_vector_ (__v4sf base, __v4sf vect)
  22. {
  23.     /* __v4sf zero = _mm_set_ps1 (0); */
  24.     /* __v4sf base_im = _mm_move_ss (base, zero); */
  25.     __v4sf base_re = _mm_shuffle_ps (base, base, 0);
  26.     __v4sf tmp = cross_product_ (base, vect);
  27.     tmp = tmp * _mm_set_ps1 (2.0);
  28.  
  29.     __v4sf res = vect + base_re*tmp + cross_product_ (base, tmp);
  30.     return res;
  31. }
  32.  
  33. static __v4sf quat_mul_ (__v4sf q1, __v4sf q2)
  34. {
  35.     __v4sf re_q1, re_q2, im_q1, im_q2, r1, r2, r3, r4;
  36.     __v4sf sign = _mm_set_epi32 (0x80000000, 0x80000000, 0x80000000, 0);
  37.     __v4sf re = q1*q2;
  38.     re = _mm_xor_ps (re, sign);
  39.     re = _mm_hadd_ps (re, re);
  40.     re = _mm_hadd_ps (re, re);
  41.  
  42.     re_q1 = _mm_shuffle_ps (q1, q1, 0);
  43.     re_q2 = _mm_shuffle_ps (q2, q2, 0);
  44.     r1 = re_q1 * q2;
  45.     r2 = re_q2 * q1;
  46.     r3 = cross_product_ (q1, q2);
  47.     r4 = r1+r2+r3;
  48.     r4 = _mm_move_ss (r4, re);
  49.     return r4;
  50. }
  51.  
  52. void quat_mul (float a[], float b[], float c[])
  53. {
  54.     __v4sf res = quat_mul_ (_mm_load_ps (a), _mm_load_ps(b));
  55.     _mm_store_ps (c, res);
  56. }
  57.  
  58. void rotate_vector (float base[], float vect[], float res[])
  59. {
  60.     __v4sf r = rotate_vector_ (_mm_load_ps (base), _mm_slli_si128 (_mm_load_ps (vect), 4));
  61.     r = _mm_srli_si128 (r, 4);
  62.     _mm_store_ps (res, r);
  63. }
RAW Paste Data