Advertisement
Guest User

Untitled

a guest
Mar 24th, 2018
100
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.65 KB | None | 0 0
  1. //For test purpose only, remove later!
  2. {
  3. _mm_mfence();
  4. {
  5. __m128 TempA = _mm_load_ss(CurrentInnerPtr + P0s[0]);
  6. TempA = _mm_add_ss(TempA, CurrentInnerA);
  7. _mm_store_ss(CurrentInnerPtr + P0s[0], TempA);
  8.  
  9. __m128 TempB = _mm_load_ss(CurrentInnerPtr + P1s[0]);
  10. TempB = _mm_add_ss(TempB, CurrentInnerB);
  11. _mm_store_ss(CurrentInnerPtr + P1s[0], TempB);
  12. }
  13. {
  14. __m128 TempA = _mm_load_ss(CurrentInnerPtr + P0s[1]);
  15. TempA = _mm_add_ss(TempA, _mm_shuffle_ps(CurrentInnerA, CurrentInnerA, 0x55));
  16. _mm_store_ss(CurrentInnerPtr + P0s[1], TempA);
  17.  
  18. __m128 TempB = _mm_load_ss(CurrentInnerPtr + P1s[1]);
  19. TempB = _mm_add_ss(TempB, _mm_shuffle_ps(CurrentInnerB, CurrentInnerB, 0x55));
  20. _mm_store_ss(CurrentInnerPtr + P1s[1], TempB);
  21. }
  22. {
  23. __m128 TempA = _mm_load_ss(CurrentInnerPtr + P0s[2]);
  24. TempA = _mm_add_ss(TempA, _mm_unpackhi_ps(CurrentInnerA, CurrentInnerA));
  25. _mm_store_ss(CurrentInnerPtr + P0s[2], TempA);
  26.  
  27. __m128 TempB = _mm_load_ss(CurrentInnerPtr + P1s[2]);
  28. TempB = _mm_add_ss(TempB, _mm_unpackhi_ps(CurrentInnerB, CurrentInnerB));
  29. _mm_store_ss(CurrentInnerPtr + P1s[2], TempB);
  30. }
  31. {
  32. __m128 TempA = _mm_load_ss(CurrentInnerPtr + P0s[3]);
  33. TempA = _mm_add_ss(TempA, _mm_shuffle_ps(CurrentInnerA, CurrentInnerA, 0xFF));
  34. _mm_store_ss(CurrentInnerPtr + P0s[3], TempA);
  35.  
  36. __m128 TempB = _mm_load_ss(CurrentInnerPtr + P1s[3]);
  37. TempB = _mm_add_ss(TempB, _mm_shuffle_ps(CurrentInnerB, CurrentInnerB, 0xFF));
  38. _mm_store_ss(CurrentInnerPtr + P1s[3], TempB);
  39. }
  40. _mm_mfence();
  41. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement