Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- //For test purpose only, remove later!
- {
- _mm_mfence();
- {
- __m128 TempA = _mm_load_ss(CurrentInnerPtr + P0s[0]);
- TempA = _mm_add_ss(TempA, CurrentInnerA);
- _mm_store_ss(CurrentInnerPtr + P0s[0], TempA);
- __m128 TempB = _mm_load_ss(CurrentInnerPtr + P1s[0]);
- TempB = _mm_add_ss(TempB, CurrentInnerB);
- _mm_store_ss(CurrentInnerPtr + P1s[0], TempB);
- }
- {
- __m128 TempA = _mm_load_ss(CurrentInnerPtr + P0s[1]);
- TempA = _mm_add_ss(TempA, _mm_shuffle_ps(CurrentInnerA, CurrentInnerA, 0x55));
- _mm_store_ss(CurrentInnerPtr + P0s[1], TempA);
- __m128 TempB = _mm_load_ss(CurrentInnerPtr + P1s[1]);
- TempB = _mm_add_ss(TempB, _mm_shuffle_ps(CurrentInnerB, CurrentInnerB, 0x55));
- _mm_store_ss(CurrentInnerPtr + P1s[1], TempB);
- }
- {
- __m128 TempA = _mm_load_ss(CurrentInnerPtr + P0s[2]);
- TempA = _mm_add_ss(TempA, _mm_unpackhi_ps(CurrentInnerA, CurrentInnerA));
- _mm_store_ss(CurrentInnerPtr + P0s[2], TempA);
- __m128 TempB = _mm_load_ss(CurrentInnerPtr + P1s[2]);
- TempB = _mm_add_ss(TempB, _mm_unpackhi_ps(CurrentInnerB, CurrentInnerB));
- _mm_store_ss(CurrentInnerPtr + P1s[2], TempB);
- }
- {
- __m128 TempA = _mm_load_ss(CurrentInnerPtr + P0s[3]);
- TempA = _mm_add_ss(TempA, _mm_shuffle_ps(CurrentInnerA, CurrentInnerA, 0xFF));
- _mm_store_ss(CurrentInnerPtr + P0s[3], TempA);
- __m128 TempB = _mm_load_ss(CurrentInnerPtr + P1s[3]);
- TempB = _mm_add_ss(TempB, _mm_shuffle_ps(CurrentInnerB, CurrentInnerB, 0xFF));
- _mm_store_ss(CurrentInnerPtr + P1s[3], TempB);
- }
- _mm_mfence();
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement