Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // original
- bool IntersectBoxWithSphere(const Math::AABB& rhs, const Physics::Sphere& sphere) const {
- __m128 zero = _mm_setzero_ps();
- __m128 center = _mm_loadu_ps(reinterpret_cast<const float*>(&sphere.origin));
- __m128 boxmin = _mm_loadu_ps(reinterpret_cast<const float*>(&rhs.BottomLeftClosest));
- __m128 boxmax = _mm_loadu_ps(reinterpret_cast<const float*>(&rhs.TopRightFurthest));
- __m128 e = _mm_add_ps(_mm_max_ps(_mm_sub_ps(boxmin, center), zero), _mm_max_ps(_mm_sub_ps(center, boxmax), zero));
- e = _mm_mul_ps(e, e);
- __declspec(align(16)) float arr[4];
- _mm_store_ps(arr, e);
- float r = sphere.radius;
- return (arr[0] + arr[1] + arr[2] <= r * r);
- }
- // modified
- std::array<bool, 4> IntersectBoxWithSphere(std::array<const Math::AABB*, 4> boxes, const Physics::Sphere& sphere) const {
- float radius = sphere.radius * sphere.radius;
- __m128 zero = _mm_setzero_ps();
- __m128 radius = _mm_load1_ps(&radius);
- __m128 centerx = _mm_load1_ps(&sphere.origin.x);
- __m128 centery = _mm_load1_ps(&sphere.origin.y);
- __m128 centerz = _mm_load1_ps(&sphere.origin.z);
- __declspec(align(16)) float temparr[4];
- for(int i = 0; i < 4; i++)
- temparr[i] = boxes[i]->BottomLeftClosest.x;
- __m128 boxminx = mm_load_ps(temparr);
- for(int i = 0; i < 4; i++)
- temparr[i] = boxes[i]->BottomLeftClosest.y;
- __m128 boxminy = mm_load_ps(temparr);
- for(int i = 0; i < 4; i++)
- temparr[i] = boxes[i]->BottomLeftClosest.z;
- __m128 boxminz = mm_load_ps(temparr);
- for(int i = 0; i < 4; i++)
- temparr[i] = boxes[i]->TopRightFurthest.x;
- __m128 boxmaxx = mm_load_ps(temparr);
- for(int i = 0; i < 4; i++)
- temparr[i] = boxes[i]->TopRightFurthest.y;
- __m128 boxmaxy = mm_load_ps(temparr);
- for(int i = 0; i < 4; i++)
- temparr[i] = boxes[i]->TopRightFurthest.z;
- __m128 boxmaxz = mm_load_ps(temparr);
- // _mm_max_ps(_mm_sub_ps(boxmin, center), zero)
- __m128 temp1x = _mm_sub_ps(boxminx, centerx);
- __m128 temp1y = _mm_sub_ps(boxminy, centery);
- __m128 temp1z = _mm_sub_ps(boxminz, centerz);
- temp1x = _mm_max_ps(temp1x, zero);
- temp1y = _mm_max_py(temp1x, zero);
- temp1z = _mm_max_pz(temp1x, zero);
- // _mm_max_ps(_mm_sub_ps(center, boxmin), zero)
- __m128 temp2x = _mm_sub_ps(centerx, boxmaxx);
- __m128 temp2y = _mm_sub_ps(centery, boxmaxy);
- __m128 temp2z = _mm_sub_ps(centerz, boxmaxz);
- temp2x = _mm_max_ps(temp2x, zero);
- temp2y = _mm_max_ps(temp2y, zero);
- temp2z = _mm_max_ps(temp2z, zero);
- __m128 ex = _mm_add_ps(temp1x, temp2x);
- __m128 ey = _mm_add_ps(temp1y, temp2y);
- __m128 ez = _mm_add_ps(temp1z, temp2z);
- __m128 final_e = _mm_add_ps(ex, _mm_add_ps(ez, ey));
- __m128 result = _mm_cmple_ps(final_e, radius);
- float output[4];
- _mm_store_ps(output, result);
- std::array<bool, 4> ret;
- for(int i = 0; i < 4; i++) {
- ret[i] = output[i];
- }
- return ret;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement