View difference between Paste ID: 7CVahWqS and wq1ppkV8
SHOW: | | - or go back to the newest paste.
1-
// original
1+
2-
            bool IntersectBoxWithSphere(const Math::AABB& rhs, const Physics::Sphere& sphere) const {
2+
3-
                __m128 zero = _mm_setzero_ps();
3+
4-
                __m128 center = _mm_loadu_ps(reinterpret_cast<const float*>(&sphere.origin));
4+
5-
                __m128 boxmin = _mm_loadu_ps(reinterpret_cast<const float*>(&rhs.BottomLeftClosest));
5+
6-
                __m128 boxmax = _mm_loadu_ps(reinterpret_cast<const float*>(&rhs.TopRightFurthest));
6+
7-
	         
7+
8-
	            __m128 e = _mm_add_ps(_mm_max_ps(_mm_sub_ps(boxmin, center), zero), _mm_max_ps(_mm_sub_ps(center, boxmax), zero));
8+
    __m128 boxminx = _mm_loadu_ps(&box[0]->BottomLeftClosest);
9-
	            e = _mm_mul_ps(e, e);
9+
    __m128 boxminy = _mm_loadu_ps(&box[1]->BottomLeftClosest);
10-
	            
10+
    __m128 boxminz = _mm_loadu_ps(&box[2]->BottomLeftClosest);
11-
                __declspec(align(16)) float arr[4];
11+
    __m128 boxmaxx = _mm_loadu_ps(&box[3]->BottomLeftClosest);
12-
                _mm_store_ps(arr, e);
12+
    _MM_TRANSPOSE4_PS(boxminx, boxminy, boxminz, boxmaxx);
13-
                
13+
14-
	            float r = sphere.radius;
14+
    boxmaxx = _mm_loadu_ps(&box[0]->TopRightFurthest);
15-
	            return (arr[0] + arr[1] + arr[2] <= r * r);
15+
    __m128 boxmaxy = _mm_loadu_ps(&box[1]->TopRightFurthest);
16-
            }
16+
    __m128 boxmaxz = _mm_loadu_ps(&box[2]->TopRightFurthest);
17-
// modified
17+
    __m128 temp1x = _mm_loadu_ps(&box[3]->TopRightFurthest);
18
    _MM_TRANSPOSE4_PS(boxmaxx, boxmaxy, boxmaxz, temp1x);
19
    
20
    // _mm_max_ps(_mm_sub_ps(boxmin, center), zero)
21
    temp1x = _mm_sub_ps(boxminx, centerx);
22
    __m128 temp1y = _mm_sub_ps(boxminy, centery);
23
    __m128 temp1z = _mm_sub_ps(boxminz, centerz);
24
    
25-
    __declspec(align(16)) float temparr[4];
25+
26-
    for(int i = 0; i < 4; i++)
26+
27-
         temparr[i] = boxes[i]->BottomLeftClosest.x;
27+
28-
    __m128 boxminx = mm_load_ps(temparr);
28+
29-
    for(int i = 0; i < 4; i++)
29+
30-
         temparr[i] = boxes[i]->BottomLeftClosest.y;
30+
31-
    __m128 boxminy = mm_load_ps(temparr);
31+
32-
    for(int i = 0; i < 4; i++)
32+
33-
         temparr[i] = boxes[i]->BottomLeftClosest.z;
33+
34-
    __m128 boxminz = mm_load_ps(temparr);
34+
35-
    for(int i = 0; i < 4; i++)
35+
36-
         temparr[i] = boxes[i]->TopRightFurthest.x;
36+
37-
    __m128 boxmaxx = mm_load_ps(temparr);
37+
38-
    for(int i = 0; i < 4; i++)
38+
39-
         temparr[i] = boxes[i]->TopRightFurthest.y;
39+
40-
    __m128 boxmaxy = mm_load_ps(temparr);
40+
41-
    for(int i = 0; i < 4; i++)
41+
42-
         temparr[i] = boxes[i]->TopRightFurthest.z;
42+
43-
    __m128 boxmaxz = mm_load_ps(temparr);
43+
44
    std::array<bool, 4> ret;
45
    for(int i = 0; i < 4; i++) {
46-
    __m128 temp1x = _mm_sub_ps(boxminx, centerx);
46+
47
    }
48
    return ret;
49
}