Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #ifdef SAFE_VERSION
- #define SIMD_OVERLAP_TEST (_mm_movemask_ps(_mm_cmpngt_ps(b, _mm_load_ps(box)))==15)
- #else
- #define SIMD_OVERLAP_TEST (_mm_movemask_ps(_mm_cmpnle_ps(_mm_load_ps(box), b))==12)
- #endif
- bool Meshmerizer::CompleteBoxPruning(udword nb, const AABB* list, Container& pairs)
- {
- // Checkings
- if(!nb || !list)
- return false;
- SIMD_AABB_X* BoxListX = new SIMD_AABB_X[nb+1+5];
- SIMD_AABB_YZ* BoxListYZ = (SIMD_AABB_YZ*)_aligned_malloc(sizeof(SIMD_AABB_YZ)*(nb+1), 16);
- udword* Remap;
- // {
- // Allocate some temporary data
- float* PosList = new float[nb+1];
- // 1) Build main list using the primary axis
- for(udword i=0;i<nb;i++)
- PosList[i] = list[i].mMin.x;
- PosList[nb] = FLT_MAX;
- // 2) Sort the list
- static PRUNING_SORTER RS; // Static for coherence
- Remap = RS.Sort(PosList, nb+1).GetRanks();
- for(udword i=0;i<nb;i++)
- {
- const udword SortedIndex = Remap[i];
- BoxListX[i].InitFrom(list[SortedIndex]);
- BoxListYZ[i].InitFrom(list[SortedIndex]);
- }
- BoxListX[nb].mMinX = FLT_MAX;
- BoxListX[nb+1].mMinX = FLT_MAX;
- BoxListX[nb+2].mMinX = FLT_MAX;
- BoxListX[nb+3].mMinX = FLT_MAX;
- BoxListX[nb+4].mMinX = FLT_MAX;
- DELETEARRAY(PosList);
- // }
- // 3) Prune the list
- udword RunningAddress = 0;
- udword Index0 = 0;
- while(RunningAddress<nb && Index0<nb)
- {
- const SIMD_AABB_X& Box0X = BoxListX[Index0];
- const float MinLimit = Box0X.mMinX;
- while(BoxListX[RunningAddress++].mMinX<MinLimit);
- const SIMD_AABB_YZ& Box0YZ = BoxListYZ[Index0];
- SIMD_OVERLAP_INIT(Box0YZ)
- const float MaxLimit = Box0X.mMaxX;
- const udword RIndex0 = Remap[Index0];
- udword Offset = 0;
- const char* const CurrentBoxListYZ = (const char*)&BoxListYZ[RunningAddress];
- const char* const CurrentBoxListX = (const char*)&BoxListX[RunningAddress];
- goto FastLoop;
- _asm align 16
- FoundSlot2:
- Offset += 8;
- _asm align 16
- FoundSlot1:
- Offset += 8;
- _asm align 16
- FoundSlot0:
- Offset += 8;
- _asm align 16
- FastFoundOne:
- {
- const udword Index = (CurrentBoxListX + Offset - 8 - (const char*)BoxListX)>>3;
- outputPair(RIndex0, Index, pairs, Remap);
- }
- _asm align 16
- FastLoop:
- while(*(const float*)(CurrentBoxListX + Offset + 8*4)<=MaxLimit)
- {
- const float* box = (const float*)(CurrentBoxListYZ + Offset*2);
- if(SIMD_OVERLAP_TEST)
- goto FoundSlot0;
- box = (const float*)(CurrentBoxListYZ + Offset*2 + 16);
- if(SIMD_OVERLAP_TEST)
- goto FoundSlot1;
- box = (const float*)(CurrentBoxListYZ + Offset*2 + 16*2);
- if(SIMD_OVERLAP_TEST)
- goto FoundSlot2;
- box = (const float*)(CurrentBoxListYZ + Offset*2 + 16*3);
- Offset += 32;
- if(SIMD_OVERLAP_TEST)
- goto FastFoundOne;
- }
- #define BLOCK if(*(const float*)(CurrentBoxListX + Offset)<=MaxLimit) \
- {const float* box = (const float*)(CurrentBoxListYZ + Offset*2);\
- if(SIMD_OVERLAP_TEST) \
- goto BeforeLoop; \
- Offset += 8;
- goto StartLoop;
- _asm align 16
- BeforeLoop:
- {const udword Index = (CurrentBoxListX + Offset - (const char*)BoxListX)>>3;
- outputPair(RIndex0, Index, pairs, Remap);
- Offset += 8;
- }
- _asm align 16
- StartLoop:
- BLOCK
- BLOCK
- BLOCK
- BLOCK
- BLOCK
- }
- }
- }
- }
- goto StartLoop;
- }
- Index0++;
- }
- _aligned_free(BoxListYZ);
- DELETEARRAY(BoxListX);
- return true;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement