void BVH4Traverser::intersect(const Ray& ray) const { Hit hit; /*! stack state */ size_t stackPtr = 1; //!< current stack pointer int32 popCur = bvh->root; //!< pre-popped top node from the stack float popDist = neg_inf; //!< pre-popped distance of top node from the stack StackItem stack[1+3*BVH4::maxDepth]; //!< stack of nodes that still need to get traversed /*! offsets to select the side that becomes the lower or upper bound */ const size_t nearX = ray.dir.x >= 0 ? 0*sizeof(ssef) : 1*sizeof(ssef); const size_t nearY = ray.dir.y >= 0 ? 2*sizeof(ssef) : 3*sizeof(ssef); const size_t nearZ = ray.dir.z >= 0 ? 4*sizeof(ssef) : 5*sizeof(ssef); const size_t farX = nearX ^ 16; const size_t farY = nearY ^ 16; const size_t farZ = nearZ ^ 16; /*! load the ray into SIMD registers */ const sse3f norg(-ray.org.x,-ray.org.y,-ray.org.z); const sse3f rdir(ray.rdir.x,ray.rdir.y,ray.rdir.z); const ssef rayNear(ray.near); ssef rayFar(ray.far); const BVH4::Node* nodes = bvh->nodes; while (true) { /*! pop next node */ if (__builtin_expect(stackPtr == 0, false)) break; stackPtr--; int32 cur = popCur; next: /*! we mostly go into the inner node case */ if (__builtin_expect(cur >= 0, true)) { /*! single ray intersection with 4 boxes */ const BVH4::Node& node = bvh->node(nodes,cur); const ssef tNearX = (norg.x + *(ssef*)((const char*)nodes+BVH4::offsetFactor*size_t(cur)+nearX)) * rdir.x; const ssef tNearY = (norg.y + *(ssef*)((const char*)nodes+BVH4::offsetFactor*size_t(cur)+nearY)) * rdir.y; const ssef tNearZ = (norg.z + *(ssef*)((const char*)nodes+BVH4::offsetFactor*size_t(cur)+nearZ)) * rdir.z; const ssef tNear = max(tNearX,tNearY,tNearZ,rayNear); const ssef tFarX = (norg.x + *(ssef*)((const char*)nodes+BVH4::offsetFactor*size_t(cur)+farX)) * rdir.x; const ssef tFarY = (norg.y + *(ssef*)((const char*)nodes+BVH4::offsetFactor*size_t(cur)+farY)) * rdir.y; const ssef tFarZ = (norg.z + *(ssef*)((const char*)nodes+BVH4::offsetFactor*size_t(cur)+farZ)) * rdir.z; popCur = stack[stackPtr-1].ofs; //!< pre-pop of topmost stack item popDist = stack[stackPtr-1].dist; //!< pre-pop of distance of topmost stack item const ssef tFar = min(tFarX,tFarY,tFarZ,rayFar); size_t _hit = movemask(tNear <= tFar); /*! if no child is hit, pop next node */ if (__builtin_expect(_hit == 0, false)) continue; /*! one child is hit, continue with that child */ size_t r = __bsf(_hit); _hit = __btc(_hit,r); if (__builtin_expect(_hit == 0, true)) { cur = node.child[r]; goto next; } /*! two children are hit, push all onto stack */ const int32 c0 = node.child[r]; const float d0 = tNear[r]; r = __bsf(_hit); _hit = __btc(_hit,r); const int32 c1 = node.child[r]; const float d1 = tNear[r]; if (__builtin_expect(_hit == 0, true)) { stack[stackPtr].ofs = c0; stack[stackPtr++].dist = d0; stack[stackPtr].ofs = c1; stack[stackPtr++].dist = d1; cur = stack[stackPtr-1].ofs; stackPtr--; goto next; } /*! Here starts the slow path for 3 or 4 hit children. We push all nodes onto the stack */ stack[stackPtr].ofs = c0; stack[stackPtr++].dist = d0; stack[stackPtr].ofs = c1; stack[stackPtr++].dist = d1; /*! three children are hit, push all onto stack */ r = __bsf(_hit); _hit = __btc(_hit,r); int32 c = node.child[r]; float d = tNear[r]; stack[stackPtr].ofs = c; stack[stackPtr++].dist = d; if (__builtin_expect(_hit == 0, true)) { cur = stack[stackPtr-1].ofs; stackPtr--; goto next; } /*! four children are hit, push all onto stack */ r = __bsf(_hit); _hit = __btc(_hit,r); c = node.child[r]; d = tNear[r]; stack[stackPtr].ofs = c; stack[stackPtr++].dist = d; cur = stack[stackPtr-1].ofs; stackPtr--; goto next; } /*! this is a leaf node */ else { cur ^= 0x80000000; const size_t ofs = size_t(cur) >> 5; const size_t num = size_t(cur) & 0x1F; for (size_t i=ofs; itriangles[i].intersect(ray, hit); } popCur = stack[stackPtr-1].ofs; //!< pre-pop of topmost stack item popDist = stack[stackPtr-1].dist; //!< pre-pop of distance of topmost stack item } } } void buildCube(vector_t& triangles, const Vec3f& center, const float halfWidth, const float halfHeight, const float halfDepth) { Vec3f v0, v1, v2, v3, c; Vec3f xAxis(1.0f, 0.0f, 0.0f); Vec3f yAxis(0.0f, 1.0f, 0.0f); Vec3f zAxis(0.0f, 0.0f, 1.0f); c = center - zAxis * halfDepth; v0 = c - halfWidth * xAxis - halfHeight * yAxis; v1 = c + halfWidth * xAxis - halfHeight * yAxis; v2 = c + halfWidth * xAxis + halfHeight * yAxis; v3 = c - halfWidth * xAxis + halfHeight * yAxis; triangles.push_back(BuildTriangle(v0, v1, v2)); triangles.push_back(BuildTriangle(v2, v3, v0)); c = center + zAxis * halfDepth; v0 = c - halfWidth * xAxis - halfHeight * yAxis; v1 = c + halfWidth * xAxis - halfHeight * yAxis; v2 = c + halfWidth * xAxis + halfHeight * yAxis; v3 = c - halfWidth * xAxis + halfHeight * yAxis; triangles.push_back(BuildTriangle(v0, v1, v2)); triangles.push_back(BuildTriangle(v2, v3, v0)); c = center - xAxis * halfWidth; v0 = c - halfDepth * zAxis - halfHeight * yAxis; v1 = c + halfDepth * zAxis - halfHeight * yAxis; v2 = c + halfDepth * zAxis + halfHeight * yAxis; v3 = c - halfDepth * zAxis + halfHeight * yAxis; triangles.push_back(BuildTriangle(v0, v1, v2)); triangles.push_back(BuildTriangle(v2, v3, v0)); c = center + xAxis * halfWidth; v0 = c - halfDepth * zAxis - halfHeight * yAxis; v1 = c + halfDepth * zAxis - halfHeight * yAxis; v2 = c + halfDepth * zAxis + halfHeight * yAxis; v3 = c - halfDepth * zAxis + halfHeight * yAxis; triangles.push_back(BuildTriangle(v0, v1, v2)); triangles.push_back(BuildTriangle(v2, v3, v0)); c = center - yAxis * halfHeight; v0 = c - halfWidth * xAxis - halfDepth * zAxis; v1 = c + halfWidth * xAxis - halfDepth * zAxis; v2 = c + halfWidth * xAxis + halfDepth * zAxis; v3 = c - halfWidth * xAxis + halfDepth * zAxis; triangles.push_back(BuildTriangle(v0, v1, v2)); triangles.push_back(BuildTriangle(v2, v3, v0)); c = center + yAxis * halfHeight; v0 = c - halfWidth * xAxis - halfDepth * zAxis; v1 = c + halfWidth * xAxis - halfDepth * zAxis; v2 = c + halfWidth * xAxis + halfDepth * zAxis; v3 = c - halfWidth * xAxis + halfDepth * zAxis; triangles.push_back(BuildTriangle(v0, v1, v2)); triangles.push_back(BuildTriangle(v2, v3, v0)); } int main(int argc, char *argv[]) { Ref bvh4; vector_t triangles; Ray ray(Vec3f(-5.0f, 0.0f, 0.0f), Vec3f(1.0f, 0.0f, 0.0f)); buildCube(triangles, Vec3f(0.0f, 0.0f, 0.0f), 1.0f, 1.0f, 1.0f); TaskScheduler::init(); bvh4 = rtcCreateAccel("bvh4", triangles.begin(), triangles.size()); bvh4->intersect(ray); TaskScheduler::cleanup(); return EXIT_SUCCESS; }