__constant sampler_t sampler = CLK_FILTER_NEAREST; // Inline definition of horizontal max inline float max4(float a, float b, float c, float d) { return max(max(max(a, b), c), d); } // Inline definition of horizontal min inline float min4(float a, float b, float c, float d) { return min(min(min(a, b), c), d); } // Traversal kernel __kernel void traverse( __read_only image2d_t nodes, __global const float4* triangles, __global const float4* rays, __global float4* result, const int num, const int w, const int h) { // Ray index int idx = get_global_id(0); if(idx < num) { // Stack int todo[32]; int todoOffset = 0; // Current node int nodeNum = 0; float tmin = 0.0f; float depth = 2e30f; // Fetch ray origin, direction and compute invdirection float4 origin = rays[2 * idx + 0]; float4 direction = rays[2 * idx + 1]; float4 invdir = native_recip(direction); float4 temp = (float4)(0.0f, 0.0f, 0.0f, 1.0f); // Traversal loop while(true) { // Fetch node information int2 nodeCoord = (int2)((nodeNum << 2) % w, (nodeNum << 2) / w); int4 specs = read_imagei(nodes, sampler, nodeCoord + (int2)(3, 0)); // While node isn't leaf while(specs.z == 0) { // Fetch child bounding boxes float4 n0xy = read_imagef(nodes, sampler, nodeCoord); float4 n1xy = read_imagef(nodes, sampler, nodeCoord + (int2)(1, 0)); float4 nz = read_imagef(nodes, sampler, nodeCoord + (int2)(2, 0)); // Test ray against child bounding boxes float oodx = origin.x * invdir.x; float oody = origin.y * invdir.y; float oodz = origin.z * invdir.z; float c0lox = n0xy.x * invdir.x - oodx; float c0hix = n0xy.y * invdir.x - oodx; float c0loy = n0xy.z * invdir.y - oody; float c0hiy = n0xy.w * invdir.y - oody; float c0loz = nz.x * invdir.z - oodz; float c0hiz = nz.y * invdir.z - oodz; float c1loz = nz.z * invdir.z - oodz; float c1hiz = nz.w * invdir.z - oodz; float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), tmin); float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), depth); float c1lox = n1xy.x * invdir.x - oodx; float c1hix = n1xy.y * invdir.x - oodx; float c1loy = n1xy.z * invdir.y - oody; float c1hiy = n1xy.w * invdir.y - oody; float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), tmin); float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), depth); bool traverseChild0 = (c0max >= c0min); bool traverseChild1 = (c1max >= c1min); nodeNum = specs.x; int nodeAbove = specs.y; // We hit just one out of 2 childs if(traverseChild0 != traverseChild1) { if(traverseChild1) { nodeNum = nodeAbove; } } // We hit either both or none else { // If we hit none, pop node from stack (or exit traversal, if stack is empty) if (!traverseChild0) { if(todoOffset == 0) { break; } nodeNum = todo[--todoOffset]; } // If we hit both else { // Sort them (so nearest goes 1st, further 2nd) if(c1min < c0min) { unsigned int tmp = nodeNum; nodeNum = nodeAbove; nodeAbove = tmp; } // Push further on stack todo[todoOffset++] = nodeAbove; } } // Fetch next node information nodeCoord = (int2)((nodeNum << 2) % w, (nodeNum << 2) / w); specs = read_imagei(nodes, sampler, nodeCoord + (int2)(3, 0)); } // If node is leaf & has some primitives if(specs.z > 0) { // Loop through primitives & perform intersection with them (Woop triangles) #pragma nounroll for(int i = specs.x; i < specs.y; i++) { // Fetch first point from global memory float4 v0 = triangles[i * 4 + 0]; float o_z = v0.w - origin.x * v0.x - origin.y * v0.y - origin.z * v0.z; float i_z = 1.0f / (direction.x * v0.x + direction.y * v0.y + direction.z * v0.z); float t = o_z * i_z; if(t > 0.0f && t < depth) { // Fetch second point from global memory float4 v1 = triangles[i * 4 + 1]; float o_x = v1.w + origin.x * v1.x + origin.y * v1.y + origin.z * v1.z; float d_x = direction.x * v1.x + direction.y * v1.y + direction.z * v1.z; float u = o_x + t * d_x; if(u >= 0.0f && u <= 1.0f) { // Fetch third point from global memory float4 v2 = triangles[i * 4 + 2]; float o_y = v2.w + origin.x * v2.x + origin.y * v2.y + origin.z * v2.z; float d_y = direction.x * v2.x + direction.y * v2.y + direction.z * v2.z; float v = o_y + t * d_y; if(v >= 0.0f && u + v <= 1.0f) { // We got successful hit, store the information depth = t; temp.x = u; temp.y = v; temp.z = t; temp.w = as_float(i); } } } } } // Pop node from stack (if empty, finish traversal) if(todoOffset == 0) { break; } nodeNum = todo[--todoOffset]; } // Store the ray traversal result in global memory result[idx] = temp; } }