SHOW:
|
|
- or go back to the newest paste.
| 1 | __constant sampler_t sampler = CLK_FILTER_NEAREST; | |
| 2 | ||
| 3 | // Inline definition of horizontal max | |
| 4 | inline float max4(float a, float b, float c, float d) | |
| 5 | {
| |
| 6 | return max(max(max(a, b), c), d); | |
| 7 | } | |
| 8 | ||
| 9 | // Inline definition of horizontal min | |
| 10 | inline float min4(float a, float b, float c, float d) | |
| 11 | {
| |
| 12 | return min(min(min(a, b), c), d); | |
| 13 | } | |
| 14 | ||
| 15 | // Traversal kernel | |
| 16 | __kernel void traverse( __read_only image2d_t nodes, | |
| 17 | __global const float4* triangles, | |
| 18 | __global const float4* rays, | |
| 19 | __global float4* result, | |
| 20 | const int num, | |
| 21 | const int w, | |
| 22 | const int h) | |
| 23 | {
| |
| 24 | // Ray index | |
| 25 | int idx = get_global_id(0); | |
| 26 | ||
| 27 | if(idx < num) | |
| 28 | {
| |
| 29 | // Stack | |
| 30 | int todo[32]; | |
| 31 | int todoOffset = 0; | |
| 32 | ||
| 33 | // Current node | |
| 34 | int nodeNum = 0; | |
| 35 | ||
| 36 | float tmin = 0.0f; | |
| 37 | float depth = 2e30f; | |
| 38 | ||
| 39 | // Fetch ray origin, direction and compute invdirection | |
| 40 | float4 origin = rays[2 * idx + 0]; | |
| 41 | float4 direction = rays[2 * idx + 1]; | |
| 42 | float4 invdir = native_recip(direction); | |
| 43 | ||
| 44 | float4 temp = (float4)(0.0f, 0.0f, 0.0f, 1.0f); | |
| 45 | ||
| 46 | // Traversal loop | |
| 47 | while(true) | |
| 48 | {
| |
| 49 | // Fetch node information | |
| 50 | int2 nodeCoord = (int2)((nodeNum << 2) % w, (nodeNum << 2) / w); | |
| 51 | int4 specs = read_imagei(nodes, sampler, nodeCoord + (int2)(3, 0)); | |
| 52 | ||
| 53 | // While node isn't leaf | |
| 54 | while(specs.z == 0) | |
| 55 | {
| |
| 56 | // Fetch child bounding boxes | |
| 57 | float4 n0xy = read_imagef(nodes, sampler, nodeCoord); | |
| 58 | float4 n1xy = read_imagef(nodes, sampler, nodeCoord + (int2)(1, 0)); | |
| 59 | float4 nz = read_imagef(nodes, sampler, nodeCoord + (int2)(2, 0)); | |
| 60 | ||
| 61 | // Test ray against child bounding boxes | |
| 62 | float oodx = origin.x * invdir.x; | |
| 63 | float oody = origin.y * invdir.y; | |
| 64 | float oodz = origin.z * invdir.z; | |
| 65 | float c0lox = n0xy.x * invdir.x - oodx; | |
| 66 | float c0hix = n0xy.y * invdir.x - oodx; | |
| 67 | float c0loy = n0xy.z * invdir.y - oody; | |
| 68 | float c0hiy = n0xy.w * invdir.y - oody; | |
| 69 | float c0loz = nz.x * invdir.z - oodz; | |
| 70 | float c0hiz = nz.y * invdir.z - oodz; | |
| 71 | float c1loz = nz.z * invdir.z - oodz; | |
| 72 | float c1hiz = nz.w * invdir.z - oodz; | |
| 73 | float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), tmin); | |
| 74 | float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), depth); | |
| 75 | float c1lox = n1xy.x * invdir.x - oodx; | |
| 76 | float c1hix = n1xy.y * invdir.x - oodx; | |
| 77 | float c1loy = n1xy.z * invdir.y - oody; | |
| 78 | float c1hiy = n1xy.w * invdir.y - oody; | |
| 79 | float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), tmin); | |
| 80 | float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), depth); | |
| 81 | ||
| 82 | bool traverseChild0 = (c0max >= c0min); | |
| 83 | bool traverseChild1 = (c1max >= c1min); | |
| 84 | ||
| 85 | nodeNum = specs.x; | |
| 86 | int nodeAbove = specs.y; | |
| 87 | ||
| 88 | // We hit just one out of 2 childs | |
| 89 | if(traverseChild0 != traverseChild1) | |
| 90 | {
| |
| 91 | if(traverseChild1) | |
| 92 | {
| |
| 93 | nodeNum = nodeAbove; | |
| 94 | } | |
| 95 | } | |
| 96 | // We hit either both or none | |
| 97 | else | |
| 98 | {
| |
| 99 | // If we hit none, pop node from stack (or exit traversal, if stack is empty) | |
| 100 | if (!traverseChild0) | |
| 101 | {
| |
| 102 | if(todoOffset == 0) | |
| 103 | {
| |
| 104 | break; | |
| 105 | } | |
| 106 | nodeNum = todo[--todoOffset]; | |
| 107 | } | |
| 108 | // If we hit both | |
| 109 | else | |
| 110 | {
| |
| 111 | // Sort them (so nearest goes 1st, further 2nd) | |
| 112 | if(c1min < c0min) | |
| 113 | {
| |
| 114 | unsigned int tmp = nodeNum; | |
| 115 | nodeNum = nodeAbove; | |
| 116 | nodeAbove = tmp; | |
| 117 | } | |
| 118 | ||
| 119 | // Push further on stack | |
| 120 | todo[todoOffset++] = nodeAbove; | |
| 121 | } | |
| 122 | } | |
| 123 | ||
| 124 | // Fetch next node information | |
| 125 | nodeCoord = (int2)((nodeNum << 2) % w, (nodeNum << 2) / w); | |
| 126 | specs = read_imagei(nodes, sampler, nodeCoord + (int2)(3, 0)); | |
| 127 | } | |
| 128 | ||
| 129 | // If node is leaf & has some primitives | |
| 130 | if(specs.z > 0) | |
| 131 | {
| |
| 132 | // Loop through primitives & perform intersection with them (Woop triangles) | |
| 133 | #pragma nounroll | |
| 134 | for(int i = specs.x; i < specs.y; i++) | |
| 135 | {
| |
| 136 | // Fetch first point from global memory | |
| 137 | float4 v0 = triangles[i * 4 + 0]; | |
| 138 | ||
| 139 | float o_z = v0.w - origin.x * v0.x - origin.y * v0.y - origin.z * v0.z; | |
| 140 | float i_z = 1.0f / (direction.x * v0.x + direction.y * v0.y + direction.z * v0.z); | |
| 141 | float t = o_z * i_z; | |
| 142 | ||
| 143 | if(t > 0.0f && t < depth) | |
| 144 | {
| |
| 145 | // Fetch second point from global memory | |
| 146 | float4 v1 = triangles[i * 4 + 1]; | |
| 147 | ||
| 148 | float o_x = v1.w + origin.x * v1.x + origin.y * v1.y + origin.z * v1.z; | |
| 149 | float d_x = direction.x * v1.x + direction.y * v1.y + direction.z * v1.z; | |
| 150 | float u = o_x + t * d_x; | |
| 151 | ||
| 152 | if(u >= 0.0f && u <= 1.0f) | |
| 153 | {
| |
| 154 | // Fetch third point from global memory | |
| 155 | float4 v2 = triangles[i * 4 + 2]; | |
| 156 | ||
| 157 | float o_y = v2.w + origin.x * v2.x + origin.y * v2.y + origin.z * v2.z; | |
| 158 | float d_y = direction.x * v2.x + direction.y * v2.y + direction.z * v2.z; | |
| 159 | float v = o_y + t * d_y; | |
| 160 | ||
| 161 | if(v >= 0.0f && u + v <= 1.0f) | |
| 162 | {
| |
| 163 | // We got successful hit, store the information | |
| 164 | depth = t; | |
| 165 | temp.x = u; | |
| 166 | temp.y = v; | |
| 167 | temp.z = t; | |
| 168 | temp.w = as_float(i); | |
| 169 | } | |
| 170 | } | |
| 171 | } | |
| 172 | } | |
| 173 | } | |
| 174 | ||
| 175 | // Pop node from stack (if empty, finish traversal) | |
| 176 | if(todoOffset == 0) | |
| 177 | {
| |
| 178 | break; | |
| 179 | } | |
| 180 | ||
| 181 | nodeNum = todo[--todoOffset]; | |
| 182 | } | |
| 183 | ||
| 184 | // Store the ray traversal result in global memory | |
| 185 | result[idx] = temp; | |
| 186 | } | |
| 187 | } |