Advertisement
Guest User

Untitled

a guest
Apr 30th, 2013
53
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.94 KB | None | 0 0
  1. __constant sampler_t sampler = CLK_FILTER_NEAREST;
  2.  
  3. // Inline definition of horizontal max
  4. inline float max4(float a, float b, float c, float d)
  5. {
  6. return max(max(max(a, b), c), d);
  7. }
  8.  
  9. // Inline definition of horizontal min
  10. inline float min4(float a, float b, float c, float d)
  11. {
  12. return min(min(min(a, b), c), d);
  13. }
  14.  
  15. // Traversal kernel
  16. __kernel void traverse( __read_only image2d_t nodes,
  17. __global const float4* triangles,
  18. __global const float4* rays,
  19. __global float4* result,
  20. const int num,
  21. const int w,
  22. const int h)
  23. {
  24. // Ray index
  25. int idx = get_global_id(0);
  26.  
  27. if(idx < num)
  28. {
  29. // Stack
  30. int todo[32];
  31. int todoOffset = 0;
  32.  
  33. // Current node
  34. int nodeNum = 0;
  35.  
  36. float tmin = 0.0f;
  37. float depth = 2e30f;
  38.  
  39. // Fetch ray origin, direction and compute invdirection
  40. float4 origin = rays[2 * idx + 0];
  41. float4 direction = rays[2 * idx + 1];
  42. float4 invdir = native_recip(direction);
  43.  
  44. float4 temp = (float4)(0.0f, 0.0f, 0.0f, 1.0f);
  45.  
  46. // Traversal loop
  47. while(true)
  48. {
  49. // Fetch node information
  50. int2 nodeCoord = (int2)((nodeNum << 2) % w, (nodeNum << 2) / w);
  51. int4 specs = read_imagei(nodes, sampler, nodeCoord + (int2)(3, 0));
  52.  
  53. // While node isn't leaf
  54. while(specs.z == 0)
  55. {
  56. // Fetch child bounding boxes
  57. float4 n0xy = read_imagef(nodes, sampler, nodeCoord);
  58. float4 n1xy = read_imagef(nodes, sampler, nodeCoord + (int2)(1, 0));
  59. float4 nz = read_imagef(nodes, sampler, nodeCoord + (int2)(2, 0));
  60.  
  61. // Test ray against child bounding boxes
  62. float oodx = origin.x * invdir.x;
  63. float oody = origin.y * invdir.y;
  64. float oodz = origin.z * invdir.z;
  65. float c0lox = n0xy.x * invdir.x - oodx;
  66. float c0hix = n0xy.y * invdir.x - oodx;
  67. float c0loy = n0xy.z * invdir.y - oody;
  68. float c0hiy = n0xy.w * invdir.y - oody;
  69. float c0loz = nz.x * invdir.z - oodz;
  70. float c0hiz = nz.y * invdir.z - oodz;
  71. float c1loz = nz.z * invdir.z - oodz;
  72. float c1hiz = nz.w * invdir.z - oodz;
  73. float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), tmin);
  74. float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), depth);
  75. float c1lox = n1xy.x * invdir.x - oodx;
  76. float c1hix = n1xy.y * invdir.x - oodx;
  77. float c1loy = n1xy.z * invdir.y - oody;
  78. float c1hiy = n1xy.w * invdir.y - oody;
  79. float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), tmin);
  80. float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), depth);
  81.  
  82. bool traverseChild0 = (c0max >= c0min);
  83. bool traverseChild1 = (c1max >= c1min);
  84.  
  85. nodeNum = specs.x;
  86. int nodeAbove = specs.y;
  87.  
  88. // We hit just one out of 2 childs
  89. if(traverseChild0 != traverseChild1)
  90. {
  91. if(traverseChild1)
  92. {
  93. nodeNum = nodeAbove;
  94. }
  95. }
  96. // We hit either both or none
  97. else
  98. {
  99. // If we hit none, pop node from stack (or exit traversal, if stack is empty)
  100. if (!traverseChild0)
  101. {
  102. if(todoOffset == 0)
  103. {
  104. break;
  105. }
  106. nodeNum = todo[--todoOffset];
  107. }
  108. // If we hit both
  109. else
  110. {
  111. // Sort them (so nearest goes 1st, further 2nd)
  112. if(c1min < c0min)
  113. {
  114. unsigned int tmp = nodeNum;
  115. nodeNum = nodeAbove;
  116. nodeAbove = tmp;
  117. }
  118.  
  119. // Push further on stack
  120. todo[todoOffset++] = nodeAbove;
  121. }
  122. }
  123.  
  124. // Fetch next node information
  125. nodeCoord = (int2)((nodeNum << 2) % w, (nodeNum << 2) / w);
  126. specs = read_imagei(nodes, sampler, nodeCoord + (int2)(3, 0));
  127. }
  128.  
  129. // If node is leaf & has some primitives
  130. if(specs.z > 0)
  131. {
  132. // Loop through primitives & perform intersection with them (Woop triangles)
  133. #pragma nounroll
  134. for(int i = specs.x; i < specs.y; i++)
  135. {
  136. // Fetch first point from global memory
  137. float4 v0 = triangles[i * 4 + 0];
  138.  
  139. float o_z = v0.w - origin.x * v0.x - origin.y * v0.y - origin.z * v0.z;
  140. float i_z = 1.0f / (direction.x * v0.x + direction.y * v0.y + direction.z * v0.z);
  141. float t = o_z * i_z;
  142.  
  143. if(t > 0.0f && t < depth)
  144. {
  145. // Fetch second point from global memory
  146. float4 v1 = triangles[i * 4 + 1];
  147.  
  148. float o_x = v1.w + origin.x * v1.x + origin.y * v1.y + origin.z * v1.z;
  149. float d_x = direction.x * v1.x + direction.y * v1.y + direction.z * v1.z;
  150. float u = o_x + t * d_x;
  151.  
  152. if(u >= 0.0f && u <= 1.0f)
  153. {
  154. // Fetch third point from global memory
  155. float4 v2 = triangles[i * 4 + 2];
  156.  
  157. float o_y = v2.w + origin.x * v2.x + origin.y * v2.y + origin.z * v2.z;
  158. float d_y = direction.x * v2.x + direction.y * v2.y + direction.z * v2.z;
  159. float v = o_y + t * d_y;
  160.  
  161. if(v >= 0.0f && u + v <= 1.0f)
  162. {
  163. // We got successful hit, store the information
  164. depth = t;
  165. temp.x = u;
  166. temp.y = v;
  167. temp.z = t;
  168. temp.w = as_float(i);
  169. }
  170. }
  171. }
  172. }
  173. }
  174.  
  175. // Pop node from stack (if empty, finish traversal)
  176. if(todoOffset == 0)
  177. {
  178. break;
  179. }
  180.  
  181. nodeNum = todo[--todoOffset];
  182. }
  183.  
  184. // Store the ray traversal result in global memory
  185. result[idx] = temp;
  186. }
  187. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement