View difference between Paste ID: CxLD8384 and dR4d33Lg
SHOW: | | - or go back to the newest paste.
1
__constant sampler_t sampler = CLK_FILTER_NEAREST;
2
3
// Inline definition of horizontal max
4
inline float max4(float a, float b, float c, float d)
5
{
6
    return max(max(max(a, b), c), d);
7
}
8
9
// Inline definition of horizontal min
10
inline float min4(float a, float b, float c, float d)
11
{
12
    return min(min(min(a, b), c), d);
13
}
14
15
// Traversal kernel
16
__kernel void traverse(	__read_only image2d_t nodes,
17
			__global const float4* triangles,
18
			__global const float4* rays,
19
			__global float4* result,
20
			const int num,
21
			const int w,
22
			const int h)
23
{
24
	// Ray index
25
	int idx = get_global_id(0);
26
	
27
	if(idx < num)
28
	{
29
		// Stack
30
        	int todo[32];
31
        	int todoOffset = 0;
32
        
33
		// Current node
34
		int nodeNum = 0;
35
		
36
        	float tmin = 0.0f;
37
        	float depth = 2e30f;
38
39
        	// Fetch ray origin, direction and compute invdirection
40
        	float4 origin = rays[2 * idx + 0];
41
        	float4 direction = rays[2 * idx + 1];
42
        	float4 invdir = native_recip(direction);
43
44
        	float4 temp = (float4)(0.0f, 0.0f, 0.0f, 1.0f);
45
46
        	// Traversal loop
47
        	while(true)
48
        	{
49
			// Fetch node information
50
            		int2 nodeCoord = (int2)((nodeNum << 2) % w, (nodeNum << 2) / w);
51
            		int4 specs = read_imagei(nodes, sampler, nodeCoord + (int2)(3, 0));
52
			
53
			// While node isn't leaf
54
            		while(specs.z == 0)
55
            		{
56
				// Fetch child bounding boxes
57
		                float4 n0xy = read_imagef(nodes, sampler, nodeCoord);
58
		                float4 n1xy = read_imagef(nodes, sampler, nodeCoord + (int2)(1, 0));
59
		                float4 nz = read_imagef(nodes, sampler, nodeCoord + (int2)(2, 0));
60
				
61
				// Test ray against child bounding boxes
62
		                float oodx = origin.x * invdir.x;
63
		                float oody = origin.y * invdir.y;
64
		                float oodz = origin.z * invdir.z;
65
		                float c0lox = n0xy.x * invdir.x - oodx;
66
		                float c0hix = n0xy.y * invdir.x - oodx;
67
		                float c0loy = n0xy.z * invdir.y - oody;
68
		                float c0hiy = n0xy.w * invdir.y - oody;
69
		                float c0loz = nz.x * invdir.z - oodz;
70
		                float c0hiz = nz.y * invdir.z - oodz;
71
		                float c1loz = nz.z * invdir.z - oodz;
72
		                float c1hiz = nz.w * invdir.z - oodz;
73
		                float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), tmin);
74
		                float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), depth);
75
		                float c1lox = n1xy.x * invdir.x - oodx;
76
		                float c1hix = n1xy.y * invdir.x - oodx;
77
		                float c1loy = n1xy.z * invdir.y - oody;
78
		                float c1hiy = n1xy.w * invdir.y - oody;
79
		                float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), tmin);
80
		                float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), depth);
81
				
82
		                bool traverseChild0 = (c0max >= c0min);
83
		                bool traverseChild1 = (c1max >= c1min);
84
85
				nodeNum = specs.x;
86
		                int nodeAbove = specs.y;
87
				
88
				// We hit just one out of 2 childs
89
                		if(traverseChild0 != traverseChild1)
90
		                {
91
		                    	if(traverseChild1)
92
                		    	{
93
		                        	nodeNum = nodeAbove;
94
		                    	}
95
		                }
96
				// We hit either both or none
97
		                else
98
                		{
99
					// If we hit none, pop node from stack (or exit traversal, if stack is empty)
100
		                    	if (!traverseChild0)
101
                		    	{
102
                        			if(todoOffset == 0)
103
                        			{
104
							break;
105
                        			}
106
						nodeNum = todo[--todoOffset];
107
                    			}
108
					// If we hit both
109
                    			else
110
                    			{
111
						// Sort them (so nearest goes 1st, further 2nd)
112
                        			if(c1min < c0min)
113
                        			{
114
                            				unsigned int tmp = nodeNum;
115
                            				nodeNum = nodeAbove;
116
                            				nodeAbove = tmp;
117
                        			}
118
119
						// Push further on stack
120
                        			todo[todoOffset++] = nodeAbove;
121
                    			}
122
                		}
123
				
124
				// Fetch next node information
125
		                nodeCoord = (int2)((nodeNum << 2) % w, (nodeNum << 2) / w);
126
                		specs = read_imagei(nodes, sampler, nodeCoord + (int2)(3, 0));
127
            		}
128
129
			// If node is leaf & has some primitives
130
            		if(specs.z > 0)
131
            		{
132
				// Loop through primitives & perform intersection with them (Woop triangles)
133
                		#pragma nounroll
134
				for(int i = specs.x; i < specs.y; i++)
135
                		{
136
					// Fetch first point from global memory
137
                    			float4 v0 = triangles[i * 4 + 0];
138
139
                    			float o_z = v0.w - origin.x * v0.x - origin.y * v0.y - origin.z * v0.z;
140
                    			float i_z = 1.0f / (direction.x * v0.x + direction.y * v0.y + direction.z * v0.z);
141
                    			float t = o_z * i_z;
142
143
                    			if(t > 0.0f && t < depth)
144
                    			{
145
						// Fetch second point from global memory
146
                        			float4 v1 = triangles[i * 4 + 1];
147
148
                        			float o_x = v1.w + origin.x * v1.x + origin.y * v1.y + origin.z * v1.z;
149
                        			float d_x = direction.x * v1.x + direction.y * v1.y + direction.z * v1.z;
150
                        			float u = o_x + t * d_x;
151
152
                        			if(u >= 0.0f && u <= 1.0f)
153
                        			{
154
							// Fetch third point from global memory
155
                            				float4 v2 = triangles[i * 4 + 2];
156
157
                            				float o_y = v2.w + origin.x * v2.x + origin.y * v2.y + origin.z * v2.z;
158
                            				float d_y = direction.x * v2.x + direction.y * v2.y + direction.z * v2.z;
159
                            				float v = o_y + t * d_y;
160
161
                            				if(v >= 0.0f && u + v <= 1.0f)
162
                            				{
163
								// We got successful hit, store the information
164
                                				depth = t;
165
                                				temp.x = u;
166
                                				temp.y = v;
167
                                				temp.z = t;
168
                                				temp.w = as_float(i);
169
                            				}
170
                        			}	
171
                    			}
172
                		}
173
            		}
174
175
			// Pop node from stack (if empty, finish traversal)
176
            		if(todoOffset == 0)
177
            		{
178
                		break;
179
            		}
180
181
            		nodeNum = todo[--todoOffset];
182
        	}
183
184
		// Store the ray traversal result in global memory
185
	        result[idx] = temp;
186
    	}
187
}