# SAO

a guest
Sep 7th, 2014
268
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
1.
2. #define NUM_SAMPLES (8)
3.
4. static const int ROTATIONS[] = { 1, 1, 2, 3, 2, 5, 2, 3, 2,
5. 3, 3, 5, 5, 3, 4, 7, 5, 5, 7,
6. 9, 8, 5, 5, 7, 7, 7, 8, 5, 8,
7. 11, 12, 7, 10, 13, 8, 11, 8, 7, 14,
8. 11, 11, 13, 12, 13, 19, 17, 13, 11, 18,
9. 19, 11, 11, 14, 17, 21, 15, 16, 17, 18,
10. 13, 17, 11, 17, 19, 18, 25, 18, 19, 19,
11. 29, 21, 19, 27, 31, 29, 21, 18, 17, 29,
12. 31, 31, 23, 18, 25, 26, 25, 23, 19, 34,
13. 19, 27, 21, 25, 39, 29, 17, 21, 27 };
14.
15. /** Used for preventing AO computation on the sky (at infinite depth) and defining the CS Z to bilateral depth key scaling.
16. This need not match the real far plane*/
17. #define FAR_PLANE_Z (90.0)
18.
19. // This is the number of turns around the circle that the spiral pattern makes. This should be prime to prevent
20. // taps from lining up. This particular choice was tuned for NUM_SAMPLES == 9
21. static const int NUM_SPIRAL_TURNS = ROTATIONS[NUM_SAMPLES-1];
22.
23. /** World-space AO radius in scene units (r). e.g., 1.0m */
24. static const float radius = 0.7;
25. /** radius*radius*/
26. static const float radius2 = (radius*radius);
27.
28. /** Bias to avoid AO in smooth corners, e.g., 0.01m */
29. static const float bias = 0.02f;
30.
31. /** The height in pixels of a 1m object if viewed from 1m away.
32. You can compute it from your projection matrix. The actual value is just
33. a scale factor on radius; you can simply hardcode this to a constant (~500)
34. and make your radius value unitless (...but resolution dependent.) */
35. static const float projScale = 500.0f;
36.
37. static const float nearZ = 0.001;
38. static const float farZ = 1.0;
39.
40. float4 g_ReprojectInfoFromInt;
41.
42. // Texture2D<float> InputTextureLinearDepth : register(t0);
43. // Texture2D<float4> InputTextureSSAO : register(t1);
44. // Texture2D<float2> InputTextureMotion : register(t2);
45.
46. texture2D depthTex2D;
47. sampler depthSampler = sampler_state
48. {
49. texture = <depthTex2D>;
50. MinFilter = POINT;
51. MagFilter = POINT;
52. MipFilter = POINT;
53. AddressU = Mirror;
54. AddressV = Mirror;
55. SRGBTexture=FALSE;
56. };
57.
58. texture2D frameTex2D;
59. sampler frameSampler = sampler_state
60. {
61. texture = <frameTex2D>;
62. MinFilter = LINEAR;
63. MagFilter = LINEAR;
64. MipFilter = LINEAR;
65. AddressU = Clamp;
66. AddressV = Clamp;
67. SRGBTexture = FALSE;
68. };
69.
70. texture2D prevPassTex2D;
71. sampler passSampler = sampler_state
72. {
73. texture = <prevPassTex2D>;
74. MinFilter = LINEAR;
75. MagFilter = LINEAR;
76. MipFilter = LINEAR;
77. AddressU = Clamp;
78. AddressV = Clamp;
79. SRGBTexture=FALSE;
80. };
81.
82. texture2D noiseTexture < string filename = "RandomNoiseB.dds"; >;
83. sampler2D noiseSampler = sampler_state {
84. texture = <noiseTexture>;
85.
86. AddressU = WRAP;
87. AddressV = WRAP;
88.
89. MINFILTER = LINEAR;
90. MAGFILTER = LINEAR;
91. MIPFILTER = LINEAR;
92. };
93.
94. struct VSOUT
95. {
96. float4 vertPos : POSITION0;
97. float2 UVCoord : TEXCOORD0;
98. };
99.
100. struct VSIN
101. {
102. float4 vertPos : POSITION0;
103. float2 UVCoord : TEXCOORD0;
104. };
105.
106.
107. VSOUT FrameVS(VSIN IN)
108. {
109. VSOUT OUT;
110. float4 pos=float4(IN.vertPos.x, IN.vertPos.y, IN.vertPos.z, 1.0f);
111. OUT.vertPos=pos;
112. float2 coord=float2(IN.UVCoord.x, IN.UVCoord.y);
113. OUT.UVCoord=coord;
114. return OUT;
115. }
116.
117. /** Reconstruct camera-space P.xyz from screen-space S = (x, y) in
118. pixels and camera-space z < 0. Assumes that the upper-left pixel center
119. is at (0.5, 0.5) [but that need not be the location at which the sample tap
120. was placed!]
121. */
122.
123. // Projection Matrix as generated in the .cpp code ( not sure if it works :/ ) ------------------------
124.
125. // D3DXVECTOR4 g_ReprojectInfoFromInt;
126. // unsigned width, height;
127. // width = 1280; height = 720;
128. // g_ReprojectInfoFromInt.x = -2.0f / ((float)(float)width*((float)height / (float)width));
129. // g_ReprojectInfoFromInt.y = -2.0f / (float)height*1.0;
130. // g_ReprojectInfoFromInt.z = (1.0f - 0.0) / ((float)(float)height / (float)width) + g_ReprojectInfoFromInt.x * 0.5f;
131. // g_ReprojectInfoFromInt.w = (1.0f + 0.0) / 1.0 + g_ReprojectInfoFromInt.y * 0.5f;
132.
133. // HRESULT hr = effect->SetVector(projectionHandle, &g_ReprojectInfoFromInt);
134.
135. float3 reconstructCSPosition(float2 S, float z)
136. {
137. return float3((S * g_ReprojectInfoFromInt.xy + g_ReprojectInfoFromInt.zw)*z, z);
138. }
139.
140. /** Reconstructs screen-space unit normal from screen-space position */
141. float3 reconstructCSFaceNormal(float3 C)
142. {
143. return normalize(cross(ddy(C), ddx(C)));
144. }
145.
146. /** Returns a unit vector and a screen-space radius for the tap on a unit disk (the caller should scale by the actual disk radius) */
147. float2 tapLocation(int sampleNumber, float spinAngle, out float ssR)
148. {
149. // Radius relative to ssR
150. float alpha = float(sampleNumber + 0.5) * (1.0 / NUM_SAMPLES);
151. float angle = alpha * (NUM_SPIRAL_TURNS * 6.28) + spinAngle;
152.
153. ssR = alpha;
154. float sin_v, cos_v;
155. sincos(angle, sin_v, cos_v);
156. return float2(cos_v, sin_v);
157. }
158.
159. /** Used for packing Z into the GB channels */
160. float CSZToKey(float z)
161. {
162. return clamp(z * (1.0 / FAR_PLANE_Z), 0.0, 1.0);
163. }
164.
165. /** Read the camera-space position of the point at screen-space pixel ssP */
166. float3 getPosition(float2 ssP)
167. {
168. float3 P;
169.
170. P.z = (2.0f * nearZ) / ((farZ + nearZ) - tex2D(depthSampler, ssP).r * (farZ - nearZ));
171.
172. // Offset to pixel center
173. P = reconstructCSPosition(float2(ssP), P.z);
174. //P = reconstructCSPosition(float2(ssP) + float2(0.5, 0.5), P.z);
175. return P;
176. }
177.
178. /** Read the camera-space position of the point at screen-space pixel ssP + unitOffset * ssR. Assumes length(unitOffset) == 1 */
179. float3 getOffsetPosition(float2 ssC, float2 unitOffset, float ssR)
180. {
181. float2 ssP = saturate(float2(ssR*unitOffset) + ssC);
182.
183. float3 P;
184.
185. // Divide coordinate by 2^mipLevel
186. P.z = (2.0f * nearZ) / ((farZ + nearZ) - tex2D(depthSampler, ssP).r * (farZ - nearZ));
187.
188. // Offset to pixel center
189. P = reconstructCSPosition(float2(ssP), P.z);
190. //P = reconstructCSPosition(float2(ssP) + float2(0.5, 0.5), P.z);
191.
192. return P;
193. }
194.
195.
196. /** Compute the occlusion due to sample with index \a i about the pixel at \a ssC that corresponds
197. to camera-space point \a C with unit normal \a n_C, using maximum screen-space sampling radius \a ssDiskRadius */
198. float sampleAO(in float2 ssC, in float3 C, in float3 n_C, in float ssDiskRadius, in int tapIndex, in float randomPatternRotationAngle)
199. {
200. // Offset on the unit disk, spun for this pixel
201. float ssR;
202. float2 unitOffset = tapLocation(tapIndex, randomPatternRotationAngle, ssR);
203. ssR *= ssDiskRadius;
204.
205. // The occluding point in camera space
206. float3 Q = getOffsetPosition(ssC, unitOffset, ssR);
207.
208. float3 v = Q - C;
209.
210. float vv = dot(v, v);
211. float vn = dot(v, n_C);
212.
213. const float epsilon = 0.02f;
214. float f = max(radius2 - vv, 0.0);
215. return f * f * f * max((vn - bias) / (epsilon + vv), 0.0);
216. }
217.
218.
219. /** Used for packing Z into the GB channels */
220. void packKey(float key, out float2 p)
221. {
222. // Round to the nearest 1/256.0
223. float temp = floor(key * 256.0);
224. // Integer part
225. p.x = temp * (1.0 / 256.0);
226. // Fractional part
227. p.y = key * 256.0 - temp;
228. }
229.
230. float unpackKey(float2 p)
231. {
232. return p.x * (256.0 / 257.0) + p.y * (1.0 / 257.0);
233. }
234.
235. #define visibility output.r
236. #define bilateralKey output.gb
237.
238. float4 SSAOCalculate(VSOUT IN) : COLOR0
239. {
240. float4 output = float4(1,1,1,1);
241.
242. // Pixel being shaded
243. float2 ssC = IN.UVCoord;
244.
245. // World space point being shaded
246. float3 C = getPosition(ssC);
247. return float4(C, 1.0);
248. //return float4(IN.UVCoord, (2.0f * nearZ) / ((farZ + nearZ) - tex2D(depthSampler, IN.UVCoord).r * (farZ - nearZ)), 1.0);
249.
250. packKey(CSZToKey(C.z), bilateralKey);
251.
252. // Hash function used in the HPG12 AlchemyAO paper
253. float randomPatternRotationAngle = tex2D(noiseSampler, ssC*12.0).x * 2.0;
254. //return float4(randomPatternRotationAngle, randomPatternRotationAngle, randomPatternRotationAngle, 1.0);
255.
256. // Reconstruct normals from positions. These will lead to 1-pixel black lines
257. // at depth discontinuities, however the blur will wipe those out so they are not visible
258. // in the final image.
259. float3 n_C = reconstructCSFaceNormal(C);
260. //return float4(n_C, 1.0);
261.
262. // Choose the screen-space sample radius
263. float ssDiskRadius = projScale * radius / max(C.z,0.1f);
264.
265. float sum = 0.0;
266. for (int i = 0; i < NUM_SAMPLES; ++i)
267. {
268. sum += sampleAO(ssC, C, n_C, ssDiskRadius, i, randomPatternRotationAngle);
269. }
270.
271. const float temp = radius2 * radius;
272. sum /= temp * temp;
273.
274. float A = max(0.0f, 1.0f - sum * 1.0f * (5.0f / NUM_SAMPLES));
275. visibility = A;
276. //return float4(sum, sum, sum, 1.0);
277.
278. return output;
279. }
280.
281. /** Increase to make edges crisper. Decrease to reduce temporal flicker. */
282. #define EDGE_SHARPNESS (1.0)
283.
284. /** Step in 2-pixel intervals since we already blurred against neighbors in the
285. first AO pass. This constant can be increased while R decreases to improve
286. performance at the expense of some dithering artifacts.
287.
288. Morgan found that a scale of 3 left a 1-pixel checkerboard grid that was
289. unobjectionable after shading was applied but eliminated most temporal incoherence
290. from using small numbers of sample taps.
291. */
292. #define SCALE (2)
293.
294. /** Filter radius in pixels. This will be multiplied by SCALE. */
295. #define R (3)
296.
297.
298.
299. //////////////////////////////////////////////////////////////////////////////////////////////
300.
301. /** Type of data to read from source. This macro allows
302. the same blur shader to be used on different kinds of input data. */
303. #define VALUE_TYPE float
304.
305. /** Swizzle to use to extract the channels of source. This macro allows
306. the same blur shader to be used on different kinds of input data. */
307. #define VALUE_COMPONENTS r
308.
309. #define VALUE_IS_KEY 0
310.
311. /** Channel encoding the bilateral key value (which must not be the same as VALUE_COMPONENTS) */
312. #define KEY_COMPONENTS gb
313.
314. // Gaussian coefficients
315. static const float gaussian[] =
316. // { 0.356642, 0.239400, 0.072410, 0.009869 };
317. // { 0.398943, 0.241971, 0.053991, 0.004432, 0.000134 }; // stddev = 1.0
318. // { 0.153170, 0.144893, 0.122649, 0.092902, 0.062970 }; // stddev = 2.0
319. { 0.111220, 0.107798, 0.098151, 0.083953, 0.067458, 0.050920, 0.036108 }; // stddev = 3.0
320.
321. #define result output.VALUE_COMPONENTS
322. #define keyPassThrough output.KEY_COMPONENTS
323.
324. float4 HBlurSSAO(VSOUT IN) : COLOR0
325. {
326. // Pixel being shaded
327. float2 ssC = IN.UVCoord;
328.
329. float4 output = 1.0f;
330. float4 temp = tex2Dlod(passSampler, float4(ssC, 0, 0));
331.
332. keyPassThrough = temp.KEY_COMPONENTS;
333. float key = unpackKey(temp.KEY_COMPONENTS);
334.
335. float sum = temp.VALUE_COMPONENTS;
336.
337. // [branch]
338. // if (key == 1.0)
339. // {
340. // // Sky pixel (if you aren't using depth keying, disable this test)
341. // result = sum;
342. // return output;
343. // }
344.
345. float BASE = gaussian[0];
346. float totalWeight = BASE;
347. sum *= totalWeight;
348.
349. [unroll]
350. for (int r = -R; r <= R; ++r)
351. {
352. // We already handled the zero case above. This loop should be unrolled and the branch discarded
353. if (r != 0)
354. {
355. float2 axis = float2(1, 0);
356.
357. temp = tex2Dlod( passSampler, float4(ssC + axis * (r * SCALE), 0, 0) );
358. float tapKey = unpackKey(temp.KEY_COMPONENTS);
359. float value = temp.VALUE_COMPONENTS;
360.
361. // spatial domain: offset gaussian tap
362. float weight = gaussian[abs(r)];
363.
364. // range domain (the "bilateral" weight). As depth difference increases, decrease weight.
365. weight *= max(0.0, 1.0 - (2000.0 * EDGE_SHARPNESS) * abs(tapKey - key));
366.
367. sum += value * weight;
368. totalWeight += weight;
369. }
370. }
371.
372. const float epsilon = 0.0001;
373. result = sum / (totalWeight + epsilon);
374.
375. return output;
376. }
377.
378. float4 VBlurSSAO(VSOUT IN) : COLOR0
379. {
380. // Pixel being shaded
381. float2 ssC = IN.UVCoord;
382.
383. float4 output = 1.0f;
384. float4 temp = tex2Dlod(passSampler, float4(ssC, 0, 0));
385.
386. float key = unpackKey(temp.KEY_COMPONENTS);
387.
388. float sum = temp.VALUE_COMPONENTS;
389.
390. // [branch]
391. // if (key == 1.0)
392. // {
393. // // Sky pixel (if you aren't using depth keying, disable this test)
394. // result = sum;
395. // return output;
396. // }
397.
398. float BASE = gaussian[0];
399. float totalWeight = BASE;
400. sum *= totalWeight;
401.
402. [unroll]
403. for (int r = -R; r <= R; ++r)
404. {
405. // We already handled the zero case above. This loop should be unrolled and the branch discarded
406. if (r != 0)
407. {
408. float2 axis = float2(0, 1);
409.
410. temp = tex2Dlod( passSampler, float4(ssC + axis * (r * SCALE), 0, 0) );
411. float tapKey = unpackKey(temp.KEY_COMPONENTS);
412. float value = temp.VALUE_COMPONENTS;
413.
414. // spatial domain: offset gaussian tap
415. float weight = gaussian[abs(r)];
416.
417. // range domain (the "bilateral" weight). As depth difference increases, decrease weight.
418. weight *= max(0.0, 1.0 - (2000.0 * EDGE_SHARPNESS) * abs(tapKey - key));
419.
420. sum += value * weight;
421. totalWeight += weight;
422. }
423. }
424.
425. const float epsilon = 0.0001;
426. result = sum / (totalWeight + epsilon);
427.
428. return output;
429. }
430.
431. technique t0
432. {
433. pass p0
434. {
435. VertexShader = compile vs_3_0 FrameVS();
436. PixelShader = compile ps_3_0 SSAOCalculate();
437. }
438. pass p1
439. {
440. VertexShader = compile vs_3_0 FrameVS();
441. PixelShader = compile ps_3_0 HBlurSSAO();
442. }
443. pass p2
444. {
445. VertexShader = compile vs_3_0 FrameVS();
446. PixelShader = compile ps_3_0 VBlurSSAO();
447. }
448. }
RAW Paste Data