Guest User

SAO

a guest
Sep 7th, 2014
268
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1.  
  2. #define NUM_SAMPLES (8)
  3.  
  4. static const int ROTATIONS[] = { 1, 1, 2, 3, 2, 5, 2, 3, 2,
  5. 3, 3, 5, 5, 3, 4, 7, 5, 5, 7,
  6. 9, 8, 5, 5, 7, 7, 7, 8, 5, 8,
  7. 11, 12, 7, 10, 13, 8, 11, 8, 7, 14,
  8. 11, 11, 13, 12, 13, 19, 17, 13, 11, 18,
  9. 19, 11, 11, 14, 17, 21, 15, 16, 17, 18,
  10. 13, 17, 11, 17, 19, 18, 25, 18, 19, 19,
  11. 29, 21, 19, 27, 31, 29, 21, 18, 17, 29,
  12. 31, 31, 23, 18, 25, 26, 25, 23, 19, 34,
  13. 19, 27, 21, 25, 39, 29, 17, 21, 27 };
  14.  
  15. /** Used for preventing AO computation on the sky (at infinite depth) and defining the CS Z to bilateral depth key scaling.
  16. This need not match the real far plane*/
  17. #define FAR_PLANE_Z (90.0)
  18.  
  19. // This is the number of turns around the circle that the spiral pattern makes. This should be prime to prevent
  20. // taps from lining up. This particular choice was tuned for NUM_SAMPLES == 9
  21. static const int NUM_SPIRAL_TURNS = ROTATIONS[NUM_SAMPLES-1];
  22.  
  23. /** World-space AO radius in scene units (r). e.g., 1.0m */
  24. static const float radius = 0.7;
  25. /** radius*radius*/
  26. static const float radius2 = (radius*radius);
  27.  
  28. /** Bias to avoid AO in smooth corners, e.g., 0.01m */
  29. static const float bias = 0.02f;
  30.  
  31. /** The height in pixels of a 1m object if viewed from 1m away.
  32. You can compute it from your projection matrix. The actual value is just
  33. a scale factor on radius; you can simply hardcode this to a constant (~500)
  34. and make your radius value unitless (...but resolution dependent.) */
  35. static const float projScale = 500.0f;
  36.  
  37. static const float nearZ = 0.001;
  38. static const float farZ = 1.0;
  39.  
  40. float4 g_ReprojectInfoFromInt;
  41.  
  42. // Texture2D<float> InputTextureLinearDepth : register(t0);
  43. // Texture2D<float4> InputTextureSSAO : register(t1);
  44. // Texture2D<float2> InputTextureMotion : register(t2);
  45.  
  46. texture2D depthTex2D;
  47. sampler depthSampler = sampler_state
  48. {
  49. texture = <depthTex2D>;
  50. MinFilter = POINT;
  51. MagFilter = POINT;
  52. MipFilter = POINT;
  53. AddressU = Mirror;
  54. AddressV = Mirror;
  55. SRGBTexture=FALSE;
  56. };
  57.  
  58. texture2D frameTex2D;
  59. sampler frameSampler = sampler_state
  60. {
  61. texture = <frameTex2D>;
  62. MinFilter = LINEAR;
  63. MagFilter = LINEAR;
  64. MipFilter = LINEAR;
  65. AddressU = Clamp;
  66. AddressV = Clamp;
  67. SRGBTexture = FALSE;
  68. };
  69.  
  70. texture2D prevPassTex2D;
  71. sampler passSampler = sampler_state
  72. {
  73. texture = <prevPassTex2D>;
  74. MinFilter = LINEAR;
  75. MagFilter = LINEAR;
  76. MipFilter = LINEAR;
  77. AddressU = Clamp;
  78. AddressV = Clamp;
  79. SRGBTexture=FALSE;
  80. };
  81.  
  82. texture2D noiseTexture < string filename = "RandomNoiseB.dds"; >;
  83. sampler2D noiseSampler = sampler_state {
  84. texture = <noiseTexture>;
  85.  
  86. AddressU = WRAP;
  87. AddressV = WRAP;
  88.  
  89. MINFILTER = LINEAR;
  90. MAGFILTER = LINEAR;
  91. MIPFILTER = LINEAR;
  92. };
  93.  
  94. struct VSOUT
  95. {
  96. float4 vertPos : POSITION0;
  97. float2 UVCoord : TEXCOORD0;
  98. };
  99.  
  100. struct VSIN
  101. {
  102. float4 vertPos : POSITION0;
  103. float2 UVCoord : TEXCOORD0;
  104. };
  105.  
  106.  
  107. VSOUT FrameVS(VSIN IN)
  108. {
  109. VSOUT OUT;
  110. float4 pos=float4(IN.vertPos.x, IN.vertPos.y, IN.vertPos.z, 1.0f);
  111. OUT.vertPos=pos;
  112. float2 coord=float2(IN.UVCoord.x, IN.UVCoord.y);
  113. OUT.UVCoord=coord;
  114. return OUT;
  115. }
  116.  
  117. /** Reconstruct camera-space P.xyz from screen-space S = (x, y) in
  118. pixels and camera-space z < 0. Assumes that the upper-left pixel center
  119. is at (0.5, 0.5) [but that need not be the location at which the sample tap
  120. was placed!]
  121. */
  122.  
  123. // Projection Matrix as generated in the .cpp code ( not sure if it works :/ ) ------------------------
  124.  
  125. // D3DXVECTOR4 g_ReprojectInfoFromInt;
  126. // unsigned width, height;
  127. // width = 1280; height = 720;
  128. // g_ReprojectInfoFromInt.x = -2.0f / ((float)(float)width*((float)height / (float)width));
  129. // g_ReprojectInfoFromInt.y = -2.0f / (float)height*1.0;
  130. // g_ReprojectInfoFromInt.z = (1.0f - 0.0) / ((float)(float)height / (float)width) + g_ReprojectInfoFromInt.x * 0.5f;
  131. // g_ReprojectInfoFromInt.w = (1.0f + 0.0) / 1.0 + g_ReprojectInfoFromInt.y * 0.5f;
  132.  
  133. // HRESULT hr = effect->SetVector(projectionHandle, &g_ReprojectInfoFromInt);
  134.  
  135. float3 reconstructCSPosition(float2 S, float z)
  136. {
  137. return float3((S * g_ReprojectInfoFromInt.xy + g_ReprojectInfoFromInt.zw)*z, z);
  138. }
  139.  
  140. /** Reconstructs screen-space unit normal from screen-space position */
  141. float3 reconstructCSFaceNormal(float3 C)
  142. {
  143. return normalize(cross(ddy(C), ddx(C)));
  144. }
  145.  
  146. /** Returns a unit vector and a screen-space radius for the tap on a unit disk (the caller should scale by the actual disk radius) */
  147. float2 tapLocation(int sampleNumber, float spinAngle, out float ssR)
  148. {
  149. // Radius relative to ssR
  150. float alpha = float(sampleNumber + 0.5) * (1.0 / NUM_SAMPLES);
  151. float angle = alpha * (NUM_SPIRAL_TURNS * 6.28) + spinAngle;
  152.  
  153. ssR = alpha;
  154. float sin_v, cos_v;
  155. sincos(angle, sin_v, cos_v);
  156. return float2(cos_v, sin_v);
  157. }
  158.  
  159. /** Used for packing Z into the GB channels */
  160. float CSZToKey(float z)
  161. {
  162. return clamp(z * (1.0 / FAR_PLANE_Z), 0.0, 1.0);
  163. }
  164.  
  165. /** Read the camera-space position of the point at screen-space pixel ssP */
  166. float3 getPosition(float2 ssP)
  167. {
  168. float3 P;
  169.  
  170. P.z = (2.0f * nearZ) / ((farZ + nearZ) - tex2D(depthSampler, ssP).r * (farZ - nearZ));
  171.  
  172. // Offset to pixel center
  173. P = reconstructCSPosition(float2(ssP), P.z);
  174. //P = reconstructCSPosition(float2(ssP) + float2(0.5, 0.5), P.z);
  175. return P;
  176. }
  177.  
  178. /** Read the camera-space position of the point at screen-space pixel ssP + unitOffset * ssR. Assumes length(unitOffset) == 1 */
  179. float3 getOffsetPosition(float2 ssC, float2 unitOffset, float ssR)
  180. {
  181. float2 ssP = saturate(float2(ssR*unitOffset) + ssC);
  182.  
  183. float3 P;
  184.  
  185. // Divide coordinate by 2^mipLevel
  186. P.z = (2.0f * nearZ) / ((farZ + nearZ) - tex2D(depthSampler, ssP).r * (farZ - nearZ));
  187.  
  188. // Offset to pixel center
  189. P = reconstructCSPosition(float2(ssP), P.z);
  190. //P = reconstructCSPosition(float2(ssP) + float2(0.5, 0.5), P.z);
  191.  
  192. return P;
  193. }
  194.  
  195.  
  196. /** Compute the occlusion due to sample with index \a i about the pixel at \a ssC that corresponds
  197. to camera-space point \a C with unit normal \a n_C, using maximum screen-space sampling radius \a ssDiskRadius */
  198. float sampleAO(in float2 ssC, in float3 C, in float3 n_C, in float ssDiskRadius, in int tapIndex, in float randomPatternRotationAngle)
  199. {
  200. // Offset on the unit disk, spun for this pixel
  201. float ssR;
  202. float2 unitOffset = tapLocation(tapIndex, randomPatternRotationAngle, ssR);
  203. ssR *= ssDiskRadius;
  204.  
  205. // The occluding point in camera space
  206. float3 Q = getOffsetPosition(ssC, unitOffset, ssR);
  207.  
  208. float3 v = Q - C;
  209.  
  210. float vv = dot(v, v);
  211. float vn = dot(v, n_C);
  212.  
  213. const float epsilon = 0.02f;
  214. float f = max(radius2 - vv, 0.0);
  215. return f * f * f * max((vn - bias) / (epsilon + vv), 0.0);
  216. }
  217.  
  218.  
  219. /** Used for packing Z into the GB channels */
  220. void packKey(float key, out float2 p)
  221. {
  222. // Round to the nearest 1/256.0
  223. float temp = floor(key * 256.0);
  224. // Integer part
  225. p.x = temp * (1.0 / 256.0);
  226. // Fractional part
  227. p.y = key * 256.0 - temp;
  228. }
  229.  
  230. float unpackKey(float2 p)
  231. {
  232. return p.x * (256.0 / 257.0) + p.y * (1.0 / 257.0);
  233. }
  234.  
  235. #define visibility output.r
  236. #define bilateralKey output.gb
  237.  
  238. float4 SSAOCalculate(VSOUT IN) : COLOR0
  239. {
  240. float4 output = float4(1,1,1,1);
  241.  
  242. // Pixel being shaded
  243. float2 ssC = IN.UVCoord;
  244.  
  245. // World space point being shaded
  246. float3 C = getPosition(ssC);
  247. return float4(C, 1.0);
  248. //return float4(IN.UVCoord, (2.0f * nearZ) / ((farZ + nearZ) - tex2D(depthSampler, IN.UVCoord).r * (farZ - nearZ)), 1.0);
  249.  
  250. packKey(CSZToKey(C.z), bilateralKey);
  251.  
  252. // Hash function used in the HPG12 AlchemyAO paper
  253. float randomPatternRotationAngle = tex2D(noiseSampler, ssC*12.0).x * 2.0;
  254. //return float4(randomPatternRotationAngle, randomPatternRotationAngle, randomPatternRotationAngle, 1.0);
  255.  
  256. // Reconstruct normals from positions. These will lead to 1-pixel black lines
  257. // at depth discontinuities, however the blur will wipe those out so they are not visible
  258. // in the final image.
  259. float3 n_C = reconstructCSFaceNormal(C);
  260. //return float4(n_C, 1.0);
  261.  
  262. // Choose the screen-space sample radius
  263. float ssDiskRadius = projScale * radius / max(C.z,0.1f);
  264.  
  265. float sum = 0.0;
  266. for (int i = 0; i < NUM_SAMPLES; ++i)
  267. {
  268. sum += sampleAO(ssC, C, n_C, ssDiskRadius, i, randomPatternRotationAngle);
  269. }
  270.  
  271. const float temp = radius2 * radius;
  272. sum /= temp * temp;
  273.  
  274. float A = max(0.0f, 1.0f - sum * 1.0f * (5.0f / NUM_SAMPLES));
  275. visibility = A;
  276. //return float4(sum, sum, sum, 1.0);
  277.  
  278. return output;
  279. }
  280.  
  281. /** Increase to make edges crisper. Decrease to reduce temporal flicker. */
  282. #define EDGE_SHARPNESS (1.0)
  283.  
  284. /** Step in 2-pixel intervals since we already blurred against neighbors in the
  285. first AO pass. This constant can be increased while R decreases to improve
  286. performance at the expense of some dithering artifacts.
  287.  
  288. Morgan found that a scale of 3 left a 1-pixel checkerboard grid that was
  289. unobjectionable after shading was applied but eliminated most temporal incoherence
  290. from using small numbers of sample taps.
  291. */
  292. #define SCALE (2)
  293.  
  294. /** Filter radius in pixels. This will be multiplied by SCALE. */
  295. #define R (3)
  296.  
  297.  
  298.  
  299. //////////////////////////////////////////////////////////////////////////////////////////////
  300.  
  301. /** Type of data to read from source. This macro allows
  302. the same blur shader to be used on different kinds of input data. */
  303. #define VALUE_TYPE float
  304.  
  305. /** Swizzle to use to extract the channels of source. This macro allows
  306. the same blur shader to be used on different kinds of input data. */
  307. #define VALUE_COMPONENTS r
  308.  
  309. #define VALUE_IS_KEY 0
  310.  
  311. /** Channel encoding the bilateral key value (which must not be the same as VALUE_COMPONENTS) */
  312. #define KEY_COMPONENTS gb
  313.  
  314. // Gaussian coefficients
  315. static const float gaussian[] =
  316. // { 0.356642, 0.239400, 0.072410, 0.009869 };
  317. // { 0.398943, 0.241971, 0.053991, 0.004432, 0.000134 }; // stddev = 1.0
  318. // { 0.153170, 0.144893, 0.122649, 0.092902, 0.062970 }; // stddev = 2.0
  319. { 0.111220, 0.107798, 0.098151, 0.083953, 0.067458, 0.050920, 0.036108 }; // stddev = 3.0
  320.  
  321. #define result output.VALUE_COMPONENTS
  322. #define keyPassThrough output.KEY_COMPONENTS
  323.  
  324. float4 HBlurSSAO(VSOUT IN) : COLOR0
  325. {
  326. // Pixel being shaded
  327. float2 ssC = IN.UVCoord;
  328.  
  329. float4 output = 1.0f;
  330. float4 temp = tex2Dlod(passSampler, float4(ssC, 0, 0));
  331.  
  332. keyPassThrough = temp.KEY_COMPONENTS;
  333. float key = unpackKey(temp.KEY_COMPONENTS);
  334.  
  335. float sum = temp.VALUE_COMPONENTS;
  336.  
  337. // [branch]
  338. // if (key == 1.0)
  339. // {
  340. // // Sky pixel (if you aren't using depth keying, disable this test)
  341. // result = sum;
  342. // return output;
  343. // }
  344.  
  345. float BASE = gaussian[0];
  346. float totalWeight = BASE;
  347. sum *= totalWeight;
  348.  
  349. [unroll]
  350. for (int r = -R; r <= R; ++r)
  351. {
  352. // We already handled the zero case above. This loop should be unrolled and the branch discarded
  353. if (r != 0)
  354. {
  355. float2 axis = float2(1, 0);
  356.  
  357. temp = tex2Dlod( passSampler, float4(ssC + axis * (r * SCALE), 0, 0) );
  358. float tapKey = unpackKey(temp.KEY_COMPONENTS);
  359. float value = temp.VALUE_COMPONENTS;
  360.  
  361. // spatial domain: offset gaussian tap
  362. float weight = gaussian[abs(r)];
  363.  
  364. // range domain (the "bilateral" weight). As depth difference increases, decrease weight.
  365. weight *= max(0.0, 1.0 - (2000.0 * EDGE_SHARPNESS) * abs(tapKey - key));
  366.  
  367. sum += value * weight;
  368. totalWeight += weight;
  369. }
  370. }
  371.  
  372. const float epsilon = 0.0001;
  373. result = sum / (totalWeight + epsilon);
  374.  
  375. return output;
  376. }
  377.  
  378. float4 VBlurSSAO(VSOUT IN) : COLOR0
  379. {
  380. // Pixel being shaded
  381. float2 ssC = IN.UVCoord;
  382.  
  383. float4 output = 1.0f;
  384. float4 temp = tex2Dlod(passSampler, float4(ssC, 0, 0));
  385.  
  386. float key = unpackKey(temp.KEY_COMPONENTS);
  387.  
  388. float sum = temp.VALUE_COMPONENTS;
  389.  
  390. // [branch]
  391. // if (key == 1.0)
  392. // {
  393. // // Sky pixel (if you aren't using depth keying, disable this test)
  394. // result = sum;
  395. // return output;
  396. // }
  397.  
  398. float BASE = gaussian[0];
  399. float totalWeight = BASE;
  400. sum *= totalWeight;
  401.  
  402. [unroll]
  403. for (int r = -R; r <= R; ++r)
  404. {
  405. // We already handled the zero case above. This loop should be unrolled and the branch discarded
  406. if (r != 0)
  407. {
  408. float2 axis = float2(0, 1);
  409.  
  410. temp = tex2Dlod( passSampler, float4(ssC + axis * (r * SCALE), 0, 0) );
  411. float tapKey = unpackKey(temp.KEY_COMPONENTS);
  412. float value = temp.VALUE_COMPONENTS;
  413.  
  414. // spatial domain: offset gaussian tap
  415. float weight = gaussian[abs(r)];
  416.  
  417. // range domain (the "bilateral" weight). As depth difference increases, decrease weight.
  418. weight *= max(0.0, 1.0 - (2000.0 * EDGE_SHARPNESS) * abs(tapKey - key));
  419.  
  420. sum += value * weight;
  421. totalWeight += weight;
  422. }
  423. }
  424.  
  425. const float epsilon = 0.0001;
  426. result = sum / (totalWeight + epsilon);
  427.  
  428. return output;
  429. }
  430.  
  431. technique t0
  432. {
  433. pass p0
  434. {
  435. VertexShader = compile vs_3_0 FrameVS();
  436. PixelShader = compile ps_3_0 SSAOCalculate();
  437. }
  438. pass p1
  439. {
  440. VertexShader = compile vs_3_0 FrameVS();
  441. PixelShader = compile ps_3_0 HBlurSSAO();
  442. }
  443. pass p2
  444. {
  445. VertexShader = compile vs_3_0 FrameVS();
  446. PixelShader = compile ps_3_0 VBlurSSAO();
  447. }
  448. }
RAW Paste Data