Advertisement
Guest User

Untitled

a guest
Oct 14th, 2019
116
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 12.32 KB | None | 0 0
  1. shader: MESA_SHADER_FRAGMENT
  2. inputs: 0
  3. outputs: 0
  4. uniforms: 0
  5. shared: 0
  6. decl_var ssbo INTERP_MODE_NONE block @0 (429, 0, 0)
  7. decl_var ssbo INTERP_MODE_NONE block @1 (429, 0, 1)
  8. decl_var ssbo INTERP_MODE_NONE block @2 (429, 0, 2)
  9. decl_var ssbo INTERP_MODE_NONE block @3 (429, 0, 3)
  10. decl_var shader_in INTERP_MODE_NONE float @4 (VARYING_SLOT_VAR0.x, 0, 0)
  11. decl_var shader_in INTERP_MODE_NONE float @5 (VARYING_SLOT_VAR0.y, 0, 0)
  12. decl_var shader_in INTERP_MODE_NONE float @6 (VARYING_SLOT_VAR0.z, 0, 0)
  13. decl_var shader_in INTERP_MODE_NONE float @7 (VARYING_SLOT_VAR0.w, 0, 0)
  14. decl_var shader_out INTERP_MODE_NONE vec4 @8 (FRAG_RESULT_DATA0, 0, 0)
  15. decl_function main (0 params)
  16.  
  17. impl main {
  18. block block_0:
  19. /* preds: */
  20. vec1 32 ssa_0 = deref_var &@4 (shader_in float)
  21. vec1 32 ssa_1 = intrinsic load_deref (ssa_0) (0) /* access=0 */
  22. vec1 32 ssa_2 = deref_var &@5 (shader_in float)
  23. vec1 32 ssa_3 = intrinsic load_deref (ssa_2) (0) /* access=0 */
  24. vec1 32 ssa_4 = deref_var &@6 (shader_in float)
  25. vec1 32 ssa_5 = intrinsic load_deref (ssa_4) (0) /* access=0 */
  26. vec1 32 ssa_6 = deref_var &@7 (shader_in float)
  27. vec1 32 ssa_7 = intrinsic load_deref (ssa_6) (0) /* access=0 */
  28. vec4 32 ssa_8 = vec4 ssa_1, ssa_3, ssa_5, ssa_7
  29. vec1 32 ssa_9 = load_const (0x00000001 /* 0.000000 */)
  30. vec1 32 ssa_10 = load_const (0x3f800000 /* 1.000000 */)
  31. vec1 32 ssa_11 = load_const (0x00000000 /* 0.000000 */)
  32. vec1 32 ssa_12 = load_const (0x00000064 /* 0.000000 */)
  33. vec1 32 ssa_13 = load_const (0x3f000000 /* 0.500000 */)
  34. vec4 32 ssa_14 = intrinsic load_frag_coord () ()
  35. vec1 1 ssa_15 = feq ssa_14.x, ssa_13
  36. vec1 1 ssa_16 = feq ssa_14.y, ssa_13
  37. vec1 1 ssa_17 = iand ssa_15, ssa_16
  38. /* succs: block_1 block_7 */
  39. if ssa_17 {
  40. block block_1:
  41. /* preds: block_0 */
  42. /* succs: block_2 */
  43. loop {
  44. block block_2:
  45. /* preds: block_1 block_5 */
  46. vec1 32 ssa_18 = phi block_1: ssa_11, block_5: ssa_35
  47. vec1 1 ssa_19 = ige ssa_18, ssa_12
  48. /* succs: block_3 block_4 */
  49. if ssa_19 {
  50. block block_3:
  51. /* preds: block_2 */
  52. break
  53. /* succs: block_6 */
  54. } else {
  55. block block_4:
  56. /* preds: block_2 */
  57. /* succs: block_5 */
  58. }
  59. block block_5:
  60. /* preds: block_4 */
  61. vec1 32 ssa_20 = iadd ssa_18, ssa_18
  62. vec1 32 ssa_21 = intrinsic vulkan_resource_index (ssa_11) (0, 2, 7) /* desc-set=0 */ /* binding=2 */ /* desc_type=SSBO */
  63. vec1 32 ssa_22 = load_const (0x00000002 /* 0.000000 */)
  64. vec1 32 ssa_23 = ishl ssa_18, ssa_22
  65. vec1 32 ssa_24 = intrinsic vulkan_resource_index (ssa_11) (0, 0, 7) /* desc-set=0 */ /* binding=0 */ /* desc_type=SSBO */
  66. vec1 32 ssa_25 = ishl ssa_20, ssa_22
  67. vec1 32 ssa_26 = load_const (0x00000004 /* 0.000000 */)
  68. vec1 32 ssa_27 = iadd ssa_25, ssa_26
  69. vec1 32 ssa_28 = intrinsic vulkan_resource_index (ssa_11) (0, 3, 7) /* desc-set=0 */ /* binding=3 */ /* desc_type=SSBO */
  70. vec1 32 ssa_29 = intrinsic load_ssbo (ssa_21, ssa_23) (0, 4, 0) /* access=0 */ /* align_mul=4 */ /* align_offset=0 */
  71. vec1 1 ssa_30 = flt ssa_29, ssa_11
  72. vec1 32 ssa_31 = bcsel ssa_30, ssa_25, ssa_27
  73. vec1 32 ssa_32 = intrinsic load_ssbo (ssa_24, ssa_31) (0, 4, 0) /* access=0 */ /* align_mul=4 */ /* align_offset=0 */
  74. vec1 32 ssa_33 = fadd ssa_32, ssa_10
  75. intrinsic store_ssbo (ssa_33, ssa_24, ssa_31) (1, 0, 4, 0) /* wrmask=x */ /* access=0 */ /* align_mul=4 */ /* align_offset=0 */
  76. vec1 32 ssa_34 = intrinsic load_ssbo (ssa_24, ssa_31) (0, 4, 0) /* access=0 */ /* align_mul=4 */ /* align_offset=0 */
  77. intrinsic store_ssbo (ssa_34, ssa_28, ssa_23) (1, 0, 4, 0) /* wrmask=x */ /* access=0 */ /* align_mul=4 */ /* align_offset=0 */
  78. vec1 32 ssa_35 = iadd ssa_18, ssa_9
  79. /* succs: block_2 */
  80. }
  81. block block_6:
  82. /* preds: block_3 */
  83. /* succs: block_8 */
  84. } else {
  85. block block_7:
  86. /* preds: block_0 */
  87. /* succs: block_8 */
  88. }
  89. block block_8:
  90. /* preds: block_6 block_7 */
  91. vec1 32 ssa_36 = deref_var &@8 (shader_out vec4)
  92. intrinsic store_deref (ssa_36, ssa_8) (15, 0) /* wrmask=xyzw */ /* access=0 */
  93. /* succs: block_9 */
  94. block block_9:
  95. }
  96.  
  97. After RA:
  98. BB0
  99. /* logical preds: / linear preds: / kind: top-level, branch, */
  100. s2: %34:s[0-1], s1: %35:s[2], s1: %36:s[3], v1: %37:v[0], v1: %38:v[1], v1: %39:v[2], v1: %40:v[3], s2: %41:exec = p_startpgm
  101. p_logical_start
  102. s1: %89:m0 = p_parallelcopy %36:s[3]
  103. v1: %44:v[4] = v_interp_p1_f32 %37:v[0], %89:m0 attr0.w
  104. v1: %3:v[4] = v_interp_p2_f32 %38:v[1], %89:m0, %44:v[4] attr0.w
  105. v1: %45:v[5] = v_interp_p1_f32 %37:v[0], %89:m0 attr0.z
  106. v1: %4:v[5] = v_interp_p2_f32 %38:v[1], %89:m0, %45:v[5] attr0.z
  107. v1: %46:v[6] = v_interp_p1_f32 %37:v[0], %89:m0 attr0.y
  108. v1: %5:v[6] = v_interp_p2_f32 %38:v[1], %89:m0, %46:v[6] attr0.y
  109. v1: %47:v[0] = v_interp_p1_f32 %37:v[0], %89:m0 attr0.x
  110. v1: %6:v[0] = v_interp_p2_f32 %38:v[1], %89:m0, %47:v[0] attr0.x
  111. s2: %10:vcc = v_cmp_eq_f32 0.5, %40:v[3]
  112. s2: %11:s[0-1] = v_cmp_eq_f32 0.5, %39:v[2]
  113. s2: %12:s[0-1], s1: %52:scc = s_and_b64 %11:s[0-1], %10:vcc
  114. p_logical_end
  115. s2: %81:s[0-1], s1: %80:scc, s2: %79:exec = s_and_saveexec_b64 %12:s[0-1], %41:exec
  116. p_cbranch_z %79:exec BB7, BB1
  117. BB1
  118. /* logical preds: BB0, / linear preds: BB0, / kind: uniform, loop-preheader, */
  119. p_logical_start
  120. p_logical_end
  121. p_branch BB2
  122. BB2
  123. /* logical preds: BB1, BB5, / linear preds: BB1, BB5, / kind: uniform, loop-header, */
  124. s2: %82:exec = p_linear_phi %79:exec, %82:exec
  125. s1: %13:s[3] = p_linear_phi 0, %33:s[3]
  126. p_logical_start
  127. s1: %15:scc = s_cmp_ge_i32 %13:s[3], 0x64
  128. p_logical_end
  129. p_cbranch_z %15:scc BB4, BB3
  130. BB3
  131. /* logical preds: BB2, / linear preds: BB2, / kind: uniform, break, */
  132. p_logical_start
  133. p_logical_end
  134. p_branch BB6
  135. BB4
  136. /* logical preds: BB2, / linear preds: BB2, / kind: uniform, */
  137. p_logical_start
  138. p_logical_end
  139. p_branch BB5
  140. BB5
  141. /* logical preds: BB4, / linear preds: BB4, / kind: uniform, continue, needs_lowering, */
  142. p_logical_start
  143. s1: %55:s[4], s1: %54:scc = s_add_i32 32, %35:s[2]
  144. s2: %61:s[4-5] = p_create_vector %55:s[4], 0xffff8000
  145. s4: %62:s[4-7] = s_load_dwordx4 %61:s[4-5], 0 reorder
  146. s1: %16:s[8], s1: %53:scc = s_add_u32 %13:s[3], %13:s[3]
  147. s1: %19:s[9], s1: %56:scc = s_lshl_b32 %13:s[3], 2
  148. s1: %25:s[4] = s_buffer_load_dword %62:s[4-7], %19:s[9] buffer
  149. s1: %60:s[6], s1: %59:scc = s_add_i32 48, %35:s[2]
  150. s2: %67:s[10-11] = p_create_vector %35:s[2], 0xffff8000
  151. s4: %68:s[12-15] = s_load_dwordx4 %67:s[10-11], 0 reorder
  152. s1: %21:s[5], s1: %57:scc = s_lshl_b32 %16:s[8], 2
  153. s1: %23:s[7], s1: %58:scc = s_add_u32 %21:s[5], 4
  154. s2: %26:vcc = v_cmp_lt_f32 %25:s[4], 0
  155. s1: %66:scc = s_cmp_lg_u64 0, %26:vcc
  156. s1: %27:s[4] = s_cselect_b32 %21:s[5], %23:s[7], %66:scc
  157. s1: %28:s[5] = s_buffer_load_dword %68:s[12-15], %27:s[4] buffer
  158. s2: %74:s[6-7] = p_create_vector %60:s[6], 0xffff8000
  159. s4: %75:s[16-19] = s_load_dwordx4 %74:s[6-7], 0 reorder
  160. s1: %33:s[3], s1: %76:scc = s_add_u32 %13:s[3], 1
  161. v1: %30:v[1] = v_add_f32 %28:s[5], 1.0
  162. s1: %83:s[10] = p_as_uniform %30:v[1]
  163. s1: %96:m0 = p_parallelcopy %27:s[4]
  164. s_buffer_store_dword %68:s[12-15], %96:m0, %83:s[10] buffer
  165. s1: %31:s[4] = s_buffer_load_dword %68:s[12-15], %96:m0 buffer
  166. s1: %97:m0 = p_parallelcopy %19:s[9]
  167. s_buffer_store_dword %75:s[16-19], %97:m0, %31:s[4] buffer
  168. p_logical_end
  169. p_branch BB2
  170. BB6
  171. /* logical preds: BB3, / linear preds: BB3, / kind: uniform, loop-exit, */
  172. s2: %84:exec = p_parallelcopy %82:exec
  173. p_logical_start
  174. p_logical_end
  175. p_branch BB8
  176. BB7
  177. /* logical preds: / linear preds: BB0, / kind: uniform, */
  178. p_branch BB8
  179. BB8
  180. /* logical preds: / linear preds: BB6, BB7, / kind: invert, */
  181. s2: %85:exec = p_linear_phi %84:exec, %79:exec
  182. s2: %87:exec, s1: %86:scc = s_andn2_b64 %81:s[0-1], %85:exec
  183. p_cbranch_z %87:exec BB10, BB9
  184. BB9
  185. /* logical preds: BB0, / linear preds: BB8, / kind: uniform, */
  186. p_logical_start
  187. p_logical_end
  188. p_branch BB11
  189. BB10
  190. /* logical preds: / linear preds: BB8, / kind: uniform, */
  191. p_branch BB11
  192. BB11
  193. /* logical preds: BB6, BB9, / linear preds: BB9, BB10, / kind: uniform, top-level, merge, */
  194. s2: %88:exec = p_parallelcopy %81:s[0-1]
  195. p_logical_start
  196. v1: %77:v[0] = v_cvt_pkrtz_f16_f32 %6:v[0], %5:v[6]
  197. v1: %78:v[1] = v_cvt_pkrtz_f16_f32 %4:v[5], %3:v[4]
  198. exp %77:v[0], %78:v[1], v1: undef, v1: undef compr mrt0
  199. p_logical_end
  200. s_dcache_wb
  201. s_endpgm
  202.  
  203. disasm:
  204. BB0:
  205. s_mov_b32 m0, s3 ; befc0303
  206. v_interp_p1_f32_e32 v4, v0, attr0.w ; c8100300
  207. v_interp_p2_f32_e32 v4, v1, attr0.w ; c8110301
  208. v_interp_p1_f32_e32 v5, v0, attr0.z ; c8140200
  209. v_interp_p2_f32_e32 v5, v1, attr0.z ; c8150201
  210. v_interp_p1_f32_e32 v6, v0, attr0.y ; c8180100
  211. v_interp_p2_f32_e32 v6, v1, attr0.y ; c8190101
  212. v_interp_p1_f32_e32 v0, v0, attr0.x ; c8000000
  213. v_interp_p2_f32_e32 v0, v1, attr0.x ; c8010001
  214. v_cmp_eq_f32_e32 vcc, 0.5, v3 ; 7c0406f0
  215. v_cmp_eq_f32_e64 s[0:1], 0.5, v2 ; d4020000 000204f0
  216. s_and_b64 s[0:1], s[0:1], vcc ; 87806a00
  217. s_and_saveexec_b64 s[0:1], s[0:1] ; be802400
  218. s_cbranch_execz BB11 ; bf88002e
  219. BB1:
  220. s_mov_b32 s3, 0 ; be830380
  221. BB2:
  222. s_cmp_ge_i32 s3, 0x64 ; bf03ff03 00000064
  223. s_cbranch_scc1 BB11 ; bf85002a
  224. BB5:
  225. s_add_i32 s4, 32, s2 ; 810402a0
  226. s_movk_i32 s5, 0x8000 ; b0058000
  227. s_load_dwordx4 s[4:7], s[4:5], 0x0 ; f4080102 fa000000
  228. s_add_u32 s8, s3, s3 ; 80080303
  229. s_lshl_b32 s9, s3, 2 ; 8f098203
  230. s_waitcnt lgkmcnt(0) ; bf8cc07f
  231. s_buffer_load_dword s4, s[4:7], s9 ; f4200102 12000000
  232. s_add_i32 s6, 48, s2 ; 810602b0
  233. s_mov_b32 s10, s2 ; be8a0302
  234. s_movk_i32 s11, 0x8000 ; b00b8000
  235. s_load_dwordx4 s[12:15], s[10:11], 0x0 ; f4080305 fa000000
  236. s_lshl_b32 s5, s8, 2 ; 8f058208
  237. s_add_u32 s7, s5, 4 ; 80078405
  238. s_waitcnt lgkmcnt(0) ; bf8cc07f
  239. v_cmp_lt_f32_e64 vcc, s4, 0 ; d401006a 00010004
  240. s_cmp_lg_u64 0, vcc ; bf136a80
  241. s_cselect_b32 s4, s5, s7 ; 85040705
  242. s_buffer_load_dword s5, s[12:15], s4 ; f4200146 08000000
  243. s_movk_i32 s7, 0x8000 ; b0078000
  244. s_load_dwordx4 s[16:19], s[6:7], 0x0 ; f4080403 fa000000
  245. s_add_u32 s3, s3, 1 ; 80038103
  246. s_waitcnt lgkmcnt(0) ; bf8cc07f
  247. v_add_f32_e64 v1, s5, 1.0 ; d5030001 0001e405
  248. v_readfirstlane_b32 s10, v1 ; 7e140501
  249. s_mov_b32 m0, s4 ; befc0304
  250. s_buffer_store_dword s10, s[12:15], m0 ; f4600286 f8000000
  251. s_nop 0 ; bf800000
  252. s_buffer_load_dword s4, s[12:15], m0 ; f4200106 f8000000
  253. s_mov_b32 m0, s9 ; befc0309
  254. s_waitcnt lgkmcnt(0) ; bf8cc07f
  255. s_buffer_store_dword s4, s[16:19], m0 ; f4600108 f8000000
  256. s_branch BB2 ; bf82ffd3
  257. BB11:
  258. s_mov_b64 exec, s[0:1] ; befe0400
  259. v_cvt_pkrtz_f16_f32_e64 v0, v0, v6 ; d52f0000 00020d00
  260. v_cvt_pkrtz_f16_f32_e64 v1, v5, v4 ; d52f0001 00020905
  261. exp mrt0 v0, off, v1, off done compr vm ; f8001c05 80800100
  262. s_dcache_wb ; f4840000 fa000000
  263. s_endpgm ; bf810000
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement