Advertisement
Guest User

Untitled

a guest
Oct 22nd, 2019
103
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.14 KB | None | 0 0
  1. After RA:
  2. BB0
  3. /* logical preds: / linear preds: / kind: uniform, top-level, */
  4. s2: %17:s[0-1], s1: %18:s[2], s1: %19:s[3], v1: %20:v[0], v1: %21:v[1], s2: %22:exec = p_startpgm
  5. s2: %43:exec, s1: %42:scc = s_wqm_b64 %22:exec
  6. p_logical_start
  7. s1: %44:s[0] = p_parallelcopy %19:s[3]
  8. s2: %27:s[2-3] = p_create_vector %18:s[2], 0xffff8000
  9. s8: %28:s[4-11] = s_load_dwordx8 %27:s[2-3], 0 reorder
  10. s4: %30:s[12-15] = s_load_dwordx4 %27:s[2-3], 0x50 reorder
  11. s1: %45:m0 = p_parallelcopy %44:s[0]
  12. v1: %25:v[2] = v_interp_p1_f32 %20:v[0], %45:m0 attr0.y
  13. v1: %3:v[2] = v_interp_p2_f32 %21:v[1], %45:m0, %25:v[2] attr0.y
  14. v1: %26:v[3] = v_interp_p1_f32 %20:v[0], %45:m0 attr0.x
  15. v1: %4:v[3] = v_interp_p2_f32 %21:v[1], %45:m0, %26:v[3] attr0.x
  16. v1: %46:v[4] = p_parallelcopy %3:v[2]
  17. v2: %5:v[3-4] = p_create_vector %4:v[3], %46:v[4]
  18. v2: %31:v[3-4] = p_wqm %5:v[3-4]
  19. v4: %7:v[4-7] = image_sample %31:v[3-4], %28:s[4-11], %30:s[12-15] 2d reorder
  20. v1: %36:v[2] = v_interp_p1_f32 %20:v[0], %45:m0 attr0.z
  21. v1: %8:v[2] = v_interp_p2_f32 %21:v[1], %45:m0, %36:v[2] attr0.z
  22. v1: %37:v[3] = v_interp_p1_f32 %20:v[0], %45:m0 attr0.w
  23. v1: %10:v[3] = v_interp_p2_f32 %21:v[1], %45:m0, %37:v[3] attr0.w
  24. v1: %38:v[8] = v_interp_p1_f32 %20:v[0], %45:m0 attr1.x
  25. v1: %12:v[8] = v_interp_p2_f32 %21:v[1], %45:m0, %38:v[8] attr1.x
  26. v1: %39:v[0] = v_interp_p1_f32 %20:v[0], %45:m0 attr1.y
  27. v1: %14:v[0] = v_interp_p2_f32 %21:v[1], %45:m0, %39:v[0] attr1.y
  28. v1: %32:v[4], v1: %33:v[5], v1: %34:v[6], v1: %35:v[7] = p_split_vector %7:v[4-7]
  29. v1: %9:v[4] = v_mul_f32 %8:v[2], %32:v[4]
  30. v1: %11:v[5] = v_mul_f32 %10:v[3], %33:v[5]
  31. v1: %13:v[6] = v_mul_f32 %12:v[8], %34:v[6]
  32. v1: %15:v[0] = v_mul_f32 %14:v[0], %35:v[7]
  33. v1: %40:v[1] = v_cvt_pkrtz_f16_f32 %9:v[4], %11:v[5]
  34. v1: %41:v[0] = v_cvt_pkrtz_f16_f32 %13:v[6], %15:v[0]
  35. exp %40:v[1], %41:v[0], v1: undef, v1: undef compr mrt0
  36. p_logical_end
  37. s_endpgm
  38.  
  39. disasm:
  40. BB0:
  41. s_wqm_b64 exec, exec ; befe0a7e
  42. s_mov_b32 s0, s3 ; be800303
  43. s_movk_i32 s3, 0x8000 ; b0038000
  44. s_load_dwordx8 s[4:11], s[2:3], 0x0 ; f40c0101 fa000000
  45. s_load_dwordx4 s[12:15], s[2:3], 0x50 ; f4080301 fa000050
  46. s_mov_b32 m0, s0 ; befc0300
  47. v_interp_p1_f32_e32 v2, v0, attr0.y ; c8080100
  48. v_interp_p2_f32_e32 v2, v1, attr0.y ; c8090101
  49. v_interp_p1_f32_e32 v3, v0, attr0.x ; c80c0000
  50. v_interp_p2_f32_e32 v3, v1, attr0.x ; c80d0001
  51. v_mov_b32_e32 v4, v2 ; 7e080302
  52. s_waitcnt lgkmcnt(0) ; bf8cc07f
  53. image_sample v[4:7], v[3:4], s[4:11], s[12:15] dmask:0xf dim:SQ_RSRC_IMG_2D ; f0800f08 00610403
  54. v_interp_p1_f32_e32 v2, v0, attr0.z ; c8080200
  55. v_interp_p2_f32_e32 v2, v1, attr0.z ; c8090201
  56. v_interp_p1_f32_e32 v3, v0, attr0.w ; c80c0300
  57. v_interp_p2_f32_e32 v3, v1, attr0.w ; c80d0301
  58. v_interp_p1_f32_e32 v8, v0, attr1.x ; c8200400
  59. v_interp_p2_f32_e32 v8, v1, attr1.x ; c8210401
  60. v_interp_p1_f32_e32 v0, v0, attr1.y ; c8000500
  61. v_interp_p2_f32_e32 v0, v1, attr1.y ; c8010501
  62. s_waitcnt vmcnt(0) ; bf8c3f70
  63. v_mul_f32_e32 v4, v2, v4 ; 10080902
  64. v_mul_f32_e32 v5, v3, v5 ; 100a0b03
  65. v_mul_f32_e32 v6, v8, v6 ; 100c0d08
  66. v_mul_f32_e32 v0, v0, v7 ; 10000f00
  67. v_cvt_pkrtz_f16_f32_e64 v1, v4, v5 ; d52f0001 00020b04
  68. v_cvt_pkrtz_f16_f32_e64 v0, v6, v0 ; d52f0000 00020106
  69. exp mrt0 v1, off, v0, off done compr vm ; f8001c05 80800001
  70. s_endpgm ; bf810000
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement