Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- [Timur@timur-zen ~]$ cd ~/Projects/Others/VK-GL-CTS/build/^C
- [Timur@timur-zen ~]$ cat output.txt
- Writing test log into TestResults.qpa
- dEQP Core git-372a1fbf47d460b75ec9e2e1de114f82bfd0ac95 (0x372a1fbf) starting..
- target implementation = 'Default'
- WARNING: Experimental compiler backend enabled. Here be dragons! Incorrect rendering, GPU hangs and/or resets are likely
- WARNING: radv is not a conformant vulkan implementation, testing use only.
- WARNING: Experimental compiler backend enabled. Here be dragons! Incorrect rendering, GPU hangs and/or resets are likely
- WARNING: radv is not a conformant vulkan implementation, testing use only.
- Test case 'dEQP-VK.subgroups.shuffle.compute.subgroupshuffle_uint'..
- shader: MESA_SHADER_COMPUTE
- local-size: 1, 1, 1
- shared-size: 1
- inputs: 0
- outputs: 0
- uniforms: 0
- shared: 0
- decl_var ssbo INTERP_MODE_NONE block @0 (429, 0, 2)
- decl_var ssbo INTERP_MODE_NONE block @1 (429, 0, 1)
- decl_var ssbo INTERP_MODE_NONE block @2 (429, 0, 0)
- decl_function main (0 params)
- impl main {
- block block_0:
- /* preds: */
- vec1 32 ssa_0 = load_const (0x00000001 /* 0.000000 */)
- vec1 32 ssa_1 = load_const (0x00000000 /* 0.000000 */)
- vec1 1 ssa_2 = load_const (true)
- vec3 32 ssa_3 = intrinsic load_num_work_groups () ()
- vec3 32 ssa_4 = intrinsic load_work_group_id () ()
- vec3 32 ssa_5 = intrinsic load_local_invocation_id () ()
- vec1 32 ssa_6 = iadd ssa_4.z, ssa_5.z
- vec1 32 ssa_7 = imul ssa_3.y, ssa_6
- vec1 32 ssa_8 = iadd ssa_4.y, ssa_5.y
- vec1 32 ssa_9 = iadd ssa_7, ssa_8
- vec1 32 ssa_10 = imul ssa_3.x, ssa_9
- vec1 32 ssa_11 = iadd ssa_4.x, ssa_5.x
- vec1 32 ssa_12 = iadd ssa_10, ssa_11
- vec1 64 ssa_13 = intrinsic ballot (ssa_2) ()
- vec1 32 ssa_14 = intrinsic load_subgroup_invocation () ()
- vec1 32 ssa_15 = intrinsic vulkan_resource_index (ssa_1) (0, 2, 7) /* desc-set=0 */ /* binding=2 */ /* desc_type=SSBO */
- vec1 32 ssa_16 = load_const (0x00000002 /* 0.000000 */)
- vec1 32 ssa_17 = ishl ssa_14, ssa_16
- vec1 32 ssa_18 = intrinsic load_ssbo (ssa_15, ssa_17) (16, 4, 0) /* access=16 */ /* align_mul=4 */ /* align_offset=0 */
- vec1 32 ssa_19 = load_const (0x00000040 /* 0.000000 */)
- vec1 32 ssa_20 = load_const (0x0000003f /* 0.000000 */)
- vec1 32 ssa_21 = iand ssa_18, ssa_20
- vec1 32 ssa_22 = intrinsic vulkan_resource_index (ssa_1) (0, 1, 7) /* desc-set=0 */ /* binding=1 */ /* desc_type=SSBO */
- vec1 32 ssa_23 = intrinsic load_ssbo (ssa_22, ssa_17) (16, 4, 0) /* access=16 */ /* align_mul=4 */ /* align_offset=0 */
- vec1 32 ssa_24 = intrinsic shuffle (ssa_23, ssa_21) ()
- vec1 1 ssa_25 = ult ssa_21, ssa_19
- vec1 64 ssa_26 = load_const (0x 1 /* 0.000000 */)
- vec1 64 ssa_27 = ushr ssa_13, ssa_21
- vec1 64 ssa_28 = iand ssa_27, ssa_26
- vec1 1 ssa_29 = i2b1 ssa_28
- vec1 1 ssa_30 = iand ssa_25, ssa_29
- /* succs: block_1 block_2 */
- if ssa_30 {
- block block_1:
- /* preds: block_0 */
- vec1 32 ssa_31 = ishl ssa_21, ssa_16
- vec1 32 ssa_32 = intrinsic load_ssbo (ssa_22, ssa_31) (16, 4, 0) /* access=16 */ /* align_mul=4 */ /* align_offset=0 */
- vec1 1 ssa_33 = ieq ssa_24, ssa_32
- vec1 32 ssa_34 = bcsel ssa_33, ssa_0, ssa_1
- /* succs: block_3 */
- } else {
- block block_2:
- /* preds: block_0 */
- /* succs: block_3 */
- }
- block block_3:
- /* preds: block_1 block_2 */
- vec1 32 ssa_35 = phi block_1: ssa_34, block_2: ssa_0
- vec1 32 ssa_36 = intrinsic vulkan_resource_index (ssa_1) (0, 0, 7) /* desc-set=0 */ /* binding=0 */ /* desc_type=SSBO */
- vec1 32 ssa_37 = ishl ssa_12, ssa_16
- intrinsic store_ssbo (ssa_35, ssa_36, ssa_37) (1, 0, 4, 0) /* wrmask=x */ /* access=0 */ /* align_mul=4 */ /* align_offset=0 */
- /* succs: block_4 */
- block block_4:
- }
- After RA:
- BB0
- /* logical preds: / linear preds: / kind: top-level, branch, */
- s2: %40:s[0-1], s1: %41:s[2], s1: %42:s[3], s1: %43:s[4], s1: %44:s[5], s1: %45:s[6], s1: %46:s[7], s1: %47:s[8], v1: %48:v[0], v1: %49:v[1], v1: %50:v[2], s2: %51:exec = p_startpgm
- p_logical_start
- s1: %64:s[0], s1: %63:scc = s_add_i32 32, %41:s[2]
- s2: %65:s[0-1] = p_create_vector %64:s[0], 0xffff8000
- s4: %66:s[12-15] = s_load_dwordx4 %65:s[0-1], 0 reorder
- v1: %62:v[3] = v_mbcnt_lo_u32_b32 -1, 0
- v1: %13:v[3] = v_mbcnt_hi_u32_b32 -1, %62:v[3]
- v1: %17:v[4] = v_lshlrev_b32 2, %13:v[3]
- v1: %18:v[5] = buffer_load_dword %17:v[4], %66:s[12-15], 0 offen buffer
- v1: %4:v[2] = v_add_u32 %47:s[8], %50:v[2]
- s1: %68:s[0], s1: %67:scc = s_add_i32 16, %41:s[2]
- s2: %69:s[0-1] = p_create_vector %68:s[0], 0xffff8000
- s4: %70:s[8-11] = s_load_dwordx4 %69:s[0-1], 0 reorder
- v1: %5:v[2] = v_mul_lo_u32 %43:s[4], %4:v[2]
- v1: %7:v[1] = v_add3_u32 %5:v[2], %46:s[7], %49:v[1]
- v1: %8:v[1] = v_mul_lo_u32 %42:s[3], %7:v[1]
- v1: %10:v[0] = v_add3_u32 %8:v[1], %45:s[6], %48:v[0]
- s1: %11:s[0] = s_mov_b32 1
- s1: %98:scc = p_parallelcopy %11:s[0]
- s2: %61:s[0-1] = s_cselect_b64 %0:exec, 0, %98:scc
- v1: %78:v[1] = v_and_b32 32, %13:v[3]
- v1: %20:v[2] = v_and_b32 63, %18:v[5]
- v1: %71:v[3] = v_lshlrev_b32 2, %20:v[2]
- v1: %79:v[5] = v_and_b32 32, %71:v[3]
- s2: %82:vcc = v_cmp_eq_u32 %78:v[1], %79:v[5]
- v2: %24:v[5-6] = v_lshrrev_b64 %20:v[2], %61:s[0-1]
- v1: %22:v[1] = buffer_load_dword %17:v[4], %70:s[8-11], 0 offen buffer
- v1: %25:v[5] = p_extract_vector %24:v[5-6], 0
- v1: %27:v[4] = v_and_b32 1, %25:v[5]
- v2: %28:v[4-5] = p_create_vector %27:v[4], 0
- s2: %29:s[0-1] = v_cmp_lg_u64 0, %28:v[4-5]
- s2: %31:s[4-5] = v_cmp_gt_u32 64, %20:v[2]
- s2: %32:s[0-1], s1: %84:scc = s_and_b64 %31:s[4-5], %29:s[0-1]
- v1: %75:v[4], s1: %74:s[3], s1: %73:s[4] = p_swap_half_waves %22:v[1]
- v1: %80:v[1] = ds_bpermute_b32 %71:v[3], %22:v[1]
- v1: %81:v[3] = ds_bpermute_b32 %71:v[3], %75:v[4]
- v1: %72:v[1] = v_cndmask_b32 %81:v[3], %80:v[1], %82:vcc
- p_logical_end
- s2: %93:s[0-1], s1: %92:scc, s2: %91:exec = s_and_saveexec_b64 %32:s[0-1], %51:exec
- p_cbranch_z %91:exec BB2, BB1
- BB1
- /* logical preds: BB0, / linear preds: BB0, / kind: uniform, */
- p_logical_start
- v1: %33:v[2] = v_lshlrev_b32 2, %20:v[2]
- v1: %34:v[2] = buffer_load_dword %33:v[2], %70:s[8-11], 0 offen buffer
- s2: %35:vcc = v_cmp_eq_i32 %72:v[1], %34:v[2]
- v1: %36:v[1] = v_cndmask_b32 0, 1, %35:vcc
- p_logical_end
- p_branch BB3
- BB2
- /* logical preds: / linear preds: BB0, / kind: uniform, */
- p_branch BB3
- BB3
- /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
- s2: %94:exec = p_linear_phi %91:exec, %91:exec
- s2: %96:exec, s1: %95:scc = s_andn2_b64 %93:s[0-1], %94:exec
- p_cbranch_z %96:exec BB5, BB4
- BB4
- /* logical preds: BB0, / linear preds: BB3, / kind: uniform, */
- p_logical_start
- p_logical_end
- p_branch BB6
- BB5
- /* logical preds: / linear preds: BB3, / kind: uniform, */
- p_branch BB6
- BB6
- /* logical preds: BB1, BB4, / linear preds: BB4, BB5, / kind: uniform, top-level, merge, */
- v1: %37:v[1] = p_phi %36:v[1], 1
- s2: %97:exec = p_parallelcopy %93:s[0-1]
- p_logical_start
- s2: %89:s[2-3] = p_create_vector %41:s[2], 0xffff8000
- s4: %90:s[0-3] = s_load_dwordx4 %89:s[2-3], 0 reorder
- v1: %39:v[0] = v_lshlrev_b32 2, %10:v[0]
- buffer_store_dword %39:v[0], %90:s[0-3], 0, %37:v[1] offen disable_wqm buffer
- p_logical_end
- s_endpgm
- disasm:
- BB0:
- s_add_i32 s0, 32, s2 ; 810002a0
- s_movk_i32 s1, 0x8000 ; b0018000
- s_load_dwordx4 s[12:15], s[0:1], 0x0 ; f4080300 fa000000
- v_mbcnt_lo_u32_b32_e64 v3, -1, 0 ; d7650003 000100c1
- v_mbcnt_hi_u32_b32_e64 v3, -1, v3 ; d7660003 000206c1
- v_lshlrev_b32_e32 v4, 2, v3 ; 34080682
- s_waitcnt lgkmcnt(0) ; bf8cc07f
- buffer_load_dword v5, v4, s[12:15], 0 offen ; e0301000 80030504
- v_add_nc_u32_e32 v2, s8, v2 ; 4a040408
- s_add_i32 s0, 16, s2 ; 81000290
- s_movk_i32 s1, 0x8000 ; b0018000
- s_load_dwordx4 s[8:11], s[0:1], 0x0 ; f4080200 fa000000
- v_mul_lo_u32 v2, s4, v2 ; d5690002 00020404
- v_add3_u32 v1, v2, s7, v1 ; d76d0001 04040f02
- v_mul_lo_u32 v1, s3, v1 ; d5690001 00020203
- v_add3_u32 v0, v1, s6, v0 ; d76d0000 04000d01
- s_mov_b32 s0, 1 ; be800381
- s_cmp_lg_i32 s0, 0 ; bf018000
- s_cselect_b64 s[0:1], exec, 0 ; 8580807e
- v_and_b32_e32 v1, 32, v3 ; 360206a0
- s_waitcnt vmcnt(0) ; bf8c3f70
- v_and_b32_e32 v2, 63, v5 ; 36040abf
- v_lshlrev_b32_e32 v3, 2, v2 ; 34060482
- v_and_b32_e32 v5, 32, v3 ; 360a06a0
- v_cmp_eq_u32_e32 vcc_lo, v1, v5 ; 7d840b01
- v_lshrrev_b64 v[5:6], v2, s[0:1] ; d7000005 00000102
- s_waitcnt lgkmcnt(0) ; bf8cc07f
- buffer_load_dword v1, v4, s[8:11], 0 offen ; e0301000 80020104
- v_and_b32_e32 v4, 1, v5 ; 36080a81
- v_mov_b32_e32 v5, 0 ; 7e0a0280
- v_cmp_ne_u64_e64 s0, 0, v[4:5] ; d4e50000 00020880
- v_cmp_gt_u32_e64 s4, 64, v2 ; d4c40004 000204c0
- s_and_b64 s[0:1], s[4:5], s[0:1] ; 87800004
- s_and_saveexec_b32 s3, 0 ; be833c80
- s_subvector_loop_begin s4, 208 ; bd8400d0
- s_waitcnt vmcnt(0) ; bf8c3f70
- v_mov_b32_e32 v8, v1 ; 7e100301
- s_subvector_loop_end s4, 192 ; be0400c0
- s_mov_b32 exec_lo, 3 ; befe0383
- s_subvector_loop_begin s4, 228 ; bd8400e4
- v_mov_b32_e32 v4, s8 ; 7e080208
- v_mov_b32_e32 v8, v1 ; 7e100301
- s_subvector_loop_end s4, 212 ; be0400d4
- ds_bpermute_b32 v1, v3, v1 ; dacc0000 01000103
- ds_bpermute_b32 v3, v3, v4 ; dacc0000 03000403
- s_waitcnt lgkmcnt(0) ; bf8cc07f
- v_cndmask_b32_e32 v1, v3, v1, vcc_lo ; 02020303
- s_and_saveexec_b64 s[0:1], s[0:1] ; be802400
- s_cbranch_execz BB3 ; bf880007
- BB1:
- v_lshlrev_b32_e32 v2, 2, v2 ; 34040482
- buffer_load_dword v2, v2, s[8:11], 0 offen ; e0301000 80020202
- s_waitcnt vmcnt(0) ; bf8c3f70
- v_cmp_eq_i32_e32 vcc_lo, v1, v2 ; 7d040501
- v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; d5010001 01a90280
- BB3:
- s_andn2_b64 exec, s[0:1], exec ; 8afe7e00
- s_cbranch_execz BB6 ; bf880001
- BB4:
- v_mov_b32_e32 v1, 1 ; 7e020281
- BB6:
- s_mov_b64 exec, s[0:1] ; befe0400
- s_movk_i32 s3, 0x8000 ; b0038000
- s_load_dwordx4 s[0:3], s[2:3], 0x0 ; f4080001 fa000000
- v_lshlrev_b32_e32 v0, 2, v0 ; 34000082
- s_waitcnt lgkmcnt(0) ; bf8cc07f
- buffer_store_dword v1, v0, s[0:3], 0 offen ; e0701000 80000100
- s_endpgm ; bf810000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- shader: MESA_SHADER_COMPUTE
- local-size: 32, 4, 1
- shared-size: 1
- inputs: 0
- outputs: 0
- uniforms: 0
- shared: 0
- decl_var ssbo INTERP_MODE_NONE block @0 (429, 0, 2)
- decl_var ssbo INTERP_MODE_NONE block @1 (429, 0, 1)
- decl_var ssbo INTERP_MODE_NONE block @2 (429, 0, 0)
- decl_function main (0 params)
- impl main {
- block block_0:
- /* preds: */
- vec1 32 ssa_0 = load_const (0x00000001 /* 0.000000 */)
- vec1 32 ssa_1 = load_const (0x00000000 /* 0.000000 */)
- vec1 1 ssa_2 = load_const (true)
- vec3 32 ssa_3 = intrinsic load_num_work_groups () ()
- vec1 32 ssa_4 = load_const (0x00000005 /* 0.000000 */)
- vec1 32 ssa_5 = ishl ssa_3.x, ssa_4
- vec1 32 ssa_6 = load_const (0x00000002 /* 0.000000 */)
- vec1 32 ssa_7 = ishl ssa_3.y, ssa_6
- vec3 32 ssa_8 = intrinsic load_work_group_id () ()
- vec3 32 ssa_9 = intrinsic load_local_invocation_id () ()
- vec1 32 ssa_10 = iadd ssa_8.z, ssa_9.z
- vec1 32 ssa_11 = imul ssa_7, ssa_10
- vec1 32 ssa_12 = ishl ssa_8.y, ssa_6
- vec1 32 ssa_13 = iadd ssa_12, ssa_9.y
- vec1 32 ssa_14 = iadd ssa_11, ssa_13
- vec1 32 ssa_15 = imul ssa_5, ssa_14
- vec1 32 ssa_16 = ishl ssa_8.x, ssa_4
- vec1 32 ssa_17 = iadd ssa_16, ssa_9.x
- vec1 32 ssa_18 = iadd ssa_15, ssa_17
- vec1 64 ssa_19 = intrinsic ballot (ssa_2) ()
- vec1 32 ssa_20 = intrinsic load_subgroup_invocation () ()
- vec1 32 ssa_21 = intrinsic vulkan_resource_index (ssa_1) (0, 2, 7) /* desc-set=0 */ /* binding=2 */ /* desc_type=SSBO */
- vec1 32 ssa_22 = ishl ssa_20, ssa_6
- vec1 32 ssa_23 = intrinsic load_ssbo (ssa_21, ssa_22) (16, 4, 0) /* access=16 */ /* align_mul=4 */ /* align_offset=0 */
- vec1 32 ssa_24 = load_const (0x00000040 /* 0.000000 */)
- vec1 32 ssa_25 = load_const (0x0000003f /* 0.000000 */)
- vec1 32 ssa_26 = iand ssa_23, ssa_25
- vec1 32 ssa_27 = intrinsic vulkan_resource_index (ssa_1) (0, 1, 7) /* desc-set=0 */ /* binding=1 */ /* desc_type=SSBO */
- vec1 32 ssa_28 = intrinsic load_ssbo (ssa_27, ssa_22) (16, 4, 0) /* access=16 */ /* align_mul=4 */ /* align_offset=0 */
- vec1 32 ssa_29 = intrinsic shuffle (ssa_28, ssa_26) ()
- vec1 1 ssa_30 = ult ssa_26, ssa_24
- vec1 64 ssa_31 = load_const (0x 1 /* 0.000000 */)
- vec1 64 ssa_32 = ushr ssa_19, ssa_26
- vec1 64 ssa_33 = iand ssa_32, ssa_31
- vec1 1 ssa_34 = i2b1 ssa_33
- vec1 1 ssa_35 = iand ssa_30, ssa_34
- /* succs: block_1 block_2 */
- if ssa_35 {
- block block_1:
- /* preds: block_0 */
- vec1 32 ssa_36 = ishl ssa_26, ssa_6
- vec1 32 ssa_37 = intrinsic load_ssbo (ssa_27, ssa_36) (16, 4, 0) /* access=16 */ /* align_mul=4 */ /* align_offset=0 */
- vec1 1 ssa_38 = ieq ssa_29, ssa_37
- vec1 32 ssa_39 = bcsel ssa_38, ssa_0, ssa_1
- /* succs: block_3 */
- } else {
- block block_2:
- /* preds: block_0 */
- /* succs: block_3 */
- }
- block block_3:
- /* preds: block_1 block_2 */
- vec1 32 ssa_40 = phi block_1: ssa_39, block_2: ssa_0
- vec1 32 ssa_41 = intrinsic vulkan_resource_index (ssa_1) (0, 0, 7) /* desc-set=0 */ /* binding=0 */ /* desc_type=SSBO */
- vec1 32 ssa_42 = ishl ssa_18, ssa_6
- intrinsic store_ssbo (ssa_40, ssa_41, ssa_42) (1, 0, 4, 0) /* wrmask=x */ /* access=0 */ /* align_mul=4 */ /* align_offset=0 */
- /* succs: block_4 */
- block block_4:
- }
- After RA:
- BB0
- /* logical preds: / linear preds: / kind: top-level, branch, */
- s2: %45:s[0-1], s1: %46:s[2], s1: %47:s[3], s1: %48:s[4], s1: %49:s[5], s1: %50:s[6], s1: %51:s[7], s1: %52:s[8], v1: %53:v[0], v1: %54:v[1], v1: %55:v[2], s2: %56:exec = p_startpgm
- p_logical_start
- s1: %73:s[0], s1: %72:scc = s_add_i32 32, %46:s[2]
- s2: %74:s[0-1] = p_create_vector %73:s[0], 0xffff8000
- s4: %75:s[12-15] = s_load_dwordx4 %74:s[0-1], 0 reorder
- v1: %71:v[3] = v_mbcnt_lo_u32_b32 -1, 0
- v1: %19:v[3] = v_mbcnt_hi_u32_b32 -1, %71:v[3]
- v1: %22:v[4] = v_lshlrev_b32 2, %19:v[3]
- v1: %23:v[5] = buffer_load_dword %22:v[4], %75:s[12-15], 0 offen buffer
- s1: %3:s[3], s1: %60:scc = s_lshl_b32 %47:s[3], 5
- s1: %77:s[0], s1: %76:scc = s_add_i32 16, %46:s[2]
- s2: %78:s[0-1] = p_create_vector %77:s[0], 0xffff8000
- s4: %79:s[12-15] = s_load_dwordx4 %78:s[0-1], 0 reorder
- s1: %5:s[0], s1: %61:scc = s_lshl_b32 %48:s[4], 2
- v1: %8:v[2] = v_add_u32 %52:s[8], %55:v[2]
- v1: %9:v[2] = v_mul_lo_u32 %5:s[0], %8:v[2]
- s1: %10:s[0], s1: %68:scc = s_lshl_b32 %51:s[7], 2
- v1: %12:v[1] = v_add3_u32 %9:v[2], %10:s[0], %54:v[1]
- v1: %13:v[1] = v_mul_lo_u32 %3:s[3], %12:v[1]
- s1: %14:s[0], s1: %69:scc = s_lshl_b32 %50:s[6], 5
- v1: %16:v[0] = v_add3_u32 %13:v[1], %14:s[0], %53:v[0]
- s1: %17:s[0] = s_mov_b32 1
- s1: %107:scc = p_parallelcopy %17:s[0]
- s2: %70:s[0-1] = s_cselect_b64 %0:exec, 0, %107:scc
- v1: %87:v[1] = v_and_b32 32, %19:v[3]
- v1: %25:v[2] = v_and_b32 63, %23:v[5]
- v1: %80:v[3] = v_lshlrev_b32 2, %25:v[2]
- v1: %88:v[5] = v_and_b32 32, %80:v[3]
- s2: %91:vcc = v_cmp_eq_u32 %87:v[1], %88:v[5]
- v2: %29:v[5-6] = v_lshrrev_b64 %25:v[2], %70:s[0-1]
- v1: %27:v[1] = buffer_load_dword %22:v[4], %79:s[12-15], 0 offen buffer
- v1: %30:v[5] = p_extract_vector %29:v[5-6], 0
- v1: %32:v[4] = v_and_b32 1, %30:v[5]
- v2: %33:v[4-5] = p_create_vector %32:v[4], 0
- s2: %34:s[0-1] = v_cmp_lg_u64 0, %33:v[4-5]
- s2: %36:s[4-5] = v_cmp_gt_u32 64, %25:v[2]
- s2: %37:s[0-1], s1: %93:scc = s_and_b64 %36:s[4-5], %34:s[0-1]
- v1: %84:v[4], s1: %83:s[3], s1: %82:s[4] = p_swap_half_waves %27:v[1]
- v1: %89:v[1] = ds_bpermute_b32 %80:v[3], %27:v[1]
- v1: %90:v[3] = ds_bpermute_b32 %80:v[3], %84:v[4]
- v1: %81:v[1] = v_cndmask_b32 %90:v[3], %89:v[1], %91:vcc
- p_logical_end
- s2: %102:s[0-1], s1: %101:scc, s2: %100:exec = s_and_saveexec_b64 %37:s[0-1], %56:exec
- p_cbranch_z %100:exec BB2, BB1
- BB1
- /* logical preds: BB0, / linear preds: BB0, / kind: uniform, */
- p_logical_start
- v1: %38:v[2] = v_lshlrev_b32 2, %25:v[2]
- v1: %39:v[2] = buffer_load_dword %38:v[2], %79:s[12-15], 0 offen buffer
- s2: %40:vcc = v_cmp_eq_i32 %81:v[1], %39:v[2]
- v1: %41:v[1] = v_cndmask_b32 0, 1, %40:vcc
- p_logical_end
- p_branch BB3
- BB2
- /* logical preds: / linear preds: BB0, / kind: uniform, */
- p_branch BB3
- BB3
- /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
- s2: %103:exec = p_linear_phi %100:exec, %100:exec
- s2: %105:exec, s1: %104:scc = s_andn2_b64 %102:s[0-1], %103:exec
- p_cbranch_z %105:exec BB5, BB4
- BB4
- /* logical preds: BB0, / linear preds: BB3, / kind: uniform, */
- p_logical_start
- p_logical_end
- p_branch BB6
- BB5
- /* logical preds: / linear preds: BB3, / kind: uniform, */
- p_branch BB6
- BB6
- /* logical preds: BB1, BB4, / linear preds: BB4, BB5, / kind: uniform, top-level, merge, */
- v1: %42:v[1] = p_phi %41:v[1], 1
- s2: %106:exec = p_parallelcopy %102:s[0-1]
- p_logical_start
- s2: %98:s[2-3] = p_create_vector %46:s[2], 0xffff8000
- s4: %99:s[0-3] = s_load_dwordx4 %98:s[2-3], 0 reorder
- v1: %44:v[0] = v_lshlrev_b32 2, %16:v[0]
- buffer_store_dword %44:v[0], %99:s[0-3], 0, %42:v[1] offen disable_wqm buffer
- p_logical_end
- s_endpgm
- disasm:
- BB0:
- s_add_i32 s0, 32, s2 ; 810002a0
- s_movk_i32 s1, 0x8000 ; b0018000
- s_load_dwordx4 s[12:15], s[0:1], 0x0 ; f4080300 fa000000
- v_mbcnt_lo_u32_b32_e64 v3, -1, 0 ; d7650003 000100c1
- v_mbcnt_hi_u32_b32_e64 v3, -1, v3 ; d7660003 000206c1
- v_lshlrev_b32_e32 v4, 2, v3 ; 34080682
- s_waitcnt lgkmcnt(0) ; bf8cc07f
- buffer_load_dword v5, v4, s[12:15], 0 offen ; e0301000 80030504
- v_nop ; 7e000000
- s_lshl_b32 s3, s3, 5 ; 8f038503
- s_add_i32 s0, 16, s2 ; 81000290
- s_movk_i32 s1, 0x8000 ; b0018000
- s_load_dwordx4 s[12:15], s[0:1], 0x0 ; f4080300 fa000000
- s_lshl_b32 s0, s4, 2 ; 8f008204
- v_add_nc_u32_e32 v2, s8, v2 ; 4a040408
- v_mul_lo_u32 v2, s0, v2 ; d5690002 00020400
- s_lshl_b32 s0, s7, 2 ; 8f008207
- v_add3_u32 v1, v2, s0, v1 ; d76d0001 04040102
- v_mul_lo_u32 v1, s3, v1 ; d5690001 00020203
- s_lshl_b32 s0, s6, 5 ; 8f008506
- v_add3_u32 v0, v1, s0, v0 ; d76d0000 04000101
- s_mov_b32 s0, 1 ; be800381
- s_cmp_lg_i32 s0, 0 ; bf018000
- s_cselect_b64 s[0:1], exec, 0 ; 8580807e
- v_and_b32_e32 v1, 32, v3 ; 360206a0
- s_waitcnt vmcnt(0) ; bf8c3f70
- v_and_b32_e32 v2, 63, v5 ; 36040abf
- v_lshlrev_b32_e32 v3, 2, v2 ; 34060482
- v_and_b32_e32 v5, 32, v3 ; 360a06a0
- v_cmp_eq_u32_e32 vcc_lo, v1, v5 ; 7d840b01
- v_lshrrev_b64 v[5:6], v2, s[0:1] ; d7000005 00000102
- s_waitcnt lgkmcnt(0) ; bf8cc07f
- buffer_load_dword v1, v4, s[12:15], 0 offen ; e0301000 80030104
- v_and_b32_e32 v4, 1, v5 ; 36080a81
- v_mov_b32_e32 v5, 0 ; 7e0a0280
- v_cmp_ne_u64_e64 s0, 0, v[4:5] ; d4e50000 00020880
- v_cmp_gt_u32_e64 s4, 64, v2 ; d4c40004 000204c0
- s_and_b64 s[0:1], s[4:5], s[0:1] ; 87800004
- s_and_saveexec_b32 s3, 0 ; be833c80
- s_subvector_loop_begin s4, 228 ; bd8400e4
- s_waitcnt vmcnt(0) ; bf8c3f70
- v_mov_b32_e32 v8, v1 ; 7e100301
- s_subvector_loop_end s4, 212 ; be0400d4
- s_mov_b32 exec_lo, 3 ; befe0383
- s_subvector_loop_begin s4, 248 ; bd8400f8
- v_mov_b32_e32 v4, s8 ; 7e080208
- v_mov_b32_e32 v8, v1 ; 7e100301
- s_subvector_loop_end s4, 232 ; be0400e8
- ds_bpermute_b32 v1, v3, v1 ; dacc0000 01000103
- ds_bpermute_b32 v3, v3, v4 ; dacc0000 03000403
- s_waitcnt lgkmcnt(0) ; bf8cc07f
- v_cndmask_b32_e32 v1, v3, v1, vcc_lo ; 02020303
- s_and_saveexec_b64 s[0:1], s[0:1] ; be802400
- s_cbranch_execz BB3 ; bf880007
- BB1:
- v_lshlrev_b32_e32 v2, 2, v2 ; 34040482
- buffer_load_dword v2, v2, s[12:15], 0 offen ; e0301000 80030202
- s_waitcnt vmcnt(0) ; bf8c3f70
- v_cmp_eq_i32_e32 vcc_lo, v1, v2 ; 7d040501
- v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; d5010001 01a90280
- BB3:
- s_andn2_b64 exec, s[0:1], exec ; 8afe7e00
- s_cbranch_execz BB6 ; bf880001
- BB4:
- v_mov_b32_e32 v1, 1 ; 7e020281
- BB6:
- s_mov_b64 exec, s[0:1] ; befe0400
- s_movk_i32 s3, 0x8000 ; b0038000
- s_load_dwordx4 s[0:3], s[2:3], 0x0 ; f4080001 fa000000
- v_lshlrev_b32_e32 v0, 2, v0 ; 34000082
- s_waitcnt lgkmcnt(0) ; bf8cc07f
- buffer_store_dword v1, v0, s[0:3], 0 offen ; e0701000 80000100
- s_endpgm ; bf810000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- shader: MESA_SHADER_COMPUTE
- local-size: 32, 1, 4
- shared-size: 1
- inputs: 0
- outputs: 0
- uniforms: 0
- shared: 0
- decl_var ssbo INTERP_MODE_NONE block @0 (429, 0, 2)
- decl_var ssbo INTERP_MODE_NONE block @1 (429, 0, 1)
- decl_var ssbo INTERP_MODE_NONE block @2 (429, 0, 0)
- decl_function main (0 params)
- impl main {
- block block_0:
- /* preds: */
- vec1 32 ssa_0 = load_const (0x00000001 /* 0.000000 */)
- vec1 32 ssa_1 = load_const (0x00000000 /* 0.000000 */)
- vec1 1 ssa_2 = load_const (true)
- vec3 32 ssa_3 = intrinsic load_num_work_groups () ()
- vec1 32 ssa_4 = load_const (0x00000005 /* 0.000000 */)
- vec1 32 ssa_5 = ishl ssa_3.x, ssa_4
- vec3 32 ssa_6 = intrinsic load_work_group_id () ()
- vec3 32 ssa_7 = intrinsic load_local_invocation_id () ()
- vec1 32 ssa_8 = load_const (0x00000002 /* 0.000000 */)
- vec1 32 ssa_9 = ishl ssa_6.z, ssa_8
- vec1 32 ssa_10 = iadd ssa_9, ssa_7.z
- vec1 32 ssa_11 = imul ssa_3.y, ssa_10
- vec1 32 ssa_12 = iadd ssa_6.y, ssa_7.y
- vec1 32 ssa_13 = iadd ssa_11, ssa_12
- vec1 32 ssa_14 = imul ssa_5, ssa_13
- vec1 32 ssa_15 = ishl ssa_6.x, ssa_4
- vec1 32 ssa_16 = iadd ssa_15, ssa_7.x
- vec1 32 ssa_17 = iadd ssa_14, ssa_16
- vec1 64 ssa_18 = intrinsic ballot (ssa_2) ()
- vec1 32 ssa_19 = intrinsic load_subgroup_invocation () ()
- vec1 32 ssa_20 = intrinsic vulkan_resource_index (ssa_1) (0, 2, 7) /* desc-set=0 */ /* binding=2 */ /* desc_type=SSBO */
- vec1 32 ssa_21 = ishl ssa_19, ssa_8
- vec1 32 ssa_22 = intrinsic load_ssbo (ssa_20, ssa_21) (16, 4, 0) /* access=16 */ /* align_mul=4 */ /* align_offset=0 */
- vec1 32 ssa_23 = load_const (0x00000040 /* 0.000000 */)
- vec1 32 ssa_24 = load_const (0x0000003f /* 0.000000 */)
- vec1 32 ssa_25 = iand ssa_22, ssa_24
- vec1 32 ssa_26 = intrinsic vulkan_resource_index (ssa_1) (0, 1, 7) /* desc-set=0 */ /* binding=1 */ /* desc_type=SSBO */
- vec1 32 ssa_27 = intrinsic load_ssbo (ssa_26, ssa_21) (16, 4, 0) /* access=16 */ /* align_mul=4 */ /* align_offset=0 */
- vec1 32 ssa_28 = intrinsic shuffle (ssa_27, ssa_25) ()
- vec1 1 ssa_29 = ult ssa_25, ssa_23
- vec1 64 ssa_30 = load_const (0x 1 /* 0.000000 */)
- vec1 64 ssa_31 = ushr ssa_18, ssa_25
- vec1 64 ssa_32 = iand ssa_31, ssa_30
- vec1 1 ssa_33 = i2b1 ssa_32
- vec1 1 ssa_34 = iand ssa_29, ssa_33
- /* succs: block_1 block_2 */
- if ssa_34 {
- block block_1:
- /* preds: block_0 */
- vec1 32 ssa_35 = ishl ssa_25, ssa_8
- vec1 32 ssa_36 = intrinsic load_ssbo (ssa_26, ssa_35) (16, 4, 0) /* access=16 */ /* align_mul=4 */ /* align_offset=0 */
- vec1 1 ssa_37 = ieq ssa_28, ssa_36
- vec1 32 ssa_38 = bcsel ssa_37, ssa_0, ssa_1
- /* succs: block_3 */
- } else {
- block block_2:
- /* preds: block_0 */
- /* succs: block_3 */
- }
- block block_3:
- /* preds: block_1 block_2 */
- vec1 32 ssa_39 = phi block_1: ssa_38, block_2: ssa_0
- vec1 32 ssa_40 = intrinsic vulkan_resource_index (ssa_1) (0, 0, 7) /* desc-set=0 */ /* binding=0 */ /* desc_type=SSBO */
- vec1 32 ssa_41 = ishl ssa_17, ssa_8
- intrinsic store_ssbo (ssa_39, ssa_40, ssa_41) (1, 0, 4, 0) /* wrmask=x */ /* access=0 */ /* align_mul=4 */ /* align_offset=0 */
- /* succs: block_4 */
- block block_4:
- }
- After RA:
- BB0
- /* logical preds: / linear preds: / kind: top-level, branch, */
- s2: %44:s[0-1], s1: %45:s[2], s1: %46:s[3], s1: %47:s[4], s1: %48:s[5], s1: %49:s[6], s1: %50:s[7], s1: %51:s[8], v1: %52:v[0], v1: %53:v[1], v1: %54:v[2], s2: %55:exec = p_startpgm
- p_logical_start
- s1: %71:s[0], s1: %70:scc = s_add_i32 32, %45:s[2]
- s2: %72:s[0-1] = p_create_vector %71:s[0], 0xffff8000
- s4: %73:s[12-15] = s_load_dwordx4 %72:s[0-1], 0 reorder
- v1: %69:v[3] = v_mbcnt_lo_u32_b32 -1, 0
- v1: %18:v[3] = v_mbcnt_hi_u32_b32 -1, %69:v[3]
- v1: %21:v[4] = v_lshlrev_b32 2, %18:v[3]
- v1: %22:v[5] = buffer_load_dword %21:v[4], %73:s[12-15], 0 offen buffer
- s1: %3:s[3], s1: %59:scc = s_lshl_b32 %46:s[3], 5
- s1: %75:s[0], s1: %74:scc = s_add_i32 16, %45:s[2]
- s2: %76:s[0-1] = p_create_vector %75:s[0], 0xffff8000
- s4: %77:s[12-15] = s_load_dwordx4 %76:s[0-1], 0 reorder
- s1: %7:s[5], s1: %66:scc = s_lshl_b32 %51:s[8], 2
- v1: %8:v[2] = v_add_u32 %7:s[5], %54:v[2]
- v1: %9:v[2] = v_mul_lo_u32 %47:s[4], %8:v[2]
- v1: %11:v[1] = v_add3_u32 %9:v[2], %50:s[7], %53:v[1]
- v1: %12:v[1] = v_mul_lo_u32 %3:s[3], %11:v[1]
- s1: %13:s[0], s1: %67:scc = s_lshl_b32 %49:s[6], 5
- v1: %15:v[0] = v_add3_u32 %12:v[1], %13:s[0], %52:v[0]
- s1: %16:s[0] = s_mov_b32 1
- s1: %105:scc = p_parallelcopy %16:s[0]
- s2: %68:s[0-1] = s_cselect_b64 %0:exec, 0, %105:scc
- v1: %85:v[1] = v_and_b32 32, %18:v[3]
- v1: %24:v[2] = v_and_b32 63, %22:v[5]
- v1: %78:v[3] = v_lshlrev_b32 2, %24:v[2]
- v1: %86:v[5] = v_and_b32 32, %78:v[3]
- s2: %89:vcc = v_cmp_eq_u32 %85:v[1], %86:v[5]
- v2: %28:v[5-6] = v_lshrrev_b64 %24:v[2], %68:s[0-1]
- v1: %26:v[1] = buffer_load_dword %21:v[4], %77:s[12-15], 0 offen buffer
- v1: %29:v[5] = p_extract_vector %28:v[5-6], 0
- v1: %31:v[4] = v_and_b32 1, %29:v[5]
- v2: %32:v[4-5] = p_create_vector %31:v[4], 0
- s2: %33:s[0-1] = v_cmp_lg_u64 0, %32:v[4-5]
- s2: %35:s[4-5] = v_cmp_gt_u32 64, %24:v[2]
- s2: %36:s[0-1], s1: %91:scc = s_and_b64 %35:s[4-5], %33:s[0-1]
- v1: %82:v[4], s1: %81:s[3], s1: %80:s[4] = p_swap_half_waves %26:v[1]
- v1: %87:v[1] = ds_bpermute_b32 %78:v[3], %26:v[1]
- v1: %88:v[3] = ds_bpermute_b32 %78:v[3], %82:v[4]
- v1: %79:v[1] = v_cndmask_b32 %88:v[3], %87:v[1], %89:vcc
- p_logical_end
- s2: %100:s[0-1], s1: %99:scc, s2: %98:exec = s_and_saveexec_b64 %36:s[0-1], %55:exec
- p_cbranch_z %98:exec BB2, BB1
- BB1
- /* logical preds: BB0, / linear preds: BB0, / kind: uniform, */
- p_logical_start
- v1: %37:v[2] = v_lshlrev_b32 2, %24:v[2]
- v1: %38:v[2] = buffer_load_dword %37:v[2], %77:s[12-15], 0 offen buffer
- s2: %39:vcc = v_cmp_eq_i32 %79:v[1], %38:v[2]
- v1: %40:v[1] = v_cndmask_b32 0, 1, %39:vcc
- p_logical_end
- p_branch BB3
- BB2
- /* logical preds: / linear preds: BB0, / kind: uniform, */
- p_branch BB3
- BB3
- /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
- s2: %101:exec = p_linear_phi %98:exec, %98:exec
- s2: %103:exec, s1: %102:scc = s_andn2_b64 %100:s[0-1], %101:exec
- p_cbranch_z %103:exec BB5, BB4
- BB4
- /* logical preds: BB0, / linear preds: BB3, / kind: uniform, */
- p_logical_start
- p_logical_end
- p_branch BB6
- BB5
- /* logical preds: / linear preds: BB3, / kind: uniform, */
- p_branch BB6
- BB6
- /* logical preds: BB1, BB4, / linear preds: BB4, BB5, / kind: uniform, top-level, merge, */
- v1: %41:v[1] = p_phi %40:v[1], 1
- s2: %104:exec = p_parallelcopy %100:s[0-1]
- p_logical_start
- s2: %96:s[2-3] = p_create_vector %45:s[2], 0xffff8000
- s4: %97:s[0-3] = s_load_dwordx4 %96:s[2-3], 0 reorder
- v1: %43:v[0] = v_lshlrev_b32 2, %15:v[0]
- buffer_store_dword %43:v[0], %97:s[0-3], 0, %41:v[1] offen disable_wqm buffer
- p_logical_end
- s_endpgm
- disasm:
- BB0:
- s_add_i32 s0, 32, s2 ; 810002a0
- s_movk_i32 s1, 0x8000 ; b0018000
- s_load_dwordx4 s[12:15], s[0:1], 0x0 ; f4080300 fa000000
- v_mbcnt_lo_u32_b32_e64 v3, -1, 0 ; d7650003 000100c1
- v_mbcnt_hi_u32_b32_e64 v3, -1, v3 ; d7660003 000206c1
- v_lshlrev_b32_e32 v4, 2, v3 ; 34080682
- s_waitcnt lgkmcnt(0) ; bf8cc07f
- buffer_load_dword v5, v4, s[12:15], 0 offen ; e0301000 80030504
- v_nop ; 7e000000
- s_lshl_b32 s3, s3, 5 ; 8f038503
- s_add_i32 s0, 16, s2 ; 81000290
- s_movk_i32 s1, 0x8000 ; b0018000
- s_load_dwordx4 s[12:15], s[0:1], 0x0 ; f4080300 fa000000
- s_lshl_b32 s5, s8, 2 ; 8f058208
- v_add_nc_u32_e32 v2, s5, v2 ; 4a040405
- v_mul_lo_u32 v2, s4, v2 ; d5690002 00020404
- v_add3_u32 v1, v2, s7, v1 ; d76d0001 04040f02
- v_mul_lo_u32 v1, s3, v1 ; d5690001 00020203
- s_lshl_b32 s0, s6, 5 ; 8f008506
- v_add3_u32 v0, v1, s0, v0 ; d76d0000 04000101
- s_mov_b32 s0, 1 ; be800381
- s_cmp_lg_i32 s0, 0 ; bf018000
- s_cselect_b64 s[0:1], exec, 0 ; 8580807e
- v_and_b32_e32 v1, 32, v3 ; 360206a0
- s_waitcnt vmcnt(0) ; bf8c3f70
- v_and_b32_e32 v2, 63, v5 ; 36040abf
- v_lshlrev_b32_e32 v3, 2, v2 ; 34060482
- v_and_b32_e32 v5, 32, v3 ; 360a06a0
- v_cmp_eq_u32_e32 vcc_lo, v1, v5 ; 7d840b01
- v_lshrrev_b64 v[5:6], v2, s[0:1] ; d7000005 00000102
- s_waitcnt lgkmcnt(0) ; bf8cc07f
- buffer_load_dword v1, v4, s[12:15], 0 offen ; e0301000 80030104
- v_and_b32_e32 v4, 1, v5 ; 36080a81
- v_mov_b32_e32 v5, 0 ; 7e0a0280
- v_cmp_ne_u64_e64 s0, 0, v[4:5] ; d4e50000 00020880
- v_cmp_gt_u32_e64 s4, 64, v2 ; d4c40004 000204c0
- s_and_b64 s[0:1], s[4:5], s[0:1] ; 87800004
- s_and_saveexec_b32 s3, 0 ; be833c80
- s_subvector_loop_begin s4, 224 ; bd8400e0
- s_waitcnt vmcnt(0) ; bf8c3f70
- v_mov_b32_e32 v8, v1 ; 7e100301
- s_subvector_loop_end s4, 208 ; be0400d0
- s_mov_b32 exec_lo, 3 ; befe0383
- s_subvector_loop_begin s4, 244 ; bd8400f4
- v_mov_b32_e32 v4, s8 ; 7e080208
- v_mov_b32_e32 v8, v1 ; 7e100301
- s_subvector_loop_end s4, 228 ; be0400e4
- ds_bpermute_b32 v1, v3, v1 ; dacc0000 01000103
- ds_bpermute_b32 v3, v3, v4 ; dacc0000 03000403
- s_waitcnt lgkmcnt(0) ; bf8cc07f
- v_cndmask_b32_e32 v1, v3, v1, vcc_lo ; 02020303
- s_and_saveexec_b64 s[0:1], s[0:1] ; be802400
- s_cbranch_execz BB3 ; bf880007
- BB1:
- v_lshlrev_b32_e32 v2, 2, v2 ; 34040482
- buffer_load_dword v2, v2, s[12:15], 0 offen ; e0301000 80030202
- s_waitcnt vmcnt(0) ; bf8c3f70
- v_cmp_eq_i32_e32 vcc_lo, v1, v2 ; 7d040501
- v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; d5010001 01a90280
- BB3:
- s_andn2_b64 exec, s[0:1], exec ; 8afe7e00
- s_cbranch_execz BB6 ; bf880001
- BB4:
- v_mov_b32_e32 v1, 1 ; 7e020281
- BB6:
- s_mov_b64 exec, s[0:1] ; befe0400
- s_movk_i32 s3, 0x8000 ; b0038000
- s_load_dwordx4 s[0:3], s[2:3], 0x0 ; f4080001 fa000000
- v_lshlrev_b32_e32 v0, 2, v0 ; 34000082
- s_waitcnt lgkmcnt(0) ; bf8cc07f
- buffer_store_dword v1, v0, s[0:3], 0 offen ; e0701000 80000100
- s_endpgm ; bf810000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- s_code_end ; bf9f0000
- [Timur@timur-zen ~]$
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement