Advertisement
lukinma

LDS counter

Jul 11th, 2019
5,576
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. /* Disassembling 'barrier-Ellesmere.bin' */
  2. .amdcl2
  3. .gpu Iceland
  4. .64bit
  5. .arch_minor 0
  6. .arch_stepping 4
  7. .driver_version 203603
  8. .compile_options "-fno-bin-source -fno-bin-llvmir -fno-bin-amdil -fbin-exe -D__AMD__=1 -D__Ellesmere__=1 -D__Ellesmere=1 -D__IMAGE_SUPPORT__=1 -DFP_FAST_FMA=1 -cl-denorms-are-zero -m64 -Dcl_khr_fp64=1 -Dcl_amd_fp64=1 -Dcl_khr_global_int32_base_atomics=1 -Dcl_khr_global_int32_extended_atomics=1 -Dcl_khr_local_int32_base_atomics=1 -Dcl_khr_local_int32_extended_atomics=1 -Dcl_khr_int64_base_atomics=1 -Dcl_khr_int64_extended_atomics=1 -Dcl_khr_3d_image_writes=1 -Dcl_khr_byte_addressable_store=1 -Dcl_khr_fp16=1 -Dcl_khr_gl_sharing=1 -Dcl_khr_gl_depth_images=1 -Dcl_amd_device_attribute_query=1 -Dcl_amd_vec3=1 -Dcl_amd_printf=1 -Dcl_amd_media_ops=1 -Dcl_amd_media_ops2=1 -Dcl_amd_popcnt=1 -Dcl_khr_d3d10_sharing=1 -Dcl_khr_d3d11_sharing=1 -Dcl_khr_dx9_media_sharing=1 -Dcl_khr_image2d_from_buffer=1 -Dcl_khr_spir=1 -Dcl_khr_subgroups=1 -Dcl_khr_gl_event=1 -Dcl_khr_depth_images=1 -Dcl_khr_mipmap_image=1 -Dcl_khr_mipmap_image_writes=1 -Dcl_amd_liquid_flash=1 -Dcl_amd_planar_yuv=1"
  9. .acl_version "AMD-COMP-LIB-v0.8 (0.0.SC_BUILD_NUMBER)"
  10. .kernel localVarExample
  11.    .config
  12.        .dims x
  13.        .cws 64, 1, 1
  14.        .sgprsnum 13
  15.        .vgprsnum 4
  16.        .localsize 256
  17.        .floatmode 0xc0
  18.        .pgmrsrc1 0x00ac0040
  19.        .pgmrsrc2 0x0000008c
  20.        .dx10clamp
  21.        .ieeemode
  22.        .useargs
  23.        .priority 0
  24.        .arg _.global_offset_0, "size_t", long
  25.        .arg _.global_offset_1, "size_t", long
  26.        .arg _.global_offset_2, "size_t", long
  27.        .arg _.printf_buffer, "size_t", void*, global, , rdonly
  28.        .arg _.vqueue_pointer, "size_t", long
  29.        .arg _.aqlwrap_pointer, "size_t", long
  30.        .arg res, "int*", int*, global,
  31.    .text
  32.         s_mov_b32       m0, 0x10000
  33.         s_lshl_b32      s0, s6, 6
  34.         v_add_u32       v0, vcc, s0, v0
  35.         s_load_dwordx2  s[0:1], s[4:5], 0x0
  36.         s_waitcnt       lgkmcnt(0)
  37.         v_add_u32       v1, vcc, s0, v0
  38.         v_lshlrev_b32   v1, 2, v1
  39.         v_mov_b32       v2, 0
  40.         v_mov_b32       v3, 1
  41.         s_load_dwordx2  s[2:3], s[4:5], 0x30
  42.         ds_write_b32    v1, v2
  43.         s_waitcnt       lgkmcnt(0)
  44.         ds_add_u32      v1, v3
  45.         s_waitcnt       lgkmcnt(0)
  46.         ds_read_b32     v2, v1
  47.         v_add_u32       v1, s[4:5], v0, s0
  48.         v_mov_b32       v0, 0
  49.         v_ashrrev_i64   v[0:1], 30, v[0:1]
  50.         v_add_u32       v0, vcc, s2, v0
  51.         v_mov_b32       v3, s3
  52.         v_addc_u32      v1, vcc, v3, v1, vcc
  53.         s_waitcnt       lgkmcnt(0)
  54.         flat_store_dword v[0:1], v2
  55.         s_endpgm
Advertisement
RAW Paste Data Copied
Advertisement