Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- module &input_bc:1:0:$full:$large:$default;
- extension "amd:gcn";
- extension "amd:mipmap";
- extension "IMAGE";
- prog kernel &__OpenCL_read_linear_uncached_kernel(
- kernarg_u64 %_.global_offset_0,
- kernarg_u64 %_.global_offset_1,
- kernarg_u64 %_.global_offset_2,
- kernarg_u64 %_.printf_buffer,
- kernarg_u64 %_.vqueue_pointer,
- kernarg_u64 %_.aqlwrap_pointer,
- kernarg_u64 %input,
- kernarg_u64 %output)
- {
- requiredworkgroupsize 256, 1, 1;
- requirenopartialworkgroups;
- pragma "AMD RTI", "ARGSTART:__OpenCL_read_linear_uncached_kernel";
- pragma "AMD RTI", "version:3:1:104";
- pragma "AMD RTI", "device:generic";
- pragma "AMD RTI", "uniqueid:1024";
- pragma "AMD RTI", "cws:256:1:1";
- pragma "AMD RTI", "value:_.global_offset_0:i64:1:1:0";
- pragma "AMD RTI", "constarg:0:_.global_offset_0";
- pragma "AMD RTI", "value:_.global_offset_1:i64:1:1:16";
- pragma "AMD RTI", "constarg:1:_.global_offset_1";
- pragma "AMD RTI", "value:_.global_offset_2:i64:1:1:32";
- pragma "AMD RTI", "constarg:2:_.global_offset_2";
- pragma "AMD RTI", "pointer:_.printf_buffer:u8:1:1:48:uav:7:1:RO:0:0:0";
- pragma "AMD RTI", "value:_.vqueue_pointer:i64:1:1:64";
- pragma "AMD RTI", "value:_.aqlwrap_pointer:i64:1:1:80";
- pragma "AMD RTI", "pointer:input:float:1:1:96:uav:7:4:RO:0:0:0";
- pragma "AMD RTI", "pointer:output:float:1:1:112:uav:7:4:RW:0:0:0";
- pragma "AMD RTI", "function:1:0";
- pragma "AMD RTI", "memory:64bitABI";
- pragma "AMD RTI", "privateid:8";
- pragma "AMD RTI", "enqueue_kernel:0";
- pragma "AMD RTI", "kernel_index:0";
- pragma "AMD RTI", "reflection:0:size_t";
- pragma "AMD RTI", "reflection:1:size_t";
- pragma "AMD RTI", "reflection:2:size_t";
- pragma "AMD RTI", "reflection:3:size_t";
- pragma "AMD RTI", "reflection:4:size_t";
- pragma "AMD RTI", "reflection:5:size_t";
- pragma "AMD RTI", "reflection:6:float*";
- pragma "AMD RTI", "reflection:7:float*";
- pragma "AMD RTI", "ARGEND:__OpenCL_read_linear_uncached_kernel";
- group_f32 %read_linear_uncached.scratch[512];
- @__OpenCL_read_linear_uncached_kernel_entry:
- // BB#0:
- currentworkgroupsize_u32 $s0, 1;
- currentworkgroupsize_u32 $s1, 0;
- mul24_s32 $s1, $s1, $s0;
- workitemabsid_u32 $s0, 0;
- cvt_u64_u32 $d0, $s0;
- workitemid_u32 $s0, 0;
- currentworkgroupsize_u32 $s2, 2;
- ld_kernarg_align(8)_width(all)_u64 $d1, [%_.global_offset_0];
- add_u64 $d1, $d0, $d1;
- mul24_s32 $s5, $s1, $s2;
- shl_u32 $s1, $s0, 2;
- workitemflatid_u32 $s2;
- lda_group_u32 $s3, [%read_linear_uncached.scratch];
- st_group_align(4)_u32 0, [%read_linear_uncached.scratch][$s1];
- add_u32 $s1, $s3, $s1;
- cvt_s64_s32 $d0, $s2;
- mov_b32 $s4, 0;
- cvt_s64_s32 $d2, $s5;
- cvt_u32_u64 $s5, $d1;
- ld_kernarg_align(8)_width(all)_u64 $d1, [%output];
- ld_kernarg_align(8)_width(all)_u64 $d3, [%input];
- mov_b32 $s6, $s5;
- @LBB0_1:
- shl_u32 $s7, $s4, 8;
- and_b32 $s8, $s7, 512;
- cmp_gt_b1_u32 $c0, $s2, 255;
- cbr_b1 $c0, @LBB0_4;
- // BB#2:
- cvt_s64_s32 $d4, $s6;
- mov_b64 $d5, $d0;
- @LBB0_3:
- // %.preheader
- cvt_u32_u64 $s7, $d5;
- add_u32 $s7, $s7, $s8;
- shl_u32 $s7, $s7, 2;
- add_u64 $d6, $d5, $d4;
- shl_u64 $d6, $d6, 2;
- add_u64 $d6, $d3, $d6;
- ld_global_align(4)_f32 $s9, [$d6];
- st_group_align(4)_f32 $s9, [%read_linear_uncached.scratch][$s7];
- add_u64 $d5, $d5, $d2;
- cmp_lt_b1_u64 $c0, $d5, 256;
- cbr_b1 $c0, @LBB0_3;
- @LBB0_4:
- // %_Z21async_work_group_copyPU3AS3fPKU3AS1fm9ocl_event.exit
- or_b32 $s7, $s8, 256;
- add_u32 $s7, $s7, $s0;
- shl_u32 $s9, $s7, 2;
- barrier;
- ld_group_align(4)_f32 $s7, [%read_linear_uncached.scratch][$s9];
- mul_ftz_f32 $s10, $s7, $s7;
- add_ftz_f32 $s10, $s7, $s10;
- add_u32 $s7, $s3, $s9;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- mul_ftz_f32 $s11, $s10, $s10;
- add_ftz_f32 $s10, $s10, $s11;
- st_group_align(4)_f32 $s10, [%read_linear_uncached.scratch][$s9];
- cmp_ge_b1_u32 $c0, $s2, 256;
- cbr_b1 $c0, @LBB0_7;
- // BB#5:
- add_u32 $s9, $s6, 1;
- cvt_s64_s32 $d4, $s9;
- mov_b64 $d5, $d0;
- @LBB0_6:
- // %.preheader.1
- cvt_u32_u64 $s9, $d5;
- add_u32 $s9, $s9, $s8;
- shl_u32 $s9, $s9, 2;
- add_u64 $d6, $d5, $d4;
- shl_u64 $d6, $d6, 2;
- add_u64 $d6, $d3, $d6;
- ld_global_align(4)_f32 $s10, [$d6];
- st_group_align(4)_f32 $s10, [%read_linear_uncached.scratch][$s9];
- add_u64 $d5, $d5, $d2;
- cmp_lt_b1_u64 $c0, $d5, 256;
- cbr_b1 $c0, @LBB0_6;
- @LBB0_7:
- // %_Z21async_work_group_copyPU3AS3fPKU3AS1fm9ocl_event.exit.1
- barrier;
- ld_group_align(4)_f32 $s8, [$s7];
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- add_u32 $s6, $s6, 2;
- add_u32 $s4, $s4, 2;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- mul_ftz_f32 $s9, $s8, $s8;
- add_ftz_f32 $s8, $s8, $s9;
- st_group_align(4)_f32 $s8, [$s7];
- cmp_ne_b1_s32 $c0, $s4, 32;
- cbr_b1 $c0, @LBB0_1;
- // BB#8:
- cvt_s64_s32 $d0, $s5;
- shl_u64 $d0, $d0, 2;
- add_u64 $d0, $d1, $d0;
- ld_group_align(4)_f32 $s0, [$s1];
- st_global_align(4)_f32 $s0, [$d0];
- ret;
- };
Add Comment
Please, Sign In to add comment