Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ; ModuleID = '<stdin>'
- source_filename = "kernel.cpp.kernel.tmp.cpp"
- target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
- target triple = "amdgcn--amdhsa-hcc"
- %struct.grid_launch_parm = type { %struct.gl_dim3, %struct.gl_dim3, i32, i32, i32, %"class.hc::accelerator_view"*, %"class.hc::completion_future"* }
- %struct.gl_dim3 = type { i32, i32, i32 }
- %"class.hc::accelerator_view" = type opaque
- %"class.hc::completion_future" = type opaque
- @llvm.used = appending global [1 x i8*] [i8* bitcast (void (%struct.grid_launch_parm addrspace(5)*, float*, float*, i32)* @__hcLaunchKernel__Z15matrixTranspose16grid_launch_parmPfS0_i to i8*)], section "llvm.metadata"
- ; Function Attrs: alwaysinline nounwind
- define void @_Z15matrixTranspose16grid_launch_parmPfS0_i(%struct.grid_launch_parm addrspace(5)* byval nocapture readnone align 8 %lp, float* nocapture %out, float* nocapture readonly %in, i32 %width) #0 {
- %call = tail call i64 @hc_get_group_size(i32 0) #2
- %call1 = tail call i64 @hc_get_group_id(i32 0) #2
- %mul = mul nsw i64 %call1, %call
- %call2 = tail call i64 @hc_get_workitem_id(i32 0) #2
- %add = add nsw i64 %mul, %call2
- %conv = trunc i64 %add to i32
- %call3 = tail call i64 @hc_get_group_size(i32 1) #2
- %call4 = tail call i64 @hc_get_group_id(i32 1) #2
- %mul5 = mul nsw i64 %call4, %call3
- %call6 = tail call i64 @hc_get_workitem_id(i32 1) #2
- %add7 = add nsw i64 %mul5, %call6
- %conv8 = trunc i64 %add7 to i32
- %mul9 = mul nsw i32 %conv, %width
- %add10 = add nsw i32 %mul9, %conv8
- %idxprom = sext i32 %add10 to i64
- %arrayidx = getelementptr inbounds float, float* %out, i64 %idxprom
- %mul11 = mul nsw i32 %conv8, %width
- %add12 = add nsw i32 %mul11, %conv
- %idxprom13 = sext i32 %add12 to i64
- %arrayidx14 = getelementptr inbounds float, float* %in, i64 %idxprom13
- %1 = load float, float* %arrayidx14, align 4, !tbaa !2
- %2 = tail call float asm sideeffect "v_mov_b32_e32 $0, $1", "=v,v"(float %1) #3, !srcloc !6
- store float %2, float* %arrayidx, align 4, !tbaa !2
- ret void
- }
- ; Function Attrs: nounwind readnone
- declare i64 @hc_get_group_size(i32) local_unnamed_addr #1
- ; Function Attrs: nounwind readnone
- declare i64 @hc_get_group_id(i32) local_unnamed_addr #1
- ; Function Attrs: nounwind readnone
- declare i64 @hc_get_workitem_id(i32) local_unnamed_addr #1
- declare void @__hcLaunchKernel__Z15matrixTranspose16grid_launch_parmPfS0_i(%struct.grid_launch_parm addrspace(5)*, float*, float*, i32)
- attributes #0 = { alwaysinline nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "hc_grid_launch" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+fp64-fp16-denormals,-fp32-denormals" "unsafe-fp-math"="false" "use-soft-float"="false" }
- attributes #1 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+fp64-fp16-denormals,-fp32-denormals" "unsafe-fp-math"="false" "use-soft-float"="false" }
- attributes #2 = { nobuiltin nounwind readnone }
- attributes #3 = { nounwind }
- !llvm.module.flags = !{!0}
- !llvm.ident = !{!1}
- !0 = !{i32 1, !"wchar_size", i32 4}
- !1 = !{!"HCC clang version 5.0.0 (based on HCC 1.0.17312-d1f4a8a-19aa706-56b5abe )"}
- !2 = !{!3, !3, i64 0}
- !3 = !{!"float", !4, i64 0}
- !4 = !{!"omnipotent char", !5, i64 0}
- !5 = !{!"Simple C++ TBAA"}
- !6 = !{i32 460}
Add Comment
Please, Sign In to add comment