Untitled

The current implementation generates this:

define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrsp
main_body:
  %ptr = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
  %l = load <16 x i8>, <16 x i8> addrspace(2)* %ptr, align 16, !tbaa !0
  %c = call float @llvm.SI.load.const(<16 x i8> %l, i32 0)
  %p = call i32 @llvm.SI.packf16(float %c, float 0.000000e+00)
  %v1 = bitcast i32 %p to float
  %z = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00)
  %v2 = bitcast i32 %z to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %v1, float %v2, float %v1, float %v2)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" }
attributes #1 = { nounwind readnone }

!0 = !{!"const", null, i32 1}


My patch should generate this:

define void @main(i32 inreg, i32 inreg, i32 inreg, i32 inreg, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2
main_body:
  %22 = zext i32 %1 to i64
  %23 = inttoptr i64 %22 to [17 x <16 x i8>] addrspace(2)*
  %ptr = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %23, i64 0, i64 0
  %l = load <16 x i8>, <16 x i8> addrspace(2)* %ptr, align 16, !tbaa !0
  %c = call float @llvm.SI.load.const(<16 x i8> %l, i32 0)
  %p = call i32 @llvm.SI.packf16(float %c, float 0.000000e+00)
  %v1 = bitcast i32 %p to float
  %z = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00)
  %v2 = bitcast i32 %z to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %v1, float %v2, float %v1, float %v2)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" }
attributes #1 = { nounwind readnone }

!0 = !{!"const", null, i32 1}