Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <class 'tvm.tensor.Tensor'>
- [15:11:58] C:\TVM\clone\tvm\src\runtime\opencl\opencl_device_api.cc:194: Multiple OpenCL platforms matched, use the first one ...
- [15:11:58] C:\TVM\clone\tvm\src\runtime\opencl\opencl_device_api.cc:197: Initialize OpenCL platform 'Intel(R) OpenCL '
- [15:11:58] C:\TVM\clone\tvm\src\runtime\opencl\opencl_device_api.cc:222: opencl(0)='Intel(R) HD Graphics 4600 ' cl_device_id=000002BE87E000E0
- ------tvm gen code------
- ; ModuleID = 'myadd'
- source_filename = "myadd"
- target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
- target triple = "x86_64-pc-windows-msvc"
- %0 = type { double }
- %1 = type { i8*, %2, i32, %3, i64*, i64*, i64 }
- %2 = type { i32, i32 }
- %3 = type { i8, i8, i16 }
- @__tvm_module_ctx = linkonce dllexport local_unnamed_addr global i8* null, align 8
- @__TVMFuncCall = linkonce dllexport local_unnamed_addr global i32 (i8*, %0*, i32*, i32, %0*, i32*)* null, align 8
- @__TVMBackendGetFuncFromEnv = linkonce dllexport local_unnamed_addr global i32 (i8*, i8*, i8**)* null, align 8
- @__TVMAPISetLastError = linkonce dllexport local_unnamed_addr global void (i8*)* null, align 8
- @.str = private constant [58 x i8] c"Assert fail: (num_args == 3), myadd: num_args should be 3\00", align 1
- @.str.1 = private constant [87 x i8] c"Assert fail: (1 == int32(arg0.strides[0])), arg0.strides: expected to be compact array\00", align 1
- @.str.2 = private constant [87 x i8] c"Assert fail: (1 == int32(arg1.strides[0])), arg1.strides: expected to be compact array\00", align 1
- @.str.3 = private constant [87 x i8] c"Assert fail: (1 == int32(arg2.strides[0])), arg2.strides: expected to be compact array\00", align 1
- @.str.4 = private constant [110 x i8] c"Assert fail: (((arg0.code == 3) || (arg0.code == 7)) || (arg0.code == 4)), myadd: Expect arg[0] to be pointer\00", align 1
- @.str.5 = private constant [110 x i8] c"Assert fail: (((arg1.code == 3) || (arg1.code == 7)) || (arg1.code == 4)), myadd: Expect arg[1] to be pointer\00", align 1
- @.str.6 = private constant [110 x i8] c"Assert fail: (((arg2.code == 3) || (arg2.code == 7)) || (arg2.code == 4)), myadd: Expect arg[2] to be pointer\00", align 1
- @.str.7 = private constant [55 x i8] c"Assert fail: (dev_type == 4), device_type need to be 4\00", align 1
- @.str.8 = private constant [81 x i8] c"Assert fail: (1 == tvm_struct_get(arg0, 0, 4)), arg0.ndim is expected to equal 1\00", align 1
- @.str.9 = private constant [186 x i8] c"Assert fail: (((tvm_struct_get(arg0, 0, 5) == (uint8)2) && (tvm_struct_get(arg0, 0, 6) == (uint8)32)) && (tvm_struct_get(arg0, 0, 7) == (uint16)1)), arg0.dtype is expected to be float32\00", align 1
- @.str.10 = private constant [112 x i8] c"Assert fail: (tvm_struct_get(arg0, 0, 8) == (uint64)0), Argument arg0.byte_offset has an unsatisfied constraint\00", align 1
- @.str.11 = private constant [81 x i8] c"Assert fail: (1 == tvm_struct_get(arg1, 0, 4)), arg1.ndim is expected to equal 1\00", align 1
- @.str.12 = private constant [186 x i8] c"Assert fail: (((tvm_struct_get(arg1, 0, 5) == (uint8)2) && (tvm_struct_get(arg1, 0, 6) == (uint8)32)) && (tvm_struct_get(arg1, 0, 7) == (uint16)1)), arg1.dtype is expected to be float32\00", align 1
- @.str.13 = private constant [95 x i8] c"Assert fail: (n == int32(arg1.shape[0])), Argument arg1.shape[0] has an unsatisfied constraint\00", align 1
- @.str.14 = private constant [112 x i8] c"Assert fail: (tvm_struct_get(arg1, 0, 8) == (uint64)0), Argument arg1.byte_offset has an unsatisfied constraint\00", align 1
- @.str.15 = private constant [105 x i8] c"Assert fail: (4 == tvm_struct_get(arg1, 0, 10)), Argument arg1.device_type has an unsatisfied constraint\00", align 1
- @.str.16 = private constant [107 x i8] c"Assert fail: (dev_id == tvm_struct_get(arg1, 0, 9)), Argument arg1.device_id has an unsatisfied constraint\00", align 1
- @.str.17 = private constant [81 x i8] c"Assert fail: (1 == tvm_struct_get(arg2, 0, 4)), arg2.ndim is expected to equal 1\00", align 1
- @.str.18 = private constant [186 x i8] c"Assert fail: (((tvm_struct_get(arg2, 0, 5) == (uint8)2) && (tvm_struct_get(arg2, 0, 6) == (uint8)32)) && (tvm_struct_get(arg2, 0, 7) == (uint16)1)), arg2.dtype is expected to be float32\00", align 1
- @.str.19 = private constant [95 x i8] c"Assert fail: (n == int32(arg2.shape[0])), Argument arg2.shape[0] has an unsatisfied constraint\00", align 1
- @.str.20 = private constant [112 x i8] c"Assert fail: (tvm_struct_get(arg2, 0, 8) == (uint64)0), Argument arg2.byte_offset has an unsatisfied constraint\00", align 1
- @.str.21 = private constant [105 x i8] c"Assert fail: (4 == tvm_struct_get(arg2, 0, 10)), Argument arg2.device_type has an unsatisfied constraint\00", align 1
- @.str.22 = private constant [107 x i8] c"Assert fail: (dev_id == tvm_struct_get(arg2, 0, 9)), Argument arg2.device_id has an unsatisfied constraint\00", align 1
- @.tvm_func.__tvm_set_device = linkonce local_unnamed_addr global i8* null, align 8
- @.str.23 = private constant [17 x i8] c"__tvm_set_device\00", align 1
- @.tvm_func.myadd__kernel0 = linkonce local_unnamed_addr global i8* null, align 8
- @.str.24 = private constant [15 x i8] c"myadd__kernel0\00", align 1
- @__tvm_main__ = weak local_unnamed_addr constant [6 x i8] c"myadd\00", align 1
- define dllexport i32 @myadd(i8* noalias nocapture readonly, i8* noalias nocapture readonly, i32) local_unnamed_addr {
- entry:
- %3 = alloca [7 x i32], align 4
- %4 = alloca [7 x %0], align 8
- %.sub55 = getelementptr inbounds [7 x %0], [7 x %0]* %4, i64 0, i64 0
- %.sub = getelementptr inbounds [7 x i32], [7 x i32]* %3, i64 0, i64 0
- %5 = icmp eq i32 %2, 3
- br i1 %5, label %assert_end, label %assert_fail, !prof !1
- assert_fail: ; preds = %entry
- %6 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
- tail call void %6(i8* getelementptr inbounds ([58 x i8], [58 x i8]* @.str, i64 0, i64 0))
- ret i32 -1
- assert_end: ; preds = %entry
- %7 = bitcast i8* %0 to %1**
- %8 = load %1*, %1** %7, align 8
- %9 = bitcast i8* %1 to i32*
- %10 = load i32, i32* %9, align 4, !tbaa !5
- %11 = getelementptr inbounds i8, i8* %0, i64 8
- %12 = bitcast i8* %11 to %1**
- %13 = load %1*, %1** %12, align 8
- %14 = getelementptr inbounds i8, i8* %1, i64 4
- %15 = bitcast i8* %14 to i32*
- %16 = load i32, i32* %15, align 4, !tbaa !19
- %17 = getelementptr inbounds i8, i8* %0, i64 16
- %18 = bitcast i8* %17 to %1**
- %19 = load %1*, %1** %18, align 8
- %20 = getelementptr inbounds i8, i8* %1, i64 8
- %21 = bitcast i8* %20 to i32*
- %22 = load i32, i32* %21, align 4, !tbaa !21
- %23 = getelementptr inbounds %1, %1* %8, i64 0, i32 0
- %24 = load i8*, i8** %23, align 8
- %25 = getelementptr inbounds %1, %1* %8, i64 0, i32 4
- %26 = load i64*, i64** %25, align 8
- %27 = load i64, i64* %26, align 8, !tbaa !24
- %28 = trunc i64 %27 to i32
- %29 = getelementptr inbounds %1, %1* %8, i64 0, i32 5
- %30 = load i64*, i64** %29, align 8
- %31 = icmp eq i64* %30, null
- br i1 %31, label %if_end, label %if_then, !prof !38
- if_then: ; preds = %assert_end
- %32 = load i64, i64* %30, align 8, !tbaa !39
- %33 = trunc i64 %32 to i32
- %34 = icmp eq i32 %33, 1
- br i1 %34, label %if_end, label %assert_fail1, !prof !1
- if_end: ; preds = %assert_end, %if_then
- %35 = getelementptr inbounds %1, %1* %8, i64 0, i32 1, i32 0
- %36 = load i32, i32* %35, align 4
- %37 = getelementptr inbounds %1, %1* %8, i64 0, i32 1, i32 1
- %38 = load i32, i32* %37, align 4
- %39 = getelementptr inbounds %1, %1* %13, i64 0, i32 0
- %40 = load i8*, i8** %39, align 8
- %41 = getelementptr inbounds %1, %1* %13, i64 0, i32 4
- %42 = load i64*, i64** %41, align 8
- %43 = getelementptr inbounds %1, %1* %13, i64 0, i32 5
- %44 = load i64*, i64** %43, align 8
- %45 = icmp eq i64* %44, null
- br i1 %45, label %if_end4, label %if_then3, !prof !38
- assert_fail1: ; preds = %if_then
- %46 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
- tail call void %46(i8* getelementptr inbounds ([87 x i8], [87 x i8]* @.str.1, i64 0, i64 0))
- ret i32 -1
- if_then3: ; preds = %if_end
- %47 = load i64, i64* %44, align 8, !tbaa !53
- %48 = trunc i64 %47 to i32
- %49 = icmp eq i32 %48, 1
- br i1 %49, label %if_end4, label %assert_fail5, !prof !1
- if_end4: ; preds = %if_end, %if_then3
- %50 = getelementptr inbounds %1, %1* %19, i64 0, i32 0
- %51 = load i8*, i8** %50, align 8
- %52 = getelementptr inbounds %1, %1* %19, i64 0, i32 4
- %53 = load i64*, i64** %52, align 8
- %54 = getelementptr inbounds %1, %1* %19, i64 0, i32 5
- %55 = load i64*, i64** %54, align 8
- %56 = icmp eq i64* %55, null
- br i1 %56, label %if_end8, label %if_then7, !prof !38
- assert_fail5: ; preds = %if_then3
- %57 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
- tail call void %57(i8* getelementptr inbounds ([87 x i8], [87 x i8]* @.str.2, i64 0, i64 0))
- ret i32 -1
- if_then7: ; preds = %if_end4
- %58 = load i64, i64* %55, align 8, !tbaa !67
- %59 = trunc i64 %58 to i32
- %60 = icmp eq i32 %59, 1
- br i1 %60, label %if_end8, label %assert_fail9, !prof !1
- if_end8: ; preds = %if_end4, %if_then7
- switch i32 %10, label %assert_fail11 [
- i32 7, label %assert_end12
- i32 4, label %assert_end12
- i32 3, label %assert_end12
- ]
- assert_fail9: ; preds = %if_then7
- %61 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
- tail call void %61(i8* getelementptr inbounds ([87 x i8], [87 x i8]* @.str.3, i64 0, i64 0))
- ret i32 -1
- assert_fail11: ; preds = %if_end8
- %62 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
- tail call void %62(i8* getelementptr inbounds ([110 x i8], [110 x i8]* @.str.4, i64 0, i64 0))
- ret i32 -1
- assert_end12: ; preds = %if_end8, %if_end8, %if_end8
- switch i32 %16, label %assert_fail13 [
- i32 7, label %assert_end14
- i32 4, label %assert_end14
- i32 3, label %assert_end14
- ]
- assert_fail13: ; preds = %assert_end12
- %63 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
- tail call void %63(i8* getelementptr inbounds ([110 x i8], [110 x i8]* @.str.5, i64 0, i64 0))
- ret i32 -1
- assert_end14: ; preds = %assert_end12, %assert_end12, %assert_end12
- switch i32 %22, label %assert_fail15 [
- i32 7, label %assert_end16
- i32 4, label %assert_end16
- i32 3, label %assert_end16
- ]
- assert_fail15: ; preds = %assert_end14
- %64 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
- tail call void %64(i8* getelementptr inbounds ([110 x i8], [110 x i8]* @.str.6, i64 0, i64 0))
- ret i32 -1
- assert_end16: ; preds = %assert_end14, %assert_end14, %assert_end14
- %65 = icmp eq i32 %36, 4
- br i1 %65, label %assert_end18, label %assert_fail17, !prof !1
- assert_fail17: ; preds = %assert_end16
- %66 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
- tail call void %66(i8* getelementptr inbounds ([55 x i8], [55 x i8]* @.str.7, i64 0, i64 0))
- ret i32 -1
- assert_end18: ; preds = %assert_end16
- %67 = getelementptr inbounds %1, %1* %8, i64 0, i32 2
- %68 = load i32, i32* %67, align 4
- %69 = icmp eq i32 %68, 1
- br i1 %69, label %assert_end20, label %assert_fail19, !prof !1
- assert_fail19: ; preds = %assert_end18
- %70 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
- tail call void %70(i8* getelementptr inbounds ([81 x i8], [81 x i8]* @.str.8, i64 0, i64 0))
- ret i32 -1
- assert_end20: ; preds = %assert_end18
- %71 = getelementptr inbounds %1, %1* %8, i64 0, i32 3, i32 2
- %72 = load i16, i16* %71, align 2
- %73 = icmp eq i16 %72, 1
- %74 = getelementptr inbounds %1, %1* %8, i64 0, i32 3, i32 1
- %75 = load i8, i8* %74, align 1
- %76 = icmp eq i8 %75, 32
- %77 = getelementptr inbounds %1, %1* %8, i64 0, i32 3, i32 0
- %78 = load i8, i8* %77, align 1
- %79 = icmp eq i8 %78, 2
- %80 = and i1 %76, %79
- %81 = and i1 %73, %80
- br i1 %81, label %assert_end22, label %assert_fail21, !prof !1
- assert_fail21: ; preds = %assert_end20
- %82 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
- tail call void %82(i8* getelementptr inbounds ([186 x i8], [186 x i8]* @.str.9, i64 0, i64 0))
- ret i32 -1
- assert_end22: ; preds = %assert_end20
- %83 = getelementptr inbounds %1, %1* %8, i64 0, i32 6
- %84 = load i64, i64* %83, align 8
- %85 = icmp eq i64 %84, 0
- br i1 %85, label %assert_end24, label %assert_fail23, !prof !1
- assert_fail23: ; preds = %assert_end22
- %86 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
- tail call void %86(i8* getelementptr inbounds ([112 x i8], [112 x i8]* @.str.10, i64 0, i64 0))
- ret i32 -1
- assert_end24: ; preds = %assert_end22
- %87 = getelementptr inbounds %1, %1* %13, i64 0, i32 2
- %88 = load i32, i32* %87, align 4
- %89 = icmp eq i32 %88, 1
- br i1 %89, label %assert_end26, label %assert_fail25, !prof !1
- assert_fail25: ; preds = %assert_end24
- %90 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
- tail call void %90(i8* getelementptr inbounds ([81 x i8], [81 x i8]* @.str.11, i64 0, i64 0))
- ret i32 -1
- assert_end26: ; preds = %assert_end24
- %91 = getelementptr inbounds %1, %1* %13, i64 0, i32 3, i32 2
- %92 = load i16, i16* %91, align 2
- %93 = icmp eq i16 %92, 1
- %94 = getelementptr inbounds %1, %1* %13, i64 0, i32 3, i32 1
- %95 = load i8, i8* %94, align 1
- %96 = icmp eq i8 %95, 32
- %97 = getelementptr inbounds %1, %1* %13, i64 0, i32 3, i32 0
- %98 = load i8, i8* %97, align 1
- %99 = icmp eq i8 %98, 2
- %100 = and i1 %96, %99
- %101 = and i1 %93, %100
- br i1 %101, label %assert_end28, label %assert_fail27, !prof !1
- assert_fail27: ; preds = %assert_end26
- %102 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
- tail call void %102(i8* getelementptr inbounds ([186 x i8], [186 x i8]* @.str.12, i64 0, i64 0))
- ret i32 -1
- assert_end28: ; preds = %assert_end26
- %103 = load i64, i64* %42, align 8, !tbaa !81
- %104 = trunc i64 %103 to i32
- %105 = icmp eq i32 %28, %104
- br i1 %105, label %assert_end30, label %assert_fail29, !prof !1
- assert_fail29: ; preds = %assert_end28
- %106 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
- tail call void %106(i8* getelementptr inbounds ([95 x i8], [95 x i8]* @.str.13, i64 0, i64 0))
- ret i32 -1
- assert_end30: ; preds = %assert_end28
- %107 = getelementptr inbounds %1, %1* %13, i64 0, i32 6
- %108 = load i64, i64* %107, align 8
- %109 = icmp eq i64 %108, 0
- br i1 %109, label %assert_end32, label %assert_fail31, !prof !1
- assert_fail31: ; preds = %assert_end30
- %110 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
- tail call void %110(i8* getelementptr inbounds ([112 x i8], [112 x i8]* @.str.14, i64 0, i64 0))
- ret i32 -1
- assert_end32: ; preds = %assert_end30
- %111 = getelementptr inbounds %1, %1* %13, i64 0, i32 1, i32 0
- %112 = load i32, i32* %111, align 4
- %113 = icmp eq i32 %112, 4
- br i1 %113, label %assert_end34, label %assert_fail33, !prof !1
- assert_fail33: ; preds = %assert_end32
- %114 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
- tail call void %114(i8* getelementptr inbounds ([105 x i8], [105 x i8]* @.str.15, i64 0, i64 0))
- ret i32 -1
- assert_end34: ; preds = %assert_end32
- %115 = getelementptr inbounds %1, %1* %13, i64 0, i32 1, i32 1
- %116 = load i32, i32* %115, align 4
- %117 = icmp eq i32 %38, %116
- br i1 %117, label %assert_end36, label %assert_fail35, !prof !1
- assert_fail35: ; preds = %assert_end34
- %118 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
- tail call void %118(i8* getelementptr inbounds ([107 x i8], [107 x i8]* @.str.16, i64 0, i64 0))
- ret i32 -1
- assert_end36: ; preds = %assert_end34
- %119 = getelementptr inbounds %1, %1* %19, i64 0, i32 2
- %120 = load i32, i32* %119, align 4
- %121 = icmp eq i32 %120, 1
- br i1 %121, label %assert_end38, label %assert_fail37, !prof !1
- assert_fail37: ; preds = %assert_end36
- %122 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
- tail call void %122(i8* getelementptr inbounds ([81 x i8], [81 x i8]* @.str.17, i64 0, i64 0))
- ret i32 -1
- assert_end38: ; preds = %assert_end36
- %123 = getelementptr inbounds %1, %1* %19, i64 0, i32 3, i32 2
- %124 = load i16, i16* %123, align 2
- %125 = icmp eq i16 %124, 1
- %126 = getelementptr inbounds %1, %1* %19, i64 0, i32 3, i32 1
- %127 = load i8, i8* %126, align 1
- %128 = icmp eq i8 %127, 32
- %129 = getelementptr inbounds %1, %1* %19, i64 0, i32 3, i32 0
- %130 = load i8, i8* %129, align 1
- %131 = icmp eq i8 %130, 2
- %132 = and i1 %128, %131
- %133 = and i1 %125, %132
- br i1 %133, label %assert_end40, label %assert_fail39, !prof !1
- assert_fail39: ; preds = %assert_end38
- %134 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
- tail call void %134(i8* getelementptr inbounds ([186 x i8], [186 x i8]* @.str.18, i64 0, i64 0))
- ret i32 -1
- assert_end40: ; preds = %assert_end38
- %135 = load i64, i64* %53, align 8, !tbaa !95
- %136 = trunc i64 %135 to i32
- %137 = icmp eq i32 %28, %136
- br i1 %137, label %assert_end42, label %assert_fail41, !prof !1
- assert_fail41: ; preds = %assert_end40
- %138 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
- tail call void %138(i8* getelementptr inbounds ([95 x i8], [95 x i8]* @.str.19, i64 0, i64 0))
- ret i32 -1
- assert_end42: ; preds = %assert_end40
- %139 = getelementptr inbounds %1, %1* %19, i64 0, i32 6
- %140 = load i64, i64* %139, align 8
- %141 = icmp eq i64 %140, 0
- br i1 %141, label %assert_end44, label %assert_fail43, !prof !1
- assert_fail43: ; preds = %assert_end42
- %142 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
- tail call void %142(i8* getelementptr inbounds ([112 x i8], [112 x i8]* @.str.20, i64 0, i64 0))
- ret i32 -1
- assert_end44: ; preds = %assert_end42
- %143 = getelementptr inbounds %1, %1* %19, i64 0, i32 1, i32 0
- %144 = load i32, i32* %143, align 4
- %145 = icmp eq i32 %144, 4
- br i1 %145, label %assert_end46, label %assert_fail45, !prof !1
- assert_fail45: ; preds = %assert_end44
- %146 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
- tail call void %146(i8* getelementptr inbounds ([105 x i8], [105 x i8]* @.str.21, i64 0, i64 0))
- ret i32 -1
- assert_end46: ; preds = %assert_end44
- %147 = getelementptr inbounds %1, %1* %19, i64 0, i32 1, i32 1
- %148 = load i32, i32* %147, align 4
- %149 = icmp eq i32 %38, %148
- br i1 %149, label %if_then49, label %assert_fail47, !prof !1
- assert_fail47: ; preds = %assert_end46
- %150 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2
- tail call void %150(i8* getelementptr inbounds ([107 x i8], [107 x i8]* @.str.22, i64 0, i64 0))
- ret i32 -1
- if_then49: ; preds = %assert_end46
- %151 = bitcast [7 x %0]* %4 to i64*
- store i64 4, i64* %151, align 8
- store i32 0, i32* %.sub, align 4, !tbaa !109
- %152 = sext i32 %38 to i64
- %153 = getelementptr inbounds [7 x %0], [7 x %0]* %4, i64 0, i64 1
- %154 = bitcast %0* %153 to i64*
- store i64 %152, i64* %154, align 8
- %155 = getelementptr inbounds [7 x i32], [7 x i32]* %3, i64 0, i64 1
- store i32 0, i32* %155, align 4, !tbaa !123
- %156 = load i8*, i8** @.tvm_func.__tvm_set_device, align 8
- %157 = icmp eq i8* %156, null
- br i1 %157, label %handle_init, label %handle_init_end, !prof !38
- handle_init: ; preds = %if_then49
- %158 = alloca i8*, align 8
- %159 = load i8*, i8** @__tvm_module_ctx, align 8, !tbaa !2
- %160 = load i32 (i8*, i8*, i8**)*, i32 (i8*, i8*, i8**)** @__TVMBackendGetFuncFromEnv, align 8, !tbaa !2
- %161 = call i32 %160(i8* %159, i8* getelementptr inbounds ([17 x i8], [17 x i8]* @.str.23, i64 0, i64 0), i8** nonnull %158)
- %162 = icmp eq i32 %161, 0
- br i1 %162, label %call_end, label %call_fail, !prof !1
- handle_init_end: ; preds = %if_then49, %call_end
- %163 = phi i8* [ %156, %if_then49 ], [ %169, %call_end ]
- %164 = getelementptr inbounds [7 x %0], [7 x %0]* %4, i64 0, i64 2
- %165 = getelementptr inbounds [7 x i32], [7 x i32]* %3, i64 0, i64 2
- %166 = load i32 (i8*, %0*, i32*, i32, %0*, i32*)*, i32 (i8*, %0*, i32*, i32, %0*, i32*)** @__TVMFuncCall, align 8, !tbaa !2
- %167 = call i32 %166(i8* %163, %0* nonnull %.sub55, i32* nonnull %.sub, i32 2, %0* %164, i32* %165)
- %168 = icmp eq i32 %167, 0
- br i1 %168, label %call_end52, label %call_fail, !prof !1
- call_fail: ; preds = %call_end52, %handle_init_end, %handle_init
- %merge = phi i32 [ %161, %handle_init ], [ %167, %handle_init_end ], [ %170, %call_end52 ]
- ret i32 %merge
- call_end: ; preds = %handle_init
- %169 = load i8*, i8** %158, align 8
- br label %handle_init_end
- call_end52: ; preds = %handle_init_end
- %170 = call fastcc i32 @myadd_compute_(%0* nonnull %.sub55, i8* %51, i32* nonnull %.sub, i8* %24, i8* %40, i32 %28)
- br label %call_fail
- }
- ; Function Attrs: noinline
- define private fastcc i32 @myadd_compute_(%0* noalias, i8* noalias, i32* noalias, i8* noalias, i8* noalias, i32) unnamed_addr #0 {
- entry:
- %6 = bitcast %0* %0 to i8**
- store i8* %1, i8** %6, align 8
- store i32 3, i32* %2, align 4, !tbaa !109
- %7 = getelementptr inbounds %0, %0* %0, i64 1
- %8 = bitcast %0* %7 to i8**
- store i8* %3, i8** %8, align 8
- %9 = getelementptr inbounds i32, i32* %2, i64 1
- store i32 3, i32* %9, align 4, !tbaa !123
- %10 = getelementptr inbounds %0, %0* %0, i64 2
- %11 = bitcast %0* %10 to i8**
- store i8* %4, i8** %11, align 8
- %12 = getelementptr inbounds i32, i32* %2, i64 2
- store i32 3, i32* %12, align 4, !tbaa !125
- %13 = sext i32 %5 to i64
- %14 = getelementptr inbounds %0, %0* %0, i64 3
- %15 = bitcast %0* %14 to i64*
- store i64 %13, i64* %15, align 8
- %16 = getelementptr inbounds i32, i32* %2, i64 3
- store i32 0, i32* %16, align 4, !tbaa !128
- %17 = add nsw i32 %5, 63
- %18 = ashr i32 %17, 6
- %19 = sext i32 %18 to i64
- %20 = getelementptr inbounds %0, %0* %0, i64 4
- %21 = bitcast %0* %20 to i64*
- store i64 %19, i64* %21, align 8
- %22 = getelementptr inbounds i32, i32* %2, i64 4
- store i32 0, i32* %22, align 4, !tbaa !130
- %23 = getelementptr inbounds %0, %0* %0, i64 5
- %24 = bitcast %0* %23 to i64*
- store i64 64, i64* %24, align 8
- %25 = getelementptr inbounds i32, i32* %2, i64 5
- store i32 0, i32* %25, align 4, !tbaa !134
- %26 = load i8*, i8** @.tvm_func.myadd__kernel0, align 8
- %27 = icmp eq i8* %26, null
- br i1 %27, label %handle_init, label %handle_init_end, !prof !38
- handle_init: ; preds = %entry
- %28 = alloca i8*, align 8
- %29 = load i8*, i8** @__tvm_module_ctx, align 8, !tbaa !2
- %30 = load i32 (i8*, i8*, i8**)*, i32 (i8*, i8*, i8**)** @__TVMBackendGetFuncFromEnv, align 8, !tbaa !2
- %31 = call i32 %30(i8* %29, i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str.24, i64 0, i64 0), i8** nonnull %28)
- %32 = icmp eq i32 %31, 0
- br i1 %32, label %call_end, label %call_fail, !prof !1
- handle_init_end: ; preds = %entry, %call_end
- %33 = phi i8* [ %26, %entry ], [ %38, %call_end ]
- %34 = getelementptr inbounds %0, %0* %0, i64 6
- %35 = getelementptr inbounds i32, i32* %2, i64 6
- %36 = load i32 (i8*, %0*, i32*, i32, %0*, i32*)*, i32 (i8*, %0*, i32*, i32, %0*, i32*)** @__TVMFuncCall, align 8, !tbaa !2
- %37 = call i32 %36(i8* %33, %0* nonnull %0, i32* nonnull %2, i32 6, %0* %34, i32* %35)
- br label %call_fail
- call_fail: ; preds = %handle_init_end, %handle_init
- %merge = phi i32 [ %31, %handle_init ], [ %37, %handle_init_end ]
- ret i32 %merge
- call_end: ; preds = %handle_init
- %38 = load i8*, i8** %28, align 8
- br label %handle_init_end
- }
- attributes #0 = { noinline }
- !llvm.module.flags = !{!0}
- !0 = !{i32 2, !"tvm_target", !"llvm"}
- !1 = !{!"branch_weights", i32 1048576, i32 1}
- !2 = !{!3, !3, i64 0}
- !3 = !{!"ctx_ptr", !4, i64 0}
- !4 = !{!"tvm-tbaa"}
- !5 = !{!6, !6, i64 0}
- !6 = !{!"000002BE8884BE20.w1.b0", !7, i64 0}
- !7 = !{!"000002BE8884BE20.w2.b0", !8, i64 0}
- !8 = !{!"000002BE8884BE20.w4.b0", !9, i64 0}
- !9 = !{!"000002BE8884BE20.w8.b0", !10, i64 0}
- !10 = !{!"000002BE8884BE20.w16.b0", !11, i64 0}
- !11 = !{!"000002BE8884BE20.w32.b0", !12, i64 0}
- !12 = !{!"000002BE8884BE20.w64.b0", !13, i64 0}
- !13 = !{!"000002BE8884BE20.w128.b0", !14, i64 0}
- !14 = !{!"000002BE8884BE20.w256.b0", !15, i64 0}
- !15 = !{!"000002BE8884BE20.w512.b0", !16, i64 0}
- !16 = !{!"000002BE8884BE20.w1024.b0", !17, i64 0}
- !17 = !{!"int32", !18, i64 0}
- !18 = !{!"000002BE8884BE20", !4, i64 0}
- !19 = !{!20, !20, i64 0}
- !20 = !{!"000002BE8884BE20.w1.b1", !7, i64 0}
- !21 = !{!22, !22, i64 0}
- !22 = !{!"000002BE8884BE20.w1.b2", !23, i64 0}
- !23 = !{!"000002BE8884BE20.w2.b2", !8, i64 0}
- !24 = !{!25, !25, i64 0}
- !25 = !{!"000002BE8884B4C0.w1.b0", !26, i64 0}
- !26 = !{!"000002BE8884B4C0.w2.b0", !27, i64 0}
- !27 = !{!"000002BE8884B4C0.w4.b0", !28, i64 0}
- !28 = !{!"000002BE8884B4C0.w8.b0", !29, i64 0}
- !29 = !{!"000002BE8884B4C0.w16.b0", !30, i64 0}
- !30 = !{!"000002BE8884B4C0.w32.b0", !31, i64 0}
- !31 = !{!"000002BE8884B4C0.w64.b0", !32, i64 0}
- !32 = !{!"000002BE8884B4C0.w128.b0", !33, i64 0}
- !33 = !{!"000002BE8884B4C0.w256.b0", !34, i64 0}
- !34 = !{!"000002BE8884B4C0.w512.b0", !35, i64 0}
- !35 = !{!"000002BE8884B4C0.w1024.b0", !36, i64 0}
- !36 = !{!"int64", !37, i64 0}
- !37 = !{!"000002BE8884B4C0", !4, i64 0}
- !38 = !{!"branch_weights", i32 1, i32 1048576}
- !39 = !{!40, !40, i64 0}
- !40 = !{!"000002BE8884ACF0.w1.b0", !41, i64 0}
- !41 = !{!"000002BE8884ACF0.w2.b0", !42, i64 0}
- !42 = !{!"000002BE8884ACF0.w4.b0", !43, i64 0}
- !43 = !{!"000002BE8884ACF0.w8.b0", !44, i64 0}
- !44 = !{!"000002BE8884ACF0.w16.b0", !45, i64 0}
- !45 = !{!"000002BE8884ACF0.w32.b0", !46, i64 0}
- !46 = !{!"000002BE8884ACF0.w64.b0", !47, i64 0}
- !47 = !{!"000002BE8884ACF0.w128.b0", !48, i64 0}
- !48 = !{!"000002BE8884ACF0.w256.b0", !49, i64 0}
- !49 = !{!"000002BE8884ACF0.w512.b0", !50, i64 0}
- !50 = !{!"000002BE8884ACF0.w1024.b0", !51, i64 0}
- !51 = !{!"int64", !52, i64 0}
- !52 = !{!"000002BE8884ACF0", !4, i64 0}
- !53 = !{!54, !54, i64 0}
- !54 = !{!"000002BE8884B6A0.w1.b0", !55, i64 0}
- !55 = !{!"000002BE8884B6A0.w2.b0", !56, i64 0}
- !56 = !{!"000002BE8884B6A0.w4.b0", !57, i64 0}
- !57 = !{!"000002BE8884B6A0.w8.b0", !58, i64 0}
- !58 = !{!"000002BE8884B6A0.w16.b0", !59, i64 0}
- !59 = !{!"000002BE8884B6A0.w32.b0", !60, i64 0}
- !60 = !{!"000002BE8884B6A0.w64.b0", !61, i64 0}
- !61 = !{!"000002BE8884B6A0.w128.b0", !62, i64 0}
- !62 = !{!"000002BE8884B6A0.w256.b0", !63, i64 0}
- !63 = !{!"000002BE8884B6A0.w512.b0", !64, i64 0}
- !64 = !{!"000002BE8884B6A0.w1024.b0", !65, i64 0}
- !65 = !{!"int64", !66, i64 0}
- !66 = !{!"000002BE8884B6A0", !4, i64 0}
- !67 = !{!68, !68, i64 0}
- !68 = !{!"000002BE88857180.w1.b0", !69, i64 0}
- !69 = !{!"000002BE88857180.w2.b0", !70, i64 0}
- !70 = !{!"000002BE88857180.w4.b0", !71, i64 0}
- !71 = !{!"000002BE88857180.w8.b0", !72, i64 0}
- !72 = !{!"000002BE88857180.w16.b0", !73, i64 0}
- !73 = !{!"000002BE88857180.w32.b0", !74, i64 0}
- !74 = !{!"000002BE88857180.w64.b0", !75, i64 0}
- !75 = !{!"000002BE88857180.w128.b0", !76, i64 0}
- !76 = !{!"000002BE88857180.w256.b0", !77, i64 0}
- !77 = !{!"000002BE88857180.w512.b0", !78, i64 0}
- !78 = !{!"000002BE88857180.w1024.b0", !79, i64 0}
- !79 = !{!"int64", !80, i64 0}
- !80 = !{!"000002BE88857180", !4, i64 0}
- !81 = !{!82, !82, i64 0}
- !82 = !{!"000002BE8884B5B0.w1.b0", !83, i64 0}
- !83 = !{!"000002BE8884B5B0.w2.b0", !84, i64 0}
- !84 = !{!"000002BE8884B5B0.w4.b0", !85, i64 0}
- !85 = !{!"000002BE8884B5B0.w8.b0", !86, i64 0}
- !86 = !{!"000002BE8884B5B0.w16.b0", !87, i64 0}
- !87 = !{!"000002BE8884B5B0.w32.b0", !88, i64 0}
- !88 = !{!"000002BE8884B5B0.w64.b0", !89, i64 0}
- !89 = !{!"000002BE8884B5B0.w128.b0", !90, i64 0}
- !90 = !{!"000002BE8884B5B0.w256.b0", !91, i64 0}
- !91 = !{!"000002BE8884B5B0.w512.b0", !92, i64 0}
- !92 = !{!"000002BE8884B5B0.w1024.b0", !93, i64 0}
- !93 = !{!"int64", !94, i64 0}
- !94 = !{!"000002BE8884B5B0", !4, i64 0}
- !95 = !{!96, !96, i64 0}
- !96 = !{!"000002BE88857040.w1.b0", !97, i64 0}
- !97 = !{!"000002BE88857040.w2.b0", !98, i64 0}
- !98 = !{!"000002BE88857040.w4.b0", !99, i64 0}
- !99 = !{!"000002BE88857040.w8.b0", !100, i64 0}
- !100 = !{!"000002BE88857040.w16.b0", !101, i64 0}
- !101 = !{!"000002BE88857040.w32.b0", !102, i64 0}
- !102 = !{!"000002BE88857040.w64.b0", !103, i64 0}
- !103 = !{!"000002BE88857040.w128.b0", !104, i64 0}
- !104 = !{!"000002BE88857040.w256.b0", !105, i64 0}
- !105 = !{!"000002BE88857040.w512.b0", !106, i64 0}
- !106 = !{!"000002BE88857040.w1024.b0", !107, i64 0}
- !107 = !{!"int64", !108, i64 0}
- !108 = !{!"000002BE88857040", !4, i64 0}
- !109 = !{!110, !110, i64 0}
- !110 = !{!"000002BE8885AA70.w1.b0", !111, i64 0}
- !111 = !{!"000002BE8885AA70.w2.b0", !112, i64 0}
- !112 = !{!"000002BE8885AA70.w4.b0", !113, i64 0}
- !113 = !{!"000002BE8885AA70.w8.b0", !114, i64 0}
- !114 = !{!"000002BE8885AA70.w16.b0", !115, i64 0}
- !115 = !{!"000002BE8885AA70.w32.b0", !116, i64 0}
- !116 = !{!"000002BE8885AA70.w64.b0", !117, i64 0}
- !117 = !{!"000002BE8885AA70.w128.b0", !118, i64 0}
- !118 = !{!"000002BE8885AA70.w256.b0", !119, i64 0}
- !119 = !{!"000002BE8885AA70.w512.b0", !120, i64 0}
- !120 = !{!"000002BE8885AA70.w1024.b0", !121, i64 0}
- !121 = !{!"int32", !122, i64 0}
- !122 = !{!"000002BE8885AA70", !4, i64 0}
- !123 = !{!124, !124, i64 0}
- !124 = !{!"000002BE8885AA70.w1.b1", !111, i64 0}
- !125 = !{!126, !126, i64 0}
- !126 = !{!"000002BE8885AA70.w1.b2", !127, i64 0}
- !127 = !{!"000002BE8885AA70.w2.b2", !112, i64 0}
- !128 = !{!129, !129, i64 0}
- !129 = !{!"000002BE8885AA70.w1.b3", !127, i64 0}
- !130 = !{!131, !131, i64 0}
- !131 = !{!"000002BE8885AA70.w1.b4", !132, i64 0}
- !132 = !{!"000002BE8885AA70.w2.b4", !133, i64 0}
- !133 = !{!"000002BE8885AA70.w4.b4", !113, i64 0}
- !134 = !{!135, !135, i64 0}
- !135 = !{!"000002BE8885AA70.w1.b5", !132, i64 0}
- ------opencl code------
- __kernel void myadd__kernel0(__global float* restrict C, __global float* restrict A, __global float* restrict B, int n) {
- if (((int)get_group_id(0)) < (n / 64)) {
- C[((((int)get_group_id(0)) * 64) + ((int)get_local_id(0)))] = (A[((((int)get_group_id(0)) * 64) + ((int)get_local_id(0)))] + B[((((int)get_group_id(0)) * 64) + ((int)get_local_id(0)))]);
- } else {
- if ((((int)get_group_id(0)) * 64) < (n - ((int)get_local_id(0)))) {
- C[((((int)get_group_id(0)) * 64) + ((int)get_local_id(0)))] = (A[((((int)get_group_id(0)) * 64) + ((int)get_local_id(0)))] + B[((((int)get_group_id(0)) * 64) + ((int)get_local_id(0)))]);
- }
- }
- }
Add Comment
Please, Sign In to add comment