Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <class 'tvm.tensor.Tensor'>
- [16:03:59] C:\TVM\copy\tvm\src\runtime\opencl\opencl_device_api.cc:192: Multiple OpenCL platforms matched, use the first one ...
- [16:03:59] C:\TVM\copy\tvm\src\runtime\opencl\opencl_device_api.cc:195: Initialize OpenCL platform 'Intel(R) OpenCL '
- [16:03:59] C:\TVM\copy\tvm\src\runtime\opencl\opencl_device_api.cc:215: opencl(0)='Intel(R) HD Graphics 4600 ' cl_device_id=000001BB23F29120
- ------tvm gen code------
- Function: myadd
- Program dump: code-size=580
- ----------begin-----------------
- [0] TVM_STACK_ALLOCA_BY_8BYTE 4
- [1]
- [2] STORE_HEAP 3 stack_tcode
- [3]
- [4] TVM_STACK_ALLOCA_BY_8BYTE 7
- [5]
- [6] STORE_HEAP 4 stack_value
- [7]
- [8] LOAD_HEAP 2 num_args
- [9]
- [10] PUSH_I64 3
- [11]
- [12] EQ_I64
- [13] ASSERT 0
- [14]
- [15] LOAD_HEAP 0 args
- [16]
- [17] TVM_STRUCT_GET 0 12
- [18]
- [19]
- [20] STORE_HEAP 5 arg0
- [21]
- [22] LOAD_HEAP 1 arg_type_ids
- [23]
- [24] ARRAY_LOAD_INT32 0
- [25]
- [26] STORE_HEAP 6 arg0.code
- [27]
- [28] LOAD_HEAP 0 args
- [29]
- [30] TVM_STRUCT_GET 1 12
- [31]
- [32]
- [33] STORE_HEAP 7 arg1
- [34]
- [35] LOAD_HEAP 1 arg_type_ids
- [36]
- [37] ARRAY_LOAD_INT32 1
- [38]
- [39] STORE_HEAP 8 arg1.code
- [40]
- [41] LOAD_HEAP 0 args
- [42]
- [43] TVM_STRUCT_GET 2 12
- [44]
- [45]
- [46] STORE_HEAP 9 arg2
- [47]
- [48] LOAD_HEAP 1 arg_type_ids
- [49]
- [50] ARRAY_LOAD_INT32 2
- [51]
- [52] STORE_HEAP 10 arg2.code
- [53]
- [54] LOAD_HEAP 5 arg0
- [55]
- [56] TVM_STRUCT_GET 0 1
- [57]
- [58]
- [59] STORE_HEAP 11 A
- [60]
- [61] LOAD_HEAP 5 arg0
- [62]
- [63] TVM_STRUCT_GET 0 2
- [64]
- [65]
- [66] STORE_HEAP 12 arg0.shape
- [67]
- [68] LOAD_HEAP 12 arg0.shape
- [69]
- [70] ARRAY_LOAD_INT64 0
- [71]
- [72] STORE_HEAP 13 n
- [73]
- [74] LOAD_HEAP 5 arg0
- [75]
- [76] TVM_STRUCT_GET 0 3
- [77]
- [78]
- [79] STORE_HEAP 14 arg0.strides
- [80]
- [81] LOAD_HEAP 14 arg0.strides
- [82]
- [83] PUSH_I64 0
- [84]
- [85] EQ_HANDLE
- [86] ASSERT 1
- [87]
- [88] LOAD_HEAP 5 arg0
- [89]
- [90] TVM_STRUCT_GET 0 10
- [91]
- [92]
- [93] STORE_HEAP 15 dev_type
- [94]
- [95] LOAD_HEAP 5 arg0
- [96]
- [97] TVM_STRUCT_GET 0 9
- [98]
- [99]
- [100] STORE_HEAP 16 dev_id
- [101]
- [102] LOAD_HEAP 7 arg1
- [103]
- [104] TVM_STRUCT_GET 0 1
- [105]
- [106]
- [107] STORE_HEAP 17 B
- [108]
- [109] LOAD_HEAP 7 arg1
- [110]
- [111] TVM_STRUCT_GET 0 2
- [112]
- [113]
- [114] STORE_HEAP 18 arg1.shape
- [115]
- [116] LOAD_HEAP 7 arg1
- [117]
- [118] TVM_STRUCT_GET 0 3
- [119]
- [120]
- [121] STORE_HEAP 19 arg1.strides
- [122]
- [123] LOAD_HEAP 19 arg1.strides
- [124]
- [125] PUSH_I64 0
- [126]
- [127] EQ_HANDLE
- [128] ASSERT 2
- [129]
- [130] LOAD_HEAP 9 arg2
- [131]
- [132] TVM_STRUCT_GET 0 1
- [133]
- [134]
- [135] STORE_HEAP 20 C
- [136]
- [137] LOAD_HEAP 9 arg2
- [138]
- [139] TVM_STRUCT_GET 0 2
- [140]
- [141]
- [142] STORE_HEAP 21 arg2.shape
- [143]
- [144] LOAD_HEAP 9 arg2
- [145]
- [146] TVM_STRUCT_GET 0 3
- [147]
- [148]
- [149] STORE_HEAP 22 arg2.strides
- [150]
- [151] LOAD_HEAP 22 arg2.strides
- [152]
- [153] PUSH_I64 0
- [154]
- [155] EQ_HANDLE
- [156] ASSERT 3
- [157]
- [158] LOAD_HEAP 6 arg0.code
- [159]
- [160] PUSH_I64 3
- [161]
- [162] EQ_I64
- [163] RJUMP_IF_TRUE rel=7 to 170
- [164]
- [165] LOAD_HEAP 6 arg0.code
- [166]
- [167] PUSH_I64 7
- [168]
- [169] EQ_I64
- [170] RJUMP_IF_TRUE rel=7 to 177
- [171]
- [172] LOAD_HEAP 6 arg0.code
- [173]
- [174] PUSH_I64 4
- [175]
- [176] EQ_I64
- [177] ASSERT 4
- [178]
- [179] LOAD_HEAP 8 arg1.code
- [180]
- [181] PUSH_I64 3
- [182]
- [183] EQ_I64
- [184] RJUMP_IF_TRUE rel=7 to 191
- [185]
- [186] LOAD_HEAP 8 arg1.code
- [187]
- [188] PUSH_I64 7
- [189]
- [190] EQ_I64
- [191] RJUMP_IF_TRUE rel=7 to 198
- [192]
- [193] LOAD_HEAP 8 arg1.code
- [194]
- [195] PUSH_I64 4
- [196]
- [197] EQ_I64
- [198] ASSERT 5
- [199]
- [200] LOAD_HEAP 10 arg2.code
- [201]
- [202] PUSH_I64 3
- [203]
- [204] EQ_I64
- [205] RJUMP_IF_TRUE rel=7 to 212
- [206]
- [207] LOAD_HEAP 10 arg2.code
- [208]
- [209] PUSH_I64 7
- [210]
- [211] EQ_I64
- [212] RJUMP_IF_TRUE rel=7 to 219
- [213]
- [214] LOAD_HEAP 10 arg2.code
- [215]
- [216] PUSH_I64 4
- [217]
- [218] EQ_I64
- [219] ASSERT 6
- [220]
- [221] LOAD_HEAP 15 dev_type
- [222]
- [223] PUSH_I64 4
- [224]
- [225] EQ_I64
- [226] ASSERT 7
- [227]
- [228] PUSH_I64 1
- [229]
- [230] LOAD_HEAP 5 arg0
- [231]
- [232] TVM_STRUCT_GET 0 4
- [233]
- [234]
- [235] EQ_I64
- [236] ASSERT 8
- [237]
- [238] LOAD_HEAP 5 arg0
- [239]
- [240] TVM_STRUCT_GET 0 5
- [241]
- [242]
- [243] PUSH_I64 2
- [244]
- [245] EQ_I64
- [246] RJUMP_IF_FALSE rel=11 to 257
- [247]
- [248] POP
- [249] LOAD_HEAP 5 arg0
- [250]
- [251] TVM_STRUCT_GET 0 6
- [252]
- [253]
- [254] PUSH_I64 32
- [255]
- [256] EQ_I64
- [257] RJUMP_IF_FALSE rel=11 to 268
- [258]
- [259] POP
- [260] LOAD_HEAP 5 arg0
- [261]
- [262] TVM_STRUCT_GET 0 7
- [263]
- [264]
- [265] PUSH_I64 1
- [266]
- [267] EQ_I64
- [268] ASSERT 9
- [269]
- [270] LOAD_HEAP 5 arg0
- [271]
- [272] TVM_STRUCT_GET 0 8
- [273]
- [274]
- [275] PUSH_I64 0
- [276]
- [277] EQ_I64
- [278] ASSERT 10
- [279]
- [280] PUSH_I64 1
- [281]
- [282] LOAD_HEAP 7 arg1
- [283]
- [284] TVM_STRUCT_GET 0 4
- [285]
- [286]
- [287] EQ_I64
- [288] ASSERT 11
- [289]
- [290] LOAD_HEAP 7 arg1
- [291]
- [292] TVM_STRUCT_GET 0 5
- [293]
- [294]
- [295] PUSH_I64 2
- [296]
- [297] EQ_I64
- [298] RJUMP_IF_FALSE rel=11 to 309
- [299]
- [300] POP
- [301] LOAD_HEAP 7 arg1
- [302]
- [303] TVM_STRUCT_GET 0 6
- [304]
- [305]
- [306] PUSH_I64 32
- [307]
- [308] EQ_I64
- [309] RJUMP_IF_FALSE rel=11 to 320
- [310]
- [311] POP
- [312] LOAD_HEAP 7 arg1
- [313]
- [314] TVM_STRUCT_GET 0 7
- [315]
- [316]
- [317] PUSH_I64 1
- [318]
- [319] EQ_I64
- [320] ASSERT 12
- [321]
- [322] LOAD_HEAP 13 n
- [323]
- [324] LOAD_HEAP 18 arg1.shape
- [325]
- [326] ARRAY_LOAD_INT64 0
- [327]
- [328] EQ_I64
- [329] ASSERT 13
- [330]
- [331] LOAD_HEAP 7 arg1
- [332]
- [333] TVM_STRUCT_GET 0 8
- [334]
- [335]
- [336] PUSH_I64 0
- [337]
- [338] EQ_I64
- [339] ASSERT 14
- [340]
- [341] PUSH_I64 4
- [342]
- [343] LOAD_HEAP 7 arg1
- [344]
- [345] TVM_STRUCT_GET 0 10
- [346]
- [347]
- [348] EQ_I64
- [349] ASSERT 15
- [350]
- [351] LOAD_HEAP 16 dev_id
- [352]
- [353] LOAD_HEAP 7 arg1
- [354]
- [355] TVM_STRUCT_GET 0 9
- [356]
- [357]
- [358] EQ_I64
- [359] ASSERT 16
- [360]
- [361] PUSH_I64 1
- [362]
- [363] LOAD_HEAP 9 arg2
- [364]
- [365] TVM_STRUCT_GET 0 4
- [366]
- [367]
- [368] EQ_I64
- [369] ASSERT 17
- [370]
- [371] LOAD_HEAP 9 arg2
- [372]
- [373] TVM_STRUCT_GET 0 5
- [374]
- [375]
- [376] PUSH_I64 2
- [377]
- [378] EQ_I64
- [379] RJUMP_IF_FALSE rel=11 to 390
- [380]
- [381] POP
- [382] LOAD_HEAP 9 arg2
- [383]
- [384] TVM_STRUCT_GET 0 6
- [385]
- [386]
- [387] PUSH_I64 32
- [388]
- [389] EQ_I64
- [390] RJUMP_IF_FALSE rel=11 to 401
- [391]
- [392] POP
- [393] LOAD_HEAP 9 arg2
- [394]
- [395] TVM_STRUCT_GET 0 7
- [396]
- [397]
- [398] PUSH_I64 1
- [399]
- [400] EQ_I64
- [401] ASSERT 18
- [402]
- [403] LOAD_HEAP 13 n
- [404]
- [405] LOAD_HEAP 21 arg2.shape
- [406]
- [407] ARRAY_LOAD_INT64 0
- [408]
- [409] EQ_I64
- [410] ASSERT 19
- [411]
- [412] LOAD_HEAP 9 arg2
- [413]
- [414] TVM_STRUCT_GET 0 8
- [415]
- [416]
- [417] PUSH_I64 0
- [418]
- [419] EQ_I64
- [420] ASSERT 20
- [421]
- [422] PUSH_I64 4
- [423]
- [424] LOAD_HEAP 9 arg2
- [425]
- [426] TVM_STRUCT_GET 0 10
- [427]
- [428]
- [429] EQ_I64
- [430] ASSERT 21
- [431]
- [432] LOAD_HEAP 16 dev_id
- [433]
- [434] LOAD_HEAP 9 arg2
- [435]
- [436] TVM_STRUCT_GET 0 9
- [437]
- [438]
- [439] EQ_I64
- [440] ASSERT 22
- [441]
- [442] PUSH_I64 4
- [443]
- [444] PUSH_I64 1
- [445]
- [446] EQ_I64
- [447] NOT
- [448] RJUMP_IF_FALSE rel=38 to 486
- [449]
- [450] POP
- [451] LOAD_HEAP 4 stack_value
- [452]
- [453] PUSH_I64 4
- [454]
- [455] TVM_STRUCT_SET 0 12
- [456]
- [457]
- [458] LOAD_HEAP 3 stack_tcode
- [459]
- [460] PUSH_I64 0
- [461]
- [462] ARRAY_STORE_INT32 0
- [463]
- [464] LOAD_HEAP 4 stack_value
- [465]
- [466] LOAD_HEAP 16 dev_id
- [467]
- [468] TVM_STRUCT_SET 1 12
- [469]
- [470]
- [471] LOAD_HEAP 3 stack_tcode
- [472]
- [473] PUSH_I64 0
- [474]
- [475] ARRAY_STORE_INT32 1
- [476]
- [477] LOAD_HEAP 4 stack_value
- [478]
- [479] LOAD_HEAP 3 stack_tcode
- [480]
- [481] CALL_PACKED_FUNC fid=0 begin=0 end=2
- [482]
- [483]
- [484]
- [485] POP
- [486] POP
- [487] LOAD_HEAP 4 stack_value
- [488]
- [489] LOAD_HEAP 20 C
- [490]
- [491] TVM_STRUCT_SET 0 12
- [492]
- [493]
- [494] LOAD_HEAP 3 stack_tcode
- [495]
- [496] PUSH_I64 3
- [497]
- [498] ARRAY_STORE_INT32 0
- [499]
- [500] LOAD_HEAP 4 stack_value
- [501]
- [502] LOAD_HEAP 11 A
- [503]
- [504] TVM_STRUCT_SET 1 12
- [505]
- [506]
- [507] LOAD_HEAP 3 stack_tcode
- [508]
- [509] PUSH_I64 3
- [510]
- [511] ARRAY_STORE_INT32 1
- [512]
- [513] LOAD_HEAP 4 stack_value
- [514]
- [515] LOAD_HEAP 17 B
- [516]
- [517] TVM_STRUCT_SET 2 12
- [518]
- [519]
- [520] LOAD_HEAP 3 stack_tcode
- [521]
- [522] PUSH_I64 3
- [523]
- [524] ARRAY_STORE_INT32 2
- [525]
- [526] LOAD_HEAP 4 stack_value
- [527]
- [528] LOAD_HEAP 13 n
- [529]
- [530] TVM_STRUCT_SET 3 12
- [531]
- [532]
- [533] LOAD_HEAP 3 stack_tcode
- [534]
- [535] PUSH_I64 0
- [536]
- [537] ARRAY_STORE_INT32 3
- [538]
- [539] LOAD_HEAP 4 stack_value
- [540]
- [541] LOAD_HEAP 13 n
- [542]
- [543] PUSH_I64 63
- [544]
- [545] ADD_I64
- [546] PUSH_I64 64
- [547]
- [548] DIV_I64
- [549] TVM_STRUCT_SET 4 12
- [550]
- [551]
- [552] LOAD_HEAP 3 stack_tcode
- [553]
- [554] PUSH_I64 0
- [555]
- [556] ARRAY_STORE_INT32 4
- [557]
- [558] LOAD_HEAP 4 stack_value
- [559]
- [560] PUSH_I64 64
- [561]
- [562] TVM_STRUCT_SET 5 12
- [563]
- [564]
- [565] LOAD_HEAP 3 stack_tcode
- [566]
- [567] PUSH_I64 0
- [568]
- [569] ARRAY_STORE_INT32 5
- [570]
- [571] LOAD_HEAP 4 stack_value
- [572]
- [573] LOAD_HEAP 3 stack_tcode
- [574]
- [575] CALL_PACKED_FUNC fid=1 begin=0 end=6
- [576]
- [577]
- [578]
- [579] POP
- ----------end--------------------
- ------opencl code------
- __kernel void myadd__kernel0(__global float* restrict C, __global float* restrict A, __global float* restrict B, int n) {
- if (((int)get_group_id(0)) < ((n + -127) / 64)) {
- C[((((int)get_group_id(0)) * 64) + ((int)get_local_id(0)))] = (A[((((int)get_group_id(0)) * 64) + ((int)get_local_id(0)))] + B[((((int)get_group_id(0)) * 64) + ((int)get_local_id(0)))]);
- } else {
- if ((((int)get_group_id(0)) * 64) < (n - ((int)get_local_id(0)))) {
- C[((((int)get_group_id(0)) * 64) + ((int)get_local_id(0)))] = (A[((((int)get_group_id(0)) * 64) + ((int)get_local_id(0)))] + B[((((int)get_group_id(0)) * 64) + ((int)get_local_id(0)))]);
- }
- }
- }
- Press any key to continue . . .
Add Comment
Please, Sign In to add comment