Guest User

Untitled

a guest
May 11th, 2018
64
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 10.34 KB | None | 0 0
  1. <class 'tvm.tensor.Tensor'>
  2. [16:03:59] C:\TVM\copy\tvm\src\runtime\opencl\opencl_device_api.cc:192: Multiple OpenCL platforms matched, use the first one ...
  3. [16:03:59] C:\TVM\copy\tvm\src\runtime\opencl\opencl_device_api.cc:195: Initialize OpenCL platform 'Intel(R) OpenCL '
  4. [16:03:59] C:\TVM\copy\tvm\src\runtime\opencl\opencl_device_api.cc:215: opencl(0)='Intel(R) HD Graphics 4600 ' cl_device_id=000001BB23F29120
  5. ------tvm gen code------
  6. Function: myadd
  7. Program dump: code-size=580
  8. ----------begin-----------------
  9. [0] TVM_STACK_ALLOCA_BY_8BYTE 4
  10. [1]
  11. [2] STORE_HEAP 3 stack_tcode
  12. [3]
  13. [4] TVM_STACK_ALLOCA_BY_8BYTE 7
  14. [5]
  15. [6] STORE_HEAP 4 stack_value
  16. [7]
  17. [8] LOAD_HEAP 2 num_args
  18. [9]
  19. [10] PUSH_I64 3
  20. [11]
  21. [12] EQ_I64
  22. [13] ASSERT 0
  23. [14]
  24. [15] LOAD_HEAP 0 args
  25. [16]
  26. [17] TVM_STRUCT_GET 0 12
  27. [18]
  28. [19]
  29. [20] STORE_HEAP 5 arg0
  30. [21]
  31. [22] LOAD_HEAP 1 arg_type_ids
  32. [23]
  33. [24] ARRAY_LOAD_INT32 0
  34. [25]
  35. [26] STORE_HEAP 6 arg0.code
  36. [27]
  37. [28] LOAD_HEAP 0 args
  38. [29]
  39. [30] TVM_STRUCT_GET 1 12
  40. [31]
  41. [32]
  42. [33] STORE_HEAP 7 arg1
  43. [34]
  44. [35] LOAD_HEAP 1 arg_type_ids
  45. [36]
  46. [37] ARRAY_LOAD_INT32 1
  47. [38]
  48. [39] STORE_HEAP 8 arg1.code
  49. [40]
  50. [41] LOAD_HEAP 0 args
  51. [42]
  52. [43] TVM_STRUCT_GET 2 12
  53. [44]
  54. [45]
  55. [46] STORE_HEAP 9 arg2
  56. [47]
  57. [48] LOAD_HEAP 1 arg_type_ids
  58. [49]
  59. [50] ARRAY_LOAD_INT32 2
  60. [51]
  61. [52] STORE_HEAP 10 arg2.code
  62. [53]
  63. [54] LOAD_HEAP 5 arg0
  64. [55]
  65. [56] TVM_STRUCT_GET 0 1
  66. [57]
  67. [58]
  68. [59] STORE_HEAP 11 A
  69. [60]
  70. [61] LOAD_HEAP 5 arg0
  71. [62]
  72. [63] TVM_STRUCT_GET 0 2
  73. [64]
  74. [65]
  75. [66] STORE_HEAP 12 arg0.shape
  76. [67]
  77. [68] LOAD_HEAP 12 arg0.shape
  78. [69]
  79. [70] ARRAY_LOAD_INT64 0
  80. [71]
  81. [72] STORE_HEAP 13 n
  82. [73]
  83. [74] LOAD_HEAP 5 arg0
  84. [75]
  85. [76] TVM_STRUCT_GET 0 3
  86. [77]
  87. [78]
  88. [79] STORE_HEAP 14 arg0.strides
  89. [80]
  90. [81] LOAD_HEAP 14 arg0.strides
  91. [82]
  92. [83] PUSH_I64 0
  93. [84]
  94. [85] EQ_HANDLE
  95. [86] ASSERT 1
  96. [87]
  97. [88] LOAD_HEAP 5 arg0
  98. [89]
  99. [90] TVM_STRUCT_GET 0 10
  100. [91]
  101. [92]
  102. [93] STORE_HEAP 15 dev_type
  103. [94]
  104. [95] LOAD_HEAP 5 arg0
  105. [96]
  106. [97] TVM_STRUCT_GET 0 9
  107. [98]
  108. [99]
  109. [100] STORE_HEAP 16 dev_id
  110. [101]
  111. [102] LOAD_HEAP 7 arg1
  112. [103]
  113. [104] TVM_STRUCT_GET 0 1
  114. [105]
  115. [106]
  116. [107] STORE_HEAP 17 B
  117. [108]
  118. [109] LOAD_HEAP 7 arg1
  119. [110]
  120. [111] TVM_STRUCT_GET 0 2
  121. [112]
  122. [113]
  123. [114] STORE_HEAP 18 arg1.shape
  124. [115]
  125. [116] LOAD_HEAP 7 arg1
  126. [117]
  127. [118] TVM_STRUCT_GET 0 3
  128. [119]
  129. [120]
  130. [121] STORE_HEAP 19 arg1.strides
  131. [122]
  132. [123] LOAD_HEAP 19 arg1.strides
  133. [124]
  134. [125] PUSH_I64 0
  135. [126]
  136. [127] EQ_HANDLE
  137. [128] ASSERT 2
  138. [129]
  139. [130] LOAD_HEAP 9 arg2
  140. [131]
  141. [132] TVM_STRUCT_GET 0 1
  142. [133]
  143. [134]
  144. [135] STORE_HEAP 20 C
  145. [136]
  146. [137] LOAD_HEAP 9 arg2
  147. [138]
  148. [139] TVM_STRUCT_GET 0 2
  149. [140]
  150. [141]
  151. [142] STORE_HEAP 21 arg2.shape
  152. [143]
  153. [144] LOAD_HEAP 9 arg2
  154. [145]
  155. [146] TVM_STRUCT_GET 0 3
  156. [147]
  157. [148]
  158. [149] STORE_HEAP 22 arg2.strides
  159. [150]
  160. [151] LOAD_HEAP 22 arg2.strides
  161. [152]
  162. [153] PUSH_I64 0
  163. [154]
  164. [155] EQ_HANDLE
  165. [156] ASSERT 3
  166. [157]
  167. [158] LOAD_HEAP 6 arg0.code
  168. [159]
  169. [160] PUSH_I64 3
  170. [161]
  171. [162] EQ_I64
  172. [163] RJUMP_IF_TRUE rel=7 to 170
  173. [164]
  174. [165] LOAD_HEAP 6 arg0.code
  175. [166]
  176. [167] PUSH_I64 7
  177. [168]
  178. [169] EQ_I64
  179. [170] RJUMP_IF_TRUE rel=7 to 177
  180. [171]
  181. [172] LOAD_HEAP 6 arg0.code
  182. [173]
  183. [174] PUSH_I64 4
  184. [175]
  185. [176] EQ_I64
  186. [177] ASSERT 4
  187. [178]
  188. [179] LOAD_HEAP 8 arg1.code
  189. [180]
  190. [181] PUSH_I64 3
  191. [182]
  192. [183] EQ_I64
  193. [184] RJUMP_IF_TRUE rel=7 to 191
  194. [185]
  195. [186] LOAD_HEAP 8 arg1.code
  196. [187]
  197. [188] PUSH_I64 7
  198. [189]
  199. [190] EQ_I64
  200. [191] RJUMP_IF_TRUE rel=7 to 198
  201. [192]
  202. [193] LOAD_HEAP 8 arg1.code
  203. [194]
  204. [195] PUSH_I64 4
  205. [196]
  206. [197] EQ_I64
  207. [198] ASSERT 5
  208. [199]
  209. [200] LOAD_HEAP 10 arg2.code
  210. [201]
  211. [202] PUSH_I64 3
  212. [203]
  213. [204] EQ_I64
  214. [205] RJUMP_IF_TRUE rel=7 to 212
  215. [206]
  216. [207] LOAD_HEAP 10 arg2.code
  217. [208]
  218. [209] PUSH_I64 7
  219. [210]
  220. [211] EQ_I64
  221. [212] RJUMP_IF_TRUE rel=7 to 219
  222. [213]
  223. [214] LOAD_HEAP 10 arg2.code
  224. [215]
  225. [216] PUSH_I64 4
  226. [217]
  227. [218] EQ_I64
  228. [219] ASSERT 6
  229. [220]
  230. [221] LOAD_HEAP 15 dev_type
  231. [222]
  232. [223] PUSH_I64 4
  233. [224]
  234. [225] EQ_I64
  235. [226] ASSERT 7
  236. [227]
  237. [228] PUSH_I64 1
  238. [229]
  239. [230] LOAD_HEAP 5 arg0
  240. [231]
  241. [232] TVM_STRUCT_GET 0 4
  242. [233]
  243. [234]
  244. [235] EQ_I64
  245. [236] ASSERT 8
  246. [237]
  247. [238] LOAD_HEAP 5 arg0
  248. [239]
  249. [240] TVM_STRUCT_GET 0 5
  250. [241]
  251. [242]
  252. [243] PUSH_I64 2
  253. [244]
  254. [245] EQ_I64
  255. [246] RJUMP_IF_FALSE rel=11 to 257
  256. [247]
  257. [248] POP
  258. [249] LOAD_HEAP 5 arg0
  259. [250]
  260. [251] TVM_STRUCT_GET 0 6
  261. [252]
  262. [253]
  263. [254] PUSH_I64 32
  264. [255]
  265. [256] EQ_I64
  266. [257] RJUMP_IF_FALSE rel=11 to 268
  267. [258]
  268. [259] POP
  269. [260] LOAD_HEAP 5 arg0
  270. [261]
  271. [262] TVM_STRUCT_GET 0 7
  272. [263]
  273. [264]
  274. [265] PUSH_I64 1
  275. [266]
  276. [267] EQ_I64
  277. [268] ASSERT 9
  278. [269]
  279. [270] LOAD_HEAP 5 arg0
  280. [271]
  281. [272] TVM_STRUCT_GET 0 8
  282. [273]
  283. [274]
  284. [275] PUSH_I64 0
  285. [276]
  286. [277] EQ_I64
  287. [278] ASSERT 10
  288. [279]
  289. [280] PUSH_I64 1
  290. [281]
  291. [282] LOAD_HEAP 7 arg1
  292. [283]
  293. [284] TVM_STRUCT_GET 0 4
  294. [285]
  295. [286]
  296. [287] EQ_I64
  297. [288] ASSERT 11
  298. [289]
  299. [290] LOAD_HEAP 7 arg1
  300. [291]
  301. [292] TVM_STRUCT_GET 0 5
  302. [293]
  303. [294]
  304. [295] PUSH_I64 2
  305. [296]
  306. [297] EQ_I64
  307. [298] RJUMP_IF_FALSE rel=11 to 309
  308. [299]
  309. [300] POP
  310. [301] LOAD_HEAP 7 arg1
  311. [302]
  312. [303] TVM_STRUCT_GET 0 6
  313. [304]
  314. [305]
  315. [306] PUSH_I64 32
  316. [307]
  317. [308] EQ_I64
  318. [309] RJUMP_IF_FALSE rel=11 to 320
  319. [310]
  320. [311] POP
  321. [312] LOAD_HEAP 7 arg1
  322. [313]
  323. [314] TVM_STRUCT_GET 0 7
  324. [315]
  325. [316]
  326. [317] PUSH_I64 1
  327. [318]
  328. [319] EQ_I64
  329. [320] ASSERT 12
  330. [321]
  331. [322] LOAD_HEAP 13 n
  332. [323]
  333. [324] LOAD_HEAP 18 arg1.shape
  334. [325]
  335. [326] ARRAY_LOAD_INT64 0
  336. [327]
  337. [328] EQ_I64
  338. [329] ASSERT 13
  339. [330]
  340. [331] LOAD_HEAP 7 arg1
  341. [332]
  342. [333] TVM_STRUCT_GET 0 8
  343. [334]
  344. [335]
  345. [336] PUSH_I64 0
  346. [337]
  347. [338] EQ_I64
  348. [339] ASSERT 14
  349. [340]
  350. [341] PUSH_I64 4
  351. [342]
  352. [343] LOAD_HEAP 7 arg1
  353. [344]
  354. [345] TVM_STRUCT_GET 0 10
  355. [346]
  356. [347]
  357. [348] EQ_I64
  358. [349] ASSERT 15
  359. [350]
  360. [351] LOAD_HEAP 16 dev_id
  361. [352]
  362. [353] LOAD_HEAP 7 arg1
  363. [354]
  364. [355] TVM_STRUCT_GET 0 9
  365. [356]
  366. [357]
  367. [358] EQ_I64
  368. [359] ASSERT 16
  369. [360]
  370. [361] PUSH_I64 1
  371. [362]
  372. [363] LOAD_HEAP 9 arg2
  373. [364]
  374. [365] TVM_STRUCT_GET 0 4
  375. [366]
  376. [367]
  377. [368] EQ_I64
  378. [369] ASSERT 17
  379. [370]
  380. [371] LOAD_HEAP 9 arg2
  381. [372]
  382. [373] TVM_STRUCT_GET 0 5
  383. [374]
  384. [375]
  385. [376] PUSH_I64 2
  386. [377]
  387. [378] EQ_I64
  388. [379] RJUMP_IF_FALSE rel=11 to 390
  389. [380]
  390. [381] POP
  391. [382] LOAD_HEAP 9 arg2
  392. [383]
  393. [384] TVM_STRUCT_GET 0 6
  394. [385]
  395. [386]
  396. [387] PUSH_I64 32
  397. [388]
  398. [389] EQ_I64
  399. [390] RJUMP_IF_FALSE rel=11 to 401
  400. [391]
  401. [392] POP
  402. [393] LOAD_HEAP 9 arg2
  403. [394]
  404. [395] TVM_STRUCT_GET 0 7
  405. [396]
  406. [397]
  407. [398] PUSH_I64 1
  408. [399]
  409. [400] EQ_I64
  410. [401] ASSERT 18
  411. [402]
  412. [403] LOAD_HEAP 13 n
  413. [404]
  414. [405] LOAD_HEAP 21 arg2.shape
  415. [406]
  416. [407] ARRAY_LOAD_INT64 0
  417. [408]
  418. [409] EQ_I64
  419. [410] ASSERT 19
  420. [411]
  421. [412] LOAD_HEAP 9 arg2
  422. [413]
  423. [414] TVM_STRUCT_GET 0 8
  424. [415]
  425. [416]
  426. [417] PUSH_I64 0
  427. [418]
  428. [419] EQ_I64
  429. [420] ASSERT 20
  430. [421]
  431. [422] PUSH_I64 4
  432. [423]
  433. [424] LOAD_HEAP 9 arg2
  434. [425]
  435. [426] TVM_STRUCT_GET 0 10
  436. [427]
  437. [428]
  438. [429] EQ_I64
  439. [430] ASSERT 21
  440. [431]
  441. [432] LOAD_HEAP 16 dev_id
  442. [433]
  443. [434] LOAD_HEAP 9 arg2
  444. [435]
  445. [436] TVM_STRUCT_GET 0 9
  446. [437]
  447. [438]
  448. [439] EQ_I64
  449. [440] ASSERT 22
  450. [441]
  451. [442] PUSH_I64 4
  452. [443]
  453. [444] PUSH_I64 1
  454. [445]
  455. [446] EQ_I64
  456. [447] NOT
  457. [448] RJUMP_IF_FALSE rel=38 to 486
  458. [449]
  459. [450] POP
  460. [451] LOAD_HEAP 4 stack_value
  461. [452]
  462. [453] PUSH_I64 4
  463. [454]
  464. [455] TVM_STRUCT_SET 0 12
  465. [456]
  466. [457]
  467. [458] LOAD_HEAP 3 stack_tcode
  468. [459]
  469. [460] PUSH_I64 0
  470. [461]
  471. [462] ARRAY_STORE_INT32 0
  472. [463]
  473. [464] LOAD_HEAP 4 stack_value
  474. [465]
  475. [466] LOAD_HEAP 16 dev_id
  476. [467]
  477. [468] TVM_STRUCT_SET 1 12
  478. [469]
  479. [470]
  480. [471] LOAD_HEAP 3 stack_tcode
  481. [472]
  482. [473] PUSH_I64 0
  483. [474]
  484. [475] ARRAY_STORE_INT32 1
  485. [476]
  486. [477] LOAD_HEAP 4 stack_value
  487. [478]
  488. [479] LOAD_HEAP 3 stack_tcode
  489. [480]
  490. [481] CALL_PACKED_FUNC fid=0 begin=0 end=2
  491. [482]
  492. [483]
  493. [484]
  494. [485] POP
  495. [486] POP
  496. [487] LOAD_HEAP 4 stack_value
  497. [488]
  498. [489] LOAD_HEAP 20 C
  499. [490]
  500. [491] TVM_STRUCT_SET 0 12
  501. [492]
  502. [493]
  503. [494] LOAD_HEAP 3 stack_tcode
  504. [495]
  505. [496] PUSH_I64 3
  506. [497]
  507. [498] ARRAY_STORE_INT32 0
  508. [499]
  509. [500] LOAD_HEAP 4 stack_value
  510. [501]
  511. [502] LOAD_HEAP 11 A
  512. [503]
  513. [504] TVM_STRUCT_SET 1 12
  514. [505]
  515. [506]
  516. [507] LOAD_HEAP 3 stack_tcode
  517. [508]
  518. [509] PUSH_I64 3
  519. [510]
  520. [511] ARRAY_STORE_INT32 1
  521. [512]
  522. [513] LOAD_HEAP 4 stack_value
  523. [514]
  524. [515] LOAD_HEAP 17 B
  525. [516]
  526. [517] TVM_STRUCT_SET 2 12
  527. [518]
  528. [519]
  529. [520] LOAD_HEAP 3 stack_tcode
  530. [521]
  531. [522] PUSH_I64 3
  532. [523]
  533. [524] ARRAY_STORE_INT32 2
  534. [525]
  535. [526] LOAD_HEAP 4 stack_value
  536. [527]
  537. [528] LOAD_HEAP 13 n
  538. [529]
  539. [530] TVM_STRUCT_SET 3 12
  540. [531]
  541. [532]
  542. [533] LOAD_HEAP 3 stack_tcode
  543. [534]
  544. [535] PUSH_I64 0
  545. [536]
  546. [537] ARRAY_STORE_INT32 3
  547. [538]
  548. [539] LOAD_HEAP 4 stack_value
  549. [540]
  550. [541] LOAD_HEAP 13 n
  551. [542]
  552. [543] PUSH_I64 63
  553. [544]
  554. [545] ADD_I64
  555. [546] PUSH_I64 64
  556. [547]
  557. [548] DIV_I64
  558. [549] TVM_STRUCT_SET 4 12
  559. [550]
  560. [551]
  561. [552] LOAD_HEAP 3 stack_tcode
  562. [553]
  563. [554] PUSH_I64 0
  564. [555]
  565. [556] ARRAY_STORE_INT32 4
  566. [557]
  567. [558] LOAD_HEAP 4 stack_value
  568. [559]
  569. [560] PUSH_I64 64
  570. [561]
  571. [562] TVM_STRUCT_SET 5 12
  572. [563]
  573. [564]
  574. [565] LOAD_HEAP 3 stack_tcode
  575. [566]
  576. [567] PUSH_I64 0
  577. [568]
  578. [569] ARRAY_STORE_INT32 5
  579. [570]
  580. [571] LOAD_HEAP 4 stack_value
  581. [572]
  582. [573] LOAD_HEAP 3 stack_tcode
  583. [574]
  584. [575] CALL_PACKED_FUNC fid=1 begin=0 end=6
  585. [576]
  586. [577]
  587. [578]
  588. [579] POP
  589. ----------end--------------------
  590.  
  591. ------opencl code------
  592. __kernel void myadd__kernel0(__global float* restrict C, __global float* restrict A, __global float* restrict B, int n) {
  593. if (((int)get_group_id(0)) < ((n + -127) / 64)) {
  594. C[((((int)get_group_id(0)) * 64) + ((int)get_local_id(0)))] = (A[((((int)get_group_id(0)) * 64) + ((int)get_local_id(0)))] + B[((((int)get_group_id(0)) * 64) + ((int)get_local_id(0)))]);
  595. } else {
  596. if ((((int)get_group_id(0)) * 64) < (n - ((int)get_local_id(0)))) {
  597. C[((((int)get_group_id(0)) * 64) + ((int)get_local_id(0)))] = (A[((((int)get_group_id(0)) * 64) + ((int)get_local_id(0)))] + B[((((int)get_group_id(0)) * 64) + ((int)get_local_id(0)))]);
  598. }
  599. }
  600. }
  601.  
  602.  
  603. Press any key to continue . . .
Add Comment
Please, Sign In to add comment