SHARE
TWEET

Untitled

a guest May 11th, 2018 17 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. <class 'tvm.tensor.Tensor'>
  2. [16:03:59] C:\TVM\copy\tvm\src\runtime\opencl\opencl_device_api.cc:192: Multiple OpenCL platforms matched, use the first one ...
  3. [16:03:59] C:\TVM\copy\tvm\src\runtime\opencl\opencl_device_api.cc:195: Initialize OpenCL platform 'Intel(R) OpenCL '
  4. [16:03:59] C:\TVM\copy\tvm\src\runtime\opencl\opencl_device_api.cc:215: opencl(0)='Intel(R) HD Graphics 4600 ' cl_device_id=000001BB23F29120
  5. ------tvm gen code------
  6. Function: myadd
  7. Program dump: code-size=580
  8. ----------begin-----------------
  9. [0]     TVM_STACK_ALLOCA_BY_8BYTE 4
  10. [1]
  11. [2]     STORE_HEAP 3 stack_tcode
  12. [3]
  13. [4]     TVM_STACK_ALLOCA_BY_8BYTE 7
  14. [5]
  15. [6]     STORE_HEAP 4 stack_value
  16. [7]
  17. [8]     LOAD_HEAP 2 num_args
  18. [9]
  19. [10]    PUSH_I64 3
  20. [11]
  21. [12]    EQ_I64
  22. [13]    ASSERT 0
  23. [14]
  24. [15]    LOAD_HEAP 0 args
  25. [16]
  26. [17]    TVM_STRUCT_GET 0 12
  27. [18]
  28. [19]
  29. [20]    STORE_HEAP 5 arg0
  30. [21]
  31. [22]    LOAD_HEAP 1 arg_type_ids
  32. [23]
  33. [24]    ARRAY_LOAD_INT32 0
  34. [25]
  35. [26]    STORE_HEAP 6 arg0.code
  36. [27]
  37. [28]    LOAD_HEAP 0 args
  38. [29]
  39. [30]    TVM_STRUCT_GET 1 12
  40. [31]
  41. [32]
  42. [33]    STORE_HEAP 7 arg1
  43. [34]
  44. [35]    LOAD_HEAP 1 arg_type_ids
  45. [36]
  46. [37]    ARRAY_LOAD_INT32 1
  47. [38]
  48. [39]    STORE_HEAP 8 arg1.code
  49. [40]
  50. [41]    LOAD_HEAP 0 args
  51. [42]
  52. [43]    TVM_STRUCT_GET 2 12
  53. [44]
  54. [45]
  55. [46]    STORE_HEAP 9 arg2
  56. [47]
  57. [48]    LOAD_HEAP 1 arg_type_ids
  58. [49]
  59. [50]    ARRAY_LOAD_INT32 2
  60. [51]
  61. [52]    STORE_HEAP 10 arg2.code
  62. [53]
  63. [54]    LOAD_HEAP 5 arg0
  64. [55]
  65. [56]    TVM_STRUCT_GET 0 1
  66. [57]
  67. [58]
  68. [59]    STORE_HEAP 11 A
  69. [60]
  70. [61]    LOAD_HEAP 5 arg0
  71. [62]
  72. [63]    TVM_STRUCT_GET 0 2
  73. [64]
  74. [65]
  75. [66]    STORE_HEAP 12 arg0.shape
  76. [67]
  77. [68]    LOAD_HEAP 12 arg0.shape
  78. [69]
  79. [70]    ARRAY_LOAD_INT64 0
  80. [71]
  81. [72]    STORE_HEAP 13 n
  82. [73]
  83. [74]    LOAD_HEAP 5 arg0
  84. [75]
  85. [76]    TVM_STRUCT_GET 0 3
  86. [77]
  87. [78]
  88. [79]    STORE_HEAP 14 arg0.strides
  89. [80]
  90. [81]    LOAD_HEAP 14 arg0.strides
  91. [82]
  92. [83]    PUSH_I64 0
  93. [84]
  94. [85]    EQ_HANDLE
  95. [86]    ASSERT 1
  96. [87]
  97. [88]    LOAD_HEAP 5 arg0
  98. [89]
  99. [90]    TVM_STRUCT_GET 0 10
  100. [91]
  101. [92]
  102. [93]    STORE_HEAP 15 dev_type
  103. [94]
  104. [95]    LOAD_HEAP 5 arg0
  105. [96]
  106. [97]    TVM_STRUCT_GET 0 9
  107. [98]
  108. [99]
  109. [100]   STORE_HEAP 16 dev_id
  110. [101]
  111. [102]   LOAD_HEAP 7 arg1
  112. [103]
  113. [104]   TVM_STRUCT_GET 0 1
  114. [105]
  115. [106]
  116. [107]   STORE_HEAP 17 B
  117. [108]
  118. [109]   LOAD_HEAP 7 arg1
  119. [110]
  120. [111]   TVM_STRUCT_GET 0 2
  121. [112]
  122. [113]
  123. [114]   STORE_HEAP 18 arg1.shape
  124. [115]
  125. [116]   LOAD_HEAP 7 arg1
  126. [117]
  127. [118]   TVM_STRUCT_GET 0 3
  128. [119]
  129. [120]
  130. [121]   STORE_HEAP 19 arg1.strides
  131. [122]
  132. [123]   LOAD_HEAP 19 arg1.strides
  133. [124]
  134. [125]   PUSH_I64 0
  135. [126]
  136. [127]   EQ_HANDLE
  137. [128]   ASSERT 2
  138. [129]
  139. [130]   LOAD_HEAP 9 arg2
  140. [131]
  141. [132]   TVM_STRUCT_GET 0 1
  142. [133]
  143. [134]
  144. [135]   STORE_HEAP 20 C
  145. [136]
  146. [137]   LOAD_HEAP 9 arg2
  147. [138]
  148. [139]   TVM_STRUCT_GET 0 2
  149. [140]
  150. [141]
  151. [142]   STORE_HEAP 21 arg2.shape
  152. [143]
  153. [144]   LOAD_HEAP 9 arg2
  154. [145]
  155. [146]   TVM_STRUCT_GET 0 3
  156. [147]
  157. [148]
  158. [149]   STORE_HEAP 22 arg2.strides
  159. [150]
  160. [151]   LOAD_HEAP 22 arg2.strides
  161. [152]
  162. [153]   PUSH_I64 0
  163. [154]
  164. [155]   EQ_HANDLE
  165. [156]   ASSERT 3
  166. [157]
  167. [158]   LOAD_HEAP 6 arg0.code
  168. [159]
  169. [160]   PUSH_I64 3
  170. [161]
  171. [162]   EQ_I64
  172. [163]   RJUMP_IF_TRUE rel=7 to 170
  173. [164]
  174. [165]   LOAD_HEAP 6 arg0.code
  175. [166]
  176. [167]   PUSH_I64 7
  177. [168]
  178. [169]   EQ_I64
  179. [170]   RJUMP_IF_TRUE rel=7 to 177
  180. [171]
  181. [172]   LOAD_HEAP 6 arg0.code
  182. [173]
  183. [174]   PUSH_I64 4
  184. [175]
  185. [176]   EQ_I64
  186. [177]   ASSERT 4
  187. [178]
  188. [179]   LOAD_HEAP 8 arg1.code
  189. [180]
  190. [181]   PUSH_I64 3
  191. [182]
  192. [183]   EQ_I64
  193. [184]   RJUMP_IF_TRUE rel=7 to 191
  194. [185]
  195. [186]   LOAD_HEAP 8 arg1.code
  196. [187]
  197. [188]   PUSH_I64 7
  198. [189]
  199. [190]   EQ_I64
  200. [191]   RJUMP_IF_TRUE rel=7 to 198
  201. [192]
  202. [193]   LOAD_HEAP 8 arg1.code
  203. [194]
  204. [195]   PUSH_I64 4
  205. [196]
  206. [197]   EQ_I64
  207. [198]   ASSERT 5
  208. [199]
  209. [200]   LOAD_HEAP 10 arg2.code
  210. [201]
  211. [202]   PUSH_I64 3
  212. [203]
  213. [204]   EQ_I64
  214. [205]   RJUMP_IF_TRUE rel=7 to 212
  215. [206]
  216. [207]   LOAD_HEAP 10 arg2.code
  217. [208]
  218. [209]   PUSH_I64 7
  219. [210]
  220. [211]   EQ_I64
  221. [212]   RJUMP_IF_TRUE rel=7 to 219
  222. [213]
  223. [214]   LOAD_HEAP 10 arg2.code
  224. [215]
  225. [216]   PUSH_I64 4
  226. [217]
  227. [218]   EQ_I64
  228. [219]   ASSERT 6
  229. [220]
  230. [221]   LOAD_HEAP 15 dev_type
  231. [222]
  232. [223]   PUSH_I64 4
  233. [224]
  234. [225]   EQ_I64
  235. [226]   ASSERT 7
  236. [227]
  237. [228]   PUSH_I64 1
  238. [229]
  239. [230]   LOAD_HEAP 5 arg0
  240. [231]
  241. [232]   TVM_STRUCT_GET 0 4
  242. [233]
  243. [234]
  244. [235]   EQ_I64
  245. [236]   ASSERT 8
  246. [237]
  247. [238]   LOAD_HEAP 5 arg0
  248. [239]
  249. [240]   TVM_STRUCT_GET 0 5
  250. [241]
  251. [242]
  252. [243]   PUSH_I64 2
  253. [244]
  254. [245]   EQ_I64
  255. [246]   RJUMP_IF_FALSE rel=11 to 257
  256. [247]
  257. [248]   POP
  258. [249]   LOAD_HEAP 5 arg0
  259. [250]
  260. [251]   TVM_STRUCT_GET 0 6
  261. [252]
  262. [253]
  263. [254]   PUSH_I64 32
  264. [255]
  265. [256]   EQ_I64
  266. [257]   RJUMP_IF_FALSE rel=11 to 268
  267. [258]
  268. [259]   POP
  269. [260]   LOAD_HEAP 5 arg0
  270. [261]
  271. [262]   TVM_STRUCT_GET 0 7
  272. [263]
  273. [264]
  274. [265]   PUSH_I64 1
  275. [266]
  276. [267]   EQ_I64
  277. [268]   ASSERT 9
  278. [269]
  279. [270]   LOAD_HEAP 5 arg0
  280. [271]
  281. [272]   TVM_STRUCT_GET 0 8
  282. [273]
  283. [274]
  284. [275]   PUSH_I64 0
  285. [276]
  286. [277]   EQ_I64
  287. [278]   ASSERT 10
  288. [279]
  289. [280]   PUSH_I64 1
  290. [281]
  291. [282]   LOAD_HEAP 7 arg1
  292. [283]
  293. [284]   TVM_STRUCT_GET 0 4
  294. [285]
  295. [286]
  296. [287]   EQ_I64
  297. [288]   ASSERT 11
  298. [289]
  299. [290]   LOAD_HEAP 7 arg1
  300. [291]
  301. [292]   TVM_STRUCT_GET 0 5
  302. [293]
  303. [294]
  304. [295]   PUSH_I64 2
  305. [296]
  306. [297]   EQ_I64
  307. [298]   RJUMP_IF_FALSE rel=11 to 309
  308. [299]
  309. [300]   POP
  310. [301]   LOAD_HEAP 7 arg1
  311. [302]
  312. [303]   TVM_STRUCT_GET 0 6
  313. [304]
  314. [305]
  315. [306]   PUSH_I64 32
  316. [307]
  317. [308]   EQ_I64
  318. [309]   RJUMP_IF_FALSE rel=11 to 320
  319. [310]
  320. [311]   POP
  321. [312]   LOAD_HEAP 7 arg1
  322. [313]
  323. [314]   TVM_STRUCT_GET 0 7
  324. [315]
  325. [316]
  326. [317]   PUSH_I64 1
  327. [318]
  328. [319]   EQ_I64
  329. [320]   ASSERT 12
  330. [321]
  331. [322]   LOAD_HEAP 13 n
  332. [323]
  333. [324]   LOAD_HEAP 18 arg1.shape
  334. [325]
  335. [326]   ARRAY_LOAD_INT64 0
  336. [327]
  337. [328]   EQ_I64
  338. [329]   ASSERT 13
  339. [330]
  340. [331]   LOAD_HEAP 7 arg1
  341. [332]
  342. [333]   TVM_STRUCT_GET 0 8
  343. [334]
  344. [335]
  345. [336]   PUSH_I64 0
  346. [337]
  347. [338]   EQ_I64
  348. [339]   ASSERT 14
  349. [340]
  350. [341]   PUSH_I64 4
  351. [342]
  352. [343]   LOAD_HEAP 7 arg1
  353. [344]
  354. [345]   TVM_STRUCT_GET 0 10
  355. [346]
  356. [347]
  357. [348]   EQ_I64
  358. [349]   ASSERT 15
  359. [350]
  360. [351]   LOAD_HEAP 16 dev_id
  361. [352]
  362. [353]   LOAD_HEAP 7 arg1
  363. [354]
  364. [355]   TVM_STRUCT_GET 0 9
  365. [356]
  366. [357]
  367. [358]   EQ_I64
  368. [359]   ASSERT 16
  369. [360]
  370. [361]   PUSH_I64 1
  371. [362]
  372. [363]   LOAD_HEAP 9 arg2
  373. [364]
  374. [365]   TVM_STRUCT_GET 0 4
  375. [366]
  376. [367]
  377. [368]   EQ_I64
  378. [369]   ASSERT 17
  379. [370]
  380. [371]   LOAD_HEAP 9 arg2
  381. [372]
  382. [373]   TVM_STRUCT_GET 0 5
  383. [374]
  384. [375]
  385. [376]   PUSH_I64 2
  386. [377]
  387. [378]   EQ_I64
  388. [379]   RJUMP_IF_FALSE rel=11 to 390
  389. [380]
  390. [381]   POP
  391. [382]   LOAD_HEAP 9 arg2
  392. [383]
  393. [384]   TVM_STRUCT_GET 0 6
  394. [385]
  395. [386]
  396. [387]   PUSH_I64 32
  397. [388]
  398. [389]   EQ_I64
  399. [390]   RJUMP_IF_FALSE rel=11 to 401
  400. [391]
  401. [392]   POP
  402. [393]   LOAD_HEAP 9 arg2
  403. [394]
  404. [395]   TVM_STRUCT_GET 0 7
  405. [396]
  406. [397]
  407. [398]   PUSH_I64 1
  408. [399]
  409. [400]   EQ_I64
  410. [401]   ASSERT 18
  411. [402]
  412. [403]   LOAD_HEAP 13 n
  413. [404]
  414. [405]   LOAD_HEAP 21 arg2.shape
  415. [406]
  416. [407]   ARRAY_LOAD_INT64 0
  417. [408]
  418. [409]   EQ_I64
  419. [410]   ASSERT 19
  420. [411]
  421. [412]   LOAD_HEAP 9 arg2
  422. [413]
  423. [414]   TVM_STRUCT_GET 0 8
  424. [415]
  425. [416]
  426. [417]   PUSH_I64 0
  427. [418]
  428. [419]   EQ_I64
  429. [420]   ASSERT 20
  430. [421]
  431. [422]   PUSH_I64 4
  432. [423]
  433. [424]   LOAD_HEAP 9 arg2
  434. [425]
  435. [426]   TVM_STRUCT_GET 0 10
  436. [427]
  437. [428]
  438. [429]   EQ_I64
  439. [430]   ASSERT 21
  440. [431]
  441. [432]   LOAD_HEAP 16 dev_id
  442. [433]
  443. [434]   LOAD_HEAP 9 arg2
  444. [435]
  445. [436]   TVM_STRUCT_GET 0 9
  446. [437]
  447. [438]
  448. [439]   EQ_I64
  449. [440]   ASSERT 22
  450. [441]
  451. [442]   PUSH_I64 4
  452. [443]
  453. [444]   PUSH_I64 1
  454. [445]
  455. [446]   EQ_I64
  456. [447]   NOT
  457. [448]   RJUMP_IF_FALSE rel=38 to 486
  458. [449]
  459. [450]   POP
  460. [451]   LOAD_HEAP 4 stack_value
  461. [452]
  462. [453]   PUSH_I64 4
  463. [454]
  464. [455]   TVM_STRUCT_SET 0 12
  465. [456]
  466. [457]
  467. [458]   LOAD_HEAP 3 stack_tcode
  468. [459]
  469. [460]   PUSH_I64 0
  470. [461]
  471. [462]   ARRAY_STORE_INT32 0
  472. [463]
  473. [464]   LOAD_HEAP 4 stack_value
  474. [465]
  475. [466]   LOAD_HEAP 16 dev_id
  476. [467]
  477. [468]   TVM_STRUCT_SET 1 12
  478. [469]
  479. [470]
  480. [471]   LOAD_HEAP 3 stack_tcode
  481. [472]
  482. [473]   PUSH_I64 0
  483. [474]
  484. [475]   ARRAY_STORE_INT32 1
  485. [476]
  486. [477]   LOAD_HEAP 4 stack_value
  487. [478]
  488. [479]   LOAD_HEAP 3 stack_tcode
  489. [480]
  490. [481]   CALL_PACKED_FUNC  fid=0 begin=0 end=2
  491. [482]
  492. [483]
  493. [484]
  494. [485]   POP
  495. [486]   POP
  496. [487]   LOAD_HEAP 4 stack_value
  497. [488]
  498. [489]   LOAD_HEAP 20 C
  499. [490]
  500. [491]   TVM_STRUCT_SET 0 12
  501. [492]
  502. [493]
  503. [494]   LOAD_HEAP 3 stack_tcode
  504. [495]
  505. [496]   PUSH_I64 3
  506. [497]
  507. [498]   ARRAY_STORE_INT32 0
  508. [499]
  509. [500]   LOAD_HEAP 4 stack_value
  510. [501]
  511. [502]   LOAD_HEAP 11 A
  512. [503]
  513. [504]   TVM_STRUCT_SET 1 12
  514. [505]
  515. [506]
  516. [507]   LOAD_HEAP 3 stack_tcode
  517. [508]
  518. [509]   PUSH_I64 3
  519. [510]
  520. [511]   ARRAY_STORE_INT32 1
  521. [512]
  522. [513]   LOAD_HEAP 4 stack_value
  523. [514]
  524. [515]   LOAD_HEAP 17 B
  525. [516]
  526. [517]   TVM_STRUCT_SET 2 12
  527. [518]
  528. [519]
  529. [520]   LOAD_HEAP 3 stack_tcode
  530. [521]
  531. [522]   PUSH_I64 3
  532. [523]
  533. [524]   ARRAY_STORE_INT32 2
  534. [525]
  535. [526]   LOAD_HEAP 4 stack_value
  536. [527]
  537. [528]   LOAD_HEAP 13 n
  538. [529]
  539. [530]   TVM_STRUCT_SET 3 12
  540. [531]
  541. [532]
  542. [533]   LOAD_HEAP 3 stack_tcode
  543. [534]
  544. [535]   PUSH_I64 0
  545. [536]
  546. [537]   ARRAY_STORE_INT32 3
  547. [538]
  548. [539]   LOAD_HEAP 4 stack_value
  549. [540]
  550. [541]   LOAD_HEAP 13 n
  551. [542]
  552. [543]   PUSH_I64 63
  553. [544]
  554. [545]   ADD_I64
  555. [546]   PUSH_I64 64
  556. [547]
  557. [548]   DIV_I64
  558. [549]   TVM_STRUCT_SET 4 12
  559. [550]
  560. [551]
  561. [552]   LOAD_HEAP 3 stack_tcode
  562. [553]
  563. [554]   PUSH_I64 0
  564. [555]
  565. [556]   ARRAY_STORE_INT32 4
  566. [557]
  567. [558]   LOAD_HEAP 4 stack_value
  568. [559]
  569. [560]   PUSH_I64 64
  570. [561]
  571. [562]   TVM_STRUCT_SET 5 12
  572. [563]
  573. [564]
  574. [565]   LOAD_HEAP 3 stack_tcode
  575. [566]
  576. [567]   PUSH_I64 0
  577. [568]
  578. [569]   ARRAY_STORE_INT32 5
  579. [570]
  580. [571]   LOAD_HEAP 4 stack_value
  581. [572]
  582. [573]   LOAD_HEAP 3 stack_tcode
  583. [574]
  584. [575]   CALL_PACKED_FUNC  fid=1 begin=0 end=6
  585. [576]
  586. [577]
  587. [578]
  588. [579]   POP
  589. ----------end--------------------
  590.  
  591. ------opencl code------
  592. __kernel void myadd__kernel0(__global float* restrict C, __global float* restrict A, __global float* restrict B, int n) {
  593.   if (((int)get_group_id(0)) < ((n + -127) / 64)) {
  594.     C[((((int)get_group_id(0)) * 64) + ((int)get_local_id(0)))] = (A[((((int)get_group_id(0)) * 64) + ((int)get_local_id(0)))] + B[((((int)get_group_id(0)) * 64) + ((int)get_local_id(0)))]);
  595.   } else {
  596.     if ((((int)get_group_id(0)) * 64) < (n - ((int)get_local_id(0)))) {
  597.       C[((((int)get_group_id(0)) * 64) + ((int)get_local_id(0)))] = (A[((((int)get_group_id(0)) * 64) + ((int)get_local_id(0)))] + B[((((int)get_group_id(0)) * 64) + ((int)get_local_id(0)))]);
  598.     }
  599.   }
  600. }
  601.  
  602.  
  603. Press any key to continue . . .
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top