Guest User

Untitled

a guest
Aug 5th, 2016
60
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 16.36 KB | None | 0 0
  1. module &input_bc:1:0:$full:$large:$default;
  2. extension "amd:gcn";
  3. extension "amd:mipmap";
  4. extension "IMAGE";
  5.  
  6. prog kernel &__OpenCL_read_linear_uncached_kernel(
  7. kernarg_u64 %_.global_offset_0,
  8. kernarg_u64 %_.global_offset_1,
  9. kernarg_u64 %_.global_offset_2,
  10. kernarg_u64 %_.printf_buffer,
  11. kernarg_u64 %_.vqueue_pointer,
  12. kernarg_u64 %_.aqlwrap_pointer,
  13. kernarg_u64 %input,
  14. kernarg_u64 %output)
  15. {
  16. requiredworkgroupsize 256, 1, 1;
  17. requirenopartialworkgroups;
  18. pragma "AMD RTI", "ARGSTART:__OpenCL_read_linear_uncached_kernel";
  19. pragma "AMD RTI", "version:3:1:104";
  20. pragma "AMD RTI", "device:generic";
  21. pragma "AMD RTI", "uniqueid:1024";
  22. pragma "AMD RTI", "cws:256:1:1";
  23. pragma "AMD RTI", "value:_.global_offset_0:i64:1:1:0";
  24. pragma "AMD RTI", "constarg:0:_.global_offset_0";
  25. pragma "AMD RTI", "value:_.global_offset_1:i64:1:1:16";
  26. pragma "AMD RTI", "constarg:1:_.global_offset_1";
  27. pragma "AMD RTI", "value:_.global_offset_2:i64:1:1:32";
  28. pragma "AMD RTI", "constarg:2:_.global_offset_2";
  29. pragma "AMD RTI", "pointer:_.printf_buffer:u8:1:1:48:uav:7:1:RO:0:0:0";
  30. pragma "AMD RTI", "value:_.vqueue_pointer:i64:1:1:64";
  31. pragma "AMD RTI", "value:_.aqlwrap_pointer:i64:1:1:80";
  32. pragma "AMD RTI", "pointer:input:float:1:1:96:uav:7:4:RO:0:0:0";
  33. pragma "AMD RTI", "pointer:output:float:1:1:112:uav:7:4:RW:0:0:0";
  34. pragma "AMD RTI", "function:1:0";
  35. pragma "AMD RTI", "memory:64bitABI";
  36. pragma "AMD RTI", "privateid:8";
  37. pragma "AMD RTI", "enqueue_kernel:0";
  38. pragma "AMD RTI", "kernel_index:0";
  39. pragma "AMD RTI", "reflection:0:size_t";
  40. pragma "AMD RTI", "reflection:1:size_t";
  41. pragma "AMD RTI", "reflection:2:size_t";
  42. pragma "AMD RTI", "reflection:3:size_t";
  43. pragma "AMD RTI", "reflection:4:size_t";
  44. pragma "AMD RTI", "reflection:5:size_t";
  45. pragma "AMD RTI", "reflection:6:float*";
  46. pragma "AMD RTI", "reflection:7:float*";
  47. pragma "AMD RTI", "ARGEND:__OpenCL_read_linear_uncached_kernel";
  48. group_f32 %read_linear_uncached.scratch[512];
  49.  
  50. @__OpenCL_read_linear_uncached_kernel_entry:
  51. // BB#0:
  52. currentworkgroupsize_u32 $s0, 1;
  53. currentworkgroupsize_u32 $s1, 0;
  54. mul24_s32 $s1, $s1, $s0;
  55. workitemabsid_u32 $s0, 0;
  56. cvt_u64_u32 $d0, $s0;
  57. workitemid_u32 $s0, 0;
  58. currentworkgroupsize_u32 $s2, 2;
  59. ld_kernarg_align(8)_width(all)_u64 $d1, [%_.global_offset_0];
  60. add_u64 $d1, $d0, $d1;
  61. mul24_s32 $s5, $s1, $s2;
  62. shl_u32 $s1, $s0, 2;
  63. workitemflatid_u32 $s2;
  64. lda_group_u32 $s3, [%read_linear_uncached.scratch];
  65. st_group_align(4)_u32 0, [%read_linear_uncached.scratch][$s1];
  66. add_u32 $s1, $s3, $s1;
  67. cvt_s64_s32 $d0, $s2;
  68. mov_b32 $s4, 0;
  69. cvt_s64_s32 $d2, $s5;
  70. cvt_u32_u64 $s5, $d1;
  71. ld_kernarg_align(8)_width(all)_u64 $d1, [%output];
  72. ld_kernarg_align(8)_width(all)_u64 $d3, [%input];
  73. mov_b32 $s6, $s5;
  74.  
  75. @LBB0_1:
  76. shl_u32 $s7, $s4, 8;
  77. and_b32 $s8, $s7, 512;
  78. cmp_gt_b1_u32 $c0, $s2, 255;
  79. cbr_b1 $c0, @LBB0_4;
  80. // BB#2:
  81. cvt_s64_s32 $d4, $s6;
  82. mov_b64 $d5, $d0;
  83.  
  84. @LBB0_3:
  85. // %.preheader
  86. cvt_u32_u64 $s7, $d5;
  87. add_u32 $s7, $s7, $s8;
  88. shl_u32 $s7, $s7, 2;
  89. add_u64 $d6, $d5, $d4;
  90. shl_u64 $d6, $d6, 2;
  91. add_u64 $d6, $d3, $d6;
  92. ld_global_align(4)_f32 $s9, [$d6];
  93. st_group_align(4)_f32 $s9, [%read_linear_uncached.scratch][$s7];
  94. add_u64 $d5, $d5, $d2;
  95. cmp_lt_b1_u64 $c0, $d5, 256;
  96. cbr_b1 $c0, @LBB0_3;
  97.  
  98. @LBB0_4:
  99. // %_Z21async_work_group_copyPU3AS3fPKU3AS1fm9ocl_event.exit
  100. or_b32 $s7, $s8, 256;
  101. add_u32 $s7, $s7, $s0;
  102. shl_u32 $s9, $s7, 2;
  103. barrier;
  104. ld_group_align(4)_f32 $s7, [%read_linear_uncached.scratch][$s9];
  105. mul_ftz_f32 $s10, $s7, $s7;
  106. add_ftz_f32 $s10, $s7, $s10;
  107. add_u32 $s7, $s3, $s9;
  108. mul_ftz_f32 $s11, $s10, $s10;
  109. add_ftz_f32 $s10, $s10, $s11;
  110. mul_ftz_f32 $s11, $s10, $s10;
  111. add_ftz_f32 $s10, $s10, $s11;
  112. mul_ftz_f32 $s11, $s10, $s10;
  113. add_ftz_f32 $s10, $s10, $s11;
  114. mul_ftz_f32 $s11, $s10, $s10;
  115. add_ftz_f32 $s10, $s10, $s11;
  116. mul_ftz_f32 $s11, $s10, $s10;
  117. add_ftz_f32 $s10, $s10, $s11;
  118. mul_ftz_f32 $s11, $s10, $s10;
  119. add_ftz_f32 $s10, $s10, $s11;
  120. mul_ftz_f32 $s11, $s10, $s10;
  121. add_ftz_f32 $s10, $s10, $s11;
  122. mul_ftz_f32 $s11, $s10, $s10;
  123. add_ftz_f32 $s10, $s10, $s11;
  124. mul_ftz_f32 $s11, $s10, $s10;
  125. add_ftz_f32 $s10, $s10, $s11;
  126. mul_ftz_f32 $s11, $s10, $s10;
  127. add_ftz_f32 $s10, $s10, $s11;
  128. mul_ftz_f32 $s11, $s10, $s10;
  129. add_ftz_f32 $s10, $s10, $s11;
  130. mul_ftz_f32 $s11, $s10, $s10;
  131. add_ftz_f32 $s10, $s10, $s11;
  132. mul_ftz_f32 $s11, $s10, $s10;
  133. add_ftz_f32 $s10, $s10, $s11;
  134. mul_ftz_f32 $s11, $s10, $s10;
  135. add_ftz_f32 $s10, $s10, $s11;
  136. mul_ftz_f32 $s11, $s10, $s10;
  137. add_ftz_f32 $s10, $s10, $s11;
  138. mul_ftz_f32 $s11, $s10, $s10;
  139. add_ftz_f32 $s10, $s10, $s11;
  140. mul_ftz_f32 $s11, $s10, $s10;
  141. add_ftz_f32 $s10, $s10, $s11;
  142. mul_ftz_f32 $s11, $s10, $s10;
  143. add_ftz_f32 $s10, $s10, $s11;
  144. mul_ftz_f32 $s11, $s10, $s10;
  145. add_ftz_f32 $s10, $s10, $s11;
  146. mul_ftz_f32 $s11, $s10, $s10;
  147. add_ftz_f32 $s10, $s10, $s11;
  148. mul_ftz_f32 $s11, $s10, $s10;
  149. add_ftz_f32 $s10, $s10, $s11;
  150. mul_ftz_f32 $s11, $s10, $s10;
  151. add_ftz_f32 $s10, $s10, $s11;
  152. mul_ftz_f32 $s11, $s10, $s10;
  153. add_ftz_f32 $s10, $s10, $s11;
  154. mul_ftz_f32 $s11, $s10, $s10;
  155. add_ftz_f32 $s10, $s10, $s11;
  156. mul_ftz_f32 $s11, $s10, $s10;
  157. add_ftz_f32 $s10, $s10, $s11;
  158. mul_ftz_f32 $s11, $s10, $s10;
  159. add_ftz_f32 $s10, $s10, $s11;
  160. mul_ftz_f32 $s11, $s10, $s10;
  161. add_ftz_f32 $s10, $s10, $s11;
  162. mul_ftz_f32 $s11, $s10, $s10;
  163. add_ftz_f32 $s10, $s10, $s11;
  164. mul_ftz_f32 $s11, $s10, $s10;
  165. add_ftz_f32 $s10, $s10, $s11;
  166. mul_ftz_f32 $s11, $s10, $s10;
  167. add_ftz_f32 $s10, $s10, $s11;
  168. mul_ftz_f32 $s11, $s10, $s10;
  169. add_ftz_f32 $s10, $s10, $s11;
  170. mul_ftz_f32 $s11, $s10, $s10;
  171. add_ftz_f32 $s10, $s10, $s11;
  172. mul_ftz_f32 $s11, $s10, $s10;
  173. add_ftz_f32 $s10, $s10, $s11;
  174. mul_ftz_f32 $s11, $s10, $s10;
  175. add_ftz_f32 $s10, $s10, $s11;
  176. mul_ftz_f32 $s11, $s10, $s10;
  177. add_ftz_f32 $s10, $s10, $s11;
  178. mul_ftz_f32 $s11, $s10, $s10;
  179. add_ftz_f32 $s10, $s10, $s11;
  180. mul_ftz_f32 $s11, $s10, $s10;
  181. add_ftz_f32 $s10, $s10, $s11;
  182. mul_ftz_f32 $s11, $s10, $s10;
  183. add_ftz_f32 $s10, $s10, $s11;
  184. mul_ftz_f32 $s11, $s10, $s10;
  185. add_ftz_f32 $s10, $s10, $s11;
  186. mul_ftz_f32 $s11, $s10, $s10;
  187. add_ftz_f32 $s10, $s10, $s11;
  188. mul_ftz_f32 $s11, $s10, $s10;
  189. add_ftz_f32 $s10, $s10, $s11;
  190. mul_ftz_f32 $s11, $s10, $s10;
  191. add_ftz_f32 $s10, $s10, $s11;
  192. mul_ftz_f32 $s11, $s10, $s10;
  193. add_ftz_f32 $s10, $s10, $s11;
  194. mul_ftz_f32 $s11, $s10, $s10;
  195. add_ftz_f32 $s10, $s10, $s11;
  196. mul_ftz_f32 $s11, $s10, $s10;
  197. add_ftz_f32 $s10, $s10, $s11;
  198. mul_ftz_f32 $s11, $s10, $s10;
  199. add_ftz_f32 $s10, $s10, $s11;
  200. mul_ftz_f32 $s11, $s10, $s10;
  201. add_ftz_f32 $s10, $s10, $s11;
  202. mul_ftz_f32 $s11, $s10, $s10;
  203. add_ftz_f32 $s10, $s10, $s11;
  204. mul_ftz_f32 $s11, $s10, $s10;
  205. add_ftz_f32 $s10, $s10, $s11;
  206. mul_ftz_f32 $s11, $s10, $s10;
  207. add_ftz_f32 $s10, $s10, $s11;
  208. mul_ftz_f32 $s11, $s10, $s10;
  209. add_ftz_f32 $s10, $s10, $s11;
  210. mul_ftz_f32 $s11, $s10, $s10;
  211. add_ftz_f32 $s10, $s10, $s11;
  212. mul_ftz_f32 $s11, $s10, $s10;
  213. add_ftz_f32 $s10, $s10, $s11;
  214. mul_ftz_f32 $s11, $s10, $s10;
  215. add_ftz_f32 $s10, $s10, $s11;
  216. mul_ftz_f32 $s11, $s10, $s10;
  217. add_ftz_f32 $s10, $s10, $s11;
  218. mul_ftz_f32 $s11, $s10, $s10;
  219. add_ftz_f32 $s10, $s10, $s11;
  220. mul_ftz_f32 $s11, $s10, $s10;
  221. add_ftz_f32 $s10, $s10, $s11;
  222. mul_ftz_f32 $s11, $s10, $s10;
  223. add_ftz_f32 $s10, $s10, $s11;
  224. mul_ftz_f32 $s11, $s10, $s10;
  225. add_ftz_f32 $s10, $s10, $s11;
  226. mul_ftz_f32 $s11, $s10, $s10;
  227. add_ftz_f32 $s10, $s10, $s11;
  228. mul_ftz_f32 $s11, $s10, $s10;
  229. add_ftz_f32 $s10, $s10, $s11;
  230. mul_ftz_f32 $s11, $s10, $s10;
  231. add_ftz_f32 $s10, $s10, $s11;
  232. mul_ftz_f32 $s11, $s10, $s10;
  233. add_ftz_f32 $s10, $s10, $s11;
  234. mul_ftz_f32 $s11, $s10, $s10;
  235. add_ftz_f32 $s10, $s10, $s11;
  236. mul_ftz_f32 $s11, $s10, $s10;
  237. add_ftz_f32 $s10, $s10, $s11;
  238. mul_ftz_f32 $s11, $s10, $s10;
  239. add_ftz_f32 $s10, $s10, $s11;
  240. mul_ftz_f32 $s11, $s10, $s10;
  241. add_ftz_f32 $s10, $s10, $s11;
  242. mul_ftz_f32 $s11, $s10, $s10;
  243. add_ftz_f32 $s10, $s10, $s11;
  244. mul_ftz_f32 $s11, $s10, $s10;
  245. add_ftz_f32 $s10, $s10, $s11;
  246. mul_ftz_f32 $s11, $s10, $s10;
  247. add_ftz_f32 $s10, $s10, $s11;
  248. mul_ftz_f32 $s11, $s10, $s10;
  249. add_ftz_f32 $s10, $s10, $s11;
  250. mul_ftz_f32 $s11, $s10, $s10;
  251. add_ftz_f32 $s10, $s10, $s11;
  252. mul_ftz_f32 $s11, $s10, $s10;
  253. add_ftz_f32 $s10, $s10, $s11;
  254. mul_ftz_f32 $s11, $s10, $s10;
  255. add_ftz_f32 $s10, $s10, $s11;
  256. mul_ftz_f32 $s11, $s10, $s10;
  257. add_ftz_f32 $s10, $s10, $s11;
  258. mul_ftz_f32 $s11, $s10, $s10;
  259. add_ftz_f32 $s10, $s10, $s11;
  260. mul_ftz_f32 $s11, $s10, $s10;
  261. add_ftz_f32 $s10, $s10, $s11;
  262. mul_ftz_f32 $s11, $s10, $s10;
  263. add_ftz_f32 $s10, $s10, $s11;
  264. mul_ftz_f32 $s11, $s10, $s10;
  265. add_ftz_f32 $s10, $s10, $s11;
  266. mul_ftz_f32 $s11, $s10, $s10;
  267. add_ftz_f32 $s10, $s10, $s11;
  268. mul_ftz_f32 $s11, $s10, $s10;
  269. add_ftz_f32 $s10, $s10, $s11;
  270. mul_ftz_f32 $s11, $s10, $s10;
  271. add_ftz_f32 $s10, $s10, $s11;
  272. mul_ftz_f32 $s11, $s10, $s10;
  273. add_ftz_f32 $s10, $s10, $s11;
  274. mul_ftz_f32 $s11, $s10, $s10;
  275. add_ftz_f32 $s10, $s10, $s11;
  276. mul_ftz_f32 $s11, $s10, $s10;
  277. add_ftz_f32 $s10, $s10, $s11;
  278. mul_ftz_f32 $s11, $s10, $s10;
  279. add_ftz_f32 $s10, $s10, $s11;
  280. mul_ftz_f32 $s11, $s10, $s10;
  281. add_ftz_f32 $s10, $s10, $s11;
  282. mul_ftz_f32 $s11, $s10, $s10;
  283. add_ftz_f32 $s10, $s10, $s11;
  284. mul_ftz_f32 $s11, $s10, $s10;
  285. add_ftz_f32 $s10, $s10, $s11;
  286. mul_ftz_f32 $s11, $s10, $s10;
  287. add_ftz_f32 $s10, $s10, $s11;
  288. mul_ftz_f32 $s11, $s10, $s10;
  289. add_ftz_f32 $s10, $s10, $s11;
  290. mul_ftz_f32 $s11, $s10, $s10;
  291. add_ftz_f32 $s10, $s10, $s11;
  292. mul_ftz_f32 $s11, $s10, $s10;
  293. add_ftz_f32 $s10, $s10, $s11;
  294. mul_ftz_f32 $s11, $s10, $s10;
  295. add_ftz_f32 $s10, $s10, $s11;
  296. mul_ftz_f32 $s11, $s10, $s10;
  297. add_ftz_f32 $s10, $s10, $s11;
  298. mul_ftz_f32 $s11, $s10, $s10;
  299. add_ftz_f32 $s10, $s10, $s11;
  300. mul_ftz_f32 $s11, $s10, $s10;
  301. add_ftz_f32 $s10, $s10, $s11;
  302. mul_ftz_f32 $s11, $s10, $s10;
  303. add_ftz_f32 $s10, $s10, $s11;
  304. mul_ftz_f32 $s11, $s10, $s10;
  305. add_ftz_f32 $s10, $s10, $s11;
  306. st_group_align(4)_f32 $s10, [%read_linear_uncached.scratch][$s9];
  307. cmp_ge_b1_u32 $c0, $s2, 256;
  308. cbr_b1 $c0, @LBB0_7;
  309. // BB#5:
  310. add_u32 $s9, $s6, 1;
  311. cvt_s64_s32 $d4, $s9;
  312. mov_b64 $d5, $d0;
  313.  
  314. @LBB0_6:
  315. // %.preheader.1
  316. cvt_u32_u64 $s9, $d5;
  317. add_u32 $s9, $s9, $s8;
  318. shl_u32 $s9, $s9, 2;
  319. add_u64 $d6, $d5, $d4;
  320. shl_u64 $d6, $d6, 2;
  321. add_u64 $d6, $d3, $d6;
  322. ld_global_align(4)_f32 $s10, [$d6];
  323. st_group_align(4)_f32 $s10, [%read_linear_uncached.scratch][$s9];
  324. add_u64 $d5, $d5, $d2;
  325. cmp_lt_b1_u64 $c0, $d5, 256;
  326. cbr_b1 $c0, @LBB0_6;
  327.  
  328. @LBB0_7:
  329. // %_Z21async_work_group_copyPU3AS3fPKU3AS1fm9ocl_event.exit.1
  330. barrier;
  331. ld_group_align(4)_f32 $s8, [$s7];
  332. mul_ftz_f32 $s9, $s8, $s8;
  333. add_ftz_f32 $s8, $s8, $s9;
  334. mul_ftz_f32 $s9, $s8, $s8;
  335. add_ftz_f32 $s8, $s8, $s9;
  336. mul_ftz_f32 $s9, $s8, $s8;
  337. add_ftz_f32 $s8, $s8, $s9;
  338. add_u32 $s6, $s6, 2;
  339. add_u32 $s4, $s4, 2;
  340. mul_ftz_f32 $s9, $s8, $s8;
  341. add_ftz_f32 $s8, $s8, $s9;
  342. mul_ftz_f32 $s9, $s8, $s8;
  343. add_ftz_f32 $s8, $s8, $s9;
  344. mul_ftz_f32 $s9, $s8, $s8;
  345. add_ftz_f32 $s8, $s8, $s9;
  346. mul_ftz_f32 $s9, $s8, $s8;
  347. add_ftz_f32 $s8, $s8, $s9;
  348. mul_ftz_f32 $s9, $s8, $s8;
  349. add_ftz_f32 $s8, $s8, $s9;
  350. mul_ftz_f32 $s9, $s8, $s8;
  351. add_ftz_f32 $s8, $s8, $s9;
  352. mul_ftz_f32 $s9, $s8, $s8;
  353. add_ftz_f32 $s8, $s8, $s9;
  354. mul_ftz_f32 $s9, $s8, $s8;
  355. add_ftz_f32 $s8, $s8, $s9;
  356. mul_ftz_f32 $s9, $s8, $s8;
  357. add_ftz_f32 $s8, $s8, $s9;
  358. mul_ftz_f32 $s9, $s8, $s8;
  359. add_ftz_f32 $s8, $s8, $s9;
  360. mul_ftz_f32 $s9, $s8, $s8;
  361. add_ftz_f32 $s8, $s8, $s9;
  362. mul_ftz_f32 $s9, $s8, $s8;
  363. add_ftz_f32 $s8, $s8, $s9;
  364. mul_ftz_f32 $s9, $s8, $s8;
  365. add_ftz_f32 $s8, $s8, $s9;
  366. mul_ftz_f32 $s9, $s8, $s8;
  367. add_ftz_f32 $s8, $s8, $s9;
  368. mul_ftz_f32 $s9, $s8, $s8;
  369. add_ftz_f32 $s8, $s8, $s9;
  370. mul_ftz_f32 $s9, $s8, $s8;
  371. add_ftz_f32 $s8, $s8, $s9;
  372. mul_ftz_f32 $s9, $s8, $s8;
  373. add_ftz_f32 $s8, $s8, $s9;
  374. mul_ftz_f32 $s9, $s8, $s8;
  375. add_ftz_f32 $s8, $s8, $s9;
  376. mul_ftz_f32 $s9, $s8, $s8;
  377. add_ftz_f32 $s8, $s8, $s9;
  378. mul_ftz_f32 $s9, $s8, $s8;
  379. add_ftz_f32 $s8, $s8, $s9;
  380. mul_ftz_f32 $s9, $s8, $s8;
  381. add_ftz_f32 $s8, $s8, $s9;
  382. mul_ftz_f32 $s9, $s8, $s8;
  383. add_ftz_f32 $s8, $s8, $s9;
  384. mul_ftz_f32 $s9, $s8, $s8;
  385. add_ftz_f32 $s8, $s8, $s9;
  386. mul_ftz_f32 $s9, $s8, $s8;
  387. add_ftz_f32 $s8, $s8, $s9;
  388. mul_ftz_f32 $s9, $s8, $s8;
  389. add_ftz_f32 $s8, $s8, $s9;
  390. mul_ftz_f32 $s9, $s8, $s8;
  391. add_ftz_f32 $s8, $s8, $s9;
  392. mul_ftz_f32 $s9, $s8, $s8;
  393. add_ftz_f32 $s8, $s8, $s9;
  394. mul_ftz_f32 $s9, $s8, $s8;
  395. add_ftz_f32 $s8, $s8, $s9;
  396. mul_ftz_f32 $s9, $s8, $s8;
  397. add_ftz_f32 $s8, $s8, $s9;
  398. mul_ftz_f32 $s9, $s8, $s8;
  399. add_ftz_f32 $s8, $s8, $s9;
  400. mul_ftz_f32 $s9, $s8, $s8;
  401. add_ftz_f32 $s8, $s8, $s9;
  402. mul_ftz_f32 $s9, $s8, $s8;
  403. add_ftz_f32 $s8, $s8, $s9;
  404. mul_ftz_f32 $s9, $s8, $s8;
  405. add_ftz_f32 $s8, $s8, $s9;
  406. mul_ftz_f32 $s9, $s8, $s8;
  407. add_ftz_f32 $s8, $s8, $s9;
  408. mul_ftz_f32 $s9, $s8, $s8;
  409. add_ftz_f32 $s8, $s8, $s9;
  410. mul_ftz_f32 $s9, $s8, $s8;
  411. add_ftz_f32 $s8, $s8, $s9;
  412. mul_ftz_f32 $s9, $s8, $s8;
  413. add_ftz_f32 $s8, $s8, $s9;
  414. mul_ftz_f32 $s9, $s8, $s8;
  415. add_ftz_f32 $s8, $s8, $s9;
  416. mul_ftz_f32 $s9, $s8, $s8;
  417. add_ftz_f32 $s8, $s8, $s9;
  418. mul_ftz_f32 $s9, $s8, $s8;
  419. add_ftz_f32 $s8, $s8, $s9;
  420. mul_ftz_f32 $s9, $s8, $s8;
  421. add_ftz_f32 $s8, $s8, $s9;
  422. mul_ftz_f32 $s9, $s8, $s8;
  423. add_ftz_f32 $s8, $s8, $s9;
  424. mul_ftz_f32 $s9, $s8, $s8;
  425. add_ftz_f32 $s8, $s8, $s9;
  426. mul_ftz_f32 $s9, $s8, $s8;
  427. add_ftz_f32 $s8, $s8, $s9;
  428. mul_ftz_f32 $s9, $s8, $s8;
  429. add_ftz_f32 $s8, $s8, $s9;
  430. mul_ftz_f32 $s9, $s8, $s8;
  431. add_ftz_f32 $s8, $s8, $s9;
  432. mul_ftz_f32 $s9, $s8, $s8;
  433. add_ftz_f32 $s8, $s8, $s9;
  434. mul_ftz_f32 $s9, $s8, $s8;
  435. add_ftz_f32 $s8, $s8, $s9;
  436. mul_ftz_f32 $s9, $s8, $s8;
  437. add_ftz_f32 $s8, $s8, $s9;
  438. mul_ftz_f32 $s9, $s8, $s8;
  439. add_ftz_f32 $s8, $s8, $s9;
  440. mul_ftz_f32 $s9, $s8, $s8;
  441. add_ftz_f32 $s8, $s8, $s9;
  442. mul_ftz_f32 $s9, $s8, $s8;
  443. add_ftz_f32 $s8, $s8, $s9;
  444. mul_ftz_f32 $s9, $s8, $s8;
  445. add_ftz_f32 $s8, $s8, $s9;
  446. mul_ftz_f32 $s9, $s8, $s8;
  447. add_ftz_f32 $s8, $s8, $s9;
  448. mul_ftz_f32 $s9, $s8, $s8;
  449. add_ftz_f32 $s8, $s8, $s9;
  450. mul_ftz_f32 $s9, $s8, $s8;
  451. add_ftz_f32 $s8, $s8, $s9;
  452. mul_ftz_f32 $s9, $s8, $s8;
  453. add_ftz_f32 $s8, $s8, $s9;
  454. mul_ftz_f32 $s9, $s8, $s8;
  455. add_ftz_f32 $s8, $s8, $s9;
  456. mul_ftz_f32 $s9, $s8, $s8;
  457. add_ftz_f32 $s8, $s8, $s9;
  458. mul_ftz_f32 $s9, $s8, $s8;
  459. add_ftz_f32 $s8, $s8, $s9;
  460. mul_ftz_f32 $s9, $s8, $s8;
  461. add_ftz_f32 $s8, $s8, $s9;
  462. mul_ftz_f32 $s9, $s8, $s8;
  463. add_ftz_f32 $s8, $s8, $s9;
  464. mul_ftz_f32 $s9, $s8, $s8;
  465. add_ftz_f32 $s8, $s8, $s9;
  466. mul_ftz_f32 $s9, $s8, $s8;
  467. add_ftz_f32 $s8, $s8, $s9;
  468. mul_ftz_f32 $s9, $s8, $s8;
  469. add_ftz_f32 $s8, $s8, $s9;
  470. mul_ftz_f32 $s9, $s8, $s8;
  471. add_ftz_f32 $s8, $s8, $s9;
  472. mul_ftz_f32 $s9, $s8, $s8;
  473. add_ftz_f32 $s8, $s8, $s9;
  474. mul_ftz_f32 $s9, $s8, $s8;
  475. add_ftz_f32 $s8, $s8, $s9;
  476. mul_ftz_f32 $s9, $s8, $s8;
  477. add_ftz_f32 $s8, $s8, $s9;
  478. mul_ftz_f32 $s9, $s8, $s8;
  479. add_ftz_f32 $s8, $s8, $s9;
  480. mul_ftz_f32 $s9, $s8, $s8;
  481. add_ftz_f32 $s8, $s8, $s9;
  482. mul_ftz_f32 $s9, $s8, $s8;
  483. add_ftz_f32 $s8, $s8, $s9;
  484. mul_ftz_f32 $s9, $s8, $s8;
  485. add_ftz_f32 $s8, $s8, $s9;
  486. mul_ftz_f32 $s9, $s8, $s8;
  487. add_ftz_f32 $s8, $s8, $s9;
  488. mul_ftz_f32 $s9, $s8, $s8;
  489. add_ftz_f32 $s8, $s8, $s9;
  490. mul_ftz_f32 $s9, $s8, $s8;
  491. add_ftz_f32 $s8, $s8, $s9;
  492. mul_ftz_f32 $s9, $s8, $s8;
  493. add_ftz_f32 $s8, $s8, $s9;
  494. mul_ftz_f32 $s9, $s8, $s8;
  495. add_ftz_f32 $s8, $s8, $s9;
  496. mul_ftz_f32 $s9, $s8, $s8;
  497. add_ftz_f32 $s8, $s8, $s9;
  498. mul_ftz_f32 $s9, $s8, $s8;
  499. add_ftz_f32 $s8, $s8, $s9;
  500. mul_ftz_f32 $s9, $s8, $s8;
  501. add_ftz_f32 $s8, $s8, $s9;
  502. mul_ftz_f32 $s9, $s8, $s8;
  503. add_ftz_f32 $s8, $s8, $s9;
  504. mul_ftz_f32 $s9, $s8, $s8;
  505. add_ftz_f32 $s8, $s8, $s9;
  506. mul_ftz_f32 $s9, $s8, $s8;
  507. add_ftz_f32 $s8, $s8, $s9;
  508. mul_ftz_f32 $s9, $s8, $s8;
  509. add_ftz_f32 $s8, $s8, $s9;
  510. mul_ftz_f32 $s9, $s8, $s8;
  511. add_ftz_f32 $s8, $s8, $s9;
  512. mul_ftz_f32 $s9, $s8, $s8;
  513. add_ftz_f32 $s8, $s8, $s9;
  514. mul_ftz_f32 $s9, $s8, $s8;
  515. add_ftz_f32 $s8, $s8, $s9;
  516. mul_ftz_f32 $s9, $s8, $s8;
  517. add_ftz_f32 $s8, $s8, $s9;
  518. mul_ftz_f32 $s9, $s8, $s8;
  519. add_ftz_f32 $s8, $s8, $s9;
  520. mul_ftz_f32 $s9, $s8, $s8;
  521. add_ftz_f32 $s8, $s8, $s9;
  522. mul_ftz_f32 $s9, $s8, $s8;
  523. add_ftz_f32 $s8, $s8, $s9;
  524. mul_ftz_f32 $s9, $s8, $s8;
  525. add_ftz_f32 $s8, $s8, $s9;
  526. mul_ftz_f32 $s9, $s8, $s8;
  527. add_ftz_f32 $s8, $s8, $s9;
  528. mul_ftz_f32 $s9, $s8, $s8;
  529. add_ftz_f32 $s8, $s8, $s9;
  530. mul_ftz_f32 $s9, $s8, $s8;
  531. add_ftz_f32 $s8, $s8, $s9;
  532. mul_ftz_f32 $s9, $s8, $s8;
  533. add_ftz_f32 $s8, $s8, $s9;
  534. st_group_align(4)_f32 $s8, [$s7];
  535. cmp_ne_b1_s32 $c0, $s4, 32;
  536. cbr_b1 $c0, @LBB0_1;
  537. // BB#8:
  538. cvt_s64_s32 $d0, $s5;
  539. shl_u64 $d0, $d0, 2;
  540. add_u64 $d0, $d1, $d0;
  541. ld_group_align(4)_f32 $s0, [$s1];
  542. st_global_align(4)_f32 $s0, [$d0];
  543. ret;
  544. };
Add Comment
Please, Sign In to add comment