Guest User

Untitled

a guest
Aug 5th, 2016
58
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 14.56 KB | None | 0 0
  1. module &input_bc:1:0:$full:$large:$default;
  2. extension "amd:gcn";
  3. extension "amd:mipmap";
  4. extension "IMAGE";
  5.  
  6. prog kernel &__OpenCL_read_linear_uncached_kernel(
  7. kernarg_u64 %_.global_offset_0,
  8. kernarg_u64 %_.global_offset_1,
  9. kernarg_u64 %_.global_offset_2,
  10. kernarg_u64 %_.printf_buffer,
  11. kernarg_u64 %_.vqueue_pointer,
  12. kernarg_u64 %_.aqlwrap_pointer,
  13. kernarg_u64 %input,
  14. kernarg_u64 %output)
  15. {
  16. requiredworkgroupsize 256, 1, 1;
  17. requirenopartialworkgroups;
  18. pragma "AMD RTI", "ARGSTART:__OpenCL_read_linear_uncached_kernel";
  19. pragma "AMD RTI", "version:3:1:104";
  20. pragma "AMD RTI", "device:generic";
  21. pragma "AMD RTI", "uniqueid:1024";
  22. pragma "AMD RTI", "cws:256:1:1";
  23. pragma "AMD RTI", "value:_.global_offset_0:i64:1:1:0";
  24. pragma "AMD RTI", "constarg:0:_.global_offset_0";
  25. pragma "AMD RTI", "value:_.global_offset_1:i64:1:1:16";
  26. pragma "AMD RTI", "constarg:1:_.global_offset_1";
  27. pragma "AMD RTI", "value:_.global_offset_2:i64:1:1:32";
  28. pragma "AMD RTI", "constarg:2:_.global_offset_2";
  29. pragma "AMD RTI", "pointer:_.printf_buffer:u8:1:1:48:uav:7:1:RO:0:0:0";
  30. pragma "AMD RTI", "value:_.vqueue_pointer:i64:1:1:64";
  31. pragma "AMD RTI", "value:_.aqlwrap_pointer:i64:1:1:80";
  32. pragma "AMD RTI", "pointer:input:float:1:1:96:uav:7:4:RO:0:0:0";
  33. pragma "AMD RTI", "pointer:output:float:1:1:112:uav:7:4:RW:0:0:0";
  34. pragma "AMD RTI", "function:1:0";
  35. pragma "AMD RTI", "memory:64bitABI";
  36. pragma "AMD RTI", "privateid:8";
  37. pragma "AMD RTI", "enqueue_kernel:0";
  38. pragma "AMD RTI", "kernel_index:0";
  39. pragma "AMD RTI", "reflection:0:size_t";
  40. pragma "AMD RTI", "reflection:1:size_t";
  41. pragma "AMD RTI", "reflection:2:size_t";
  42. pragma "AMD RTI", "reflection:3:size_t";
  43. pragma "AMD RTI", "reflection:4:size_t";
  44. pragma "AMD RTI", "reflection:5:size_t";
  45. pragma "AMD RTI", "reflection:6:float*";
  46. pragma "AMD RTI", "reflection:7:float*";
  47. pragma "AMD RTI", "ARGEND:__OpenCL_read_linear_uncached_kernel";
  48. group_f32 %read_linear_uncached.scratch[512];
  49.  
  50. @__OpenCL_read_linear_uncached_kernel_entry:
  51. // BB#0:
  52. workitemabsid_u32 $s0, 0;
  53. cvt_u64_u32 $d0, $s0;
  54. ld_kernarg_align(8)_width(all)_u64 $d1, [%_.global_offset_0];
  55. add_u64 $d0, $d0, $d1;
  56. workitemid_u32 $s1, 0;
  57. shl_u32 $s3, $s1, 2;
  58. lda_group_u32 $s0, [%read_linear_uncached.scratch];
  59. add_u32 $s0, $s0, $s3;
  60. mov_b32 $s2, 0;
  61. st_group_align(4)_u32 0, [%read_linear_uncached.scratch][$s3];
  62. cvt_u32_u64 $s3, $d0;
  63. ld_kernarg_align(8)_width(all)_u64 $d0, [%output];
  64. ld_kernarg_align(8)_width(all)_u64 $d1, [%input];
  65. mov_b32 $s4, $s3;
  66.  
  67. @LBB0_1:
  68. and_b32 $s5, $s2, 512;
  69. or_b32 $s6, $s5, 256;
  70. add_u32 $s6, $s6, $s1;
  71. shl_u32 $s6, $s6, 2;
  72. ld_group_align(4)_f32 $s7, [%read_linear_uncached.scratch][$s6];
  73. mul_ftz_f32 $s8, $s7, $s7;
  74. add_ftz_f32 $s7, $s7, $s8;
  75. cvt_s64_s32 $d2, $s4;
  76. shl_u64 $d2, $d2, 2;
  77. add_u64 $d2, $d1, $d2;
  78. add_u32 $s4, $s4, 2;
  79. add_u32 $s2, $s2, 512;
  80. mul_ftz_f32 $s8, $s7, $s7;
  81. ld_global_align(4)_const_f32 $s9, [$d2+4];
  82. add_u32 $s5, $s5, $s1;
  83. shl_u32 $s5, $s5, 2;
  84. st_group_align(4)_f32 $s9, [%read_linear_uncached.scratch][$s5];
  85. add_ftz_f32 $s5, $s7, $s8;
  86. mul_ftz_f32 $s7, $s5, $s5;
  87. add_ftz_f32 $s5, $s5, $s7;
  88. mul_ftz_f32 $s7, $s5, $s5;
  89. add_ftz_f32 $s5, $s5, $s7;
  90. mul_ftz_f32 $s7, $s5, $s5;
  91. add_ftz_f32 $s5, $s5, $s7;
  92. mul_ftz_f32 $s7, $s5, $s5;
  93. add_ftz_f32 $s5, $s5, $s7;
  94. mul_ftz_f32 $s7, $s5, $s5;
  95. add_ftz_f32 $s5, $s5, $s7;
  96. mul_ftz_f32 $s7, $s5, $s5;
  97. add_ftz_f32 $s5, $s5, $s7;
  98. mul_ftz_f32 $s7, $s5, $s5;
  99. add_ftz_f32 $s5, $s5, $s7;
  100. mul_ftz_f32 $s7, $s5, $s5;
  101. add_ftz_f32 $s5, $s5, $s7;
  102. mul_ftz_f32 $s7, $s5, $s5;
  103. add_ftz_f32 $s5, $s5, $s7;
  104. mul_ftz_f32 $s7, $s5, $s5;
  105. add_ftz_f32 $s5, $s5, $s7;
  106. mul_ftz_f32 $s7, $s5, $s5;
  107. add_ftz_f32 $s5, $s5, $s7;
  108. mul_ftz_f32 $s7, $s5, $s5;
  109. add_ftz_f32 $s5, $s5, $s7;
  110. mul_ftz_f32 $s7, $s5, $s5;
  111. add_ftz_f32 $s5, $s5, $s7;
  112. mul_ftz_f32 $s7, $s5, $s5;
  113. add_ftz_f32 $s5, $s5, $s7;
  114. mul_ftz_f32 $s7, $s5, $s5;
  115. add_ftz_f32 $s5, $s5, $s7;
  116. mul_ftz_f32 $s7, $s5, $s5;
  117. add_ftz_f32 $s5, $s5, $s7;
  118. mul_ftz_f32 $s7, $s5, $s5;
  119. add_ftz_f32 $s5, $s5, $s7;
  120. mul_ftz_f32 $s7, $s5, $s5;
  121. add_ftz_f32 $s5, $s5, $s7;
  122. mul_ftz_f32 $s7, $s5, $s5;
  123. add_ftz_f32 $s5, $s5, $s7;
  124. mul_ftz_f32 $s7, $s5, $s5;
  125. add_ftz_f32 $s5, $s5, $s7;
  126. mul_ftz_f32 $s7, $s5, $s5;
  127. add_ftz_f32 $s5, $s5, $s7;
  128. mul_ftz_f32 $s7, $s5, $s5;
  129. add_ftz_f32 $s5, $s5, $s7;
  130. mul_ftz_f32 $s7, $s5, $s5;
  131. add_ftz_f32 $s5, $s5, $s7;
  132. mul_ftz_f32 $s7, $s5, $s5;
  133. add_ftz_f32 $s5, $s5, $s7;
  134. mul_ftz_f32 $s7, $s5, $s5;
  135. add_ftz_f32 $s5, $s5, $s7;
  136. mul_ftz_f32 $s7, $s5, $s5;
  137. add_ftz_f32 $s5, $s5, $s7;
  138. mul_ftz_f32 $s7, $s5, $s5;
  139. add_ftz_f32 $s5, $s5, $s7;
  140. mul_ftz_f32 $s7, $s5, $s5;
  141. add_ftz_f32 $s5, $s5, $s7;
  142. mul_ftz_f32 $s7, $s5, $s5;
  143. add_ftz_f32 $s5, $s5, $s7;
  144. mul_ftz_f32 $s7, $s5, $s5;
  145. add_ftz_f32 $s5, $s5, $s7;
  146. mul_ftz_f32 $s7, $s5, $s5;
  147. add_ftz_f32 $s5, $s5, $s7;
  148. mul_ftz_f32 $s7, $s5, $s5;
  149. add_ftz_f32 $s5, $s5, $s7;
  150. mul_ftz_f32 $s7, $s5, $s5;
  151. add_ftz_f32 $s5, $s5, $s7;
  152. mul_ftz_f32 $s7, $s5, $s5;
  153. add_ftz_f32 $s5, $s5, $s7;
  154. mul_ftz_f32 $s7, $s5, $s5;
  155. add_ftz_f32 $s5, $s5, $s7;
  156. mul_ftz_f32 $s7, $s5, $s5;
  157. add_ftz_f32 $s5, $s5, $s7;
  158. mul_ftz_f32 $s7, $s5, $s5;
  159. add_ftz_f32 $s5, $s5, $s7;
  160. mul_ftz_f32 $s7, $s5, $s5;
  161. add_ftz_f32 $s5, $s5, $s7;
  162. mul_ftz_f32 $s7, $s5, $s5;
  163. add_ftz_f32 $s5, $s5, $s7;
  164. mul_ftz_f32 $s7, $s5, $s5;
  165. add_ftz_f32 $s5, $s5, $s7;
  166. mul_ftz_f32 $s7, $s5, $s5;
  167. add_ftz_f32 $s5, $s5, $s7;
  168. mul_ftz_f32 $s7, $s5, $s5;
  169. add_ftz_f32 $s5, $s5, $s7;
  170. mul_ftz_f32 $s7, $s5, $s5;
  171. add_ftz_f32 $s5, $s5, $s7;
  172. mul_ftz_f32 $s7, $s5, $s5;
  173. add_ftz_f32 $s5, $s5, $s7;
  174. mul_ftz_f32 $s7, $s5, $s5;
  175. add_ftz_f32 $s5, $s5, $s7;
  176. mul_ftz_f32 $s7, $s5, $s5;
  177. add_ftz_f32 $s5, $s5, $s7;
  178. mul_ftz_f32 $s7, $s5, $s5;
  179. add_ftz_f32 $s5, $s5, $s7;
  180. mul_ftz_f32 $s7, $s5, $s5;
  181. add_ftz_f32 $s5, $s5, $s7;
  182. mul_ftz_f32 $s7, $s5, $s5;
  183. add_ftz_f32 $s5, $s5, $s7;
  184. mul_ftz_f32 $s7, $s5, $s5;
  185. add_ftz_f32 $s5, $s5, $s7;
  186. mul_ftz_f32 $s7, $s5, $s5;
  187. add_ftz_f32 $s5, $s5, $s7;
  188. mul_ftz_f32 $s7, $s5, $s5;
  189. add_ftz_f32 $s5, $s5, $s7;
  190. mul_ftz_f32 $s7, $s5, $s5;
  191. add_ftz_f32 $s5, $s5, $s7;
  192. mul_ftz_f32 $s7, $s5, $s5;
  193. add_ftz_f32 $s5, $s5, $s7;
  194. mul_ftz_f32 $s7, $s5, $s5;
  195. add_ftz_f32 $s5, $s5, $s7;
  196. mul_ftz_f32 $s7, $s5, $s5;
  197. add_ftz_f32 $s5, $s5, $s7;
  198. mul_ftz_f32 $s7, $s5, $s5;
  199. add_ftz_f32 $s5, $s5, $s7;
  200. mul_ftz_f32 $s7, $s5, $s5;
  201. add_ftz_f32 $s5, $s5, $s7;
  202. mul_ftz_f32 $s7, $s5, $s5;
  203. add_ftz_f32 $s5, $s5, $s7;
  204. mul_ftz_f32 $s7, $s5, $s5;
  205. add_ftz_f32 $s5, $s5, $s7;
  206. mul_ftz_f32 $s7, $s5, $s5;
  207. add_ftz_f32 $s5, $s5, $s7;
  208. mul_ftz_f32 $s7, $s5, $s5;
  209. add_ftz_f32 $s5, $s5, $s7;
  210. mul_ftz_f32 $s7, $s5, $s5;
  211. add_ftz_f32 $s5, $s5, $s7;
  212. mul_ftz_f32 $s7, $s5, $s5;
  213. add_ftz_f32 $s5, $s5, $s7;
  214. mul_ftz_f32 $s7, $s5, $s5;
  215. add_ftz_f32 $s5, $s5, $s7;
  216. mul_ftz_f32 $s7, $s5, $s5;
  217. add_ftz_f32 $s5, $s5, $s7;
  218. mul_ftz_f32 $s7, $s5, $s5;
  219. add_ftz_f32 $s5, $s5, $s7;
  220. mul_ftz_f32 $s7, $s5, $s5;
  221. add_ftz_f32 $s5, $s5, $s7;
  222. mul_ftz_f32 $s7, $s5, $s5;
  223. add_ftz_f32 $s5, $s5, $s7;
  224. mul_ftz_f32 $s7, $s5, $s5;
  225. add_ftz_f32 $s5, $s5, $s7;
  226. mul_ftz_f32 $s7, $s5, $s5;
  227. add_ftz_f32 $s5, $s5, $s7;
  228. mul_ftz_f32 $s7, $s5, $s5;
  229. add_ftz_f32 $s5, $s5, $s7;
  230. mul_ftz_f32 $s7, $s5, $s5;
  231. add_ftz_f32 $s5, $s5, $s7;
  232. mul_ftz_f32 $s7, $s5, $s5;
  233. add_ftz_f32 $s5, $s5, $s7;
  234. mul_ftz_f32 $s7, $s5, $s5;
  235. add_ftz_f32 $s5, $s5, $s7;
  236. mul_ftz_f32 $s7, $s5, $s5;
  237. add_ftz_f32 $s5, $s5, $s7;
  238. mul_ftz_f32 $s7, $s5, $s5;
  239. add_ftz_f32 $s5, $s5, $s7;
  240. mul_ftz_f32 $s7, $s5, $s5;
  241. add_ftz_f32 $s5, $s5, $s7;
  242. mul_ftz_f32 $s7, $s5, $s5;
  243. add_ftz_f32 $s5, $s5, $s7;
  244. mul_ftz_f32 $s7, $s5, $s5;
  245. add_ftz_f32 $s5, $s5, $s7;
  246. mul_ftz_f32 $s7, $s5, $s5;
  247. add_ftz_f32 $s5, $s5, $s7;
  248. mul_ftz_f32 $s7, $s5, $s5;
  249. add_ftz_f32 $s5, $s5, $s7;
  250. mul_ftz_f32 $s7, $s5, $s5;
  251. add_ftz_f32 $s5, $s5, $s7;
  252. mul_ftz_f32 $s7, $s5, $s5;
  253. add_ftz_f32 $s5, $s5, $s7;
  254. mul_ftz_f32 $s7, $s5, $s5;
  255. add_ftz_f32 $s5, $s5, $s7;
  256. mul_ftz_f32 $s7, $s5, $s5;
  257. add_ftz_f32 $s5, $s5, $s7;
  258. mul_ftz_f32 $s7, $s5, $s5;
  259. add_ftz_f32 $s5, $s5, $s7;
  260. mul_ftz_f32 $s7, $s5, $s5;
  261. add_ftz_f32 $s5, $s5, $s7;
  262. mul_ftz_f32 $s7, $s5, $s5;
  263. add_ftz_f32 $s5, $s5, $s7;
  264. mul_ftz_f32 $s7, $s5, $s5;
  265. add_ftz_f32 $s5, $s5, $s7;
  266. mul_ftz_f32 $s7, $s5, $s5;
  267. add_ftz_f32 $s5, $s5, $s7;
  268. mul_ftz_f32 $s7, $s5, $s5;
  269. add_ftz_f32 $s5, $s5, $s7;
  270. mul_ftz_f32 $s7, $s5, $s5;
  271. add_ftz_f32 $s5, $s5, $s7;
  272. mul_ftz_f32 $s7, $s5, $s5;
  273. add_ftz_f32 $s5, $s5, $s7;
  274. mul_ftz_f32 $s7, $s5, $s5;
  275. add_ftz_f32 $s5, $s5, $s7;
  276. mul_ftz_f32 $s7, $s5, $s5;
  277. add_ftz_f32 $s5, $s5, $s7;
  278. mul_ftz_f32 $s7, $s5, $s5;
  279. add_ftz_f32 $s5, $s5, $s7;
  280. mul_ftz_f32 $s7, $s5, $s5;
  281. add_ftz_f32 $s5, $s5, $s7;
  282. mul_ftz_f32 $s7, $s5, $s5;
  283. add_ftz_f32 $s5, $s5, $s7;
  284. mul_ftz_f32 $s7, $s5, $s5;
  285. add_ftz_f32 $s5, $s5, $s7;
  286. mul_ftz_f32 $s7, $s5, $s5;
  287. add_ftz_f32 $s5, $s5, $s7;
  288. mul_ftz_f32 $s7, $s5, $s5;
  289. add_ftz_f32 $s5, $s5, $s7;
  290. mul_ftz_f32 $s7, $s5, $s5;
  291. add_ftz_f32 $s5, $s5, $s7;
  292. mul_ftz_f32 $s7, $s5, $s5;
  293. add_ftz_f32 $s5, $s5, $s7;
  294. mul_ftz_f32 $s7, $s5, $s5;
  295. add_ftz_f32 $s5, $s5, $s7;
  296. mul_ftz_f32 $s7, $s5, $s5;
  297. add_ftz_f32 $s5, $s5, $s7;
  298. mul_ftz_f32 $s7, $s5, $s5;
  299. add_ftz_f32 $s5, $s5, $s7;
  300. mul_ftz_f32 $s7, $s5, $s5;
  301. add_ftz_f32 $s5, $s5, $s7;
  302. mul_ftz_f32 $s7, $s5, $s5;
  303. add_ftz_f32 $s5, $s5, $s7;
  304. mul_ftz_f32 $s7, $s5, $s5;
  305. add_ftz_f32 $s5, $s5, $s7;
  306. mul_ftz_f32 $s7, $s5, $s5;
  307. add_ftz_f32 $s5, $s5, $s7;
  308. mul_ftz_f32 $s7, $s5, $s5;
  309. add_ftz_f32 $s5, $s5, $s7;
  310. mul_ftz_f32 $s7, $s5, $s5;
  311. add_ftz_f32 $s5, $s5, $s7;
  312. mul_ftz_f32 $s7, $s5, $s5;
  313. add_ftz_f32 $s5, $s5, $s7;
  314. mul_ftz_f32 $s7, $s5, $s5;
  315. add_ftz_f32 $s5, $s5, $s7;
  316. mul_ftz_f32 $s7, $s5, $s5;
  317. add_ftz_f32 $s5, $s5, $s7;
  318. mul_ftz_f32 $s7, $s5, $s5;
  319. add_ftz_f32 $s5, $s5, $s7;
  320. mul_ftz_f32 $s7, $s5, $s5;
  321. add_ftz_f32 $s5, $s5, $s7;
  322. mul_ftz_f32 $s7, $s5, $s5;
  323. add_ftz_f32 $s5, $s5, $s7;
  324. mul_ftz_f32 $s7, $s5, $s5;
  325. add_ftz_f32 $s5, $s5, $s7;
  326. mul_ftz_f32 $s7, $s5, $s5;
  327. add_ftz_f32 $s5, $s5, $s7;
  328. mul_ftz_f32 $s7, $s5, $s5;
  329. add_ftz_f32 $s5, $s5, $s7;
  330. mul_ftz_f32 $s7, $s5, $s5;
  331. add_ftz_f32 $s5, $s5, $s7;
  332. mul_ftz_f32 $s7, $s5, $s5;
  333. add_ftz_f32 $s5, $s5, $s7;
  334. mul_ftz_f32 $s7, $s5, $s5;
  335. add_ftz_f32 $s5, $s5, $s7;
  336. mul_ftz_f32 $s7, $s5, $s5;
  337. add_ftz_f32 $s5, $s5, $s7;
  338. mul_ftz_f32 $s7, $s5, $s5;
  339. add_ftz_f32 $s5, $s5, $s7;
  340. mul_ftz_f32 $s7, $s5, $s5;
  341. add_ftz_f32 $s5, $s5, $s7;
  342. mul_ftz_f32 $s7, $s5, $s5;
  343. add_ftz_f32 $s5, $s5, $s7;
  344. mul_ftz_f32 $s7, $s5, $s5;
  345. add_ftz_f32 $s5, $s5, $s7;
  346. mul_ftz_f32 $s7, $s5, $s5;
  347. add_ftz_f32 $s5, $s5, $s7;
  348. mul_ftz_f32 $s7, $s5, $s5;
  349. add_ftz_f32 $s5, $s5, $s7;
  350. mul_ftz_f32 $s7, $s5, $s5;
  351. add_ftz_f32 $s5, $s5, $s7;
  352. mul_ftz_f32 $s7, $s5, $s5;
  353. add_ftz_f32 $s5, $s5, $s7;
  354. mul_ftz_f32 $s7, $s5, $s5;
  355. add_ftz_f32 $s5, $s5, $s7;
  356. mul_ftz_f32 $s7, $s5, $s5;
  357. add_ftz_f32 $s5, $s5, $s7;
  358. mul_ftz_f32 $s7, $s5, $s5;
  359. add_ftz_f32 $s5, $s5, $s7;
  360. mul_ftz_f32 $s7, $s5, $s5;
  361. add_ftz_f32 $s5, $s5, $s7;
  362. mul_ftz_f32 $s7, $s5, $s5;
  363. add_ftz_f32 $s5, $s5, $s7;
  364. mul_ftz_f32 $s7, $s5, $s5;
  365. add_ftz_f32 $s5, $s5, $s7;
  366. mul_ftz_f32 $s7, $s5, $s5;
  367. add_ftz_f32 $s5, $s5, $s7;
  368. mul_ftz_f32 $s7, $s5, $s5;
  369. add_ftz_f32 $s5, $s5, $s7;
  370. mul_ftz_f32 $s7, $s5, $s5;
  371. add_ftz_f32 $s5, $s5, $s7;
  372. mul_ftz_f32 $s7, $s5, $s5;
  373. add_ftz_f32 $s5, $s5, $s7;
  374. mul_ftz_f32 $s7, $s5, $s5;
  375. add_ftz_f32 $s5, $s5, $s7;
  376. mul_ftz_f32 $s7, $s5, $s5;
  377. add_ftz_f32 $s5, $s5, $s7;
  378. mul_ftz_f32 $s7, $s5, $s5;
  379. add_ftz_f32 $s5, $s5, $s7;
  380. mul_ftz_f32 $s7, $s5, $s5;
  381. add_ftz_f32 $s5, $s5, $s7;
  382. mul_ftz_f32 $s7, $s5, $s5;
  383. add_ftz_f32 $s5, $s5, $s7;
  384. mul_ftz_f32 $s7, $s5, $s5;
  385. add_ftz_f32 $s5, $s5, $s7;
  386. mul_ftz_f32 $s7, $s5, $s5;
  387. add_ftz_f32 $s5, $s5, $s7;
  388. mul_ftz_f32 $s7, $s5, $s5;
  389. add_ftz_f32 $s5, $s5, $s7;
  390. mul_ftz_f32 $s7, $s5, $s5;
  391. add_ftz_f32 $s5, $s5, $s7;
  392. mul_ftz_f32 $s7, $s5, $s5;
  393. add_ftz_f32 $s5, $s5, $s7;
  394. mul_ftz_f32 $s7, $s5, $s5;
  395. add_ftz_f32 $s5, $s5, $s7;
  396. mul_ftz_f32 $s7, $s5, $s5;
  397. add_ftz_f32 $s5, $s5, $s7;
  398. mul_ftz_f32 $s7, $s5, $s5;
  399. add_ftz_f32 $s5, $s5, $s7;
  400. mul_ftz_f32 $s7, $s5, $s5;
  401. add_ftz_f32 $s5, $s5, $s7;
  402. mul_ftz_f32 $s7, $s5, $s5;
  403. add_ftz_f32 $s5, $s5, $s7;
  404. mul_ftz_f32 $s7, $s5, $s5;
  405. add_ftz_f32 $s5, $s5, $s7;
  406. mul_ftz_f32 $s7, $s5, $s5;
  407. add_ftz_f32 $s5, $s5, $s7;
  408. mul_ftz_f32 $s7, $s5, $s5;
  409. add_ftz_f32 $s5, $s5, $s7;
  410. mul_ftz_f32 $s7, $s5, $s5;
  411. add_ftz_f32 $s5, $s5, $s7;
  412. mul_ftz_f32 $s7, $s5, $s5;
  413. add_ftz_f32 $s5, $s5, $s7;
  414. mul_ftz_f32 $s7, $s5, $s5;
  415. add_ftz_f32 $s5, $s5, $s7;
  416. mul_ftz_f32 $s7, $s5, $s5;
  417. add_ftz_f32 $s5, $s5, $s7;
  418. mul_ftz_f32 $s7, $s5, $s5;
  419. add_ftz_f32 $s5, $s5, $s7;
  420. mul_ftz_f32 $s7, $s5, $s5;
  421. add_ftz_f32 $s5, $s5, $s7;
  422. mul_ftz_f32 $s7, $s5, $s5;
  423. add_ftz_f32 $s5, $s5, $s7;
  424. mul_ftz_f32 $s7, $s5, $s5;
  425. add_ftz_f32 $s5, $s5, $s7;
  426. mul_ftz_f32 $s7, $s5, $s5;
  427. add_ftz_f32 $s5, $s5, $s7;
  428. mul_ftz_f32 $s7, $s5, $s5;
  429. add_ftz_f32 $s5, $s5, $s7;
  430. mul_ftz_f32 $s7, $s5, $s5;
  431. add_ftz_f32 $s5, $s5, $s7;
  432. mul_ftz_f32 $s7, $s5, $s5;
  433. add_ftz_f32 $s5, $s5, $s7;
  434. mul_ftz_f32 $s7, $s5, $s5;
  435. add_ftz_f32 $s5, $s5, $s7;
  436. mul_ftz_f32 $s7, $s5, $s5;
  437. add_ftz_f32 $s5, $s5, $s7;
  438. mul_ftz_f32 $s7, $s5, $s5;
  439. add_ftz_f32 $s5, $s5, $s7;
  440. mul_ftz_f32 $s7, $s5, $s5;
  441. add_ftz_f32 $s5, $s5, $s7;
  442. mul_ftz_f32 $s7, $s5, $s5;
  443. add_ftz_f32 $s5, $s5, $s7;
  444. mul_ftz_f32 $s7, $s5, $s5;
  445. add_ftz_f32 $s5, $s5, $s7;
  446. mul_ftz_f32 $s7, $s5, $s5;
  447. add_ftz_f32 $s5, $s5, $s7;
  448. mul_ftz_f32 $s7, $s5, $s5;
  449. add_ftz_f32 $s5, $s5, $s7;
  450. mul_ftz_f32 $s7, $s5, $s5;
  451. add_ftz_f32 $s5, $s5, $s7;
  452. mul_ftz_f32 $s7, $s5, $s5;
  453. add_ftz_f32 $s5, $s5, $s7;
  454. mul_ftz_f32 $s7, $s5, $s5;
  455. add_ftz_f32 $s5, $s5, $s7;
  456. mul_ftz_f32 $s7, $s5, $s5;
  457. add_ftz_f32 $s5, $s5, $s7;
  458. mul_ftz_f32 $s7, $s5, $s5;
  459. add_ftz_f32 $s5, $s5, $s7;
  460. mul_ftz_f32 $s7, $s5, $s5;
  461. add_ftz_f32 $s5, $s5, $s7;
  462. mul_ftz_f32 $s7, $s5, $s5;
  463. add_ftz_f32 $s5, $s5, $s7;
  464. mul_ftz_f32 $s7, $s5, $s5;
  465. add_ftz_f32 $s5, $s5, $s7;
  466. mul_ftz_f32 $s7, $s5, $s5;
  467. add_ftz_f32 $s5, $s5, $s7;
  468. mul_ftz_f32 $s7, $s5, $s5;
  469. add_ftz_f32 $s5, $s5, $s7;
  470. mul_ftz_f32 $s7, $s5, $s5;
  471. add_ftz_f32 $s5, $s5, $s7;
  472. mul_ftz_f32 $s7, $s5, $s5;
  473. add_ftz_f32 $s5, $s5, $s7;
  474. mul_ftz_f32 $s7, $s5, $s5;
  475. add_ftz_f32 $s5, $s5, $s7;
  476. mul_ftz_f32 $s7, $s5, $s5;
  477. add_ftz_f32 $s5, $s5, $s7;
  478. mul_ftz_f32 $s7, $s5, $s5;
  479. add_ftz_f32 $s5, $s5, $s7;
  480. mul_ftz_f32 $s7, $s5, $s5;
  481. add_ftz_f32 $s5, $s5, $s7;
  482. st_group_align(4)_f32 $s5, [%read_linear_uncached.scratch][$s6];
  483. cmp_ne_b1_s32 $c0, $s2, 8192;
  484. cbr_b1 $c0, @LBB0_1;
  485. // BB#2:
  486. cvt_s64_s32 $d1, $s3;
  487. shl_u64 $d1, $d1, 2;
  488. add_u64 $d0, $d0, $d1;
  489. ld_group_align(4)_f32 $s0, [$s0];
  490. st_global_align(4)_f32 $s0, [$d0];
  491. ret;
  492. };
Add Comment
Please, Sign In to add comment