Advertisement
Guest User

sempar.o

a guest
Nov 24th, 2017
65
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 89.96 KB | None | 0 0
  1.  
  2. /tmp/sempar.o: file format elf64-x86-64
  3.  
  4. SYMBOL TABLE:
  5. 0000000000000000 l d .text 0000000000000000 .text
  6. 0000000000000000 l d .rodata 0000000000000000 .rodata
  7. 0000000000000000 l O .rodata 0000000000000200 ff/bias_0
  8. 0000000000000200 l O .rodata 0000000000040000 lr_lstm/c2i
  9. 0000000000040200 l O .rodata 0000000000040000 lr_lstm/c2o
  10. 0000000000080200 l O .rodata 0000000000000008 ff/frame-creation-steps/shape:0
  11. 0000000000080220 l O .rodata 0000000000000004 rl_lstm/ones:0
  12. 0000000000080240 l O .rodata 0000000000040000 lr_lstm/h2c
  13. 00000000000c0240 l O .rodata 0000000000040000 lr_lstm/h2o
  14. 0000000000100240 l O .rodata 0000000000040000 lr_lstm/h2i
  15. 0000000000140240 l O .rodata 0000000000000004 ff/axis:0
  16. 0000000000140260 l O .rodata 0000000000000400 rl_lstm/bo
  17. 0000000000140660 l O .rodata 0000000000000400 rl_lstm/bi
  18. 0000000000140a60 l O .rodata 0000000000000400 rl_lstm/bc
  19. 0000000000140e60 l O .rodata 0000000000040000 rl_lstm/c2o
  20. 0000000000180e60 l O .rodata 0000000000040000 rl_lstm/c2i
  21. 00000000001c0e60 l O .rodata 0000000000016000 lr_lstm/x2c
  22. 00000000001d6e60 l O .rodata 0000000000016000 lr_lstm/x2o
  23. 00000000001ece60 l O .rodata 0000000000016000 lr_lstm/x2i
  24. 0000000000202e60 l O .rodata 0000000000000004 rl_lstm/axis:0
  25. 0000000000202e80 l O .rodata 0000000000367000 ff/weights_softmax
  26. 0000000000569e80 l O .rodata 0000000000016000 rl_lstm/x2i
  27. 000000000057fe80 l O .rodata 0000000000016000 rl_lstm/x2c
  28. 0000000000595e80 l O .rodata 0000000000008400 ff/linked_embedding_matrix_2
  29. 000000000059e280 l O .rodata 0000000000000004 lr_lstm/axis:0
  30. 000000000059e2a0 l O .rodata 0000000000040000 rl_lstm/h2i
  31. 00000000005de2a0 l O .rodata 0000000000040000 rl_lstm/h2o
  32. 000000000061e2a0 l O .rodata 0000000000040000 rl_lstm/h2c
  33. 000000000065e2a0 l O .rodata 0000000000000040 lr_lstm/fixed_embedding_matrix_3
  34. 000000000065e2e0 l O .rodata 0000000000680480 lr_lstm/fixed_embedding_matrix_0
  35. 0000000000cde760 l O .rodata 0000000000000060 lr_lstm/fixed_embedding_matrix_6
  36. 0000000000cde7c0 l O .rodata 0000000000000080 lr_lstm/fixed_embedding_matrix_5
  37. 0000000000cde840 l O .rodata 0000000000000060 lr_lstm/fixed_embedding_matrix_4
  38. 0000000000cde8a0 l O .rodata 0000000000000008 ff/frame-end-rl/shape:0
  39. 0000000000cde8c0 l O .rodata 0000000000006ce0 ff/bias_softmax
  40. 0000000000ce55a0 l O .rodata 0000000000016000 rl_lstm/x2o
  41. 0000000000cfb5a0 l O .rodata 0000000000008400 ff/linked_embedding_matrix_6
  42. 0000000000d039a0 l O .rodata 0000000000008400 ff/linked_embedding_matrix_5
  43. 0000000000d0bda0 l O .rodata 0000000000008800 ff/linked_embedding_matrix_4
  44. 0000000000d145a0 l O .rodata 0000000000008400 ff/linked_embedding_matrix_3
  45. 0000000000d1c9a0 l O .rodata 0000000000008800 ff/linked_embedding_matrix_1
  46. 0000000000d251a0 l O .rodata 0000000000008800 ff/linked_embedding_matrix_0
  47. 0000000000d2d9a0 l O .rodata 0000000000000400 lr_lstm/bc
  48. 0000000000d2dda0 l O .rodata 0000000000000400 lr_lstm/bi
  49. 0000000000d2e1a0 l O .rodata 0000000000000400 lr_lstm/bo
  50. 0000000000d2e5a0 l O .rodata 0000000000000008 ff/frame-focus-steps/shape:0
  51. 0000000000d2e5a8 l O .rodata 0000000000000008 ff/history/shape:0
  52. 0000000000d2e5c0 l O .rodata 00000000000a8000 ff/weights_0
  53. 0000000000dd65c0 l O .rodata 00000000000000a0 lr_lstm/fixed_embedding_matrix_2
  54. 0000000000dd6660 l O .rodata 0000000000082380 lr_lstm/fixed_embedding_matrix_1
  55. 0000000000e589e0 l O .rodata 0000000000000008 ff/frame-end-lr/shape:0
  56. 0000000000e58a00 l O .rodata 0000000000000004 lr_lstm/ones:0
  57. 0000000000e58a04 l O .rodata 0000000000000060 rl_lstm/fixed_embedding_matrix_6
  58. 0000000000e58a64 l O .rodata 0000000000000080 rl_lstm/fixed_embedding_matrix_5
  59. 0000000000e58ae4 l O .rodata 0000000000000060 rl_lstm/fixed_embedding_matrix_4
  60. 0000000000e58b44 l O .rodata 0000000000000040 rl_lstm/fixed_embedding_matrix_3
  61. 0000000000e58b84 l O .rodata 00000000000000a0 rl_lstm/fixed_embedding_matrix_2
  62. 0000000000e58c40 l O .rodata 0000000000082380 rl_lstm/fixed_embedding_matrix_1
  63. 0000000000edafc0 l O .rodata 0000000000680480 rl_lstm/fixed_embedding_matrix_0
  64. 000000000155b440 l O .rodata 0000000000001f40 ff/fixed_embedding_matrix_1
  65. 000000000155d380 l O .rodata 0000000000001f40 ff/fixed_embedding_matrix_0
  66. 000000000155f2c0 l O .rodata 0000000000000640 ff/fixed_embedding_matrix_3
  67. 000000000155f900 l O .rodata 0000000000009c40 ff/fixed_embedding_matrix_2
  68. 0000000000000007 l F .text 0000000000000000 lr_lstm/punctuation/Lookup
  69. 000000000000002e l F .text 0000000000000000 lr_lstm/suffix/Lookup
  70. 000000000000008e l F .text 0000000000000000 lr_lstm/quote/Lookup
  71. 00000000000000b5 l F .text 0000000000000000 lr_lstm/capitalization/Lookup
  72. 00000000000000dc l F .text 0000000000000000 lr_lstm/digit/Lookup
  73. 0000000000000106 l F .text 0000000000000000 lr_lstm/hyphen/Lookup
  74. 0000000000000130 l F .text 0000000000000000 lr_lstm/words/Lookup
  75. 000000000000015a l F .text 0000000000000000 lr_lstm/concat
  76. 00000000000001f2 l F .text 0000000000000000 lr_lstm/MatMul_3
  77. 0000000000000305 l F .text 0000000000000000 lr_lstm/MatMul
  78. 0000000000000418 l F .text 0000000000000000 lr_lstm/MatMul_4
  79. 0000000000000552 l F .text 0000000000000000 lr_lstm/MatMul_1
  80. 000000000000068c l F .text 0000000000000000 lr_lstm/MatMul_2
  81. 00000000000007c6 l F .text 0000000000000000 lr_lstm/add_4
  82. 0000000000000a4a l F .text 0000000000000000 lr_lstm/MatMul_6
  83. 0000000000000b5a l F .text 0000000000000000 lr_lstm/MatMul_5
  84. 0000000000000c97 l F .text 0000000000000000 lr_lstm/MatMul_7
  85. 0000000000000dd1 l F .text 0000000000000000 lr_lstm/add_7
  86. 0000000000000f0b l O .text 0000000000000335 lr_lstm_data
  87. 0000000000001247 l F .text 0000000000000000 rl_lstm/hyphen/Lookup
  88. 000000000000126e l F .text 0000000000000000 rl_lstm/capitalization/Lookup
  89. 0000000000001295 l F .text 0000000000000000 rl_lstm/quote/Lookup
  90. 00000000000012bc l F .text 0000000000000000 rl_lstm/words/Lookup
  91. 00000000000012e3 l F .text 0000000000000000 rl_lstm/digit/Lookup
  92. 000000000000130a l F .text 0000000000000000 rl_lstm/punctuation/Lookup
  93. 0000000000001331 l F .text 0000000000000000 rl_lstm/suffix/Lookup
  94. 0000000000001394 l F .text 0000000000000000 rl_lstm/concat
  95. 0000000000001426 l F .text 0000000000000000 rl_lstm/MatMul
  96. 0000000000001539 l F .text 0000000000000000 rl_lstm/MatMul_3
  97. 000000000000164c l F .text 0000000000000000 rl_lstm/MatMul_1
  98. 0000000000001786 l F .text 0000000000000000 rl_lstm/MatMul_4
  99. 00000000000018c0 l F .text 0000000000000000 rl_lstm/MatMul_2
  100. 00000000000019fa l F .text 0000000000000000 rl_lstm/add_4
  101. 0000000000001c7e l F .text 0000000000000000 rl_lstm/MatMul_6
  102. 0000000000001d8e l F .text 0000000000000000 rl_lstm/MatMul_5
  103. 0000000000001ecb l F .text 0000000000000000 rl_lstm/MatMul_7
  104. 0000000000002005 l F .text 0000000000000000 rl_lstm/add_7
  105. 000000000000213f l O .text 0000000000000321 rl_lstm_data
  106. 0000000000002467 l F .text 0000000000000000 ff/rl/Collect
  107. 00000000000024ba l F .text 0000000000000000 ff/frame-end-lr/Collect
  108. 0000000000002521 l F .text 0000000000000000 ff/frame-end-rl/Collect
  109. 0000000000002588 l F .text 0000000000000000 ff/in-roles/Lookup
  110. 00000000000025ee l F .text 0000000000000000 ff/unlabeled-roles/Lookup
  111. 0000000000002654 l F .text 0000000000000000 ff/labeled-roles/Lookup
  112. 00000000000026b6 l F .text 0000000000000000 ff/out-roles/Lookup
  113. 000000000000271c l F .text 0000000000000000 ff/frame-focus-steps/Collect
  114. 0000000000002783 l F .text 0000000000000000 ff/frame-creation-steps/Collect
  115. 00000000000027ea l F .text 0000000000000000 ff/lr/Collect
  116. 000000000000283d l F .text 0000000000000000 ff/history/Collect
  117. 00000000000028a4 l F .text 0000000000000000 ff/rl/MatMul
  118. 000000000000292c l F .text 0000000000000000 ff/frame-end-lr/MatMul
  119. 00000000000029e8 l F .text 0000000000000000 ff/frame-end-rl/MatMul
  120. 0000000000002aa4 l F .text 0000000000000000 ff/frame-focus-steps/MatMul
  121. 0000000000002b28 l F .text 0000000000000000 ff/frame-creation-steps/MatMul
  122. 0000000000002bac l F .text 0000000000000000 ff/lr/MatMul
  123. 0000000000002c34 l F .text 0000000000000000 ff/history/MatMul
  124. 0000000000002cb8 l F .text 0000000000000000 ff/frame-end-lr/Reshape
  125. 0000000000002cb8 l F .text 0000000000000000 ff/frame-end-rl/Reshape
  126. 0000000000002cb8 l F .text 0000000000000000 ff/frame-focus-steps/Reshape
  127. 0000000000002cb8 l F .text 0000000000000000 ff/frame-creation-steps/Reshape
  128. 0000000000002cb8 l F .text 0000000000000000 ff/history/Reshape
  129. 0000000000002cb8 l F .text 0000000000000000 ff/concat
  130. 0000000000002db5 l F .text 0000000000000000 ff/MatMul
  131. 0000000000002f22 l F .text 0000000000000000 ff/MatMul_1
  132. 0000000000000000 g F .text 0000000000000f0b lr_lstm
  133. 0000000000001240 g F .text 0000000000000eff rl_lstm
  134. 0000000000002460 g F .text 0000000000000b30 ff
  135.  
  136.  
  137.  
  138. Disassembly of section .text:
  139.  
  140. 0000000000000000 <lr_lstm>:
  141. 0: vzeroupper
  142. 3: push rbp
  143. 4: mov rbp,rdi
  144.  
  145. 0000000000000007 <lr_lstm/punctuation/Lookup>:
  146. 7: movsxd rax,DWORD PTR [rbp+0xc]
  147. b: mov rcx,0x2
  148. 12: test rax,rax
  149. 15: cmovs rax,rcx
  150. 19: shl rax,0x5
  151. 1d: movabs rdx,0x0
  152. 1f: R_X86_64_64 lr_lstm/fixed_embedding_matrix_4
  153. 27: add rax,rdx
  154. 2a: mov QWORD PTR [rbp+0x48],rax
  155.  
  156. 000000000000002e <lr_lstm/suffix/Lookup>:
  157. 2e: lea rcx,[rbp+0x10]
  158. 32: movabs rdx,0x0
  159. 34: R_X86_64_64 lr_lstm/fixed_embedding_matrix_1
  160. 3c: lea rsi,[rbp+0x60]
  161. 40: vxorps ymm0,ymm0,ymm0
  162. 44: vxorps ymm1,ymm1,ymm1
  163. 48: mov r8,0x208d
  164. 4f: xor rdi,rdi
  165. 52: movsxd rax,DWORD PTR [rcx+rdi*4]
  166. 56: test rax,rax
  167. 59: jns 6c <lr_lstm/suffix/Lookup+0x3e>
  168. 5f: cmp rax,0xffffffffffffffff
  169. 63: jne 7c <lr_lstm/suffix/Lookup+0x4e>
  170. 69: mov rax,r8
  171. 6c: shl rax,0x6
  172. 70: add rax,rdx
  173. 73: vaddps ymm0,ymm0,YMMWORD PTR [rax]
  174. 77: vaddps ymm1,ymm1,YMMWORD PTR [rax+0x20]
  175. 7c: inc rdi
  176. 7f: cmp rdi,0x3
  177. 83: jne 52 <lr_lstm/suffix/Lookup+0x24>
  178. 85: vmovaps YMMWORD PTR [rsi],ymm0
  179. 89: vmovaps YMMWORD PTR [rsi+0x20],ymm1
  180.  
  181. 000000000000008e <lr_lstm/quote/Lookup>:
  182. 8e: movsxd rax,DWORD PTR [rbp+0x40]
  183. 92: mov rcx,0x3
  184. 99: test rax,rax
  185. 9c: cmovs rax,rcx
  186. a0: shl rax,0x5
  187. a4: movabs rdx,0x0
  188. a6: R_X86_64_64 lr_lstm/fixed_embedding_matrix_5
  189. ae: add rax,rdx
  190. b1: mov QWORD PTR [rbp+0x50],rax
  191.  
  192. 00000000000000b5 <lr_lstm/capitalization/Lookup>:
  193. b5: movsxd rax,DWORD PTR [rbp+0x1c]
  194. b9: mov rcx,0x4
  195. c0: test rax,rax
  196. c3: cmovs rax,rcx
  197. c7: shl rax,0x5
  198. cb: movabs rdx,0x0
  199. cd: R_X86_64_64 lr_lstm/fixed_embedding_matrix_2
  200. d5: add rax,rdx
  201. d8: mov QWORD PTR [rbp+0x58],rax
  202.  
  203. 00000000000000dc <lr_lstm/digit/Lookup>:
  204. dc: movsxd rax,DWORD PTR [rbp+0x30]
  205. e0: mov rcx,0x2
  206. e7: test rax,rax
  207. ea: cmovs rax,rcx
  208. ee: shl rax,0x5
  209. f2: movabs rdx,0x0
  210. f4: R_X86_64_64 lr_lstm/fixed_embedding_matrix_6
  211. fc: add rax,rdx
  212. ff: mov QWORD PTR [rbp+0xa0],rax
  213.  
  214. 0000000000000106 <lr_lstm/hyphen/Lookup>:
  215. 106: movsxd rax,DWORD PTR [rbp+0x8]
  216. 10a: mov rcx,0x1
  217. 111: test rax,rax
  218. 114: cmovs rax,rcx
  219. 118: shl rax,0x5
  220. 11c: movabs rdx,0x0
  221. 11e: R_X86_64_64 lr_lstm/fixed_embedding_matrix_3
  222. 126: add rax,rdx
  223. 129: mov QWORD PTR [rbp+0xa8],rax
  224.  
  225. 0000000000000130 <lr_lstm/words/Lookup>:
  226. 130: movsxd rax,DWORD PTR [rbp+0x34]
  227. 134: mov rcx,0xd008
  228. 13b: test rax,rax
  229. 13e: cmovs rax,rcx
  230. 142: shl rax,0x7
  231. 146: movabs rdx,0x0
  232. 148: R_X86_64_64 lr_lstm/fixed_embedding_matrix_0
  233. 150: add rax,rdx
  234. 153: mov QWORD PTR [rbp+0xb0],rax
  235.  
  236. 000000000000015a <lr_lstm/concat>:
  237. 15a: lea r8,[rbp+0xc0]
  238. 161: mov rsi,QWORD PTR [rbp+0xb0]
  239. 168: lea rdi,[r8]
  240. 16b: mov rcx,0x80
  241. 172: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
  242. 174: lea rsi,[rbp+0x60]
  243. 178: lea rdi,[r8+0x80]
  244. 17f: mov rcx,0x40
  245. 186: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
  246. 188: mov rsi,QWORD PTR [rbp+0x58]
  247. 18c: lea rdi,[r8+0xc0]
  248. 193: mov rcx,0x20
  249. 19a: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
  250. 19c: mov rsi,QWORD PTR [rbp+0xa8]
  251. 1a3: lea rdi,[r8+0xe0]
  252. 1aa: mov rcx,0x20
  253. 1b1: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
  254. 1b3: mov rsi,QWORD PTR [rbp+0x48]
  255. 1b7: lea rdi,[r8+0x100]
  256. 1be: mov rcx,0x20
  257. 1c5: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
  258. 1c7: mov rsi,QWORD PTR [rbp+0x50]
  259. 1cb: lea rdi,[r8+0x120]
  260. 1d2: mov rcx,0x20
  261. 1d9: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
  262. 1db: mov rsi,QWORD PTR [rbp+0xa0]
  263. 1e2: lea rdi,[r8+0x140]
  264. 1e9: mov rcx,0x20
  265. 1f0: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
  266.  
  267. 00000000000001f2 <lr_lstm/MatMul_3>:
  268. 1f2: lea rdi,[rbp+0xc0]
  269. 1f9: movabs rsi,0x0
  270. 1fb: R_X86_64_64 lr_lstm/x2c
  271. 203: lea r8,[rbp+0x220]
  272. 20a: xor rcx,rcx
  273. 20d: vxorps ymm0,ymm0,ymm0
  274. 211: vxorps ymm1,ymm1,ymm1
  275. 215: vxorps ymm2,ymm2,ymm2
  276. 219: vxorps ymm3,ymm3,ymm3
  277. 21d: vxorps ymm4,ymm4,ymm4
  278. 221: vxorps ymm5,ymm5,ymm5
  279. 225: vxorps ymm6,ymm6,ymm6
  280. 229: vxorps ymm7,ymm7,ymm7
  281. 22d: mov rdx,rsi
  282. 230: xor rax,rax
  283. 233: vbroadcastss ymm12,DWORD PTR [rdi+rax*1]
  284. 239: vmulps ymm8,ymm12,YMMWORD PTR [rdx]
  285. 23d: vaddps ymm0,ymm0,ymm8
  286. 242: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0x20]
  287. 247: vaddps ymm1,ymm1,ymm9
  288. 24c: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0x40]
  289. 251: vaddps ymm2,ymm2,ymm10
  290. 256: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0x60]
  291. 25b: vaddps ymm3,ymm3,ymm11
  292. 260: vmulps ymm8,ymm12,YMMWORD PTR [rdx+0x80]
  293. 268: vaddps ymm4,ymm4,ymm8
  294. 26d: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0xa0]
  295. 275: vaddps ymm5,ymm5,ymm9
  296. 27a: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0xc0]
  297. 282: vaddps ymm6,ymm6,ymm10
  298. 287: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0xe0]
  299. 28f: vaddps ymm7,ymm7,ymm11
  300. 294: add rdx,0x400
  301. 29b: add rax,0x4
  302. 29f: cmp rax,0x160
  303. 2a5: jl 233 <lr_lstm/MatMul_3+0x41>
  304. 2a7: vmovaps YMMWORD PTR [r8+rcx*1],ymm0
  305. 2ad: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1
  306. 2b4: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2
  307. 2bb: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3
  308. 2c2: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4
  309. 2cc: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5
  310. 2d6: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6
  311. 2e0: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7
  312. 2ea: add rsi,0x100
  313. 2f1: add rcx,0x100
  314. 2f8: cmp rcx,0x400
  315. 2ff: jl 20d <lr_lstm/MatMul_3+0x1b>
  316.  
  317. 0000000000000305 <lr_lstm/MatMul>:
  318. 305: lea rdi,[rbp+0xc0]
  319. 30c: movabs rsi,0x0
  320. 30e: R_X86_64_64 lr_lstm/x2i
  321. 316: lea r8,[rbp+0x620]
  322. 31d: xor rcx,rcx
  323. 320: vxorps ymm0,ymm0,ymm0
  324. 324: vxorps ymm1,ymm1,ymm1
  325. 328: vxorps ymm2,ymm2,ymm2
  326. 32c: vxorps ymm3,ymm3,ymm3
  327. 330: vxorps ymm4,ymm4,ymm4
  328. 334: vxorps ymm5,ymm5,ymm5
  329. 338: vxorps ymm6,ymm6,ymm6
  330. 33c: vxorps ymm7,ymm7,ymm7
  331. 340: mov rdx,rsi
  332. 343: xor rax,rax
  333. 346: vbroadcastss ymm12,DWORD PTR [rdi+rax*1]
  334. 34c: vmulps ymm8,ymm12,YMMWORD PTR [rdx]
  335. 350: vaddps ymm0,ymm0,ymm8
  336. 355: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0x20]
  337. 35a: vaddps ymm1,ymm1,ymm9
  338. 35f: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0x40]
  339. 364: vaddps ymm2,ymm2,ymm10
  340. 369: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0x60]
  341. 36e: vaddps ymm3,ymm3,ymm11
  342. 373: vmulps ymm8,ymm12,YMMWORD PTR [rdx+0x80]
  343. 37b: vaddps ymm4,ymm4,ymm8
  344. 380: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0xa0]
  345. 388: vaddps ymm5,ymm5,ymm9
  346. 38d: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0xc0]
  347. 395: vaddps ymm6,ymm6,ymm10
  348. 39a: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0xe0]
  349. 3a2: vaddps ymm7,ymm7,ymm11
  350. 3a7: add rdx,0x400
  351. 3ae: add rax,0x4
  352. 3b2: cmp rax,0x160
  353. 3b8: jl 346 <lr_lstm/MatMul+0x41>
  354. 3ba: vmovaps YMMWORD PTR [r8+rcx*1],ymm0
  355. 3c0: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1
  356. 3c7: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2
  357. 3ce: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3
  358. 3d5: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4
  359. 3df: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5
  360. 3e9: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6
  361. 3f3: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7
  362. 3fd: add rsi,0x100
  363. 404: add rcx,0x100
  364. 40b: cmp rcx,0x400
  365. 412: jl 320 <lr_lstm/MatMul+0x1b>
  366.  
  367. 0000000000000418 <lr_lstm/MatMul_4>:
  368. 418: mov rdi,QWORD PTR [rbp+0x0]
  369. 41c: movabs rsi,0x0
  370. 41e: R_X86_64_64 lr_lstm/h2c
  371. 426: lea r9,[rbp+0x220]
  372. 42d: lea r8,[rbp+0xa20]
  373. 434: xor rcx,rcx
  374. 437: vmovaps ymm0,YMMWORD PTR [r9+rcx*1]
  375. 43d: vmovaps ymm1,YMMWORD PTR [r9+rcx*1+0x20]
  376. 444: vmovaps ymm2,YMMWORD PTR [r9+rcx*1+0x40]
  377. 44b: vmovaps ymm3,YMMWORD PTR [r9+rcx*1+0x60]
  378. 452: vmovaps ymm4,YMMWORD PTR [r9+rcx*1+0x80]
  379. 45c: vmovaps ymm5,YMMWORD PTR [r9+rcx*1+0xa0]
  380. 466: vmovaps ymm6,YMMWORD PTR [r9+rcx*1+0xc0]
  381. 470: vmovaps ymm7,YMMWORD PTR [r9+rcx*1+0xe0]
  382. 47a: mov rdx,rsi
  383. 47d: xor rax,rax
  384. 480: vbroadcastss ymm12,DWORD PTR [rdi+rax*1]
  385. 486: vmulps ymm8,ymm12,YMMWORD PTR [rdx]
  386. 48a: vaddps ymm0,ymm0,ymm8
  387. 48f: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0x20]
  388. 494: vaddps ymm1,ymm1,ymm9
  389. 499: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0x40]
  390. 49e: vaddps ymm2,ymm2,ymm10
  391. 4a3: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0x60]
  392. 4a8: vaddps ymm3,ymm3,ymm11
  393. 4ad: vmulps ymm8,ymm12,YMMWORD PTR [rdx+0x80]
  394. 4b5: vaddps ymm4,ymm4,ymm8
  395. 4ba: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0xa0]
  396. 4c2: vaddps ymm5,ymm5,ymm9
  397. 4c7: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0xc0]
  398. 4cf: vaddps ymm6,ymm6,ymm10
  399. 4d4: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0xe0]
  400. 4dc: vaddps ymm7,ymm7,ymm11
  401. 4e1: add rdx,0x400
  402. 4e8: add rax,0x4
  403. 4ec: cmp rax,0x400
  404. 4f2: jl 480 <lr_lstm/MatMul_4+0x68>
  405. 4f4: vmovaps YMMWORD PTR [r8+rcx*1],ymm0
  406. 4fa: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1
  407. 501: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2
  408. 508: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3
  409. 50f: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4
  410. 519: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5
  411. 523: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6
  412. 52d: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7
  413. 537: add rsi,0x100
  414. 53e: add rcx,0x100
  415. 545: cmp rcx,0x400
  416. 54c: jl 437 <lr_lstm/MatMul_4+0x1f>
  417.  
  418. 0000000000000552 <lr_lstm/MatMul_1>:
  419. 552: mov rdi,QWORD PTR [rbp+0x0]
  420. 556: movabs rsi,0x0
  421. 558: R_X86_64_64 lr_lstm/h2i
  422. 560: lea r9,[rbp+0x620]
  423. 567: lea r8,[rbp+0xe20]
  424. 56e: xor rcx,rcx
  425. 571: vmovaps ymm0,YMMWORD PTR [r9+rcx*1]
  426. 577: vmovaps ymm1,YMMWORD PTR [r9+rcx*1+0x20]
  427. 57e: vmovaps ymm2,YMMWORD PTR [r9+rcx*1+0x40]
  428. 585: vmovaps ymm3,YMMWORD PTR [r9+rcx*1+0x60]
  429. 58c: vmovaps ymm4,YMMWORD PTR [r9+rcx*1+0x80]
  430. 596: vmovaps ymm5,YMMWORD PTR [r9+rcx*1+0xa0]
  431. 5a0: vmovaps ymm6,YMMWORD PTR [r9+rcx*1+0xc0]
  432. 5aa: vmovaps ymm7,YMMWORD PTR [r9+rcx*1+0xe0]
  433. 5b4: mov rdx,rsi
  434. 5b7: xor rax,rax
  435. 5ba: vbroadcastss ymm12,DWORD PTR [rdi+rax*1]
  436. 5c0: vmulps ymm8,ymm12,YMMWORD PTR [rdx]
  437. 5c4: vaddps ymm0,ymm0,ymm8
  438. 5c9: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0x20]
  439. 5ce: vaddps ymm1,ymm1,ymm9
  440. 5d3: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0x40]
  441. 5d8: vaddps ymm2,ymm2,ymm10
  442. 5dd: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0x60]
  443. 5e2: vaddps ymm3,ymm3,ymm11
  444. 5e7: vmulps ymm8,ymm12,YMMWORD PTR [rdx+0x80]
  445. 5ef: vaddps ymm4,ymm4,ymm8
  446. 5f4: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0xa0]
  447. 5fc: vaddps ymm5,ymm5,ymm9
  448. 601: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0xc0]
  449. 609: vaddps ymm6,ymm6,ymm10
  450. 60e: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0xe0]
  451. 616: vaddps ymm7,ymm7,ymm11
  452. 61b: add rdx,0x400
  453. 622: add rax,0x4
  454. 626: cmp rax,0x400
  455. 62c: jl 5ba <lr_lstm/MatMul_1+0x68>
  456. 62e: vmovaps YMMWORD PTR [r8+rcx*1],ymm0
  457. 634: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1
  458. 63b: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2
  459. 642: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3
  460. 649: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4
  461. 653: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5
  462. 65d: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6
  463. 667: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7
  464. 671: add rsi,0x100
  465. 678: add rcx,0x100
  466. 67f: cmp rcx,0x400
  467. 686: jl 571 <lr_lstm/MatMul_1+0x1f>
  468.  
  469. 000000000000068c <lr_lstm/MatMul_2>:
  470. 68c: mov rdi,QWORD PTR [rbp+0x38]
  471. 690: movabs rsi,0x0
  472. 692: R_X86_64_64 lr_lstm/c2i
  473. 69a: lea r9,[rbp+0xe20]
  474. 6a1: lea r8,[rbp+0x1220]
  475. 6a8: xor rcx,rcx
  476. 6ab: vmovaps ymm0,YMMWORD PTR [r9+rcx*1]
  477. 6b1: vmovaps ymm1,YMMWORD PTR [r9+rcx*1+0x20]
  478. 6b8: vmovaps ymm2,YMMWORD PTR [r9+rcx*1+0x40]
  479. 6bf: vmovaps ymm3,YMMWORD PTR [r9+rcx*1+0x60]
  480. 6c6: vmovaps ymm4,YMMWORD PTR [r9+rcx*1+0x80]
  481. 6d0: vmovaps ymm5,YMMWORD PTR [r9+rcx*1+0xa0]
  482. 6da: vmovaps ymm6,YMMWORD PTR [r9+rcx*1+0xc0]
  483. 6e4: vmovaps ymm7,YMMWORD PTR [r9+rcx*1+0xe0]
  484. 6ee: mov rdx,rsi
  485. 6f1: xor rax,rax
  486. 6f4: vbroadcastss ymm12,DWORD PTR [rdi+rax*1]
  487. 6fa: vmulps ymm8,ymm12,YMMWORD PTR [rdx]
  488. 6fe: vaddps ymm0,ymm0,ymm8
  489. 703: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0x20]
  490. 708: vaddps ymm1,ymm1,ymm9
  491. 70d: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0x40]
  492. 712: vaddps ymm2,ymm2,ymm10
  493. 717: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0x60]
  494. 71c: vaddps ymm3,ymm3,ymm11
  495. 721: vmulps ymm8,ymm12,YMMWORD PTR [rdx+0x80]
  496. 729: vaddps ymm4,ymm4,ymm8
  497. 72e: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0xa0]
  498. 736: vaddps ymm5,ymm5,ymm9
  499. 73b: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0xc0]
  500. 743: vaddps ymm6,ymm6,ymm10
  501. 748: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0xe0]
  502. 750: vaddps ymm7,ymm7,ymm11
  503. 755: add rdx,0x400
  504. 75c: add rax,0x4
  505. 760: cmp rax,0x400
  506. 766: jl 6f4 <lr_lstm/MatMul_2+0x68>
  507. 768: vmovaps YMMWORD PTR [r8+rcx*1],ymm0
  508. 76e: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1
  509. 775: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2
  510. 77c: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3
  511. 783: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4
  512. 78d: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5
  513. 797: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6
  514. 7a1: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7
  515. 7ab: add rsi,0x100
  516. 7b2: add rcx,0x100
  517. 7b9: cmp rcx,0x400
  518. 7c0: jl 6ab <lr_lstm/MatMul_2+0x1f>
  519.  
  520. 00000000000007c6 <lr_lstm/add_4>:
  521. 7c6: vmovaps ymm0,YMMWORD PTR [rip+0x752] # f20 <lr_lstm_data+0x15>
  522. 7ce: vmovaps ymm1,YMMWORD PTR [rip+0x76a] # f40 <lr_lstm_data+0x35>
  523. 7d6: vmovaps ymm2,YMMWORD PTR [rip+0x782] # f60 <lr_lstm_data+0x55>
  524. 7de: vmovaps ymm3,YMMWORD PTR [rip+0x79a] # f80 <lr_lstm_data+0x75>
  525. 7e6: vmovaps ymm4,YMMWORD PTR [rip+0x7b2] # fa0 <lr_lstm_data+0x95>
  526. 7ee: vmovaps ymm5,YMMWORD PTR [rip+0x7ca] # fc0 <lr_lstm_data+0xb5>
  527. 7f6: vmovaps ymm6,YMMWORD PTR [rip+0x7e2] # fe0 <lr_lstm_data+0xd5>
  528. 7fe: vmovaps ymm7,YMMWORD PTR [rip+0x7fa] # 1000 <lr_lstm_data+0xf5>
  529. 806: vmovaps ymm8,YMMWORD PTR [rip+0x812] # 1020 <lr_lstm_data+0x115>
  530. 80e: lea r8,[rbp+0xa20]
  531. 815: movabs rcx,0x0
  532. 817: R_X86_64_64 lr_lstm/bc
  533. 81f: lea r9,[rbp+0x1220]
  534. 826: movabs rdx,0x0
  535. 828: R_X86_64_64 lr_lstm/bi
  536. 830: mov rsi,QWORD PTR [rbp+0x38]
  537. 834: mov rdi,QWORD PTR [rbp+0x20]
  538. 838: lea r10,[rbp+0xa20]
  539. 83f: xor rax,rax
  540. 842: vmovaps ymm9,YMMWORD PTR [r9+rax*1]
  541. 848: vaddps ymm10,ymm9,YMMWORD PTR [rdx+rax*1]
  542. 84d: vxorps ymm9,ymm9,ymm9
  543. 852: vsubps ymm11,ymm9,ymm10
  544. 857: vminps ymm9,ymm11,YMMWORD PTR [rip+0x7e1] # 1040 <lr_lstm_data+0x135>
  545. 85f: vmaxps ymm10,ymm9,YMMWORD PTR [rip+0x7f9] # 1060 <lr_lstm_data+0x155>
  546. 867: vmulps ymm9,ymm10,YMMWORD PTR [rip+0x811] # 1080 <lr_lstm_data+0x175>
  547. 86f: vaddps ymm12,ymm9,YMMWORD PTR [rip+0x829] # 10a0 <lr_lstm_data+0x195>
  548. 877: vroundps ymm9,ymm12,0x1
  549. 87d: vmulps ymm12,ymm9,YMMWORD PTR [rip+0x83b] # 10c0 <lr_lstm_data+0x1b5>
  550. 885: vaddps ymm13,ymm12,ymm10
  551. 88a: vmulps ymm10,ymm13,ymm13
  552. 88f: vmulps ymm12,ymm13,YMMWORD PTR [rip+0x849] # 10e0 <lr_lstm_data+0x1d5>
  553. 897: vaddps ymm14,ymm12,YMMWORD PTR [rip+0x861] # 1100 <lr_lstm_data+0x1f5>
  554. 89f: vmulps ymm12,ymm14,ymm13
  555. 8a4: vaddps ymm14,ymm12,YMMWORD PTR [rip+0x874] # 1120 <lr_lstm_data+0x215>
  556. 8ac: vmulps ymm12,ymm14,ymm13
  557. 8b1: vaddps ymm14,ymm12,YMMWORD PTR [rip+0x887] # 1140 <lr_lstm_data+0x235>
  558. 8b9: vmulps ymm12,ymm14,ymm13
  559. 8be: vaddps ymm14,ymm12,YMMWORD PTR [rip+0x89a] # 1160 <lr_lstm_data+0x255>
  560. 8c6: vmulps ymm12,ymm14,ymm13
  561. 8cb: vaddps ymm14,ymm12,YMMWORD PTR [rip+0x7cd] # 10a0 <lr_lstm_data+0x195>
  562. 8d3: vmulps ymm12,ymm14,ymm10
  563. 8d8: vaddps ymm10,ymm12,ymm13
  564. 8dd: vaddps ymm12,ymm10,ymm0
  565. 8e1: vaddps ymm10,ymm9,YMMWORD PTR [rip+0x897] # 1180 <lr_lstm_data+0x275>
  566. 8e9: vcvttps2dq ymm9,ymm10
  567. 8ee: vextractf128 xmm15,ymm9,0x1
  568. 8f4: vpslld xmm15,xmm15,0x17
  569. 8fa: vpslld xmm10,xmm9,0x17
  570. 900: vinsertf128 ymm10,ymm10,xmm15,0x1
  571. 906: vmulps ymm9,ymm12,ymm10
  572. 90b: vmaxps ymm10,ymm9,ymm11
  573. 910: vaddps ymm9,ymm0,ymm10
  574. 915: vdivps ymm10,ymm0,ymm9
  575. 91a: vmovaps ymm9,YMMWORD PTR [r8+rax*1]
  576. 920: vaddps ymm11,ymm9,YMMWORD PTR [rcx+rax*1]
  577. 925: vminps ymm9,ymm11,ymm2
  578. 929: vmaxps ymm11,ymm9,ymm1
  579. 92d: vmulps ymm9,ymm11,ymm11
  580. 932: vmulps ymm12,ymm9,ymm3
  581. 936: vaddps ymm13,ymm12,ymm4
  582. 93a: vmulps ymm12,ymm9,ymm13
  583. 93f: vaddps ymm13,ymm12,ymm5
  584. 943: vmulps ymm12,ymm9,ymm13
  585. 948: vaddps ymm13,ymm12,ymm6
  586. 94c: vmulps ymm12,ymm9,ymm13
  587. 951: vaddps ymm13,ymm12,ymm7
  588. 955: vmulps ymm12,ymm9,ymm13
  589. 95a: vaddps ymm13,ymm12,ymm8
  590. 95f: vmulps ymm12,ymm9,ymm13
  591. 964: vaddps ymm13,ymm12,YMMWORD PTR [rip+0x834] # 11a0 <lr_lstm_data+0x295>
  592. 96c: vmulps ymm12,ymm11,ymm13
  593. 971: vmulps ymm11,ymm9,YMMWORD PTR [rip+0x847] # 11c0 <lr_lstm_data+0x2b5>
  594. 979: vaddps ymm13,ymm11,YMMWORD PTR [rip+0x85f] # 11e0 <lr_lstm_data+0x2d5>
  595. 981: vmulps ymm11,ymm9,ymm13
  596. 986: vaddps ymm13,ymm11,YMMWORD PTR [rip+0x872] # 1200 <lr_lstm_data+0x2f5>
  597. 98e: vmulps ymm11,ymm9,ymm13
  598. 993: vaddps ymm9,ymm11,YMMWORD PTR [rip+0x885] # 1220 <lr_lstm_data+0x315>
  599. 99b: vdivps ymm11,ymm12,ymm9
  600. 9a0: vmulps ymm9,ymm10,ymm11
  601. 9a5: vsubps ymm11,ymm0,ymm10
  602. 9aa: vmulps ymm10,ymm11,YMMWORD PTR [rsi+rax*1]
  603. 9af: vaddps ymm11,ymm9,ymm10
  604. 9b4: vmovaps YMMWORD PTR [rdi+rax*1],ymm11
  605. 9b9: vminps ymm9,ymm11,ymm2
  606. 9bd: vmaxps ymm10,ymm9,ymm1
  607. 9c1: vmulps ymm9,ymm10,ymm10
  608. 9c6: vmulps ymm11,ymm9,ymm3
  609. 9ca: vaddps ymm12,ymm11,ymm4
  610. 9ce: vmulps ymm11,ymm9,ymm12
  611. 9d3: vaddps ymm12,ymm11,ymm5
  612. 9d7: vmulps ymm11,ymm9,ymm12
  613. 9dc: vaddps ymm12,ymm11,ymm6
  614. 9e0: vmulps ymm11,ymm9,ymm12
  615. 9e5: vaddps ymm12,ymm11,ymm7
  616. 9e9: vmulps ymm11,ymm9,ymm12
  617. 9ee: vaddps ymm12,ymm11,ymm8
  618. 9f3: vmulps ymm11,ymm9,ymm12
  619. 9f8: vaddps ymm12,ymm11,YMMWORD PTR [rip+0x7a0] # 11a0 <lr_lstm_data+0x295>
  620. a00: vmulps ymm11,ymm10,ymm12
  621. a05: vmulps ymm10,ymm9,YMMWORD PTR [rip+0x7b3] # 11c0 <lr_lstm_data+0x2b5>
  622. a0d: vaddps ymm12,ymm10,YMMWORD PTR [rip+0x7cb] # 11e0 <lr_lstm_data+0x2d5>
  623. a15: vmulps ymm10,ymm9,ymm12
  624. a1a: vaddps ymm12,ymm10,YMMWORD PTR [rip+0x7de] # 1200 <lr_lstm_data+0x2f5>
  625. a22: vmulps ymm10,ymm9,ymm12
  626. a27: vaddps ymm9,ymm10,YMMWORD PTR [rip+0x7f1] # 1220 <lr_lstm_data+0x315>
  627. a2f: vdivps ymm10,ymm11,ymm9
  628. a34: vmovaps YMMWORD PTR [r10+rax*1],ymm10
  629. a3a: add rax,0x20
  630. a3e: cmp rax,0x400
  631. a44: jl 842 <lr_lstm/add_4+0x7c>
  632.  
  633. 0000000000000a4a <lr_lstm/MatMul_6>:
  634. a4a: mov rdi,QWORD PTR [rbp+0x20]
  635. a4e: movabs rsi,0x0
  636. a50: R_X86_64_64 lr_lstm/c2o
  637. a58: lea r8,[rbp+0x1620]
  638. a5f: xor rcx,rcx
  639. a62: vxorps ymm0,ymm0,ymm0
  640. a66: vxorps ymm1,ymm1,ymm1
  641. a6a: vxorps ymm2,ymm2,ymm2
  642. a6e: vxorps ymm3,ymm3,ymm3
  643. a72: vxorps ymm4,ymm4,ymm4
  644. a76: vxorps ymm5,ymm5,ymm5
  645. a7a: vxorps ymm6,ymm6,ymm6
  646. a7e: vxorps ymm7,ymm7,ymm7
  647. a82: mov rdx,rsi
  648. a85: xor rax,rax
  649. a88: vbroadcastss ymm12,DWORD PTR [rdi+rax*1]
  650. a8e: vmulps ymm8,ymm12,YMMWORD PTR [rdx]
  651. a92: vaddps ymm0,ymm0,ymm8
  652. a97: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0x20]
  653. a9c: vaddps ymm1,ymm1,ymm9
  654. aa1: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0x40]
  655. aa6: vaddps ymm2,ymm2,ymm10
  656. aab: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0x60]
  657. ab0: vaddps ymm3,ymm3,ymm11
  658. ab5: vmulps ymm8,ymm12,YMMWORD PTR [rdx+0x80]
  659. abd: vaddps ymm4,ymm4,ymm8
  660. ac2: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0xa0]
  661. aca: vaddps ymm5,ymm5,ymm9
  662. acf: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0xc0]
  663. ad7: vaddps ymm6,ymm6,ymm10
  664. adc: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0xe0]
  665. ae4: vaddps ymm7,ymm7,ymm11
  666. ae9: add rdx,0x400
  667. af0: add rax,0x4
  668. af4: cmp rax,0x400
  669. afa: jl a88 <lr_lstm/MatMul_6+0x3e>
  670. afc: vmovaps YMMWORD PTR [r8+rcx*1],ymm0
  671. b02: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1
  672. b09: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2
  673. b10: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3
  674. b17: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4
  675. b21: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5
  676. b2b: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6
  677. b35: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7
  678. b3f: add rsi,0x100
  679. b46: add rcx,0x100
  680. b4d: cmp rcx,0x400
  681. b54: jl a62 <lr_lstm/MatMul_6+0x18>
  682.  
  683. 0000000000000b5a <lr_lstm/MatMul_5>:
  684. b5a: lea rdi,[rbp+0xc0]
  685. b61: movabs rsi,0x0
  686. b63: R_X86_64_64 lr_lstm/x2o
  687. b6b: lea r9,[rbp+0x1620]
  688. b72: lea r8,[rbp+0x1a20]
  689. b79: xor rcx,rcx
  690. b7c: vmovaps ymm0,YMMWORD PTR [r9+rcx*1]
  691. b82: vmovaps ymm1,YMMWORD PTR [r9+rcx*1+0x20]
  692. b89: vmovaps ymm2,YMMWORD PTR [r9+rcx*1+0x40]
  693. b90: vmovaps ymm3,YMMWORD PTR [r9+rcx*1+0x60]
  694. b97: vmovaps ymm4,YMMWORD PTR [r9+rcx*1+0x80]
  695. ba1: vmovaps ymm5,YMMWORD PTR [r9+rcx*1+0xa0]
  696. bab: vmovaps ymm6,YMMWORD PTR [r9+rcx*1+0xc0]
  697. bb5: vmovaps ymm7,YMMWORD PTR [r9+rcx*1+0xe0]
  698. bbf: mov rdx,rsi
  699. bc2: xor rax,rax
  700. bc5: vbroadcastss ymm12,DWORD PTR [rdi+rax*1]
  701. bcb: vmulps ymm8,ymm12,YMMWORD PTR [rdx]
  702. bcf: vaddps ymm0,ymm0,ymm8
  703. bd4: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0x20]
  704. bd9: vaddps ymm1,ymm1,ymm9
  705. bde: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0x40]
  706. be3: vaddps ymm2,ymm2,ymm10
  707. be8: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0x60]
  708. bed: vaddps ymm3,ymm3,ymm11
  709. bf2: vmulps ymm8,ymm12,YMMWORD PTR [rdx+0x80]
  710. bfa: vaddps ymm4,ymm4,ymm8
  711. bff: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0xa0]
  712. c07: vaddps ymm5,ymm5,ymm9
  713. c0c: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0xc0]
  714. c14: vaddps ymm6,ymm6,ymm10
  715. c19: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0xe0]
  716. c21: vaddps ymm7,ymm7,ymm11
  717. c26: add rdx,0x400
  718. c2d: add rax,0x4
  719. c31: cmp rax,0x160
  720. c37: jl bc5 <lr_lstm/MatMul_5+0x6b>
  721. c39: vmovaps YMMWORD PTR [r8+rcx*1],ymm0
  722. c3f: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1
  723. c46: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2
  724. c4d: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3
  725. c54: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4
  726. c5e: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5
  727. c68: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6
  728. c72: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7
  729. c7c: add rsi,0x100
  730. c83: add rcx,0x100
  731. c8a: cmp rcx,0x400
  732. c91: jl b7c <lr_lstm/MatMul_5+0x22>
  733.  
  734. 0000000000000c97 <lr_lstm/MatMul_7>:
  735. c97: mov rdi,QWORD PTR [rbp+0x0]
  736. c9b: movabs rsi,0x0
  737. c9d: R_X86_64_64 lr_lstm/h2o
  738. ca5: lea r9,[rbp+0x1a20]
  739. cac: lea r8,[rbp+0x1e20]
  740. cb3: xor rcx,rcx
  741. cb6: vmovaps ymm0,YMMWORD PTR [r9+rcx*1]
  742. cbc: vmovaps ymm1,YMMWORD PTR [r9+rcx*1+0x20]
  743. cc3: vmovaps ymm2,YMMWORD PTR [r9+rcx*1+0x40]
  744. cca: vmovaps ymm3,YMMWORD PTR [r9+rcx*1+0x60]
  745. cd1: vmovaps ymm4,YMMWORD PTR [r9+rcx*1+0x80]
  746. cdb: vmovaps ymm5,YMMWORD PTR [r9+rcx*1+0xa0]
  747. ce5: vmovaps ymm6,YMMWORD PTR [r9+rcx*1+0xc0]
  748. cef: vmovaps ymm7,YMMWORD PTR [r9+rcx*1+0xe0]
  749. cf9: mov rdx,rsi
  750. cfc: xor rax,rax
  751. cff: vbroadcastss ymm12,DWORD PTR [rdi+rax*1]
  752. d05: vmulps ymm8,ymm12,YMMWORD PTR [rdx]
  753. d09: vaddps ymm0,ymm0,ymm8
  754. d0e: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0x20]
  755. d13: vaddps ymm1,ymm1,ymm9
  756. d18: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0x40]
  757. d1d: vaddps ymm2,ymm2,ymm10
  758. d22: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0x60]
  759. d27: vaddps ymm3,ymm3,ymm11
  760. d2c: vmulps ymm8,ymm12,YMMWORD PTR [rdx+0x80]
  761. d34: vaddps ymm4,ymm4,ymm8
  762. d39: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0xa0]
  763. d41: vaddps ymm5,ymm5,ymm9
  764. d46: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0xc0]
  765. d4e: vaddps ymm6,ymm6,ymm10
  766. d53: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0xe0]
  767. d5b: vaddps ymm7,ymm7,ymm11
  768. d60: add rdx,0x400
  769. d67: add rax,0x4
  770. d6b: cmp rax,0x400
  771. d71: jl cff <lr_lstm/MatMul_7+0x68>
  772. d73: vmovaps YMMWORD PTR [r8+rcx*1],ymm0
  773. d79: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1
  774. d80: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2
  775. d87: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3
  776. d8e: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4
  777. d98: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5
  778. da2: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6
  779. dac: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7
  780. db6: add rsi,0x100
  781. dbd: add rcx,0x100
  782. dc4: cmp rcx,0x400
  783. dcb: jl cb6 <lr_lstm/MatMul_7+0x1f>
  784.  
  785. 0000000000000dd1 <lr_lstm/add_7>:
  786. dd1: vmovaps ymm0,YMMWORD PTR [rip+0x147] # f20 <lr_lstm_data+0x15>
  787. dd9: vxorps ymm1,ymm1,ymm1
  788. ddd: vmovaps ymm2,YMMWORD PTR [rip+0x27b] # 1060 <lr_lstm_data+0x155>
  789. de5: vmovaps ymm3,YMMWORD PTR [rip+0x253] # 1040 <lr_lstm_data+0x135>
  790. ded: vmovaps ymm4,YMMWORD PTR [rip+0x2ab] # 10a0 <lr_lstm_data+0x195>
  791. df5: vmovaps ymm5,YMMWORD PTR [rip+0x283] # 1080 <lr_lstm_data+0x175>
  792. dfd: vmovaps ymm6,YMMWORD PTR [rip+0x2bb] # 10c0 <lr_lstm_data+0x1b5>
  793. e05: vmovaps ymm7,YMMWORD PTR [rip+0x2d3] # 10e0 <lr_lstm_data+0x1d5>
  794. e0d: vmovaps ymm8,YMMWORD PTR [rip+0x2eb] # 1100 <lr_lstm_data+0x1f5>
  795. e15: lea rsi,[rbp+0x1e20]
  796. e1c: movabs rcx,0x0
  797. e1e: R_X86_64_64 lr_lstm/bo
  798. e26: lea rdi,[rbp+0xa20]
  799. e2d: mov rdx,QWORD PTR [rbp+0x28]
  800. e31: xor rax,rax
  801. e34: vmovaps ymm9,YMMWORD PTR [rsi+rax*1]
  802. e39: vaddps ymm10,ymm9,YMMWORD PTR [rcx+rax*1]
  803. e3e: vsubps ymm9,ymm1,ymm10
  804. e43: vminps ymm10,ymm9,ymm3
  805. e47: vmaxps ymm11,ymm10,ymm2
  806. e4b: vmulps ymm10,ymm11,ymm5
  807. e4f: vaddps ymm12,ymm10,ymm4
  808. e53: vroundps ymm10,ymm12,0x1
  809. e59: vmulps ymm12,ymm10,ymm6
  810. e5d: vaddps ymm13,ymm12,ymm11
  811. e62: vmulps ymm11,ymm13,ymm13
  812. e67: vmulps ymm12,ymm7,ymm13
  813. e6c: vaddps ymm14,ymm12,ymm8
  814. e71: vmulps ymm12,ymm14,ymm13
  815. e76: vaddps ymm14,ymm12,YMMWORD PTR [rip+0x2a2] # 1120 <lr_lstm_data+0x215>
  816. e7e: vmulps ymm12,ymm14,ymm13
  817. e83: vaddps ymm14,ymm12,YMMWORD PTR [rip+0x2b5] # 1140 <lr_lstm_data+0x235>
  818. e8b: vmulps ymm12,ymm14,ymm13
  819. e90: vaddps ymm14,ymm12,YMMWORD PTR [rip+0x2c8] # 1160 <lr_lstm_data+0x255>
  820. e98: vmulps ymm12,ymm14,ymm13
  821. e9d: vaddps ymm14,ymm12,YMMWORD PTR [rip+0x1fb] # 10a0 <lr_lstm_data+0x195>
  822. ea5: vmulps ymm12,ymm14,ymm11
  823. eaa: vaddps ymm11,ymm12,ymm13
  824. eaf: vaddps ymm12,ymm11,ymm0
  825. eb3: vaddps ymm11,ymm10,YMMWORD PTR [rip+0x2c5] # 1180 <lr_lstm_data+0x275>
  826. ebb: vcvttps2dq ymm10,ymm11
  827. ec0: vextractf128 xmm15,ymm10,0x1
  828. ec6: vpslld xmm15,xmm15,0x17
  829. ecc: vpslld xmm11,xmm10,0x17
  830. ed2: vinsertf128 ymm11,ymm11,xmm15,0x1
  831. ed8: vmulps ymm10,ymm12,ymm11
  832. edd: vmaxps ymm11,ymm10,ymm9
  833. ee2: vaddps ymm9,ymm0,ymm11
  834. ee7: vdivps ymm10,ymm0,ymm9
  835. eec: vmulps ymm9,ymm10,YMMWORD PTR [rdi+rax*1]
  836. ef1: vmovaps YMMWORD PTR [rdx+rax*1],ymm9
  837. ef6: add rax,0x20
  838. efa: cmp rax,0x400
  839. f00: jl e34 <lr_lstm/add_7+0x63>
  840. f06: pop rbp
  841. f07: vzeroupper
  842. f0a: ret
  843.  
  844. 0000000000000f0b <lr_lstm_data>:
  845. ...
  846. f1f: ....?...?...?...
  847. f2f: ?...?...?...?...
  848. f3f: ?...............
  849. f4f: ................
  850. f5f: ....A...A...A...
  851. f6f: A...A...A...A...
  852. f7f: A.%...%...%...%.
  853. f8f: ..%...%...%...%.
  854. f9f: .~3a*~3a*~3a*~3a
  855. faf: *~3a*~3a*~3a*~3a
  856. fbf: *.7...7...7...7.
  857. fcf: ..7...7...7...7.
  858. fdf: .A.\3A.\3A.\3A.\
  859. fef: 3A.\3A.\3A.\3A.\
  860. fff: 3JCy7JCy7JCy7JCy
  861. 100f: 7JCy7JCy7JCy7JCy
  862. 101f: 7..':..':..':..'
  863. 102f: :..':..':..':..'
  864. 103f: :...B...B...B...
  865. 104f: B...B...B...B...
  866. 105f: B...............
  867. 106f: ................
  868. 107f: .;..?;..?;..?;..
  869. 108f: ?;..?;..?;..?;..
  870. 109f: ?...?...?...?...
  871. 10af: ?...?...?...?...
  872. 10bf: ?.r1..r1..r1..r1
  873. 10cf: ..r1..r1..r1..r1
  874. 10df: .giP9giP9giP9giP
  875. 10ef: 9giP9giP9giP9giP
  876. 10ff: 9.C.:.C.:.C.:.C.
  877. 110f: :.C.:.C.:.C.:.C.
  878. 111f: :...<...<...<...
  879. 112f: <...<...<...<...
  880. 113f: <..*=..*=..*=..*
  881. 114f: =..*=..*=..*=..*
  882. 115f: =..*>..*>..*>..*
  883. 116f: >..*>..*>..*>..*
  884. 117f: >...B...B...B...
  885. 118f: B...B...B...B...
  886. 119f: B.Y.;.Y.;.Y.;.Y.
  887. 11af: ;.Y.;.Y.;.Y.;.Y.
  888. 11bf: ;...5...5...5...
  889. 11cf: 5...5...5...5...
  890. 11df: 5...8...8...8...
  891. 11ef: 8...8...8...8...
  892. 11ff: 8...;...;...;...
  893. 120f: ;...;...;...;...
  894. 121f: ;.Y.;.Y.;.Y.;.Y.
  895. 122f: ;.Y.;.Y.;.Y.;.Y.
  896. 123f: ;
  897.  
  898. 0000000000001240 <rl_lstm>:
  899. 1240: vzeroupper
  900. 1243: push rbp
  901. 1244: mov rbp,rdi
  902.  
  903. 0000000000001247 <rl_lstm/hyphen/Lookup>:
  904. 1247: movsxd rax,DWORD PTR [rbp+0x0]
  905. 124b: mov rcx,0x1
  906. 1252: test rax,rax
  907. 1255: cmovs rax,rcx
  908. 1259: shl rax,0x5
  909. 125d: movabs rdx,0x0
  910. 125f: R_X86_64_64 rl_lstm/fixed_embedding_matrix_3
  911. 1267: add rax,rdx
  912. 126a: mov QWORD PTR [rbp+0x48],rax
  913.  
  914. 000000000000126e <rl_lstm/capitalization/Lookup>:
  915. 126e: movsxd rax,DWORD PTR [rbp+0x4]
  916. 1272: mov rcx,0x4
  917. 1279: test rax,rax
  918. 127c: cmovs rax,rcx
  919. 1280: shl rax,0x5
  920. 1284: movabs rdx,0x0
  921. 1286: R_X86_64_64 rl_lstm/fixed_embedding_matrix_2
  922. 128e: add rax,rdx
  923. 1291: mov QWORD PTR [rbp+0x50],rax
  924.  
  925. 0000000000001295 <rl_lstm/quote/Lookup>:
  926. 1295: movsxd rax,DWORD PTR [rbp+0x40]
  927. 1299: mov rcx,0x3
  928. 12a0: test rax,rax
  929. 12a3: cmovs rax,rcx
  930. 12a7: shl rax,0x5
  931. 12ab: movabs rdx,0x0
  932. 12ad: R_X86_64_64 rl_lstm/fixed_embedding_matrix_5
  933. 12b5: add rax,rdx
  934. 12b8: mov QWORD PTR [rbp+0x58],rax
  935.  
  936. 00000000000012bc <rl_lstm/words/Lookup>:
  937. 12bc: movsxd rax,DWORD PTR [rbp+0x1c]
  938. 12c0: mov rcx,0xd008
  939. 12c7: test rax,rax
  940. 12ca: cmovs rax,rcx
  941. 12ce: shl rax,0x7
  942. 12d2: movabs rdx,0x0
  943. 12d4: R_X86_64_64 rl_lstm/fixed_embedding_matrix_0
  944. 12dc: add rax,rdx
  945. 12df: mov QWORD PTR [rbp+0x60],rax
  946.  
  947. 00000000000012e3 <rl_lstm/digit/Lookup>:
  948. 12e3: movsxd rax,DWORD PTR [rbp+0x18]
  949. 12e7: mov rcx,0x2
  950. 12ee: test rax,rax
  951. 12f1: cmovs rax,rcx
  952. 12f5: shl rax,0x5
  953. 12f9: movabs rdx,0x0
  954. 12fb: R_X86_64_64 rl_lstm/fixed_embedding_matrix_6
  955. 1303: add rax,rdx
  956. 1306: mov QWORD PTR [rbp+0x68],rax
  957.  
  958. 000000000000130a <rl_lstm/punctuation/Lookup>:
  959. 130a: movsxd rax,DWORD PTR [rbp+0x30]
  960. 130e: mov rcx,0x2
  961. 1315: test rax,rax
  962. 1318: cmovs rax,rcx
  963. 131c: shl rax,0x5
  964. 1320: movabs rdx,0x0
  965. 1322: R_X86_64_64 rl_lstm/fixed_embedding_matrix_4
  966. 132a: add rax,rdx
  967. 132d: mov QWORD PTR [rbp+0x70],rax
  968.  
  969. 0000000000001331 <rl_lstm/suffix/Lookup>:
  970. 1331: lea rcx,[rbp+0x34]
  971. 1335: movabs rdx,0x0
  972. 1337: R_X86_64_64 rl_lstm/fixed_embedding_matrix_1
  973. 133f: lea rsi,[rbp+0x80]
  974. 1346: vxorps ymm0,ymm0,ymm0
  975. 134a: vxorps ymm1,ymm1,ymm1
  976. 134e: mov r8,0x208d
  977. 1355: xor rdi,rdi
  978. 1358: movsxd rax,DWORD PTR [rcx+rdi*4]
  979. 135c: test rax,rax
  980. 135f: jns 1372 <rl_lstm/suffix/Lookup+0x41>
  981. 1365: cmp rax,0xffffffffffffffff
  982. 1369: jne 1382 <rl_lstm/suffix/Lookup+0x51>
  983. 136f: mov rax,r8
  984. 1372: shl rax,0x6
  985. 1376: add rax,rdx
  986. 1379: vaddps ymm0,ymm0,YMMWORD PTR [rax]
  987. 137d: vaddps ymm1,ymm1,YMMWORD PTR [rax+0x20]
  988. 1382: inc rdi
  989. 1385: cmp rdi,0x3
  990. 1389: jne 1358 <rl_lstm/suffix/Lookup+0x27>
  991. 138b: vmovaps YMMWORD PTR [rsi],ymm0
  992. 138f: vmovaps YMMWORD PTR [rsi+0x20],ymm1
  993.  
  994. 0000000000001394 <rl_lstm/concat>:
  995. 1394: lea r8,[rbp+0xc0]
  996. 139b: mov rsi,QWORD PTR [rbp+0x60]
  997. 139f: lea rdi,[r8]
  998. 13a2: mov rcx,0x80
  999. 13a9: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
  1000. 13ab: lea rsi,[rbp+0x80]
  1001. 13b2: lea rdi,[r8+0x80]
  1002. 13b9: mov rcx,0x40
  1003. 13c0: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
  1004. 13c2: mov rsi,QWORD PTR [rbp+0x50]
  1005. 13c6: lea rdi,[r8+0xc0]
  1006. 13cd: mov rcx,0x20
  1007. 13d4: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
  1008. 13d6: mov rsi,QWORD PTR [rbp+0x48]
  1009. 13da: lea rdi,[r8+0xe0]
  1010. 13e1: mov rcx,0x20
  1011. 13e8: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
  1012. 13ea: mov rsi,QWORD PTR [rbp+0x70]
  1013. 13ee: lea rdi,[r8+0x100]
  1014. 13f5: mov rcx,0x20
  1015. 13fc: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
  1016. 13fe: mov rsi,QWORD PTR [rbp+0x58]
  1017. 1402: lea rdi,[r8+0x120]
  1018. 1409: mov rcx,0x20
  1019. 1410: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
  1020. 1412: mov rsi,QWORD PTR [rbp+0x68]
  1021. 1416: lea rdi,[r8+0x140]
  1022. 141d: mov rcx,0x20
  1023. 1424: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
  1024.  
  1025. 0000000000001426 <rl_lstm/MatMul>:
  1026. 1426: lea rdi,[rbp+0xc0]
  1027. 142d: movabs rsi,0x0
  1028. 142f: R_X86_64_64 rl_lstm/x2i
  1029. 1437: lea r8,[rbp+0x220]
  1030. 143e: xor rcx,rcx
  1031. 1441: vxorps ymm0,ymm0,ymm0
  1032. 1445: vxorps ymm1,ymm1,ymm1
  1033. 1449: vxorps ymm2,ymm2,ymm2
  1034. 144d: vxorps ymm3,ymm3,ymm3
  1035. 1451: vxorps ymm4,ymm4,ymm4
  1036. 1455: vxorps ymm5,ymm5,ymm5
  1037. 1459: vxorps ymm6,ymm6,ymm6
  1038. 145d: vxorps ymm7,ymm7,ymm7
  1039. 1461: mov rdx,rsi
  1040. 1464: xor rax,rax
  1041. 1467: vbroadcastss ymm12,DWORD PTR [rdi+rax*1]
  1042. 146d: vmulps ymm8,ymm12,YMMWORD PTR [rdx]
  1043. 1471: vaddps ymm0,ymm0,ymm8
  1044. 1476: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0x20]
  1045. 147b: vaddps ymm1,ymm1,ymm9
  1046. 1480: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0x40]
  1047. 1485: vaddps ymm2,ymm2,ymm10
  1048. 148a: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0x60]
  1049. 148f: vaddps ymm3,ymm3,ymm11
  1050. 1494: vmulps ymm8,ymm12,YMMWORD PTR [rdx+0x80]
  1051. 149c: vaddps ymm4,ymm4,ymm8
  1052. 14a1: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0xa0]
  1053. 14a9: vaddps ymm5,ymm5,ymm9
  1054. 14ae: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0xc0]
  1055. 14b6: vaddps ymm6,ymm6,ymm10
  1056. 14bb: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0xe0]
  1057. 14c3: vaddps ymm7,ymm7,ymm11
  1058. 14c8: add rdx,0x400
  1059. 14cf: add rax,0x4
  1060. 14d3: cmp rax,0x160
  1061. 14d9: jl 1467 <rl_lstm/MatMul+0x41>
  1062. 14db: vmovaps YMMWORD PTR [r8+rcx*1],ymm0
  1063. 14e1: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1
  1064. 14e8: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2
  1065. 14ef: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3
  1066. 14f6: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4
  1067. 1500: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5
  1068. 150a: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6
  1069. 1514: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7
  1070. 151e: add rsi,0x100
  1071. 1525: add rcx,0x100
  1072. 152c: cmp rcx,0x400
  1073. 1533: jl 1441 <rl_lstm/MatMul+0x1b>
  1074.  
  1075. 0000000000001539 <rl_lstm/MatMul_3>:
  1076. 1539: lea rdi,[rbp+0xc0]
  1077. 1540: movabs rsi,0x0
  1078. 1542: R_X86_64_64 rl_lstm/x2c
  1079. 154a: lea r8,[rbp+0x620]
  1080. 1551: xor rcx,rcx
  1081. 1554: vxorps ymm0,ymm0,ymm0
  1082. 1558: vxorps ymm1,ymm1,ymm1
  1083. 155c: vxorps ymm2,ymm2,ymm2
  1084. 1560: vxorps ymm3,ymm3,ymm3
  1085. 1564: vxorps ymm4,ymm4,ymm4
  1086. 1568: vxorps ymm5,ymm5,ymm5
  1087. 156c: vxorps ymm6,ymm6,ymm6
  1088. 1570: vxorps ymm7,ymm7,ymm7
  1089. 1574: mov rdx,rsi
  1090. 1577: xor rax,rax
  1091. 157a: vbroadcastss ymm12,DWORD PTR [rdi+rax*1]
  1092. 1580: vmulps ymm8,ymm12,YMMWORD PTR [rdx]
  1093. 1584: vaddps ymm0,ymm0,ymm8
  1094. 1589: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0x20]
  1095. 158e: vaddps ymm1,ymm1,ymm9
  1096. 1593: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0x40]
  1097. 1598: vaddps ymm2,ymm2,ymm10
  1098. 159d: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0x60]
  1099. 15a2: vaddps ymm3,ymm3,ymm11
  1100. 15a7: vmulps ymm8,ymm12,YMMWORD PTR [rdx+0x80]
  1101. 15af: vaddps ymm4,ymm4,ymm8
  1102. 15b4: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0xa0]
  1103. 15bc: vaddps ymm5,ymm5,ymm9
  1104. 15c1: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0xc0]
  1105. 15c9: vaddps ymm6,ymm6,ymm10
  1106. 15ce: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0xe0]
  1107. 15d6: vaddps ymm7,ymm7,ymm11
  1108. 15db: add rdx,0x400
  1109. 15e2: add rax,0x4
  1110. 15e6: cmp rax,0x160
  1111. 15ec: jl 157a <rl_lstm/MatMul_3+0x41>
  1112. 15ee: vmovaps YMMWORD PTR [r8+rcx*1],ymm0
  1113. 15f4: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1
  1114. 15fb: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2
  1115. 1602: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3
  1116. 1609: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4
  1117. 1613: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5
  1118. 161d: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6
  1119. 1627: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7
  1120. 1631: add rsi,0x100
  1121. 1638: add rcx,0x100
  1122. 163f: cmp rcx,0x400
  1123. 1646: jl 1554 <rl_lstm/MatMul_3+0x1b>
  1124.  
  1125. 000000000000164c <rl_lstm/MatMul_1>:
  1126. 164c: mov rdi,QWORD PTR [rbp+0x8]
  1127. 1650: movabs rsi,0x0
  1128. 1652: R_X86_64_64 rl_lstm/h2i
  1129. 165a: lea r9,[rbp+0x220]
  1130. 1661: lea r8,[rbp+0xa20]
  1131. 1668: xor rcx,rcx
  1132. 166b: vmovaps ymm0,YMMWORD PTR [r9+rcx*1]
  1133. 1671: vmovaps ymm1,YMMWORD PTR [r9+rcx*1+0x20]
  1134. 1678: vmovaps ymm2,YMMWORD PTR [r9+rcx*1+0x40]
  1135. 167f: vmovaps ymm3,YMMWORD PTR [r9+rcx*1+0x60]
  1136. 1686: vmovaps ymm4,YMMWORD PTR [r9+rcx*1+0x80]
  1137. 1690: vmovaps ymm5,YMMWORD PTR [r9+rcx*1+0xa0]
  1138. 169a: vmovaps ymm6,YMMWORD PTR [r9+rcx*1+0xc0]
  1139. 16a4: vmovaps ymm7,YMMWORD PTR [r9+rcx*1+0xe0]
  1140. 16ae: mov rdx,rsi
  1141. 16b1: xor rax,rax
  1142. 16b4: vbroadcastss ymm12,DWORD PTR [rdi+rax*1]
  1143. 16ba: vmulps ymm8,ymm12,YMMWORD PTR [rdx]
  1144. 16be: vaddps ymm0,ymm0,ymm8
  1145. 16c3: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0x20]
  1146. 16c8: vaddps ymm1,ymm1,ymm9
  1147. 16cd: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0x40]
  1148. 16d2: vaddps ymm2,ymm2,ymm10
  1149. 16d7: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0x60]
  1150. 16dc: vaddps ymm3,ymm3,ymm11
  1151. 16e1: vmulps ymm8,ymm12,YMMWORD PTR [rdx+0x80]
  1152. 16e9: vaddps ymm4,ymm4,ymm8
  1153. 16ee: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0xa0]
  1154. 16f6: vaddps ymm5,ymm5,ymm9
  1155. 16fb: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0xc0]
  1156. 1703: vaddps ymm6,ymm6,ymm10
  1157. 1708: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0xe0]
  1158. 1710: vaddps ymm7,ymm7,ymm11
  1159. 1715: add rdx,0x400
  1160. 171c: add rax,0x4
  1161. 1720: cmp rax,0x400
  1162. 1726: jl 16b4 <rl_lstm/MatMul_1+0x68>
  1163. 1728: vmovaps YMMWORD PTR [r8+rcx*1],ymm0
  1164. 172e: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1
  1165. 1735: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2
  1166. 173c: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3
  1167. 1743: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4
  1168. 174d: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5
  1169. 1757: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6
  1170. 1761: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7
  1171. 176b: add rsi,0x100
  1172. 1772: add rcx,0x100
  1173. 1779: cmp rcx,0x400
  1174. 1780: jl 166b <rl_lstm/MatMul_1+0x1f>
  1175.  
  1176. 0000000000001786 <rl_lstm/MatMul_4>:
  1177. 1786: mov rdi,QWORD PTR [rbp+0x8]
  1178. 178a: movabs rsi,0x0
  1179. 178c: R_X86_64_64 rl_lstm/h2c
  1180. 1794: lea r9,[rbp+0x620]
  1181. 179b: lea r8,[rbp+0xe20]
  1182. 17a2: xor rcx,rcx
  1183. 17a5: vmovaps ymm0,YMMWORD PTR [r9+rcx*1]
  1184. 17ab: vmovaps ymm1,YMMWORD PTR [r9+rcx*1+0x20]
  1185. 17b2: vmovaps ymm2,YMMWORD PTR [r9+rcx*1+0x40]
  1186. 17b9: vmovaps ymm3,YMMWORD PTR [r9+rcx*1+0x60]
  1187. 17c0: vmovaps ymm4,YMMWORD PTR [r9+rcx*1+0x80]
  1188. 17ca: vmovaps ymm5,YMMWORD PTR [r9+rcx*1+0xa0]
  1189. 17d4: vmovaps ymm6,YMMWORD PTR [r9+rcx*1+0xc0]
  1190. 17de: vmovaps ymm7,YMMWORD PTR [r9+rcx*1+0xe0]
  1191. 17e8: mov rdx,rsi
  1192. 17eb: xor rax,rax
  1193. 17ee: vbroadcastss ymm12,DWORD PTR [rdi+rax*1]
  1194. 17f4: vmulps ymm8,ymm12,YMMWORD PTR [rdx]
  1195. 17f8: vaddps ymm0,ymm0,ymm8
  1196. 17fd: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0x20]
  1197. 1802: vaddps ymm1,ymm1,ymm9
  1198. 1807: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0x40]
  1199. 180c: vaddps ymm2,ymm2,ymm10
  1200. 1811: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0x60]
  1201. 1816: vaddps ymm3,ymm3,ymm11
  1202. 181b: vmulps ymm8,ymm12,YMMWORD PTR [rdx+0x80]
  1203. 1823: vaddps ymm4,ymm4,ymm8
  1204. 1828: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0xa0]
  1205. 1830: vaddps ymm5,ymm5,ymm9
  1206. 1835: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0xc0]
  1207. 183d: vaddps ymm6,ymm6,ymm10
  1208. 1842: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0xe0]
  1209. 184a: vaddps ymm7,ymm7,ymm11
  1210. 184f: add rdx,0x400
  1211. 1856: add rax,0x4
  1212. 185a: cmp rax,0x400
  1213. 1860: jl 17ee <rl_lstm/MatMul_4+0x68>
  1214. 1862: vmovaps YMMWORD PTR [r8+rcx*1],ymm0
  1215. 1868: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1
  1216. 186f: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2
  1217. 1876: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3
  1218. 187d: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4
  1219. 1887: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5
  1220. 1891: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6
  1221. 189b: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7
  1222. 18a5: add rsi,0x100
  1223. 18ac: add rcx,0x100
  1224. 18b3: cmp rcx,0x400
  1225. 18ba: jl 17a5 <rl_lstm/MatMul_4+0x1f>
  1226.  
  1227. 00000000000018c0 <rl_lstm/MatMul_2>:
  1228. 18c0: mov rdi,QWORD PTR [rbp+0x10]
  1229. 18c4: movabs rsi,0x0
  1230. 18c6: R_X86_64_64 rl_lstm/c2i
  1231. 18ce: lea r9,[rbp+0xa20]
  1232. 18d5: lea r8,[rbp+0x1220]
  1233. 18dc: xor rcx,rcx
  1234. 18df: vmovaps ymm0,YMMWORD PTR [r9+rcx*1]
  1235. 18e5: vmovaps ymm1,YMMWORD PTR [r9+rcx*1+0x20]
  1236. 18ec: vmovaps ymm2,YMMWORD PTR [r9+rcx*1+0x40]
  1237. 18f3: vmovaps ymm3,YMMWORD PTR [r9+rcx*1+0x60]
  1238. 18fa: vmovaps ymm4,YMMWORD PTR [r9+rcx*1+0x80]
  1239. 1904: vmovaps ymm5,YMMWORD PTR [r9+rcx*1+0xa0]
  1240. 190e: vmovaps ymm6,YMMWORD PTR [r9+rcx*1+0xc0]
  1241. 1918: vmovaps ymm7,YMMWORD PTR [r9+rcx*1+0xe0]
  1242. 1922: mov rdx,rsi
  1243. 1925: xor rax,rax
  1244. 1928: vbroadcastss ymm12,DWORD PTR [rdi+rax*1]
  1245. 192e: vmulps ymm8,ymm12,YMMWORD PTR [rdx]
  1246. 1932: vaddps ymm0,ymm0,ymm8
  1247. 1937: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0x20]
  1248. 193c: vaddps ymm1,ymm1,ymm9
  1249. 1941: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0x40]
  1250. 1946: vaddps ymm2,ymm2,ymm10
  1251. 194b: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0x60]
  1252. 1950: vaddps ymm3,ymm3,ymm11
  1253. 1955: vmulps ymm8,ymm12,YMMWORD PTR [rdx+0x80]
  1254. 195d: vaddps ymm4,ymm4,ymm8
  1255. 1962: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0xa0]
  1256. 196a: vaddps ymm5,ymm5,ymm9
  1257. 196f: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0xc0]
  1258. 1977: vaddps ymm6,ymm6,ymm10
  1259. 197c: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0xe0]
  1260. 1984: vaddps ymm7,ymm7,ymm11
  1261. 1989: add rdx,0x400
  1262. 1990: add rax,0x4
  1263. 1994: cmp rax,0x400
  1264. 199a: jl 1928 <rl_lstm/MatMul_2+0x68>
  1265. 199c: vmovaps YMMWORD PTR [r8+rcx*1],ymm0
  1266. 19a2: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1
  1267. 19a9: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2
  1268. 19b0: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3
  1269. 19b7: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4
  1270. 19c1: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5
  1271. 19cb: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6
  1272. 19d5: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7
  1273. 19df: add rsi,0x100
  1274. 19e6: add rcx,0x100
  1275. 19ed: cmp rcx,0x400
  1276. 19f4: jl 18df <rl_lstm/MatMul_2+0x1f>
  1277.  
  1278. 00000000000019fa <rl_lstm/add_4>:
  1279. 19fa: vmovaps ymm0,YMMWORD PTR [rip+0x73e] # 2140 <rl_lstm_data+0x1>
  1280. 1a02: vmovaps ymm1,YMMWORD PTR [rip+0x756] # 2160 <rl_lstm_data+0x21>
  1281. 1a0a: vmovaps ymm2,YMMWORD PTR [rip+0x76e] # 2180 <rl_lstm_data+0x41>
  1282. 1a12: vmovaps ymm3,YMMWORD PTR [rip+0x786] # 21a0 <rl_lstm_data+0x61>
  1283. 1a1a: vmovaps ymm4,YMMWORD PTR [rip+0x79e] # 21c0 <rl_lstm_data+0x81>
  1284. 1a22: vmovaps ymm5,YMMWORD PTR [rip+0x7b6] # 21e0 <rl_lstm_data+0xa1>
  1285. 1a2a: vmovaps ymm6,YMMWORD PTR [rip+0x7ce] # 2200 <rl_lstm_data+0xc1>
  1286. 1a32: vmovaps ymm7,YMMWORD PTR [rip+0x7e6] # 2220 <rl_lstm_data+0xe1>
  1287. 1a3a: vmovaps ymm8,YMMWORD PTR [rip+0x7fe] # 2240 <rl_lstm_data+0x101>
  1288. 1a42: lea r8,[rbp+0xe20]
  1289. 1a49: movabs rcx,0x0
  1290. 1a4b: R_X86_64_64 rl_lstm/bc
  1291. 1a53: lea r9,[rbp+0x1220]
  1292. 1a5a: movabs rdx,0x0
  1293. 1a5c: R_X86_64_64 rl_lstm/bi
  1294. 1a64: mov rsi,QWORD PTR [rbp+0x10]
  1295. 1a68: mov rdi,QWORD PTR [rbp+0x20]
  1296. 1a6c: lea r10,[rbp+0xe20]
  1297. 1a73: xor rax,rax
  1298. 1a76: vmovaps ymm9,YMMWORD PTR [r9+rax*1]
  1299. 1a7c: vaddps ymm10,ymm9,YMMWORD PTR [rdx+rax*1]
  1300. 1a81: vxorps ymm9,ymm9,ymm9
  1301. 1a86: vsubps ymm11,ymm9,ymm10
  1302. 1a8b: vminps ymm9,ymm11,YMMWORD PTR [rip+0x7cd] # 2260 <rl_lstm_data+0x121>
  1303. 1a93: vmaxps ymm10,ymm9,YMMWORD PTR [rip+0x7e5] # 2280 <rl_lstm_data+0x141>
  1304. 1a9b: vmulps ymm9,ymm10,YMMWORD PTR [rip+0x7fd] # 22a0 <rl_lstm_data+0x161>
  1305. 1aa3: vaddps ymm12,ymm9,YMMWORD PTR [rip+0x815] # 22c0 <rl_lstm_data+0x181>
  1306. 1aab: vroundps ymm9,ymm12,0x1
  1307. 1ab1: vmulps ymm12,ymm9,YMMWORD PTR [rip+0x827] # 22e0 <rl_lstm_data+0x1a1>
  1308. 1ab9: vaddps ymm13,ymm12,ymm10
  1309. 1abe: vmulps ymm10,ymm13,ymm13
  1310. 1ac3: vmulps ymm12,ymm13,YMMWORD PTR [rip+0x835] # 2300 <rl_lstm_data+0x1c1>
  1311. 1acb: vaddps ymm14,ymm12,YMMWORD PTR [rip+0x84d] # 2320 <rl_lstm_data+0x1e1>
  1312. 1ad3: vmulps ymm12,ymm14,ymm13
  1313. 1ad8: vaddps ymm14,ymm12,YMMWORD PTR [rip+0x860] # 2340 <rl_lstm_data+0x201>
  1314. 1ae0: vmulps ymm12,ymm14,ymm13
  1315. 1ae5: vaddps ymm14,ymm12,YMMWORD PTR [rip+0x873] # 2360 <rl_lstm_data+0x221>
  1316. 1aed: vmulps ymm12,ymm14,ymm13
  1317. 1af2: vaddps ymm14,ymm12,YMMWORD PTR [rip+0x886] # 2380 <rl_lstm_data+0x241>
  1318. 1afa: vmulps ymm12,ymm14,ymm13
  1319. 1aff: vaddps ymm14,ymm12,YMMWORD PTR [rip+0x7b9] # 22c0 <rl_lstm_data+0x181>
  1320. 1b07: vmulps ymm12,ymm14,ymm10
  1321. 1b0c: vaddps ymm10,ymm12,ymm13
  1322. 1b11: vaddps ymm12,ymm10,ymm0
  1323. 1b15: vaddps ymm10,ymm9,YMMWORD PTR [rip+0x883] # 23a0 <rl_lstm_data+0x261>
  1324. 1b1d: vcvttps2dq ymm9,ymm10
  1325. 1b22: vextractf128 xmm15,ymm9,0x1
  1326. 1b28: vpslld xmm15,xmm15,0x17
  1327. 1b2e: vpslld xmm10,xmm9,0x17
  1328. 1b34: vinsertf128 ymm10,ymm10,xmm15,0x1
  1329. 1b3a: vmulps ymm9,ymm12,ymm10
  1330. 1b3f: vmaxps ymm10,ymm9,ymm11
  1331. 1b44: vaddps ymm9,ymm0,ymm10
  1332. 1b49: vdivps ymm10,ymm0,ymm9
  1333. 1b4e: vmovaps ymm9,YMMWORD PTR [r8+rax*1]
  1334. 1b54: vaddps ymm11,ymm9,YMMWORD PTR [rcx+rax*1]
  1335. 1b59: vminps ymm9,ymm11,ymm2
  1336. 1b5d: vmaxps ymm11,ymm9,ymm1
  1337. 1b61: vmulps ymm9,ymm11,ymm11
  1338. 1b66: vmulps ymm12,ymm9,ymm3
  1339. 1b6a: vaddps ymm13,ymm12,ymm4
  1340. 1b6e: vmulps ymm12,ymm9,ymm13
  1341. 1b73: vaddps ymm13,ymm12,ymm5
  1342. 1b77: vmulps ymm12,ymm9,ymm13
  1343. 1b7c: vaddps ymm13,ymm12,ymm6
  1344. 1b80: vmulps ymm12,ymm9,ymm13
  1345. 1b85: vaddps ymm13,ymm12,ymm7
  1346. 1b89: vmulps ymm12,ymm9,ymm13
  1347. 1b8e: vaddps ymm13,ymm12,ymm8
  1348. 1b93: vmulps ymm12,ymm9,ymm13
  1349. 1b98: vaddps ymm13,ymm12,YMMWORD PTR [rip+0x820] # 23c0 <rl_lstm_data+0x281>
  1350. 1ba0: vmulps ymm12,ymm11,ymm13
  1351. 1ba5: vmulps ymm11,ymm9,YMMWORD PTR [rip+0x833] # 23e0 <rl_lstm_data+0x2a1>
  1352. 1bad: vaddps ymm13,ymm11,YMMWORD PTR [rip+0x84b] # 2400 <rl_lstm_data+0x2c1>
  1353. 1bb5: vmulps ymm11,ymm9,ymm13
  1354. 1bba: vaddps ymm13,ymm11,YMMWORD PTR [rip+0x85e] # 2420 <rl_lstm_data+0x2e1>
  1355. 1bc2: vmulps ymm11,ymm9,ymm13
  1356. 1bc7: vaddps ymm9,ymm11,YMMWORD PTR [rip+0x871] # 2440 <rl_lstm_data+0x301>
  1357. 1bcf: vdivps ymm11,ymm12,ymm9
  1358. 1bd4: vmulps ymm9,ymm10,ymm11
  1359. 1bd9: vsubps ymm11,ymm0,ymm10
  1360. 1bde: vmulps ymm10,ymm11,YMMWORD PTR [rsi+rax*1]
  1361. 1be3: vaddps ymm11,ymm9,ymm10
  1362. 1be8: vmovaps YMMWORD PTR [rdi+rax*1],ymm11
  1363. 1bed: vminps ymm9,ymm11,ymm2
  1364. 1bf1: vmaxps ymm10,ymm9,ymm1
  1365. 1bf5: vmulps ymm9,ymm10,ymm10
  1366. 1bfa: vmulps ymm11,ymm9,ymm3
  1367. 1bfe: vaddps ymm12,ymm11,ymm4
  1368. 1c02: vmulps ymm11,ymm9,ymm12
  1369. 1c07: vaddps ymm12,ymm11,ymm5
  1370. 1c0b: vmulps ymm11,ymm9,ymm12
  1371. 1c10: vaddps ymm12,ymm11,ymm6
  1372. 1c14: vmulps ymm11,ymm9,ymm12
  1373. 1c19: vaddps ymm12,ymm11,ymm7
  1374. 1c1d: vmulps ymm11,ymm9,ymm12
  1375. 1c22: vaddps ymm12,ymm11,ymm8
  1376. 1c27: vmulps ymm11,ymm9,ymm12
  1377. 1c2c: vaddps ymm12,ymm11,YMMWORD PTR [rip+0x78c] # 23c0 <rl_lstm_data+0x281>
  1378. 1c34: vmulps ymm11,ymm10,ymm12
  1379. 1c39: vmulps ymm10,ymm9,YMMWORD PTR [rip+0x79f] # 23e0 <rl_lstm_data+0x2a1>
  1380. 1c41: vaddps ymm12,ymm10,YMMWORD PTR [rip+0x7b7] # 2400 <rl_lstm_data+0x2c1>
  1381. 1c49: vmulps ymm10,ymm9,ymm12
  1382. 1c4e: vaddps ymm12,ymm10,YMMWORD PTR [rip+0x7ca] # 2420 <rl_lstm_data+0x2e1>
  1383. 1c56: vmulps ymm10,ymm9,ymm12
  1384. 1c5b: vaddps ymm9,ymm10,YMMWORD PTR [rip+0x7dd] # 2440 <rl_lstm_data+0x301>
  1385. 1c63: vdivps ymm10,ymm11,ymm9
  1386. 1c68: vmovaps YMMWORD PTR [r10+rax*1],ymm10
  1387. 1c6e: add rax,0x20
  1388. 1c72: cmp rax,0x400
  1389. 1c78: jl 1a76 <rl_lstm/add_4+0x7c>
  1390.  
  1391. 0000000000001c7e <rl_lstm/MatMul_6>:
  1392. 1c7e: mov rdi,QWORD PTR [rbp+0x20]
  1393. 1c82: movabs rsi,0x0
  1394. 1c84: R_X86_64_64 rl_lstm/c2o
  1395. 1c8c: lea r8,[rbp+0x1620]
  1396. 1c93: xor rcx,rcx
  1397. 1c96: vxorps ymm0,ymm0,ymm0
  1398. 1c9a: vxorps ymm1,ymm1,ymm1
  1399. 1c9e: vxorps ymm2,ymm2,ymm2
  1400. 1ca2: vxorps ymm3,ymm3,ymm3
  1401. 1ca6: vxorps ymm4,ymm4,ymm4
  1402. 1caa: vxorps ymm5,ymm5,ymm5
  1403. 1cae: vxorps ymm6,ymm6,ymm6
  1404. 1cb2: vxorps ymm7,ymm7,ymm7
  1405. 1cb6: mov rdx,rsi
  1406. 1cb9: xor rax,rax
  1407. 1cbc: vbroadcastss ymm12,DWORD PTR [rdi+rax*1]
  1408. 1cc2: vmulps ymm8,ymm12,YMMWORD PTR [rdx]
  1409. 1cc6: vaddps ymm0,ymm0,ymm8
  1410. 1ccb: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0x20]
  1411. 1cd0: vaddps ymm1,ymm1,ymm9
  1412. 1cd5: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0x40]
  1413. 1cda: vaddps ymm2,ymm2,ymm10
  1414. 1cdf: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0x60]
  1415. 1ce4: vaddps ymm3,ymm3,ymm11
  1416. 1ce9: vmulps ymm8,ymm12,YMMWORD PTR [rdx+0x80]
  1417. 1cf1: vaddps ymm4,ymm4,ymm8
  1418. 1cf6: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0xa0]
  1419. 1cfe: vaddps ymm5,ymm5,ymm9
  1420. 1d03: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0xc0]
  1421. 1d0b: vaddps ymm6,ymm6,ymm10
  1422. 1d10: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0xe0]
  1423. 1d18: vaddps ymm7,ymm7,ymm11
  1424. 1d1d: add rdx,0x400
  1425. 1d24: add rax,0x4
  1426. 1d28: cmp rax,0x400
  1427. 1d2e: jl 1cbc <rl_lstm/MatMul_6+0x3e>
  1428. 1d30: vmovaps YMMWORD PTR [r8+rcx*1],ymm0
  1429. 1d36: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1
  1430. 1d3d: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2
  1431. 1d44: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3
  1432. 1d4b: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4
  1433. 1d55: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5
  1434. 1d5f: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6
  1435. 1d69: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7
  1436. 1d73: add rsi,0x100
  1437. 1d7a: add rcx,0x100
  1438. 1d81: cmp rcx,0x400
  1439. 1d88: jl 1c96 <rl_lstm/MatMul_6+0x18>
  1440.  
  1441. 0000000000001d8e <rl_lstm/MatMul_5>:
  1442. 1d8e: lea rdi,[rbp+0xc0]
  1443. 1d95: movabs rsi,0x0
  1444. 1d97: R_X86_64_64 rl_lstm/x2o
  1445. 1d9f: lea r9,[rbp+0x1620]
  1446. 1da6: lea r8,[rbp+0x1a20]
  1447. 1dad: xor rcx,rcx
  1448. 1db0: vmovaps ymm0,YMMWORD PTR [r9+rcx*1]
  1449. 1db6: vmovaps ymm1,YMMWORD PTR [r9+rcx*1+0x20]
  1450. 1dbd: vmovaps ymm2,YMMWORD PTR [r9+rcx*1+0x40]
  1451. 1dc4: vmovaps ymm3,YMMWORD PTR [r9+rcx*1+0x60]
  1452. 1dcb: vmovaps ymm4,YMMWORD PTR [r9+rcx*1+0x80]
  1453. 1dd5: vmovaps ymm5,YMMWORD PTR [r9+rcx*1+0xa0]
  1454. 1ddf: vmovaps ymm6,YMMWORD PTR [r9+rcx*1+0xc0]
  1455. 1de9: vmovaps ymm7,YMMWORD PTR [r9+rcx*1+0xe0]
  1456. 1df3: mov rdx,rsi
  1457. 1df6: xor rax,rax
  1458. 1df9: vbroadcastss ymm12,DWORD PTR [rdi+rax*1]
  1459. 1dff: vmulps ymm8,ymm12,YMMWORD PTR [rdx]
  1460. 1e03: vaddps ymm0,ymm0,ymm8
  1461. 1e08: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0x20]
  1462. 1e0d: vaddps ymm1,ymm1,ymm9
  1463. 1e12: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0x40]
  1464. 1e17: vaddps ymm2,ymm2,ymm10
  1465. 1e1c: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0x60]
  1466. 1e21: vaddps ymm3,ymm3,ymm11
  1467. 1e26: vmulps ymm8,ymm12,YMMWORD PTR [rdx+0x80]
  1468. 1e2e: vaddps ymm4,ymm4,ymm8
  1469. 1e33: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0xa0]
  1470. 1e3b: vaddps ymm5,ymm5,ymm9
  1471. 1e40: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0xc0]
  1472. 1e48: vaddps ymm6,ymm6,ymm10
  1473. 1e4d: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0xe0]
  1474. 1e55: vaddps ymm7,ymm7,ymm11
  1475. 1e5a: add rdx,0x400
  1476. 1e61: add rax,0x4
  1477. 1e65: cmp rax,0x160
  1478. 1e6b: jl 1df9 <rl_lstm/MatMul_5+0x6b>
  1479. 1e6d: vmovaps YMMWORD PTR [r8+rcx*1],ymm0
  1480. 1e73: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1
  1481. 1e7a: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2
  1482. 1e81: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3
  1483. 1e88: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4
  1484. 1e92: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5
  1485. 1e9c: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6
  1486. 1ea6: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7
  1487. 1eb0: add rsi,0x100
  1488. 1eb7: add rcx,0x100
  1489. 1ebe: cmp rcx,0x400
  1490. 1ec5: jl 1db0 <rl_lstm/MatMul_5+0x22>
  1491.  
  1492. 0000000000001ecb <rl_lstm/MatMul_7>:
  1493. 1ecb: mov rdi,QWORD PTR [rbp+0x8]
  1494. 1ecf: movabs rsi,0x0
  1495. 1ed1: R_X86_64_64 rl_lstm/h2o
  1496. 1ed9: lea r9,[rbp+0x1a20]
  1497. 1ee0: lea r8,[rbp+0x1e20]
  1498. 1ee7: xor rcx,rcx
  1499. 1eea: vmovaps ymm0,YMMWORD PTR [r9+rcx*1]
  1500. 1ef0: vmovaps ymm1,YMMWORD PTR [r9+rcx*1+0x20]
  1501. 1ef7: vmovaps ymm2,YMMWORD PTR [r9+rcx*1+0x40]
  1502. 1efe: vmovaps ymm3,YMMWORD PTR [r9+rcx*1+0x60]
  1503. 1f05: vmovaps ymm4,YMMWORD PTR [r9+rcx*1+0x80]
  1504. 1f0f: vmovaps ymm5,YMMWORD PTR [r9+rcx*1+0xa0]
  1505. 1f19: vmovaps ymm6,YMMWORD PTR [r9+rcx*1+0xc0]
  1506. 1f23: vmovaps ymm7,YMMWORD PTR [r9+rcx*1+0xe0]
  1507. 1f2d: mov rdx,rsi
  1508. 1f30: xor rax,rax
  1509. 1f33: vbroadcastss ymm12,DWORD PTR [rdi+rax*1]
  1510. 1f39: vmulps ymm8,ymm12,YMMWORD PTR [rdx]
  1511. 1f3d: vaddps ymm0,ymm0,ymm8
  1512. 1f42: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0x20]
  1513. 1f47: vaddps ymm1,ymm1,ymm9
  1514. 1f4c: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0x40]
  1515. 1f51: vaddps ymm2,ymm2,ymm10
  1516. 1f56: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0x60]
  1517. 1f5b: vaddps ymm3,ymm3,ymm11
  1518. 1f60: vmulps ymm8,ymm12,YMMWORD PTR [rdx+0x80]
  1519. 1f68: vaddps ymm4,ymm4,ymm8
  1520. 1f6d: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0xa0]
  1521. 1f75: vaddps ymm5,ymm5,ymm9
  1522. 1f7a: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0xc0]
  1523. 1f82: vaddps ymm6,ymm6,ymm10
  1524. 1f87: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0xe0]
  1525. 1f8f: vaddps ymm7,ymm7,ymm11
  1526. 1f94: add rdx,0x400
  1527. 1f9b: add rax,0x4
  1528. 1f9f: cmp rax,0x400
  1529. 1fa5: jl 1f33 <rl_lstm/MatMul_7+0x68>
  1530. 1fa7: vmovaps YMMWORD PTR [r8+rcx*1],ymm0
  1531. 1fad: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1
  1532. 1fb4: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2
  1533. 1fbb: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3
  1534. 1fc2: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4
  1535. 1fcc: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5
  1536. 1fd6: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6
  1537. 1fe0: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7
  1538. 1fea: add rsi,0x100
  1539. 1ff1: add rcx,0x100
  1540. 1ff8: cmp rcx,0x400
  1541. 1fff: jl 1eea <rl_lstm/MatMul_7+0x1f>
  1542.  
  1543. 0000000000002005 <rl_lstm/add_7>:
  1544. 2005: vmovaps ymm0,YMMWORD PTR [rip+0x133] # 2140 <rl_lstm_data+0x1>
  1545. 200d: vxorps ymm1,ymm1,ymm1
  1546. 2011: vmovaps ymm2,YMMWORD PTR [rip+0x267] # 2280 <rl_lstm_data+0x141>
  1547. 2019: vmovaps ymm3,YMMWORD PTR [rip+0x23f] # 2260 <rl_lstm_data+0x121>
  1548. 2021: vmovaps ymm4,YMMWORD PTR [rip+0x297] # 22c0 <rl_lstm_data+0x181>
  1549. 2029: vmovaps ymm5,YMMWORD PTR [rip+0x26f] # 22a0 <rl_lstm_data+0x161>
  1550. 2031: vmovaps ymm6,YMMWORD PTR [rip+0x2a7] # 22e0 <rl_lstm_data+0x1a1>
  1551. 2039: vmovaps ymm7,YMMWORD PTR [rip+0x2bf] # 2300 <rl_lstm_data+0x1c1>
  1552. 2041: vmovaps ymm8,YMMWORD PTR [rip+0x2d7] # 2320 <rl_lstm_data+0x1e1>
  1553. 2049: lea rsi,[rbp+0x1e20]
  1554. 2050: movabs rcx,0x0
  1555. 2052: R_X86_64_64 rl_lstm/bo
  1556. 205a: lea rdi,[rbp+0xe20]
  1557. 2061: mov rdx,QWORD PTR [rbp+0x28]
  1558. 2065: xor rax,rax
  1559. 2068: vmovaps ymm9,YMMWORD PTR [rsi+rax*1]
  1560. 206d: vaddps ymm10,ymm9,YMMWORD PTR [rcx+rax*1]
  1561. 2072: vsubps ymm9,ymm1,ymm10
  1562. 2077: vminps ymm10,ymm9,ymm3
  1563. 207b: vmaxps ymm11,ymm10,ymm2
  1564. 207f: vmulps ymm10,ymm11,ymm5
  1565. 2083: vaddps ymm12,ymm10,ymm4
  1566. 2087: vroundps ymm10,ymm12,0x1
  1567. 208d: vmulps ymm12,ymm10,ymm6
  1568. 2091: vaddps ymm13,ymm12,ymm11
  1569. 2096: vmulps ymm11,ymm13,ymm13
  1570. 209b: vmulps ymm12,ymm7,ymm13
  1571. 20a0: vaddps ymm14,ymm12,ymm8
  1572. 20a5: vmulps ymm12,ymm14,ymm13
  1573. 20aa: vaddps ymm14,ymm12,YMMWORD PTR [rip+0x28e] # 2340 <rl_lstm_data+0x201>
  1574. 20b2: vmulps ymm12,ymm14,ymm13
  1575. 20b7: vaddps ymm14,ymm12,YMMWORD PTR [rip+0x2a1] # 2360 <rl_lstm_data+0x221>
  1576. 20bf: vmulps ymm12,ymm14,ymm13
  1577. 20c4: vaddps ymm14,ymm12,YMMWORD PTR [rip+0x2b4] # 2380 <rl_lstm_data+0x241>
  1578. 20cc: vmulps ymm12,ymm14,ymm13
  1579. 20d1: vaddps ymm14,ymm12,YMMWORD PTR [rip+0x1e7] # 22c0 <rl_lstm_data+0x181>
  1580. 20d9: vmulps ymm12,ymm14,ymm11
  1581. 20de: vaddps ymm11,ymm12,ymm13
  1582. 20e3: vaddps ymm12,ymm11,ymm0
  1583. 20e7: vaddps ymm11,ymm10,YMMWORD PTR [rip+0x2b1] # 23a0 <rl_lstm_data+0x261>
  1584. 20ef: vcvttps2dq ymm10,ymm11
  1585. 20f4: vextractf128 xmm15,ymm10,0x1
  1586. 20fa: vpslld xmm15,xmm15,0x17
  1587. 2100: vpslld xmm11,xmm10,0x17
  1588. 2106: vinsertf128 ymm11,ymm11,xmm15,0x1
  1589. 210c: vmulps ymm10,ymm12,ymm11
  1590. 2111: vmaxps ymm11,ymm10,ymm9
  1591. 2116: vaddps ymm9,ymm0,ymm11
  1592. 211b: vdivps ymm10,ymm0,ymm9
  1593. 2120: vmulps ymm9,ymm10,YMMWORD PTR [rdi+rax*1]
  1594. 2125: vmovaps YMMWORD PTR [rdx+rax*1],ymm9
  1595. 212a: add rax,0x20
  1596. 212e: cmp rax,0x400
  1597. 2134: jl 2068 <rl_lstm/add_7+0x63>
  1598. 213a: pop rbp
  1599. 213b: vzeroupper
  1600. 213e: ret
  1601.  
  1602. 000000000000213f <rl_lstm_data>:
  1603. 213f: ....?...?...?...
  1604. 214f: ?...?...?...?...
  1605. 215f: ?...............
  1606. 216f: ................
  1607. 217f: ....A...A...A...
  1608. 218f: A...A...A...A...
  1609. 219f: A.%...%...%...%.
  1610. 21af: ..%...%...%...%.
  1611. 21bf: .~3a*~3a*~3a*~3a
  1612. 21cf: *~3a*~3a*~3a*~3a
  1613. 21df: *.7...7...7...7.
  1614. 21ef: ..7...7...7...7.
  1615. 21ff: .A.\3A.\3A.\3A.\
  1616. 220f: 3A.\3A.\3A.\3A.\
  1617. 221f: 3JCy7JCy7JCy7JCy
  1618. 222f: 7JCy7JCy7JCy7JCy
  1619. 223f: 7..':..':..':..'
  1620. 224f: :..':..':..':..'
  1621. 225f: :...B...B...B...
  1622. 226f: B...B...B...B...
  1623. 227f: B...............
  1624. 228f: ................
  1625. 229f: .;..?;..?;..?;..
  1626. 22af: ?;..?;..?;..?;..
  1627. 22bf: ?...?...?...?...
  1628. 22cf: ?...?...?...?...
  1629. 22df: ?.r1..r1..r1..r1
  1630. 22ef: ..r1..r1..r1..r1
  1631. 22ff: .giP9giP9giP9giP
  1632. 230f: 9giP9giP9giP9giP
  1633. 231f: 9.C.:.C.:.C.:.C.
  1634. 232f: :.C.:.C.:.C.:.C.
  1635. 233f: :...<...<...<...
  1636. 234f: <...<...<...<...
  1637. 235f: <..*=..*=..*=..*
  1638. 236f: =..*=..*=..*=..*
  1639. 237f: =..*>..*>..*>..*
  1640. 238f: >..*>..*>..*>..*
  1641. 239f: >...B...B...B...
  1642. 23af: B...B...B...B...
  1643. 23bf: B.Y.;.Y.;.Y.;.Y.
  1644. 23cf: ;.Y.;.Y.;.Y.;.Y.
  1645. 23df: ;...5...5...5...
  1646. 23ef: 5...5...5...5...
  1647. 23ff: 5...8...8...8...
  1648. 240f: 8...8...8...8...
  1649. 241f: 8...;...;...;...
  1650. 242f: ;...;...;...;...
  1651. 243f: ;.Y.;.Y.;.Y.;.Y.
  1652. 244f: ;.Y.;.Y.;.Y.;.Y.
  1653. 245f: ;
  1654.  
  1655. 0000000000002460 <ff>:
  1656. 2460: vzeroupper
  1657. 2463: push rbp
  1658. 2464: mov rbp,rdi
  1659.  
  1660. 0000000000002467 <ff/rl/Collect>:
  1661. 2467: lea rdx,[rbp+0x12c]
  1662. 246e: mov r8,QWORD PTR [rbp+0x200]
  1663. 2475: lea r9,[rbp+0x2a0]
  1664. 247c: movsxd rax,DWORD PTR [rdx]
  1665. 247f: test rax,rax
  1666. 2482: js 24a3 <ff/rl/Collect+0x3c>
  1667. 2488: shl rax,0xa
  1668. 248c: add rax,r8
  1669. 248f: mov rdi,r9
  1670. 2492: mov rsi,rax
  1671. 2495: mov rcx,0x400
  1672. 249c: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
  1673. 249e: jmp 24ba <ff/frame-end-lr/Collect>
  1674. 24a3: cmp rax,0xffffffffffffffff
  1675. 24a7: jne 24ba <ff/frame-end-lr/Collect>
  1676. 24ad: mov r11d,0x3f800000
  1677. 24b3: mov DWORD PTR [r9+0x400],r11d
  1678.  
  1679. 00000000000024ba <ff/frame-end-lr/Collect>:
  1680. 24ba: lea rdx,[rbp+0x164]
  1681. 24c1: mov r8,QWORD PTR [rbp+0x138]
  1682. 24c8: lea r9,[rbp+0x6c0]
  1683. 24cf: xor r10,r10
  1684. 24d2: movsxd rax,DWORD PTR [rdx+r10*4]
  1685. 24d6: test rax,rax
  1686. 24d9: js 24fa <ff/frame-end-lr/Collect+0x40>
  1687. 24df: shl rax,0xa
  1688. 24e3: add rax,r8
  1689. 24e6: mov rdi,r9
  1690. 24e9: mov rsi,rax
  1691. 24ec: mov rcx,0x400
  1692. 24f3: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
  1693. 24f5: jmp 2511 <ff/frame-end-lr/Collect+0x57>
  1694. 24fa: cmp rax,0xffffffffffffffff
  1695. 24fe: jne 2511 <ff/frame-end-lr/Collect+0x57>
  1696. 2504: mov r11d,0x3f800000
  1697. 250a: mov DWORD PTR [r9+0x400],r11d
  1698. 2511: add r9,0x420
  1699. 2518: inc r10
  1700. 251b: cmp r10,0x5
  1701. 251f: jne 24d2 <ff/frame-end-lr/Collect+0x18>
  1702.  
  1703. 0000000000002521 <ff/frame-end-rl/Collect>:
  1704. 2521: lea rdx,[rbp+0x140]
  1705. 2528: mov r8,QWORD PTR [rbp+0x200]
  1706. 252f: lea r9,[rbp+0x1b60]
  1707. 2536: xor r10,r10
  1708. 2539: movsxd rax,DWORD PTR [rdx+r10*4]
  1709. 253d: test rax,rax
  1710. 2540: js 2561 <ff/frame-end-rl/Collect+0x40>
  1711. 2546: shl rax,0xa
  1712. 254a: add rax,r8
  1713. 254d: mov rdi,r9
  1714. 2550: mov rsi,rax
  1715. 2553: mov rcx,0x400
  1716. 255a: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
  1717. 255c: jmp 2578 <ff/frame-end-rl/Collect+0x57>
  1718. 2561: cmp rax,0xffffffffffffffff
  1719. 2565: jne 2578 <ff/frame-end-rl/Collect+0x57>
  1720. 256b: mov r11d,0x3f800000
  1721. 2571: mov DWORD PTR [r9+0x400],r11d
  1722. 2578: add r9,0x420
  1723. 257f: inc r10
  1724. 2582: cmp r10,0x5
  1725. 2586: jne 2539 <ff/frame-end-rl/Collect+0x18>
  1726.  
  1727. 0000000000002588 <ff/in-roles/Lookup>:
  1728. 2588: lea rcx,[rbp+0x208]
  1729. 258f: movabs rdx,0x0
  1730. 2591: R_X86_64_64 ff/fixed_embedding_matrix_0
  1731. 2599: lea rsi,[rbp+0x3000]
  1732. 25a0: vxorps ymm0,ymm0,ymm0
  1733. 25a4: vxorps ymm1,ymm1,ymm1
  1734. 25a8: mov r8,0x7c
  1735. 25af: xor rdi,rdi
  1736. 25b2: movsxd rax,DWORD PTR [rcx+rdi*4]
  1737. 25b6: test rax,rax
  1738. 25b9: jns 25cc <ff/in-roles/Lookup+0x44>
  1739. 25bf: cmp rax,0xffffffffffffffff
  1740. 25c3: jne 25dc <ff/in-roles/Lookup+0x54>
  1741. 25c9: mov rax,r8
  1742. 25cc: shl rax,0x6
  1743. 25d0: add rax,rdx
  1744. 25d3: vaddps ymm0,ymm0,YMMWORD PTR [rax]
  1745. 25d7: vaddps ymm1,ymm1,YMMWORD PTR [rax+0x20]
  1746. 25dc: inc rdi
  1747. 25df: cmp rdi,0x20
  1748. 25e3: jne 25b2 <ff/in-roles/Lookup+0x2a>
  1749. 25e5: vmovaps YMMWORD PTR [rsi],ymm0
  1750. 25e9: vmovaps YMMWORD PTR [rsi+0x20],ymm1
  1751.  
  1752. 00000000000025ee <ff/unlabeled-roles/Lookup>:
  1753. 25ee: lea rcx,[rbp+0x180]
  1754. 25f5: movabs rdx,0x0
  1755. 25f7: R_X86_64_64 ff/fixed_embedding_matrix_3
  1756. 25ff: lea rsi,[rbp+0x3040]
  1757. 2606: vxorps ymm0,ymm0,ymm0
  1758. 260a: vxorps ymm1,ymm1,ymm1
  1759. 260e: mov r8,0x18
  1760. 2615: xor rdi,rdi
  1761. 2618: movsxd rax,DWORD PTR [rcx+rdi*4]
  1762. 261c: test rax,rax
  1763. 261f: jns 2632 <ff/unlabeled-roles/Lookup+0x44>
  1764. 2625: cmp rax,0xffffffffffffffff
  1765. 2629: jne 2642 <ff/unlabeled-roles/Lookup+0x54>
  1766. 262f: mov rax,r8
  1767. 2632: shl rax,0x6
  1768. 2636: add rax,rdx
  1769. 2639: vaddps ymm0,ymm0,YMMWORD PTR [rax]
  1770. 263d: vaddps ymm1,ymm1,YMMWORD PTR [rax+0x20]
  1771. 2642: inc rdi
  1772. 2645: cmp rdi,0x20
  1773. 2649: jne 2618 <ff/unlabeled-roles/Lookup+0x2a>
  1774. 264b: vmovaps YMMWORD PTR [rsi],ymm0
  1775. 264f: vmovaps YMMWORD PTR [rsi+0x20],ymm1
  1776.  
  1777. 0000000000002654 <ff/labeled-roles/Lookup>:
  1778. 2654: mov rcx,rbp
  1779. 2657: movabs rdx,0x0
  1780. 2659: R_X86_64_64 ff/fixed_embedding_matrix_2
  1781. 2661: lea rsi,[rbp+0x3080]
  1782. 2668: vxorps ymm0,ymm0,ymm0
  1783. 266c: vxorps ymm1,ymm1,ymm1
  1784. 2670: mov r8,0x270
  1785. 2677: xor rdi,rdi
  1786. 267a: movsxd rax,DWORD PTR [rcx+rdi*4]
  1787. 267e: test rax,rax
  1788. 2681: jns 2694 <ff/labeled-roles/Lookup+0x40>
  1789. 2687: cmp rax,0xffffffffffffffff
  1790. 268b: jne 26a4 <ff/labeled-roles/Lookup+0x50>
  1791. 2691: mov rax,r8
  1792. 2694: shl rax,0x6
  1793. 2698: add rax,rdx
  1794. 269b: vaddps ymm0,ymm0,YMMWORD PTR [rax]
  1795. 269f: vaddps ymm1,ymm1,YMMWORD PTR [rax+0x20]
  1796. 26a4: inc rdi
  1797. 26a7: cmp rdi,0x20
  1798. 26ab: jne 267a <ff/labeled-roles/Lookup+0x26>
  1799. 26ad: vmovaps YMMWORD PTR [rsi],ymm0
  1800. 26b1: vmovaps YMMWORD PTR [rsi+0x20],ymm1
  1801.  
  1802. 00000000000026b6 <ff/out-roles/Lookup>:
  1803. 26b6: lea rcx,[rbp+0xac]
  1804. 26bd: movabs rdx,0x0
  1805. 26bf: R_X86_64_64 ff/fixed_embedding_matrix_1
  1806. 26c7: lea rsi,[rbp+0x30c0]
  1807. 26ce: vxorps ymm0,ymm0,ymm0
  1808. 26d2: vxorps ymm1,ymm1,ymm1
  1809. 26d6: mov r8,0x7c
  1810. 26dd: xor rdi,rdi
  1811. 26e0: movsxd rax,DWORD PTR [rcx+rdi*4]
  1812. 26e4: test rax,rax
  1813. 26e7: jns 26fa <ff/out-roles/Lookup+0x44>
  1814. 26ed: cmp rax,0xffffffffffffffff
  1815. 26f1: jne 270a <ff/out-roles/Lookup+0x54>
  1816. 26f7: mov rax,r8
  1817. 26fa: shl rax,0x6
  1818. 26fe: add rax,rdx
  1819. 2701: vaddps ymm0,ymm0,YMMWORD PTR [rax]
  1820. 2705: vaddps ymm1,ymm1,YMMWORD PTR [rax+0x20]
  1821. 270a: inc rdi
  1822. 270d: cmp rdi,0x20
  1823. 2711: jne 26e0 <ff/out-roles/Lookup+0x2a>
  1824. 2713: vmovaps YMMWORD PTR [rsi],ymm0
  1825. 2717: vmovaps YMMWORD PTR [rsi+0x20],ymm1
  1826.  
  1827. 000000000000271c <ff/frame-focus-steps/Collect>:
  1828. 271c: lea rdx,[rbp+0x98]
  1829. 2723: mov r8,QWORD PTR [rbp+0x178]
  1830. 272a: lea r9,[rbp+0x3100]
  1831. 2731: xor r10,r10
  1832. 2734: movsxd rax,DWORD PTR [rdx+r10*4]
  1833. 2738: test rax,rax
  1834. 273b: js 275c <ff/frame-focus-steps/Collect+0x40>
  1835. 2741: shl rax,0x9
  1836. 2745: add rax,r8
  1837. 2748: mov rdi,r9
  1838. 274b: mov rsi,rax
  1839. 274e: mov rcx,0x200
  1840. 2755: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
  1841. 2757: jmp 2773 <ff/frame-focus-steps/Collect+0x57>
  1842. 275c: cmp rax,0xffffffffffffffff
  1843. 2760: jne 2773 <ff/frame-focus-steps/Collect+0x57>
  1844. 2766: mov r11d,0x3f800000
  1845. 276c: mov DWORD PTR [r9+0x200],r11d
  1846. 2773: add r9,0x220
  1847. 277a: inc r10
  1848. 277d: cmp r10,0x5
  1849. 2781: jne 2734 <ff/frame-focus-steps/Collect+0x18>
  1850.  
  1851. 0000000000002783 <ff/frame-creation-steps/Collect>:
  1852. 2783: lea rdx,[rbp+0x84]
  1853. 278a: mov r8,QWORD PTR [rbp+0x178]
  1854. 2791: lea r9,[rbp+0x3ba0]
  1855. 2798: xor r10,r10
  1856. 279b: movsxd rax,DWORD PTR [rdx+r10*4]
  1857. 279f: test rax,rax
  1858. 27a2: js 27c3 <ff/frame-creation-steps/Collect+0x40>
  1859. 27a8: shl rax,0x9
  1860. 27ac: add rax,r8
  1861. 27af: mov rdi,r9
  1862. 27b2: mov rsi,rax
  1863. 27b5: mov rcx,0x200
  1864. 27bc: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
  1865. 27be: jmp 27da <ff/frame-creation-steps/Collect+0x57>
  1866. 27c3: cmp rax,0xffffffffffffffff
  1867. 27c7: jne 27da <ff/frame-creation-steps/Collect+0x57>
  1868. 27cd: mov r11d,0x3f800000
  1869. 27d3: mov DWORD PTR [r9+0x200],r11d
  1870. 27da: add r9,0x220
  1871. 27e1: inc r10
  1872. 27e4: cmp r10,0x5
  1873. 27e8: jne 279b <ff/frame-creation-steps/Collect+0x18>
  1874.  
  1875. 00000000000027ea <ff/lr/Collect>:
  1876. 27ea: lea rdx,[rbp+0x80]
  1877. 27f1: mov r8,QWORD PTR [rbp+0x138]
  1878. 27f8: lea r9,[rbp+0x4640]
  1879. 27ff: movsxd rax,DWORD PTR [rdx]
  1880. 2802: test rax,rax
  1881. 2805: js 2826 <ff/lr/Collect+0x3c>
  1882. 280b: shl rax,0xa
  1883. 280f: add rax,r8
  1884. 2812: mov rdi,r9
  1885. 2815: mov rsi,rax
  1886. 2818: mov rcx,0x400
  1887. 281f: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
  1888. 2821: jmp 283d <ff/history/Collect>
  1889. 2826: cmp rax,0xffffffffffffffff
  1890. 282a: jne 283d <ff/history/Collect>
  1891. 2830: mov r11d,0x3f800000
  1892. 2836: mov DWORD PTR [r9+0x400],r11d
  1893.  
  1894. 000000000000283d <ff/history/Collect>:
  1895. 283d: lea rdx,[rbp+0x154]
  1896. 2844: mov r8,QWORD PTR [rbp+0x178]
  1897. 284b: lea r9,[rbp+0x4a60]
  1898. 2852: xor r10,r10
  1899. 2855: movsxd rax,DWORD PTR [rdx+r10*4]
  1900. 2859: test rax,rax
  1901. 285c: js 287d <ff/history/Collect+0x40>
  1902. 2862: shl rax,0x9
  1903. 2866: add rax,r8
  1904. 2869: mov rdi,r9
  1905. 286c: mov rsi,rax
  1906. 286f: mov rcx,0x200
  1907. 2876: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
  1908. 2878: jmp 2894 <ff/history/Collect+0x57>
  1909. 287d: cmp rax,0xffffffffffffffff
  1910. 2881: jne 2894 <ff/history/Collect+0x57>
  1911. 2887: mov r11d,0x3f800000
  1912. 288d: mov DWORD PTR [r9+0x200],r11d
  1913. 2894: add r9,0x220
  1914. 289b: inc r10
  1915. 289e: cmp r10,0x4
  1916. 28a2: jne 2855 <ff/history/Collect+0x18>
  1917.  
  1918. 00000000000028a4 <ff/rl/MatMul>:
  1919. 28a4: lea rdi,[rbp+0x2a0]
  1920. 28ab: movabs rsi,0x0
  1921. 28ad: R_X86_64_64 ff/linked_embedding_matrix_6
  1922. 28b5: lea r8,[rbp+0x52e0]
  1923. 28bc: xor rcx,rcx
  1924. 28bf: vxorps ymm0,ymm0,ymm0
  1925. 28c3: vxorps ymm1,ymm1,ymm1
  1926. 28c7: vxorps ymm2,ymm2,ymm2
  1927. 28cb: vxorps ymm3,ymm3,ymm3
  1928. 28cf: mov rdx,rsi
  1929. 28d2: xor rax,rax
  1930. 28d5: vbroadcastss ymm8,DWORD PTR [rdi+rax*1]
  1931. 28db: vmulps ymm4,ymm8,YMMWORD PTR [rdx]
  1932. 28df: vaddps ymm0,ymm0,ymm4
  1933. 28e3: vmulps ymm5,ymm8,YMMWORD PTR [rdx+0x20]
  1934. 28e8: vaddps ymm1,ymm1,ymm5
  1935. 28ec: vmulps ymm6,ymm8,YMMWORD PTR [rdx+0x40]
  1936. 28f1: vaddps ymm2,ymm2,ymm6
  1937. 28f5: vmulps ymm7,ymm8,YMMWORD PTR [rdx+0x60]
  1938. 28fa: vaddps ymm3,ymm3,ymm7
  1939. 28fe: add rdx,0x80
  1940. 2905: add rax,0x4
  1941. 2909: cmp rax,0x404
  1942. 290f: jl 28d5 <ff/rl/MatMul+0x31>
  1943. 2911: vmovaps YMMWORD PTR [r8+rcx*1],ymm0
  1944. 2917: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1
  1945. 291e: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2
  1946. 2925: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3
  1947.  
  1948. 000000000000292c <ff/frame-end-lr/MatMul>:
  1949. 292c: lea rax,[rbp+0x6c0]
  1950. 2933: movabs rcx,0x0
  1951. 2935: R_X86_64_64 ff/linked_embedding_matrix_2
  1952. 293d: lea rdi,[rbp+0x5360]
  1953. 2944: mov rsi,rcx
  1954. 2947: add rsi,0x8400
  1955. 294e: mov r8,rdi
  1956. 2951: add r8,0x280
  1957. 2958: mov rdx,rcx
  1958. 295b: xor r9,r9
  1959. 295e: vxorps ymm3,ymm3,ymm3
  1960. 2962: vxorps ymm4,ymm4,ymm4
  1961. 2966: vxorps ymm5,ymm5,ymm5
  1962. 296a: vmovaps ymm0,YMMWORD PTR [rax+r9*4]
  1963. 2970: vmovaps ymm1,YMMWORD PTR [rax+r9*4+0x20]
  1964. 2977: vmovaps ymm2,YMMWORD PTR [rax+r9*4+0x40]
  1965. 297e: vmulps ymm0,ymm0,YMMWORD PTR [rdx+r9*4]
  1966. 2984: vaddps ymm3,ymm3,ymm0
  1967. 2988: vmulps ymm1,ymm1,YMMWORD PTR [rdx+r9*4+0x20]
  1968. 298f: vaddps ymm4,ymm4,ymm1
  1969. 2993: vmulps ymm2,ymm2,YMMWORD PTR [rdx+r9*4+0x40]
  1970. 299a: vaddps ymm5,ymm5,ymm2
  1971. 299e: add r9,0x18
  1972. 29a2: cmp r9,0x101
  1973. 29a9: jl 296a <ff/frame-end-lr/MatMul+0x3e>
  1974. 29ab: vaddps ymm3,ymm3,ymm4
  1975. 29af: vaddps ymm3,ymm3,ymm5
  1976. 29b3: vperm2f128 ymm6,ymm3,ymm3,0x1
  1977. 29b9: vhaddps ymm3,ymm3,ymm6
  1978. 29bd: vhaddps ymm3,ymm3,ymm3
  1979. 29c1: vhaddps ymm3,ymm3,ymm3
  1980. 29c5: vmovss DWORD PTR [rdi],xmm3
  1981. 29c9: add rdi,0x4
  1982. 29cd: add rdx,0x420
  1983. 29d4: cmp rdx,rsi
  1984. 29d7: jl 295b <ff/frame-end-lr/MatMul+0x2f>
  1985. 29d9: add rax,0x420
  1986. 29df: cmp rdi,r8
  1987. 29e2: jl 2958 <ff/frame-end-lr/MatMul+0x2c>
  1988.  
  1989. 00000000000029e8 <ff/frame-end-rl/MatMul>:
  1990. 29e8: lea rax,[rbp+0x1b60]
  1991. 29ef: movabs rcx,0x0
  1992. 29f1: R_X86_64_64 ff/linked_embedding_matrix_3
  1993. 29f9: lea rdi,[rbp+0x55e0]
  1994. 2a00: mov rsi,rcx
  1995. 2a03: add rsi,0x8400
  1996. 2a0a: mov r8,rdi
  1997. 2a0d: add r8,0x280
  1998. 2a14: mov rdx,rcx
  1999. 2a17: xor r9,r9
  2000. 2a1a: vxorps ymm3,ymm3,ymm3
  2001. 2a1e: vxorps ymm4,ymm4,ymm4
  2002. 2a22: vxorps ymm5,ymm5,ymm5
  2003. 2a26: vmovaps ymm0,YMMWORD PTR [rax+r9*4]
  2004. 2a2c: vmovaps ymm1,YMMWORD PTR [rax+r9*4+0x20]
  2005. 2a33: vmovaps ymm2,YMMWORD PTR [rax+r9*4+0x40]
  2006. 2a3a: vmulps ymm0,ymm0,YMMWORD PTR [rdx+r9*4]
  2007. 2a40: vaddps ymm3,ymm3,ymm0
  2008. 2a44: vmulps ymm1,ymm1,YMMWORD PTR [rdx+r9*4+0x20]
  2009. 2a4b: vaddps ymm4,ymm4,ymm1
  2010. 2a4f: vmulps ymm2,ymm2,YMMWORD PTR [rdx+r9*4+0x40]
  2011. 2a56: vaddps ymm5,ymm5,ymm2
  2012. 2a5a: add r9,0x18
  2013. 2a5e: cmp r9,0x101
  2014. 2a65: jl 2a26 <ff/frame-end-rl/MatMul+0x3e>
  2015. 2a67: vaddps ymm3,ymm3,ymm4
  2016. 2a6b: vaddps ymm3,ymm3,ymm5
  2017. 2a6f: vperm2f128 ymm6,ymm3,ymm3,0x1
  2018. 2a75: vhaddps ymm3,ymm3,ymm6
  2019. 2a79: vhaddps ymm3,ymm3,ymm3
  2020. 2a7d: vhaddps ymm3,ymm3,ymm3
  2021. 2a81: vmovss DWORD PTR [rdi],xmm3
  2022. 2a85: add rdi,0x4
  2023. 2a89: add rdx,0x420
  2024. 2a90: cmp rdx,rsi
  2025. 2a93: jl 2a17 <ff/frame-end-rl/MatMul+0x2f>
  2026. 2a95: add rax,0x420
  2027. 2a9b: cmp rdi,r8
  2028. 2a9e: jl 2a14 <ff/frame-end-rl/MatMul+0x2c>
  2029.  
  2030. 0000000000002aa4 <ff/frame-focus-steps/MatMul>:
  2031. 2aa4: lea rax,[rbp+0x3100]
  2032. 2aab: movabs rcx,0x0
  2033. 2aad: R_X86_64_64 ff/linked_embedding_matrix_1
  2034. 2ab5: lea rdi,[rbp+0x5860]
  2035. 2abc: mov rsi,rcx
  2036. 2abf: add rsi,0x8800
  2037. 2ac6: mov r8,rdi
  2038. 2ac9: add r8,0x500
  2039. 2ad0: mov rdx,rcx
  2040. 2ad3: xor r9,r9
  2041. 2ad6: vxorps ymm1,ymm1,ymm1
  2042. 2ada: vmovaps ymm0,YMMWORD PTR [rax+r9*4]
  2043. 2ae0: vmulps ymm0,ymm0,YMMWORD PTR [rdx+r9*4]
  2044. 2ae6: vaddps ymm1,ymm1,ymm0
  2045. 2aea: add r9,0x8
  2046. 2aee: cmp r9,0x81
  2047. 2af5: jl 2ada <ff/frame-focus-steps/MatMul+0x36>
  2048. 2af7: vperm2f128 ymm2,ymm1,ymm1,0x1
  2049. 2afd: vhaddps ymm1,ymm1,ymm2
  2050. 2b01: vhaddps ymm1,ymm1,ymm1
  2051. 2b05: vhaddps ymm1,ymm1,ymm1
  2052. 2b09: vmovss DWORD PTR [rdi],xmm1
  2053. 2b0d: add rdi,0x4
  2054. 2b11: add rdx,0x220
  2055. 2b18: cmp rdx,rsi
  2056. 2b1b: jl 2ad3 <ff/frame-focus-steps/MatMul+0x2f>
  2057. 2b1d: add rax,0x220
  2058. 2b23: cmp rdi,r8
  2059. 2b26: jl 2ad0 <ff/frame-focus-steps/MatMul+0x2c>
  2060.  
  2061. 0000000000002b28 <ff/frame-creation-steps/MatMul>:
  2062. 2b28: lea rax,[rbp+0x3ba0]
  2063. 2b2f: movabs rcx,0x0
  2064. 2b31: R_X86_64_64 ff/linked_embedding_matrix_0
  2065. 2b39: lea rdi,[rbp+0x5d60]
  2066. 2b40: mov rsi,rcx
  2067. 2b43: add rsi,0x8800
  2068. 2b4a: mov r8,rdi
  2069. 2b4d: add r8,0x500
  2070. 2b54: mov rdx,rcx
  2071. 2b57: xor r9,r9
  2072. 2b5a: vxorps ymm1,ymm1,ymm1
  2073. 2b5e: vmovaps ymm0,YMMWORD PTR [rax+r9*4]
  2074. 2b64: vmulps ymm0,ymm0,YMMWORD PTR [rdx+r9*4]
  2075. 2b6a: vaddps ymm1,ymm1,ymm0
  2076. 2b6e: add r9,0x8
  2077. 2b72: cmp r9,0x81
  2078. 2b79: jl 2b5e <ff/frame-creation-steps/MatMul+0x36>
  2079. 2b7b: vperm2f128 ymm2,ymm1,ymm1,0x1
  2080. 2b81: vhaddps ymm1,ymm1,ymm2
  2081. 2b85: vhaddps ymm1,ymm1,ymm1
  2082. 2b89: vhaddps ymm1,ymm1,ymm1
  2083. 2b8d: vmovss DWORD PTR [rdi],xmm1
  2084. 2b91: add rdi,0x4
  2085. 2b95: add rdx,0x220
  2086. 2b9c: cmp rdx,rsi
  2087. 2b9f: jl 2b57 <ff/frame-creation-steps/MatMul+0x2f>
  2088. 2ba1: add rax,0x220
  2089. 2ba7: cmp rdi,r8
  2090. 2baa: jl 2b54 <ff/frame-creation-steps/MatMul+0x2c>
  2091.  
  2092. 0000000000002bac <ff/lr/MatMul>:
  2093. 2bac: lea rdi,[rbp+0x4640]
  2094. 2bb3: movabs rsi,0x0
  2095. 2bb5: R_X86_64_64 ff/linked_embedding_matrix_5
  2096. 2bbd: lea r8,[rbp+0x6260]
  2097. 2bc4: xor rcx,rcx
  2098. 2bc7: vxorps ymm0,ymm0,ymm0
  2099. 2bcb: vxorps ymm1,ymm1,ymm1
  2100. 2bcf: vxorps ymm2,ymm2,ymm2
  2101. 2bd3: vxorps ymm3,ymm3,ymm3
  2102. 2bd7: mov rdx,rsi
  2103. 2bda: xor rax,rax
  2104. 2bdd: vbroadcastss ymm8,DWORD PTR [rdi+rax*1]
  2105. 2be3: vmulps ymm4,ymm8,YMMWORD PTR [rdx]
  2106. 2be7: vaddps ymm0,ymm0,ymm4
  2107. 2beb: vmulps ymm5,ymm8,YMMWORD PTR [rdx+0x20]
  2108. 2bf0: vaddps ymm1,ymm1,ymm5
  2109. 2bf4: vmulps ymm6,ymm8,YMMWORD PTR [rdx+0x40]
  2110. 2bf9: vaddps ymm2,ymm2,ymm6
  2111. 2bfd: vmulps ymm7,ymm8,YMMWORD PTR [rdx+0x60]
  2112. 2c02: vaddps ymm3,ymm3,ymm7
  2113. 2c06: add rdx,0x80
  2114. 2c0d: add rax,0x4
  2115. 2c11: cmp rax,0x404
  2116. 2c17: jl 2bdd <ff/lr/MatMul+0x31>
  2117. 2c19: vmovaps YMMWORD PTR [r8+rcx*1],ymm0
  2118. 2c1f: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1
  2119. 2c26: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2
  2120. 2c2d: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3
  2121.  
  2122. 0000000000002c34 <ff/history/MatMul>:
  2123. 2c34: lea rax,[rbp+0x4a60]
  2124. 2c3b: movabs rcx,0x0
  2125. 2c3d: R_X86_64_64 ff/linked_embedding_matrix_4
  2126. 2c45: lea rdi,[rbp+0x62e0]
  2127. 2c4c: mov rsi,rcx
  2128. 2c4f: add rsi,0x8800
  2129. 2c56: mov r8,rdi
  2130. 2c59: add r8,0x400
  2131. 2c60: mov rdx,rcx
  2132. 2c63: xor r9,r9
  2133. 2c66: vxorps ymm1,ymm1,ymm1
  2134. 2c6a: vmovaps ymm0,YMMWORD PTR [rax+r9*4]
  2135. 2c70: vmulps ymm0,ymm0,YMMWORD PTR [rdx+r9*4]
  2136. 2c76: vaddps ymm1,ymm1,ymm0
  2137. 2c7a: add r9,0x8
  2138. 2c7e: cmp r9,0x81
  2139. 2c85: jl 2c6a <ff/history/MatMul+0x36>
  2140. 2c87: vperm2f128 ymm2,ymm1,ymm1,0x1
  2141. 2c8d: vhaddps ymm1,ymm1,ymm2
  2142. 2c91: vhaddps ymm1,ymm1,ymm1
  2143. 2c95: vhaddps ymm1,ymm1,ymm1
  2144. 2c99: vmovss DWORD PTR [rdi],xmm1
  2145. 2c9d: add rdi,0x4
  2146. 2ca1: add rdx,0x220
  2147. 2ca8: cmp rdx,rsi
  2148. 2cab: jl 2c63 <ff/history/MatMul+0x2f>
  2149. 2cad: add rax,0x220
  2150. 2cb3: cmp rdi,r8
  2151. 2cb6: jl 2c60 <ff/history/MatMul+0x2c>
  2152.  
  2153. 0000000000002cb8 <ff/concat>:
  2154. 2cb8: lea r8,[rbp+0x66e0]
  2155. 2cbf: lea rsi,[rbp+0x3000]
  2156. 2cc6: lea rdi,[r8]
  2157. 2cc9: mov rcx,0x40
  2158. 2cd0: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
  2159. 2cd2: lea rsi,[rbp+0x30c0]
  2160. 2cd9: lea rdi,[r8+0x40]
  2161. 2cdd: mov rcx,0x40
  2162. 2ce4: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
  2163. 2ce6: lea rsi,[rbp+0x3080]
  2164. 2ced: lea rdi,[r8+0x80]
  2165. 2cf4: mov rcx,0x40
  2166. 2cfb: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
  2167. 2cfd: lea rsi,[rbp+0x3040]
  2168. 2d04: lea rdi,[r8+0xc0]
  2169. 2d0b: mov rcx,0x40
  2170. 2d12: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
  2171. 2d14: lea rsi,[rbp+0x5d60]
  2172. 2d1b: lea rdi,[r8+0x100]
  2173. 2d22: mov rcx,0x500
  2174. 2d29: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
  2175. 2d2b: lea rsi,[rbp+0x5860]
  2176. 2d32: lea rdi,[r8+0x600]
  2177. 2d39: mov rcx,0x500
  2178. 2d40: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
  2179. 2d42: lea rsi,[rbp+0x5360]
  2180. 2d49: lea rdi,[r8+0xb00]
  2181. 2d50: mov rcx,0x280
  2182. 2d57: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
  2183. 2d59: lea rsi,[rbp+0x55e0]
  2184. 2d60: lea rdi,[r8+0xd80]
  2185. 2d67: mov rcx,0x280
  2186. 2d6e: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
  2187. 2d70: lea rsi,[rbp+0x62e0]
  2188. 2d77: lea rdi,[r8+0x1000]
  2189. 2d7e: mov rcx,0x400
  2190. 2d85: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
  2191. 2d87: lea rsi,[rbp+0x6260]
  2192. 2d8e: lea rdi,[r8+0x1400]
  2193. 2d95: mov rcx,0x80
  2194. 2d9c: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
  2195. 2d9e: lea rsi,[rbp+0x52e0]
  2196. 2da5: lea rdi,[r8+0x1480]
  2197. 2dac: mov rcx,0x80
  2198. 2db3: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
  2199.  
  2200. 0000000000002db5 <ff/MatMul>:
  2201. 2db5: lea rdi,[rbp+0x66e0]
  2202. 2dbc: movabs rsi,0x0
  2203. 2dbe: R_X86_64_64 ff/weights_0
  2204. 2dc6: movabs r9,0x0
  2205. 2dc8: R_X86_64_64 ff/bias_0
  2206. 2dd0: mov r8,QWORD PTR [rbp+0x130]
  2207. 2dd7: vxorps ymm13,ymm13,ymm13
  2208. 2ddc: xor rcx,rcx
  2209. 2ddf: vmovaps ymm0,YMMWORD PTR [r9+rcx*1]
  2210. 2de5: vmovaps ymm1,YMMWORD PTR [r9+rcx*1+0x20]
  2211. 2dec: vmovaps ymm2,YMMWORD PTR [r9+rcx*1+0x40]
  2212. 2df3: vmovaps ymm3,YMMWORD PTR [r9+rcx*1+0x60]
  2213. 2dfa: vmovaps ymm4,YMMWORD PTR [r9+rcx*1+0x80]
  2214. 2e04: vmovaps ymm5,YMMWORD PTR [r9+rcx*1+0xa0]
  2215. 2e0e: vmovaps ymm6,YMMWORD PTR [r9+rcx*1+0xc0]
  2216. 2e18: vmovaps ymm7,YMMWORD PTR [r9+rcx*1+0xe0]
  2217. 2e22: mov rdx,rsi
  2218. 2e25: xor rax,rax
  2219. 2e28: vbroadcastss ymm12,DWORD PTR [rdi+rax*1]
  2220. 2e2e: vmulps ymm8,ymm12,YMMWORD PTR [rdx]
  2221. 2e32: vaddps ymm0,ymm0,ymm8
  2222. 2e37: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0x20]
  2223. 2e3c: vaddps ymm1,ymm1,ymm9
  2224. 2e41: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0x40]
  2225. 2e46: vaddps ymm2,ymm2,ymm10
  2226. 2e4b: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0x60]
  2227. 2e50: vaddps ymm3,ymm3,ymm11
  2228. 2e55: vmulps ymm8,ymm12,YMMWORD PTR [rdx+0x80]
  2229. 2e5d: vaddps ymm4,ymm4,ymm8
  2230. 2e62: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0xa0]
  2231. 2e6a: vaddps ymm5,ymm5,ymm9
  2232. 2e6f: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0xc0]
  2233. 2e77: vaddps ymm6,ymm6,ymm10
  2234. 2e7c: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0xe0]
  2235. 2e84: vaddps ymm7,ymm7,ymm11
  2236. 2e89: add rdx,0x200
  2237. 2e90: add rax,0x4
  2238. 2e94: cmp rax,0x1500
  2239. 2e9a: jl 2e28 <ff/MatMul+0x73>
  2240. 2e9c: vmaxps ymm0,ymm0,ymm13
  2241. 2ea1: vmovaps YMMWORD PTR [r8+rcx*1],ymm0
  2242. 2ea7: vmaxps ymm1,ymm1,ymm13
  2243. 2eac: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1
  2244. 2eb3: vmaxps ymm2,ymm2,ymm13
  2245. 2eb8: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2
  2246. 2ebf: vmaxps ymm3,ymm3,ymm13
  2247. 2ec4: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3
  2248. 2ecb: vmaxps ymm4,ymm4,ymm13
  2249. 2ed0: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4
  2250. 2eda: vmaxps ymm5,ymm5,ymm13
  2251. 2edf: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5
  2252. 2ee9: vmaxps ymm6,ymm6,ymm13
  2253. 2eee: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6
  2254. 2ef8: vmaxps ymm7,ymm7,ymm13
  2255. 2efd: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7
  2256. 2f07: add rsi,0x100
  2257. 2f0e: add rcx,0x100
  2258. 2f15: cmp rcx,0x200
  2259. 2f1c: jl 2ddf <ff/MatMul+0x2a>
  2260.  
  2261. 0000000000002f22 <ff/MatMul_1>:
  2262. 2f22: mov rdi,QWORD PTR [rbp+0x130]
  2263. 2f29: movabs rsi,0x0
  2264. 2f2b: R_X86_64_64 ff/weights_softmax
  2265. 2f33: movabs r9,0x0
  2266. 2f35: R_X86_64_64 ff/bias_softmax
  2267. 2f3d: lea r8,[rbp+0x7be0]
  2268. 2f44: xor rcx,rcx
  2269. 2f47: vmovaps ymm0,YMMWORD PTR [r9+rcx*1]
  2270. 2f4d: mov rdx,rsi
  2271. 2f50: xor rax,rax
  2272. 2f53: vbroadcastss ymm8,DWORD PTR [rdi+rax*1]
  2273. 2f59: vmulps ymm4,ymm8,YMMWORD PTR [rdx]
  2274. 2f5d: vaddps ymm0,ymm0,ymm4
  2275. 2f61: add rdx,0x6ce0
  2276. 2f68: add rax,0x4
  2277. 2f6c: cmp rax,0x200
  2278. 2f72: jl 2f53 <ff/MatMul_1+0x31>
  2279. 2f74: vmovaps YMMWORD PTR [r8+rcx*1],ymm0
  2280. 2f7a: add rsi,0x20
  2281. 2f7e: add rcx,0x20
  2282. 2f82: cmp rcx,0x6ce0
  2283. 2f89: jl 2f47 <ff/MatMul_1+0x25>
  2284. 2f8b: pop rbp
  2285. 2f8c: vzeroupper
  2286. 2f8f: ret
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement