Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /tmp/sempar.o: file format elf64-x86-64
- SYMBOL TABLE:
- 0000000000000000 l d .text 0000000000000000 .text
- 0000000000000000 l d .rodata 0000000000000000 .rodata
- 0000000000000000 l O .rodata 0000000000000200 ff/bias_0
- 0000000000000200 l O .rodata 0000000000040000 lr_lstm/c2i
- 0000000000040200 l O .rodata 0000000000040000 lr_lstm/c2o
- 0000000000080200 l O .rodata 0000000000000008 ff/frame-creation-steps/shape:0
- 0000000000080220 l O .rodata 0000000000000004 rl_lstm/ones:0
- 0000000000080240 l O .rodata 0000000000040000 lr_lstm/h2c
- 00000000000c0240 l O .rodata 0000000000040000 lr_lstm/h2o
- 0000000000100240 l O .rodata 0000000000040000 lr_lstm/h2i
- 0000000000140240 l O .rodata 0000000000000004 ff/axis:0
- 0000000000140260 l O .rodata 0000000000000400 rl_lstm/bo
- 0000000000140660 l O .rodata 0000000000000400 rl_lstm/bi
- 0000000000140a60 l O .rodata 0000000000000400 rl_lstm/bc
- 0000000000140e60 l O .rodata 0000000000040000 rl_lstm/c2o
- 0000000000180e60 l O .rodata 0000000000040000 rl_lstm/c2i
- 00000000001c0e60 l O .rodata 0000000000016000 lr_lstm/x2c
- 00000000001d6e60 l O .rodata 0000000000016000 lr_lstm/x2o
- 00000000001ece60 l O .rodata 0000000000016000 lr_lstm/x2i
- 0000000000202e60 l O .rodata 0000000000000004 rl_lstm/axis:0
- 0000000000202e80 l O .rodata 0000000000367000 ff/weights_softmax
- 0000000000569e80 l O .rodata 0000000000016000 rl_lstm/x2i
- 000000000057fe80 l O .rodata 0000000000016000 rl_lstm/x2c
- 0000000000595e80 l O .rodata 0000000000008400 ff/linked_embedding_matrix_2
- 000000000059e280 l O .rodata 0000000000000004 lr_lstm/axis:0
- 000000000059e2a0 l O .rodata 0000000000040000 rl_lstm/h2i
- 00000000005de2a0 l O .rodata 0000000000040000 rl_lstm/h2o
- 000000000061e2a0 l O .rodata 0000000000040000 rl_lstm/h2c
- 000000000065e2a0 l O .rodata 0000000000000040 lr_lstm/fixed_embedding_matrix_3
- 000000000065e2e0 l O .rodata 0000000000680480 lr_lstm/fixed_embedding_matrix_0
- 0000000000cde760 l O .rodata 0000000000000060 lr_lstm/fixed_embedding_matrix_6
- 0000000000cde7c0 l O .rodata 0000000000000080 lr_lstm/fixed_embedding_matrix_5
- 0000000000cde840 l O .rodata 0000000000000060 lr_lstm/fixed_embedding_matrix_4
- 0000000000cde8a0 l O .rodata 0000000000000008 ff/frame-end-rl/shape:0
- 0000000000cde8c0 l O .rodata 0000000000006ce0 ff/bias_softmax
- 0000000000ce55a0 l O .rodata 0000000000016000 rl_lstm/x2o
- 0000000000cfb5a0 l O .rodata 0000000000008400 ff/linked_embedding_matrix_6
- 0000000000d039a0 l O .rodata 0000000000008400 ff/linked_embedding_matrix_5
- 0000000000d0bda0 l O .rodata 0000000000008800 ff/linked_embedding_matrix_4
- 0000000000d145a0 l O .rodata 0000000000008400 ff/linked_embedding_matrix_3
- 0000000000d1c9a0 l O .rodata 0000000000008800 ff/linked_embedding_matrix_1
- 0000000000d251a0 l O .rodata 0000000000008800 ff/linked_embedding_matrix_0
- 0000000000d2d9a0 l O .rodata 0000000000000400 lr_lstm/bc
- 0000000000d2dda0 l O .rodata 0000000000000400 lr_lstm/bi
- 0000000000d2e1a0 l O .rodata 0000000000000400 lr_lstm/bo
- 0000000000d2e5a0 l O .rodata 0000000000000008 ff/frame-focus-steps/shape:0
- 0000000000d2e5a8 l O .rodata 0000000000000008 ff/history/shape:0
- 0000000000d2e5c0 l O .rodata 00000000000a8000 ff/weights_0
- 0000000000dd65c0 l O .rodata 00000000000000a0 lr_lstm/fixed_embedding_matrix_2
- 0000000000dd6660 l O .rodata 0000000000082380 lr_lstm/fixed_embedding_matrix_1
- 0000000000e589e0 l O .rodata 0000000000000008 ff/frame-end-lr/shape:0
- 0000000000e58a00 l O .rodata 0000000000000004 lr_lstm/ones:0
- 0000000000e58a04 l O .rodata 0000000000000060 rl_lstm/fixed_embedding_matrix_6
- 0000000000e58a64 l O .rodata 0000000000000080 rl_lstm/fixed_embedding_matrix_5
- 0000000000e58ae4 l O .rodata 0000000000000060 rl_lstm/fixed_embedding_matrix_4
- 0000000000e58b44 l O .rodata 0000000000000040 rl_lstm/fixed_embedding_matrix_3
- 0000000000e58b84 l O .rodata 00000000000000a0 rl_lstm/fixed_embedding_matrix_2
- 0000000000e58c40 l O .rodata 0000000000082380 rl_lstm/fixed_embedding_matrix_1
- 0000000000edafc0 l O .rodata 0000000000680480 rl_lstm/fixed_embedding_matrix_0
- 000000000155b440 l O .rodata 0000000000001f40 ff/fixed_embedding_matrix_1
- 000000000155d380 l O .rodata 0000000000001f40 ff/fixed_embedding_matrix_0
- 000000000155f2c0 l O .rodata 0000000000000640 ff/fixed_embedding_matrix_3
- 000000000155f900 l O .rodata 0000000000009c40 ff/fixed_embedding_matrix_2
- 0000000000000007 l F .text 0000000000000000 lr_lstm/punctuation/Lookup
- 000000000000002e l F .text 0000000000000000 lr_lstm/suffix/Lookup
- 000000000000008e l F .text 0000000000000000 lr_lstm/quote/Lookup
- 00000000000000b5 l F .text 0000000000000000 lr_lstm/capitalization/Lookup
- 00000000000000dc l F .text 0000000000000000 lr_lstm/digit/Lookup
- 0000000000000106 l F .text 0000000000000000 lr_lstm/hyphen/Lookup
- 0000000000000130 l F .text 0000000000000000 lr_lstm/words/Lookup
- 000000000000015a l F .text 0000000000000000 lr_lstm/concat
- 00000000000001f2 l F .text 0000000000000000 lr_lstm/MatMul_3
- 0000000000000305 l F .text 0000000000000000 lr_lstm/MatMul
- 0000000000000418 l F .text 0000000000000000 lr_lstm/MatMul_4
- 0000000000000552 l F .text 0000000000000000 lr_lstm/MatMul_1
- 000000000000068c l F .text 0000000000000000 lr_lstm/MatMul_2
- 00000000000007c6 l F .text 0000000000000000 lr_lstm/add_4
- 0000000000000a4a l F .text 0000000000000000 lr_lstm/MatMul_6
- 0000000000000b5a l F .text 0000000000000000 lr_lstm/MatMul_5
- 0000000000000c97 l F .text 0000000000000000 lr_lstm/MatMul_7
- 0000000000000dd1 l F .text 0000000000000000 lr_lstm/add_7
- 0000000000000f0b l O .text 0000000000000335 lr_lstm_data
- 0000000000001247 l F .text 0000000000000000 rl_lstm/hyphen/Lookup
- 000000000000126e l F .text 0000000000000000 rl_lstm/capitalization/Lookup
- 0000000000001295 l F .text 0000000000000000 rl_lstm/quote/Lookup
- 00000000000012bc l F .text 0000000000000000 rl_lstm/words/Lookup
- 00000000000012e3 l F .text 0000000000000000 rl_lstm/digit/Lookup
- 000000000000130a l F .text 0000000000000000 rl_lstm/punctuation/Lookup
- 0000000000001331 l F .text 0000000000000000 rl_lstm/suffix/Lookup
- 0000000000001394 l F .text 0000000000000000 rl_lstm/concat
- 0000000000001426 l F .text 0000000000000000 rl_lstm/MatMul
- 0000000000001539 l F .text 0000000000000000 rl_lstm/MatMul_3
- 000000000000164c l F .text 0000000000000000 rl_lstm/MatMul_1
- 0000000000001786 l F .text 0000000000000000 rl_lstm/MatMul_4
- 00000000000018c0 l F .text 0000000000000000 rl_lstm/MatMul_2
- 00000000000019fa l F .text 0000000000000000 rl_lstm/add_4
- 0000000000001c7e l F .text 0000000000000000 rl_lstm/MatMul_6
- 0000000000001d8e l F .text 0000000000000000 rl_lstm/MatMul_5
- 0000000000001ecb l F .text 0000000000000000 rl_lstm/MatMul_7
- 0000000000002005 l F .text 0000000000000000 rl_lstm/add_7
- 000000000000213f l O .text 0000000000000321 rl_lstm_data
- 0000000000002467 l F .text 0000000000000000 ff/rl/Collect
- 00000000000024ba l F .text 0000000000000000 ff/frame-end-lr/Collect
- 0000000000002521 l F .text 0000000000000000 ff/frame-end-rl/Collect
- 0000000000002588 l F .text 0000000000000000 ff/in-roles/Lookup
- 00000000000025ee l F .text 0000000000000000 ff/unlabeled-roles/Lookup
- 0000000000002654 l F .text 0000000000000000 ff/labeled-roles/Lookup
- 00000000000026b6 l F .text 0000000000000000 ff/out-roles/Lookup
- 000000000000271c l F .text 0000000000000000 ff/frame-focus-steps/Collect
- 0000000000002783 l F .text 0000000000000000 ff/frame-creation-steps/Collect
- 00000000000027ea l F .text 0000000000000000 ff/lr/Collect
- 000000000000283d l F .text 0000000000000000 ff/history/Collect
- 00000000000028a4 l F .text 0000000000000000 ff/rl/MatMul
- 000000000000292c l F .text 0000000000000000 ff/frame-end-lr/MatMul
- 00000000000029e8 l F .text 0000000000000000 ff/frame-end-rl/MatMul
- 0000000000002aa4 l F .text 0000000000000000 ff/frame-focus-steps/MatMul
- 0000000000002b28 l F .text 0000000000000000 ff/frame-creation-steps/MatMul
- 0000000000002bac l F .text 0000000000000000 ff/lr/MatMul
- 0000000000002c34 l F .text 0000000000000000 ff/history/MatMul
- 0000000000002cb8 l F .text 0000000000000000 ff/frame-end-lr/Reshape
- 0000000000002cb8 l F .text 0000000000000000 ff/frame-end-rl/Reshape
- 0000000000002cb8 l F .text 0000000000000000 ff/frame-focus-steps/Reshape
- 0000000000002cb8 l F .text 0000000000000000 ff/frame-creation-steps/Reshape
- 0000000000002cb8 l F .text 0000000000000000 ff/history/Reshape
- 0000000000002cb8 l F .text 0000000000000000 ff/concat
- 0000000000002db5 l F .text 0000000000000000 ff/MatMul
- 0000000000002f22 l F .text 0000000000000000 ff/MatMul_1
- 0000000000000000 g F .text 0000000000000f0b lr_lstm
- 0000000000001240 g F .text 0000000000000eff rl_lstm
- 0000000000002460 g F .text 0000000000000b30 ff
- Disassembly of section .text:
- 0000000000000000 <lr_lstm>:
- 0: vzeroupper
- 3: push rbp
- 4: mov rbp,rdi
- 0000000000000007 <lr_lstm/punctuation/Lookup>:
- 7: movsxd rax,DWORD PTR [rbp+0xc]
- b: mov rcx,0x2
- 12: test rax,rax
- 15: cmovs rax,rcx
- 19: shl rax,0x5
- 1d: movabs rdx,0x0
- 1f: R_X86_64_64 lr_lstm/fixed_embedding_matrix_4
- 27: add rax,rdx
- 2a: mov QWORD PTR [rbp+0x48],rax
- 000000000000002e <lr_lstm/suffix/Lookup>:
- 2e: lea rcx,[rbp+0x10]
- 32: movabs rdx,0x0
- 34: R_X86_64_64 lr_lstm/fixed_embedding_matrix_1
- 3c: lea rsi,[rbp+0x60]
- 40: vxorps ymm0,ymm0,ymm0
- 44: vxorps ymm1,ymm1,ymm1
- 48: mov r8,0x208d
- 4f: xor rdi,rdi
- 52: movsxd rax,DWORD PTR [rcx+rdi*4]
- 56: test rax,rax
- 59: jns 6c <lr_lstm/suffix/Lookup+0x3e>
- 5f: cmp rax,0xffffffffffffffff
- 63: jne 7c <lr_lstm/suffix/Lookup+0x4e>
- 69: mov rax,r8
- 6c: shl rax,0x6
- 70: add rax,rdx
- 73: vaddps ymm0,ymm0,YMMWORD PTR [rax]
- 77: vaddps ymm1,ymm1,YMMWORD PTR [rax+0x20]
- 7c: inc rdi
- 7f: cmp rdi,0x3
- 83: jne 52 <lr_lstm/suffix/Lookup+0x24>
- 85: vmovaps YMMWORD PTR [rsi],ymm0
- 89: vmovaps YMMWORD PTR [rsi+0x20],ymm1
- 000000000000008e <lr_lstm/quote/Lookup>:
- 8e: movsxd rax,DWORD PTR [rbp+0x40]
- 92: mov rcx,0x3
- 99: test rax,rax
- 9c: cmovs rax,rcx
- a0: shl rax,0x5
- a4: movabs rdx,0x0
- a6: R_X86_64_64 lr_lstm/fixed_embedding_matrix_5
- ae: add rax,rdx
- b1: mov QWORD PTR [rbp+0x50],rax
- 00000000000000b5 <lr_lstm/capitalization/Lookup>:
- b5: movsxd rax,DWORD PTR [rbp+0x1c]
- b9: mov rcx,0x4
- c0: test rax,rax
- c3: cmovs rax,rcx
- c7: shl rax,0x5
- cb: movabs rdx,0x0
- cd: R_X86_64_64 lr_lstm/fixed_embedding_matrix_2
- d5: add rax,rdx
- d8: mov QWORD PTR [rbp+0x58],rax
- 00000000000000dc <lr_lstm/digit/Lookup>:
- dc: movsxd rax,DWORD PTR [rbp+0x30]
- e0: mov rcx,0x2
- e7: test rax,rax
- ea: cmovs rax,rcx
- ee: shl rax,0x5
- f2: movabs rdx,0x0
- f4: R_X86_64_64 lr_lstm/fixed_embedding_matrix_6
- fc: add rax,rdx
- ff: mov QWORD PTR [rbp+0xa0],rax
- 0000000000000106 <lr_lstm/hyphen/Lookup>:
- 106: movsxd rax,DWORD PTR [rbp+0x8]
- 10a: mov rcx,0x1
- 111: test rax,rax
- 114: cmovs rax,rcx
- 118: shl rax,0x5
- 11c: movabs rdx,0x0
- 11e: R_X86_64_64 lr_lstm/fixed_embedding_matrix_3
- 126: add rax,rdx
- 129: mov QWORD PTR [rbp+0xa8],rax
- 0000000000000130 <lr_lstm/words/Lookup>:
- 130: movsxd rax,DWORD PTR [rbp+0x34]
- 134: mov rcx,0xd008
- 13b: test rax,rax
- 13e: cmovs rax,rcx
- 142: shl rax,0x7
- 146: movabs rdx,0x0
- 148: R_X86_64_64 lr_lstm/fixed_embedding_matrix_0
- 150: add rax,rdx
- 153: mov QWORD PTR [rbp+0xb0],rax
- 000000000000015a <lr_lstm/concat>:
- 15a: lea r8,[rbp+0xc0]
- 161: mov rsi,QWORD PTR [rbp+0xb0]
- 168: lea rdi,[r8]
- 16b: mov rcx,0x80
- 172: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
- 174: lea rsi,[rbp+0x60]
- 178: lea rdi,[r8+0x80]
- 17f: mov rcx,0x40
- 186: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
- 188: mov rsi,QWORD PTR [rbp+0x58]
- 18c: lea rdi,[r8+0xc0]
- 193: mov rcx,0x20
- 19a: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
- 19c: mov rsi,QWORD PTR [rbp+0xa8]
- 1a3: lea rdi,[r8+0xe0]
- 1aa: mov rcx,0x20
- 1b1: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
- 1b3: mov rsi,QWORD PTR [rbp+0x48]
- 1b7: lea rdi,[r8+0x100]
- 1be: mov rcx,0x20
- 1c5: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
- 1c7: mov rsi,QWORD PTR [rbp+0x50]
- 1cb: lea rdi,[r8+0x120]
- 1d2: mov rcx,0x20
- 1d9: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
- 1db: mov rsi,QWORD PTR [rbp+0xa0]
- 1e2: lea rdi,[r8+0x140]
- 1e9: mov rcx,0x20
- 1f0: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
- 00000000000001f2 <lr_lstm/MatMul_3>:
- 1f2: lea rdi,[rbp+0xc0]
- 1f9: movabs rsi,0x0
- 1fb: R_X86_64_64 lr_lstm/x2c
- 203: lea r8,[rbp+0x220]
- 20a: xor rcx,rcx
- 20d: vxorps ymm0,ymm0,ymm0
- 211: vxorps ymm1,ymm1,ymm1
- 215: vxorps ymm2,ymm2,ymm2
- 219: vxorps ymm3,ymm3,ymm3
- 21d: vxorps ymm4,ymm4,ymm4
- 221: vxorps ymm5,ymm5,ymm5
- 225: vxorps ymm6,ymm6,ymm6
- 229: vxorps ymm7,ymm7,ymm7
- 22d: mov rdx,rsi
- 230: xor rax,rax
- 233: vbroadcastss ymm12,DWORD PTR [rdi+rax*1]
- 239: vmulps ymm8,ymm12,YMMWORD PTR [rdx]
- 23d: vaddps ymm0,ymm0,ymm8
- 242: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0x20]
- 247: vaddps ymm1,ymm1,ymm9
- 24c: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0x40]
- 251: vaddps ymm2,ymm2,ymm10
- 256: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0x60]
- 25b: vaddps ymm3,ymm3,ymm11
- 260: vmulps ymm8,ymm12,YMMWORD PTR [rdx+0x80]
- 268: vaddps ymm4,ymm4,ymm8
- 26d: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0xa0]
- 275: vaddps ymm5,ymm5,ymm9
- 27a: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0xc0]
- 282: vaddps ymm6,ymm6,ymm10
- 287: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0xe0]
- 28f: vaddps ymm7,ymm7,ymm11
- 294: add rdx,0x400
- 29b: add rax,0x4
- 29f: cmp rax,0x160
- 2a5: jl 233 <lr_lstm/MatMul_3+0x41>
- 2a7: vmovaps YMMWORD PTR [r8+rcx*1],ymm0
- 2ad: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1
- 2b4: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2
- 2bb: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3
- 2c2: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4
- 2cc: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5
- 2d6: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6
- 2e0: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7
- 2ea: add rsi,0x100
- 2f1: add rcx,0x100
- 2f8: cmp rcx,0x400
- 2ff: jl 20d <lr_lstm/MatMul_3+0x1b>
- 0000000000000305 <lr_lstm/MatMul>:
- 305: lea rdi,[rbp+0xc0]
- 30c: movabs rsi,0x0
- 30e: R_X86_64_64 lr_lstm/x2i
- 316: lea r8,[rbp+0x620]
- 31d: xor rcx,rcx
- 320: vxorps ymm0,ymm0,ymm0
- 324: vxorps ymm1,ymm1,ymm1
- 328: vxorps ymm2,ymm2,ymm2
- 32c: vxorps ymm3,ymm3,ymm3
- 330: vxorps ymm4,ymm4,ymm4
- 334: vxorps ymm5,ymm5,ymm5
- 338: vxorps ymm6,ymm6,ymm6
- 33c: vxorps ymm7,ymm7,ymm7
- 340: mov rdx,rsi
- 343: xor rax,rax
- 346: vbroadcastss ymm12,DWORD PTR [rdi+rax*1]
- 34c: vmulps ymm8,ymm12,YMMWORD PTR [rdx]
- 350: vaddps ymm0,ymm0,ymm8
- 355: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0x20]
- 35a: vaddps ymm1,ymm1,ymm9
- 35f: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0x40]
- 364: vaddps ymm2,ymm2,ymm10
- 369: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0x60]
- 36e: vaddps ymm3,ymm3,ymm11
- 373: vmulps ymm8,ymm12,YMMWORD PTR [rdx+0x80]
- 37b: vaddps ymm4,ymm4,ymm8
- 380: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0xa0]
- 388: vaddps ymm5,ymm5,ymm9
- 38d: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0xc0]
- 395: vaddps ymm6,ymm6,ymm10
- 39a: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0xe0]
- 3a2: vaddps ymm7,ymm7,ymm11
- 3a7: add rdx,0x400
- 3ae: add rax,0x4
- 3b2: cmp rax,0x160
- 3b8: jl 346 <lr_lstm/MatMul+0x41>
- 3ba: vmovaps YMMWORD PTR [r8+rcx*1],ymm0
- 3c0: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1
- 3c7: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2
- 3ce: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3
- 3d5: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4
- 3df: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5
- 3e9: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6
- 3f3: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7
- 3fd: add rsi,0x100
- 404: add rcx,0x100
- 40b: cmp rcx,0x400
- 412: jl 320 <lr_lstm/MatMul+0x1b>
- 0000000000000418 <lr_lstm/MatMul_4>:
- 418: mov rdi,QWORD PTR [rbp+0x0]
- 41c: movabs rsi,0x0
- 41e: R_X86_64_64 lr_lstm/h2c
- 426: lea r9,[rbp+0x220]
- 42d: lea r8,[rbp+0xa20]
- 434: xor rcx,rcx
- 437: vmovaps ymm0,YMMWORD PTR [r9+rcx*1]
- 43d: vmovaps ymm1,YMMWORD PTR [r9+rcx*1+0x20]
- 444: vmovaps ymm2,YMMWORD PTR [r9+rcx*1+0x40]
- 44b: vmovaps ymm3,YMMWORD PTR [r9+rcx*1+0x60]
- 452: vmovaps ymm4,YMMWORD PTR [r9+rcx*1+0x80]
- 45c: vmovaps ymm5,YMMWORD PTR [r9+rcx*1+0xa0]
- 466: vmovaps ymm6,YMMWORD PTR [r9+rcx*1+0xc0]
- 470: vmovaps ymm7,YMMWORD PTR [r9+rcx*1+0xe0]
- 47a: mov rdx,rsi
- 47d: xor rax,rax
- 480: vbroadcastss ymm12,DWORD PTR [rdi+rax*1]
- 486: vmulps ymm8,ymm12,YMMWORD PTR [rdx]
- 48a: vaddps ymm0,ymm0,ymm8
- 48f: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0x20]
- 494: vaddps ymm1,ymm1,ymm9
- 499: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0x40]
- 49e: vaddps ymm2,ymm2,ymm10
- 4a3: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0x60]
- 4a8: vaddps ymm3,ymm3,ymm11
- 4ad: vmulps ymm8,ymm12,YMMWORD PTR [rdx+0x80]
- 4b5: vaddps ymm4,ymm4,ymm8
- 4ba: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0xa0]
- 4c2: vaddps ymm5,ymm5,ymm9
- 4c7: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0xc0]
- 4cf: vaddps ymm6,ymm6,ymm10
- 4d4: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0xe0]
- 4dc: vaddps ymm7,ymm7,ymm11
- 4e1: add rdx,0x400
- 4e8: add rax,0x4
- 4ec: cmp rax,0x400
- 4f2: jl 480 <lr_lstm/MatMul_4+0x68>
- 4f4: vmovaps YMMWORD PTR [r8+rcx*1],ymm0
- 4fa: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1
- 501: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2
- 508: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3
- 50f: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4
- 519: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5
- 523: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6
- 52d: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7
- 537: add rsi,0x100
- 53e: add rcx,0x100
- 545: cmp rcx,0x400
- 54c: jl 437 <lr_lstm/MatMul_4+0x1f>
- 0000000000000552 <lr_lstm/MatMul_1>:
- 552: mov rdi,QWORD PTR [rbp+0x0]
- 556: movabs rsi,0x0
- 558: R_X86_64_64 lr_lstm/h2i
- 560: lea r9,[rbp+0x620]
- 567: lea r8,[rbp+0xe20]
- 56e: xor rcx,rcx
- 571: vmovaps ymm0,YMMWORD PTR [r9+rcx*1]
- 577: vmovaps ymm1,YMMWORD PTR [r9+rcx*1+0x20]
- 57e: vmovaps ymm2,YMMWORD PTR [r9+rcx*1+0x40]
- 585: vmovaps ymm3,YMMWORD PTR [r9+rcx*1+0x60]
- 58c: vmovaps ymm4,YMMWORD PTR [r9+rcx*1+0x80]
- 596: vmovaps ymm5,YMMWORD PTR [r9+rcx*1+0xa0]
- 5a0: vmovaps ymm6,YMMWORD PTR [r9+rcx*1+0xc0]
- 5aa: vmovaps ymm7,YMMWORD PTR [r9+rcx*1+0xe0]
- 5b4: mov rdx,rsi
- 5b7: xor rax,rax
- 5ba: vbroadcastss ymm12,DWORD PTR [rdi+rax*1]
- 5c0: vmulps ymm8,ymm12,YMMWORD PTR [rdx]
- 5c4: vaddps ymm0,ymm0,ymm8
- 5c9: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0x20]
- 5ce: vaddps ymm1,ymm1,ymm9
- 5d3: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0x40]
- 5d8: vaddps ymm2,ymm2,ymm10
- 5dd: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0x60]
- 5e2: vaddps ymm3,ymm3,ymm11
- 5e7: vmulps ymm8,ymm12,YMMWORD PTR [rdx+0x80]
- 5ef: vaddps ymm4,ymm4,ymm8
- 5f4: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0xa0]
- 5fc: vaddps ymm5,ymm5,ymm9
- 601: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0xc0]
- 609: vaddps ymm6,ymm6,ymm10
- 60e: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0xe0]
- 616: vaddps ymm7,ymm7,ymm11
- 61b: add rdx,0x400
- 622: add rax,0x4
- 626: cmp rax,0x400
- 62c: jl 5ba <lr_lstm/MatMul_1+0x68>
- 62e: vmovaps YMMWORD PTR [r8+rcx*1],ymm0
- 634: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1
- 63b: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2
- 642: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3
- 649: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4
- 653: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5
- 65d: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6
- 667: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7
- 671: add rsi,0x100
- 678: add rcx,0x100
- 67f: cmp rcx,0x400
- 686: jl 571 <lr_lstm/MatMul_1+0x1f>
- 000000000000068c <lr_lstm/MatMul_2>:
- 68c: mov rdi,QWORD PTR [rbp+0x38]
- 690: movabs rsi,0x0
- 692: R_X86_64_64 lr_lstm/c2i
- 69a: lea r9,[rbp+0xe20]
- 6a1: lea r8,[rbp+0x1220]
- 6a8: xor rcx,rcx
- 6ab: vmovaps ymm0,YMMWORD PTR [r9+rcx*1]
- 6b1: vmovaps ymm1,YMMWORD PTR [r9+rcx*1+0x20]
- 6b8: vmovaps ymm2,YMMWORD PTR [r9+rcx*1+0x40]
- 6bf: vmovaps ymm3,YMMWORD PTR [r9+rcx*1+0x60]
- 6c6: vmovaps ymm4,YMMWORD PTR [r9+rcx*1+0x80]
- 6d0: vmovaps ymm5,YMMWORD PTR [r9+rcx*1+0xa0]
- 6da: vmovaps ymm6,YMMWORD PTR [r9+rcx*1+0xc0]
- 6e4: vmovaps ymm7,YMMWORD PTR [r9+rcx*1+0xe0]
- 6ee: mov rdx,rsi
- 6f1: xor rax,rax
- 6f4: vbroadcastss ymm12,DWORD PTR [rdi+rax*1]
- 6fa: vmulps ymm8,ymm12,YMMWORD PTR [rdx]
- 6fe: vaddps ymm0,ymm0,ymm8
- 703: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0x20]
- 708: vaddps ymm1,ymm1,ymm9
- 70d: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0x40]
- 712: vaddps ymm2,ymm2,ymm10
- 717: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0x60]
- 71c: vaddps ymm3,ymm3,ymm11
- 721: vmulps ymm8,ymm12,YMMWORD PTR [rdx+0x80]
- 729: vaddps ymm4,ymm4,ymm8
- 72e: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0xa0]
- 736: vaddps ymm5,ymm5,ymm9
- 73b: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0xc0]
- 743: vaddps ymm6,ymm6,ymm10
- 748: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0xe0]
- 750: vaddps ymm7,ymm7,ymm11
- 755: add rdx,0x400
- 75c: add rax,0x4
- 760: cmp rax,0x400
- 766: jl 6f4 <lr_lstm/MatMul_2+0x68>
- 768: vmovaps YMMWORD PTR [r8+rcx*1],ymm0
- 76e: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1
- 775: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2
- 77c: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3
- 783: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4
- 78d: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5
- 797: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6
- 7a1: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7
- 7ab: add rsi,0x100
- 7b2: add rcx,0x100
- 7b9: cmp rcx,0x400
- 7c0: jl 6ab <lr_lstm/MatMul_2+0x1f>
- 00000000000007c6 <lr_lstm/add_4>:
- 7c6: vmovaps ymm0,YMMWORD PTR [rip+0x752] # f20 <lr_lstm_data+0x15>
- 7ce: vmovaps ymm1,YMMWORD PTR [rip+0x76a] # f40 <lr_lstm_data+0x35>
- 7d6: vmovaps ymm2,YMMWORD PTR [rip+0x782] # f60 <lr_lstm_data+0x55>
- 7de: vmovaps ymm3,YMMWORD PTR [rip+0x79a] # f80 <lr_lstm_data+0x75>
- 7e6: vmovaps ymm4,YMMWORD PTR [rip+0x7b2] # fa0 <lr_lstm_data+0x95>
- 7ee: vmovaps ymm5,YMMWORD PTR [rip+0x7ca] # fc0 <lr_lstm_data+0xb5>
- 7f6: vmovaps ymm6,YMMWORD PTR [rip+0x7e2] # fe0 <lr_lstm_data+0xd5>
- 7fe: vmovaps ymm7,YMMWORD PTR [rip+0x7fa] # 1000 <lr_lstm_data+0xf5>
- 806: vmovaps ymm8,YMMWORD PTR [rip+0x812] # 1020 <lr_lstm_data+0x115>
- 80e: lea r8,[rbp+0xa20]
- 815: movabs rcx,0x0
- 817: R_X86_64_64 lr_lstm/bc
- 81f: lea r9,[rbp+0x1220]
- 826: movabs rdx,0x0
- 828: R_X86_64_64 lr_lstm/bi
- 830: mov rsi,QWORD PTR [rbp+0x38]
- 834: mov rdi,QWORD PTR [rbp+0x20]
- 838: lea r10,[rbp+0xa20]
- 83f: xor rax,rax
- 842: vmovaps ymm9,YMMWORD PTR [r9+rax*1]
- 848: vaddps ymm10,ymm9,YMMWORD PTR [rdx+rax*1]
- 84d: vxorps ymm9,ymm9,ymm9
- 852: vsubps ymm11,ymm9,ymm10
- 857: vminps ymm9,ymm11,YMMWORD PTR [rip+0x7e1] # 1040 <lr_lstm_data+0x135>
- 85f: vmaxps ymm10,ymm9,YMMWORD PTR [rip+0x7f9] # 1060 <lr_lstm_data+0x155>
- 867: vmulps ymm9,ymm10,YMMWORD PTR [rip+0x811] # 1080 <lr_lstm_data+0x175>
- 86f: vaddps ymm12,ymm9,YMMWORD PTR [rip+0x829] # 10a0 <lr_lstm_data+0x195>
- 877: vroundps ymm9,ymm12,0x1
- 87d: vmulps ymm12,ymm9,YMMWORD PTR [rip+0x83b] # 10c0 <lr_lstm_data+0x1b5>
- 885: vaddps ymm13,ymm12,ymm10
- 88a: vmulps ymm10,ymm13,ymm13
- 88f: vmulps ymm12,ymm13,YMMWORD PTR [rip+0x849] # 10e0 <lr_lstm_data+0x1d5>
- 897: vaddps ymm14,ymm12,YMMWORD PTR [rip+0x861] # 1100 <lr_lstm_data+0x1f5>
- 89f: vmulps ymm12,ymm14,ymm13
- 8a4: vaddps ymm14,ymm12,YMMWORD PTR [rip+0x874] # 1120 <lr_lstm_data+0x215>
- 8ac: vmulps ymm12,ymm14,ymm13
- 8b1: vaddps ymm14,ymm12,YMMWORD PTR [rip+0x887] # 1140 <lr_lstm_data+0x235>
- 8b9: vmulps ymm12,ymm14,ymm13
- 8be: vaddps ymm14,ymm12,YMMWORD PTR [rip+0x89a] # 1160 <lr_lstm_data+0x255>
- 8c6: vmulps ymm12,ymm14,ymm13
- 8cb: vaddps ymm14,ymm12,YMMWORD PTR [rip+0x7cd] # 10a0 <lr_lstm_data+0x195>
- 8d3: vmulps ymm12,ymm14,ymm10
- 8d8: vaddps ymm10,ymm12,ymm13
- 8dd: vaddps ymm12,ymm10,ymm0
- 8e1: vaddps ymm10,ymm9,YMMWORD PTR [rip+0x897] # 1180 <lr_lstm_data+0x275>
- 8e9: vcvttps2dq ymm9,ymm10
- 8ee: vextractf128 xmm15,ymm9,0x1
- 8f4: vpslld xmm15,xmm15,0x17
- 8fa: vpslld xmm10,xmm9,0x17
- 900: vinsertf128 ymm10,ymm10,xmm15,0x1
- 906: vmulps ymm9,ymm12,ymm10
- 90b: vmaxps ymm10,ymm9,ymm11
- 910: vaddps ymm9,ymm0,ymm10
- 915: vdivps ymm10,ymm0,ymm9
- 91a: vmovaps ymm9,YMMWORD PTR [r8+rax*1]
- 920: vaddps ymm11,ymm9,YMMWORD PTR [rcx+rax*1]
- 925: vminps ymm9,ymm11,ymm2
- 929: vmaxps ymm11,ymm9,ymm1
- 92d: vmulps ymm9,ymm11,ymm11
- 932: vmulps ymm12,ymm9,ymm3
- 936: vaddps ymm13,ymm12,ymm4
- 93a: vmulps ymm12,ymm9,ymm13
- 93f: vaddps ymm13,ymm12,ymm5
- 943: vmulps ymm12,ymm9,ymm13
- 948: vaddps ymm13,ymm12,ymm6
- 94c: vmulps ymm12,ymm9,ymm13
- 951: vaddps ymm13,ymm12,ymm7
- 955: vmulps ymm12,ymm9,ymm13
- 95a: vaddps ymm13,ymm12,ymm8
- 95f: vmulps ymm12,ymm9,ymm13
- 964: vaddps ymm13,ymm12,YMMWORD PTR [rip+0x834] # 11a0 <lr_lstm_data+0x295>
- 96c: vmulps ymm12,ymm11,ymm13
- 971: vmulps ymm11,ymm9,YMMWORD PTR [rip+0x847] # 11c0 <lr_lstm_data+0x2b5>
- 979: vaddps ymm13,ymm11,YMMWORD PTR [rip+0x85f] # 11e0 <lr_lstm_data+0x2d5>
- 981: vmulps ymm11,ymm9,ymm13
- 986: vaddps ymm13,ymm11,YMMWORD PTR [rip+0x872] # 1200 <lr_lstm_data+0x2f5>
- 98e: vmulps ymm11,ymm9,ymm13
- 993: vaddps ymm9,ymm11,YMMWORD PTR [rip+0x885] # 1220 <lr_lstm_data+0x315>
- 99b: vdivps ymm11,ymm12,ymm9
- 9a0: vmulps ymm9,ymm10,ymm11
- 9a5: vsubps ymm11,ymm0,ymm10
- 9aa: vmulps ymm10,ymm11,YMMWORD PTR [rsi+rax*1]
- 9af: vaddps ymm11,ymm9,ymm10
- 9b4: vmovaps YMMWORD PTR [rdi+rax*1],ymm11
- 9b9: vminps ymm9,ymm11,ymm2
- 9bd: vmaxps ymm10,ymm9,ymm1
- 9c1: vmulps ymm9,ymm10,ymm10
- 9c6: vmulps ymm11,ymm9,ymm3
- 9ca: vaddps ymm12,ymm11,ymm4
- 9ce: vmulps ymm11,ymm9,ymm12
- 9d3: vaddps ymm12,ymm11,ymm5
- 9d7: vmulps ymm11,ymm9,ymm12
- 9dc: vaddps ymm12,ymm11,ymm6
- 9e0: vmulps ymm11,ymm9,ymm12
- 9e5: vaddps ymm12,ymm11,ymm7
- 9e9: vmulps ymm11,ymm9,ymm12
- 9ee: vaddps ymm12,ymm11,ymm8
- 9f3: vmulps ymm11,ymm9,ymm12
- 9f8: vaddps ymm12,ymm11,YMMWORD PTR [rip+0x7a0] # 11a0 <lr_lstm_data+0x295>
- a00: vmulps ymm11,ymm10,ymm12
- a05: vmulps ymm10,ymm9,YMMWORD PTR [rip+0x7b3] # 11c0 <lr_lstm_data+0x2b5>
- a0d: vaddps ymm12,ymm10,YMMWORD PTR [rip+0x7cb] # 11e0 <lr_lstm_data+0x2d5>
- a15: vmulps ymm10,ymm9,ymm12
- a1a: vaddps ymm12,ymm10,YMMWORD PTR [rip+0x7de] # 1200 <lr_lstm_data+0x2f5>
- a22: vmulps ymm10,ymm9,ymm12
- a27: vaddps ymm9,ymm10,YMMWORD PTR [rip+0x7f1] # 1220 <lr_lstm_data+0x315>
- a2f: vdivps ymm10,ymm11,ymm9
- a34: vmovaps YMMWORD PTR [r10+rax*1],ymm10
- a3a: add rax,0x20
- a3e: cmp rax,0x400
- a44: jl 842 <lr_lstm/add_4+0x7c>
- 0000000000000a4a <lr_lstm/MatMul_6>:
- a4a: mov rdi,QWORD PTR [rbp+0x20]
- a4e: movabs rsi,0x0
- a50: R_X86_64_64 lr_lstm/c2o
- a58: lea r8,[rbp+0x1620]
- a5f: xor rcx,rcx
- a62: vxorps ymm0,ymm0,ymm0
- a66: vxorps ymm1,ymm1,ymm1
- a6a: vxorps ymm2,ymm2,ymm2
- a6e: vxorps ymm3,ymm3,ymm3
- a72: vxorps ymm4,ymm4,ymm4
- a76: vxorps ymm5,ymm5,ymm5
- a7a: vxorps ymm6,ymm6,ymm6
- a7e: vxorps ymm7,ymm7,ymm7
- a82: mov rdx,rsi
- a85: xor rax,rax
- a88: vbroadcastss ymm12,DWORD PTR [rdi+rax*1]
- a8e: vmulps ymm8,ymm12,YMMWORD PTR [rdx]
- a92: vaddps ymm0,ymm0,ymm8
- a97: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0x20]
- a9c: vaddps ymm1,ymm1,ymm9
- aa1: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0x40]
- aa6: vaddps ymm2,ymm2,ymm10
- aab: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0x60]
- ab0: vaddps ymm3,ymm3,ymm11
- ab5: vmulps ymm8,ymm12,YMMWORD PTR [rdx+0x80]
- abd: vaddps ymm4,ymm4,ymm8
- ac2: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0xa0]
- aca: vaddps ymm5,ymm5,ymm9
- acf: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0xc0]
- ad7: vaddps ymm6,ymm6,ymm10
- adc: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0xe0]
- ae4: vaddps ymm7,ymm7,ymm11
- ae9: add rdx,0x400
- af0: add rax,0x4
- af4: cmp rax,0x400
- afa: jl a88 <lr_lstm/MatMul_6+0x3e>
- afc: vmovaps YMMWORD PTR [r8+rcx*1],ymm0
- b02: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1
- b09: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2
- b10: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3
- b17: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4
- b21: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5
- b2b: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6
- b35: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7
- b3f: add rsi,0x100
- b46: add rcx,0x100
- b4d: cmp rcx,0x400
- b54: jl a62 <lr_lstm/MatMul_6+0x18>
- 0000000000000b5a <lr_lstm/MatMul_5>:
- b5a: lea rdi,[rbp+0xc0]
- b61: movabs rsi,0x0
- b63: R_X86_64_64 lr_lstm/x2o
- b6b: lea r9,[rbp+0x1620]
- b72: lea r8,[rbp+0x1a20]
- b79: xor rcx,rcx
- b7c: vmovaps ymm0,YMMWORD PTR [r9+rcx*1]
- b82: vmovaps ymm1,YMMWORD PTR [r9+rcx*1+0x20]
- b89: vmovaps ymm2,YMMWORD PTR [r9+rcx*1+0x40]
- b90: vmovaps ymm3,YMMWORD PTR [r9+rcx*1+0x60]
- b97: vmovaps ymm4,YMMWORD PTR [r9+rcx*1+0x80]
- ba1: vmovaps ymm5,YMMWORD PTR [r9+rcx*1+0xa0]
- bab: vmovaps ymm6,YMMWORD PTR [r9+rcx*1+0xc0]
- bb5: vmovaps ymm7,YMMWORD PTR [r9+rcx*1+0xe0]
- bbf: mov rdx,rsi
- bc2: xor rax,rax
- bc5: vbroadcastss ymm12,DWORD PTR [rdi+rax*1]
- bcb: vmulps ymm8,ymm12,YMMWORD PTR [rdx]
- bcf: vaddps ymm0,ymm0,ymm8
- bd4: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0x20]
- bd9: vaddps ymm1,ymm1,ymm9
- bde: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0x40]
- be3: vaddps ymm2,ymm2,ymm10
- be8: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0x60]
- bed: vaddps ymm3,ymm3,ymm11
- bf2: vmulps ymm8,ymm12,YMMWORD PTR [rdx+0x80]
- bfa: vaddps ymm4,ymm4,ymm8
- bff: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0xa0]
- c07: vaddps ymm5,ymm5,ymm9
- c0c: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0xc0]
- c14: vaddps ymm6,ymm6,ymm10
- c19: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0xe0]
- c21: vaddps ymm7,ymm7,ymm11
- c26: add rdx,0x400
- c2d: add rax,0x4
- c31: cmp rax,0x160
- c37: jl bc5 <lr_lstm/MatMul_5+0x6b>
- c39: vmovaps YMMWORD PTR [r8+rcx*1],ymm0
- c3f: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1
- c46: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2
- c4d: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3
- c54: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4
- c5e: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5
- c68: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6
- c72: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7
- c7c: add rsi,0x100
- c83: add rcx,0x100
- c8a: cmp rcx,0x400
- c91: jl b7c <lr_lstm/MatMul_5+0x22>
- 0000000000000c97 <lr_lstm/MatMul_7>:
- c97: mov rdi,QWORD PTR [rbp+0x0]
- c9b: movabs rsi,0x0
- c9d: R_X86_64_64 lr_lstm/h2o
- ca5: lea r9,[rbp+0x1a20]
- cac: lea r8,[rbp+0x1e20]
- cb3: xor rcx,rcx
- cb6: vmovaps ymm0,YMMWORD PTR [r9+rcx*1]
- cbc: vmovaps ymm1,YMMWORD PTR [r9+rcx*1+0x20]
- cc3: vmovaps ymm2,YMMWORD PTR [r9+rcx*1+0x40]
- cca: vmovaps ymm3,YMMWORD PTR [r9+rcx*1+0x60]
- cd1: vmovaps ymm4,YMMWORD PTR [r9+rcx*1+0x80]
- cdb: vmovaps ymm5,YMMWORD PTR [r9+rcx*1+0xa0]
- ce5: vmovaps ymm6,YMMWORD PTR [r9+rcx*1+0xc0]
- cef: vmovaps ymm7,YMMWORD PTR [r9+rcx*1+0xe0]
- cf9: mov rdx,rsi
- cfc: xor rax,rax
- cff: vbroadcastss ymm12,DWORD PTR [rdi+rax*1]
- d05: vmulps ymm8,ymm12,YMMWORD PTR [rdx]
- d09: vaddps ymm0,ymm0,ymm8
- d0e: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0x20]
- d13: vaddps ymm1,ymm1,ymm9
- d18: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0x40]
- d1d: vaddps ymm2,ymm2,ymm10
- d22: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0x60]
- d27: vaddps ymm3,ymm3,ymm11
- d2c: vmulps ymm8,ymm12,YMMWORD PTR [rdx+0x80]
- d34: vaddps ymm4,ymm4,ymm8
- d39: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0xa0]
- d41: vaddps ymm5,ymm5,ymm9
- d46: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0xc0]
- d4e: vaddps ymm6,ymm6,ymm10
- d53: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0xe0]
- d5b: vaddps ymm7,ymm7,ymm11
- d60: add rdx,0x400
- d67: add rax,0x4
- d6b: cmp rax,0x400
- d71: jl cff <lr_lstm/MatMul_7+0x68>
- d73: vmovaps YMMWORD PTR [r8+rcx*1],ymm0
- d79: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1
- d80: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2
- d87: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3
- d8e: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4
- d98: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5
- da2: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6
- dac: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7
- db6: add rsi,0x100
- dbd: add rcx,0x100
- dc4: cmp rcx,0x400
- dcb: jl cb6 <lr_lstm/MatMul_7+0x1f>
- 0000000000000dd1 <lr_lstm/add_7>:
- dd1: vmovaps ymm0,YMMWORD PTR [rip+0x147] # f20 <lr_lstm_data+0x15>
- dd9: vxorps ymm1,ymm1,ymm1
- ddd: vmovaps ymm2,YMMWORD PTR [rip+0x27b] # 1060 <lr_lstm_data+0x155>
- de5: vmovaps ymm3,YMMWORD PTR [rip+0x253] # 1040 <lr_lstm_data+0x135>
- ded: vmovaps ymm4,YMMWORD PTR [rip+0x2ab] # 10a0 <lr_lstm_data+0x195>
- df5: vmovaps ymm5,YMMWORD PTR [rip+0x283] # 1080 <lr_lstm_data+0x175>
- dfd: vmovaps ymm6,YMMWORD PTR [rip+0x2bb] # 10c0 <lr_lstm_data+0x1b5>
- e05: vmovaps ymm7,YMMWORD PTR [rip+0x2d3] # 10e0 <lr_lstm_data+0x1d5>
- e0d: vmovaps ymm8,YMMWORD PTR [rip+0x2eb] # 1100 <lr_lstm_data+0x1f5>
- e15: lea rsi,[rbp+0x1e20]
- e1c: movabs rcx,0x0
- e1e: R_X86_64_64 lr_lstm/bo
- e26: lea rdi,[rbp+0xa20]
- e2d: mov rdx,QWORD PTR [rbp+0x28]
- e31: xor rax,rax
- e34: vmovaps ymm9,YMMWORD PTR [rsi+rax*1]
- e39: vaddps ymm10,ymm9,YMMWORD PTR [rcx+rax*1]
- e3e: vsubps ymm9,ymm1,ymm10
- e43: vminps ymm10,ymm9,ymm3
- e47: vmaxps ymm11,ymm10,ymm2
- e4b: vmulps ymm10,ymm11,ymm5
- e4f: vaddps ymm12,ymm10,ymm4
- e53: vroundps ymm10,ymm12,0x1
- e59: vmulps ymm12,ymm10,ymm6
- e5d: vaddps ymm13,ymm12,ymm11
- e62: vmulps ymm11,ymm13,ymm13
- e67: vmulps ymm12,ymm7,ymm13
- e6c: vaddps ymm14,ymm12,ymm8
- e71: vmulps ymm12,ymm14,ymm13
- e76: vaddps ymm14,ymm12,YMMWORD PTR [rip+0x2a2] # 1120 <lr_lstm_data+0x215>
- e7e: vmulps ymm12,ymm14,ymm13
- e83: vaddps ymm14,ymm12,YMMWORD PTR [rip+0x2b5] # 1140 <lr_lstm_data+0x235>
- e8b: vmulps ymm12,ymm14,ymm13
- e90: vaddps ymm14,ymm12,YMMWORD PTR [rip+0x2c8] # 1160 <lr_lstm_data+0x255>
- e98: vmulps ymm12,ymm14,ymm13
- e9d: vaddps ymm14,ymm12,YMMWORD PTR [rip+0x1fb] # 10a0 <lr_lstm_data+0x195>
- ea5: vmulps ymm12,ymm14,ymm11
- eaa: vaddps ymm11,ymm12,ymm13
- eaf: vaddps ymm12,ymm11,ymm0
- eb3: vaddps ymm11,ymm10,YMMWORD PTR [rip+0x2c5] # 1180 <lr_lstm_data+0x275>
- ebb: vcvttps2dq ymm10,ymm11
- ec0: vextractf128 xmm15,ymm10,0x1
- ec6: vpslld xmm15,xmm15,0x17
- ecc: vpslld xmm11,xmm10,0x17
- ed2: vinsertf128 ymm11,ymm11,xmm15,0x1
- ed8: vmulps ymm10,ymm12,ymm11
- edd: vmaxps ymm11,ymm10,ymm9
- ee2: vaddps ymm9,ymm0,ymm11
- ee7: vdivps ymm10,ymm0,ymm9
- eec: vmulps ymm9,ymm10,YMMWORD PTR [rdi+rax*1]
- ef1: vmovaps YMMWORD PTR [rdx+rax*1],ymm9
- ef6: add rax,0x20
- efa: cmp rax,0x400
- f00: jl e34 <lr_lstm/add_7+0x63>
- f06: pop rbp
- f07: vzeroupper
- f0a: ret
- 0000000000000f0b <lr_lstm_data>:
- ...
- f1f: ....?...?...?...
- f2f: ?...?...?...?...
- f3f: ?...............
- f4f: ................
- f5f: ....A...A...A...
- f6f: A...A...A...A...
- f7f: A.%...%...%...%.
- f8f: ..%...%...%...%.
- f9f: .~3a*~3a*~3a*~3a
- faf: *~3a*~3a*~3a*~3a
- fbf: *.7...7...7...7.
- fcf: ..7...7...7...7.
- fdf: .A.\3A.\3A.\3A.\
- fef: 3A.\3A.\3A.\3A.\
- fff: 3JCy7JCy7JCy7JCy
- 100f: 7JCy7JCy7JCy7JCy
- 101f: 7..':..':..':..'
- 102f: :..':..':..':..'
- 103f: :...B...B...B...
- 104f: B...B...B...B...
- 105f: B...............
- 106f: ................
- 107f: .;..?;..?;..?;..
- 108f: ?;..?;..?;..?;..
- 109f: ?...?...?...?...
- 10af: ?...?...?...?...
- 10bf: ?.r1..r1..r1..r1
- 10cf: ..r1..r1..r1..r1
- 10df: .giP9giP9giP9giP
- 10ef: 9giP9giP9giP9giP
- 10ff: 9.C.:.C.:.C.:.C.
- 110f: :.C.:.C.:.C.:.C.
- 111f: :...<...<...<...
- 112f: <...<...<...<...
- 113f: <..*=..*=..*=..*
- 114f: =..*=..*=..*=..*
- 115f: =..*>..*>..*>..*
- 116f: >..*>..*>..*>..*
- 117f: >...B...B...B...
- 118f: B...B...B...B...
- 119f: B.Y.;.Y.;.Y.;.Y.
- 11af: ;.Y.;.Y.;.Y.;.Y.
- 11bf: ;...5...5...5...
- 11cf: 5...5...5...5...
- 11df: 5...8...8...8...
- 11ef: 8...8...8...8...
- 11ff: 8...;...;...;...
- 120f: ;...;...;...;...
- 121f: ;.Y.;.Y.;.Y.;.Y.
- 122f: ;.Y.;.Y.;.Y.;.Y.
- 123f: ;
- 0000000000001240 <rl_lstm>:
- 1240: vzeroupper
- 1243: push rbp
- 1244: mov rbp,rdi
- 0000000000001247 <rl_lstm/hyphen/Lookup>:
- 1247: movsxd rax,DWORD PTR [rbp+0x0]
- 124b: mov rcx,0x1
- 1252: test rax,rax
- 1255: cmovs rax,rcx
- 1259: shl rax,0x5
- 125d: movabs rdx,0x0
- 125f: R_X86_64_64 rl_lstm/fixed_embedding_matrix_3
- 1267: add rax,rdx
- 126a: mov QWORD PTR [rbp+0x48],rax
- 000000000000126e <rl_lstm/capitalization/Lookup>:
- 126e: movsxd rax,DWORD PTR [rbp+0x4]
- 1272: mov rcx,0x4
- 1279: test rax,rax
- 127c: cmovs rax,rcx
- 1280: shl rax,0x5
- 1284: movabs rdx,0x0
- 1286: R_X86_64_64 rl_lstm/fixed_embedding_matrix_2
- 128e: add rax,rdx
- 1291: mov QWORD PTR [rbp+0x50],rax
- 0000000000001295 <rl_lstm/quote/Lookup>:
- 1295: movsxd rax,DWORD PTR [rbp+0x40]
- 1299: mov rcx,0x3
- 12a0: test rax,rax
- 12a3: cmovs rax,rcx
- 12a7: shl rax,0x5
- 12ab: movabs rdx,0x0
- 12ad: R_X86_64_64 rl_lstm/fixed_embedding_matrix_5
- 12b5: add rax,rdx
- 12b8: mov QWORD PTR [rbp+0x58],rax
- 00000000000012bc <rl_lstm/words/Lookup>:
- 12bc: movsxd rax,DWORD PTR [rbp+0x1c]
- 12c0: mov rcx,0xd008
- 12c7: test rax,rax
- 12ca: cmovs rax,rcx
- 12ce: shl rax,0x7
- 12d2: movabs rdx,0x0
- 12d4: R_X86_64_64 rl_lstm/fixed_embedding_matrix_0
- 12dc: add rax,rdx
- 12df: mov QWORD PTR [rbp+0x60],rax
- 00000000000012e3 <rl_lstm/digit/Lookup>:
- 12e3: movsxd rax,DWORD PTR [rbp+0x18]
- 12e7: mov rcx,0x2
- 12ee: test rax,rax
- 12f1: cmovs rax,rcx
- 12f5: shl rax,0x5
- 12f9: movabs rdx,0x0
- 12fb: R_X86_64_64 rl_lstm/fixed_embedding_matrix_6
- 1303: add rax,rdx
- 1306: mov QWORD PTR [rbp+0x68],rax
- 000000000000130a <rl_lstm/punctuation/Lookup>:
- 130a: movsxd rax,DWORD PTR [rbp+0x30]
- 130e: mov rcx,0x2
- 1315: test rax,rax
- 1318: cmovs rax,rcx
- 131c: shl rax,0x5
- 1320: movabs rdx,0x0
- 1322: R_X86_64_64 rl_lstm/fixed_embedding_matrix_4
- 132a: add rax,rdx
- 132d: mov QWORD PTR [rbp+0x70],rax
- 0000000000001331 <rl_lstm/suffix/Lookup>:
- 1331: lea rcx,[rbp+0x34]
- 1335: movabs rdx,0x0
- 1337: R_X86_64_64 rl_lstm/fixed_embedding_matrix_1
- 133f: lea rsi,[rbp+0x80]
- 1346: vxorps ymm0,ymm0,ymm0
- 134a: vxorps ymm1,ymm1,ymm1
- 134e: mov r8,0x208d
- 1355: xor rdi,rdi
- 1358: movsxd rax,DWORD PTR [rcx+rdi*4]
- 135c: test rax,rax
- 135f: jns 1372 <rl_lstm/suffix/Lookup+0x41>
- 1365: cmp rax,0xffffffffffffffff
- 1369: jne 1382 <rl_lstm/suffix/Lookup+0x51>
- 136f: mov rax,r8
- 1372: shl rax,0x6
- 1376: add rax,rdx
- 1379: vaddps ymm0,ymm0,YMMWORD PTR [rax]
- 137d: vaddps ymm1,ymm1,YMMWORD PTR [rax+0x20]
- 1382: inc rdi
- 1385: cmp rdi,0x3
- 1389: jne 1358 <rl_lstm/suffix/Lookup+0x27>
- 138b: vmovaps YMMWORD PTR [rsi],ymm0
- 138f: vmovaps YMMWORD PTR [rsi+0x20],ymm1
- 0000000000001394 <rl_lstm/concat>:
- 1394: lea r8,[rbp+0xc0]
- 139b: mov rsi,QWORD PTR [rbp+0x60]
- 139f: lea rdi,[r8]
- 13a2: mov rcx,0x80
- 13a9: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
- 13ab: lea rsi,[rbp+0x80]
- 13b2: lea rdi,[r8+0x80]
- 13b9: mov rcx,0x40
- 13c0: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
- 13c2: mov rsi,QWORD PTR [rbp+0x50]
- 13c6: lea rdi,[r8+0xc0]
- 13cd: mov rcx,0x20
- 13d4: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
- 13d6: mov rsi,QWORD PTR [rbp+0x48]
- 13da: lea rdi,[r8+0xe0]
- 13e1: mov rcx,0x20
- 13e8: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
- 13ea: mov rsi,QWORD PTR [rbp+0x70]
- 13ee: lea rdi,[r8+0x100]
- 13f5: mov rcx,0x20
- 13fc: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
- 13fe: mov rsi,QWORD PTR [rbp+0x58]
- 1402: lea rdi,[r8+0x120]
- 1409: mov rcx,0x20
- 1410: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
- 1412: mov rsi,QWORD PTR [rbp+0x68]
- 1416: lea rdi,[r8+0x140]
- 141d: mov rcx,0x20
- 1424: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
- 0000000000001426 <rl_lstm/MatMul>:
- 1426: lea rdi,[rbp+0xc0]
- 142d: movabs rsi,0x0
- 142f: R_X86_64_64 rl_lstm/x2i
- 1437: lea r8,[rbp+0x220]
- 143e: xor rcx,rcx
- 1441: vxorps ymm0,ymm0,ymm0
- 1445: vxorps ymm1,ymm1,ymm1
- 1449: vxorps ymm2,ymm2,ymm2
- 144d: vxorps ymm3,ymm3,ymm3
- 1451: vxorps ymm4,ymm4,ymm4
- 1455: vxorps ymm5,ymm5,ymm5
- 1459: vxorps ymm6,ymm6,ymm6
- 145d: vxorps ymm7,ymm7,ymm7
- 1461: mov rdx,rsi
- 1464: xor rax,rax
- 1467: vbroadcastss ymm12,DWORD PTR [rdi+rax*1]
- 146d: vmulps ymm8,ymm12,YMMWORD PTR [rdx]
- 1471: vaddps ymm0,ymm0,ymm8
- 1476: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0x20]
- 147b: vaddps ymm1,ymm1,ymm9
- 1480: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0x40]
- 1485: vaddps ymm2,ymm2,ymm10
- 148a: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0x60]
- 148f: vaddps ymm3,ymm3,ymm11
- 1494: vmulps ymm8,ymm12,YMMWORD PTR [rdx+0x80]
- 149c: vaddps ymm4,ymm4,ymm8
- 14a1: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0xa0]
- 14a9: vaddps ymm5,ymm5,ymm9
- 14ae: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0xc0]
- 14b6: vaddps ymm6,ymm6,ymm10
- 14bb: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0xe0]
- 14c3: vaddps ymm7,ymm7,ymm11
- 14c8: add rdx,0x400
- 14cf: add rax,0x4
- 14d3: cmp rax,0x160
- 14d9: jl 1467 <rl_lstm/MatMul+0x41>
- 14db: vmovaps YMMWORD PTR [r8+rcx*1],ymm0
- 14e1: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1
- 14e8: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2
- 14ef: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3
- 14f6: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4
- 1500: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5
- 150a: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6
- 1514: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7
- 151e: add rsi,0x100
- 1525: add rcx,0x100
- 152c: cmp rcx,0x400
- 1533: jl 1441 <rl_lstm/MatMul+0x1b>
- 0000000000001539 <rl_lstm/MatMul_3>:
- 1539: lea rdi,[rbp+0xc0]
- 1540: movabs rsi,0x0
- 1542: R_X86_64_64 rl_lstm/x2c
- 154a: lea r8,[rbp+0x620]
- 1551: xor rcx,rcx
- 1554: vxorps ymm0,ymm0,ymm0
- 1558: vxorps ymm1,ymm1,ymm1
- 155c: vxorps ymm2,ymm2,ymm2
- 1560: vxorps ymm3,ymm3,ymm3
- 1564: vxorps ymm4,ymm4,ymm4
- 1568: vxorps ymm5,ymm5,ymm5
- 156c: vxorps ymm6,ymm6,ymm6
- 1570: vxorps ymm7,ymm7,ymm7
- 1574: mov rdx,rsi
- 1577: xor rax,rax
- 157a: vbroadcastss ymm12,DWORD PTR [rdi+rax*1]
- 1580: vmulps ymm8,ymm12,YMMWORD PTR [rdx]
- 1584: vaddps ymm0,ymm0,ymm8
- 1589: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0x20]
- 158e: vaddps ymm1,ymm1,ymm9
- 1593: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0x40]
- 1598: vaddps ymm2,ymm2,ymm10
- 159d: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0x60]
- 15a2: vaddps ymm3,ymm3,ymm11
- 15a7: vmulps ymm8,ymm12,YMMWORD PTR [rdx+0x80]
- 15af: vaddps ymm4,ymm4,ymm8
- 15b4: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0xa0]
- 15bc: vaddps ymm5,ymm5,ymm9
- 15c1: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0xc0]
- 15c9: vaddps ymm6,ymm6,ymm10
- 15ce: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0xe0]
- 15d6: vaddps ymm7,ymm7,ymm11
- 15db: add rdx,0x400
- 15e2: add rax,0x4
- 15e6: cmp rax,0x160
- 15ec: jl 157a <rl_lstm/MatMul_3+0x41>
- 15ee: vmovaps YMMWORD PTR [r8+rcx*1],ymm0
- 15f4: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1
- 15fb: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2
- 1602: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3
- 1609: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4
- 1613: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5
- 161d: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6
- 1627: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7
- 1631: add rsi,0x100
- 1638: add rcx,0x100
- 163f: cmp rcx,0x400
- 1646: jl 1554 <rl_lstm/MatMul_3+0x1b>
- 000000000000164c <rl_lstm/MatMul_1>:
- 164c: mov rdi,QWORD PTR [rbp+0x8]
- 1650: movabs rsi,0x0
- 1652: R_X86_64_64 rl_lstm/h2i
- 165a: lea r9,[rbp+0x220]
- 1661: lea r8,[rbp+0xa20]
- 1668: xor rcx,rcx
- 166b: vmovaps ymm0,YMMWORD PTR [r9+rcx*1]
- 1671: vmovaps ymm1,YMMWORD PTR [r9+rcx*1+0x20]
- 1678: vmovaps ymm2,YMMWORD PTR [r9+rcx*1+0x40]
- 167f: vmovaps ymm3,YMMWORD PTR [r9+rcx*1+0x60]
- 1686: vmovaps ymm4,YMMWORD PTR [r9+rcx*1+0x80]
- 1690: vmovaps ymm5,YMMWORD PTR [r9+rcx*1+0xa0]
- 169a: vmovaps ymm6,YMMWORD PTR [r9+rcx*1+0xc0]
- 16a4: vmovaps ymm7,YMMWORD PTR [r9+rcx*1+0xe0]
- 16ae: mov rdx,rsi
- 16b1: xor rax,rax
- 16b4: vbroadcastss ymm12,DWORD PTR [rdi+rax*1]
- 16ba: vmulps ymm8,ymm12,YMMWORD PTR [rdx]
- 16be: vaddps ymm0,ymm0,ymm8
- 16c3: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0x20]
- 16c8: vaddps ymm1,ymm1,ymm9
- 16cd: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0x40]
- 16d2: vaddps ymm2,ymm2,ymm10
- 16d7: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0x60]
- 16dc: vaddps ymm3,ymm3,ymm11
- 16e1: vmulps ymm8,ymm12,YMMWORD PTR [rdx+0x80]
- 16e9: vaddps ymm4,ymm4,ymm8
- 16ee: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0xa0]
- 16f6: vaddps ymm5,ymm5,ymm9
- 16fb: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0xc0]
- 1703: vaddps ymm6,ymm6,ymm10
- 1708: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0xe0]
- 1710: vaddps ymm7,ymm7,ymm11
- 1715: add rdx,0x400
- 171c: add rax,0x4
- 1720: cmp rax,0x400
- 1726: jl 16b4 <rl_lstm/MatMul_1+0x68>
- 1728: vmovaps YMMWORD PTR [r8+rcx*1],ymm0
- 172e: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1
- 1735: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2
- 173c: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3
- 1743: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4
- 174d: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5
- 1757: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6
- 1761: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7
- 176b: add rsi,0x100
- 1772: add rcx,0x100
- 1779: cmp rcx,0x400
- 1780: jl 166b <rl_lstm/MatMul_1+0x1f>
- 0000000000001786 <rl_lstm/MatMul_4>:
- 1786: mov rdi,QWORD PTR [rbp+0x8]
- 178a: movabs rsi,0x0
- 178c: R_X86_64_64 rl_lstm/h2c
- 1794: lea r9,[rbp+0x620]
- 179b: lea r8,[rbp+0xe20]
- 17a2: xor rcx,rcx
- 17a5: vmovaps ymm0,YMMWORD PTR [r9+rcx*1]
- 17ab: vmovaps ymm1,YMMWORD PTR [r9+rcx*1+0x20]
- 17b2: vmovaps ymm2,YMMWORD PTR [r9+rcx*1+0x40]
- 17b9: vmovaps ymm3,YMMWORD PTR [r9+rcx*1+0x60]
- 17c0: vmovaps ymm4,YMMWORD PTR [r9+rcx*1+0x80]
- 17ca: vmovaps ymm5,YMMWORD PTR [r9+rcx*1+0xa0]
- 17d4: vmovaps ymm6,YMMWORD PTR [r9+rcx*1+0xc0]
- 17de: vmovaps ymm7,YMMWORD PTR [r9+rcx*1+0xe0]
- 17e8: mov rdx,rsi
- 17eb: xor rax,rax
- 17ee: vbroadcastss ymm12,DWORD PTR [rdi+rax*1]
- 17f4: vmulps ymm8,ymm12,YMMWORD PTR [rdx]
- 17f8: vaddps ymm0,ymm0,ymm8
- 17fd: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0x20]
- 1802: vaddps ymm1,ymm1,ymm9
- 1807: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0x40]
- 180c: vaddps ymm2,ymm2,ymm10
- 1811: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0x60]
- 1816: vaddps ymm3,ymm3,ymm11
- 181b: vmulps ymm8,ymm12,YMMWORD PTR [rdx+0x80]
- 1823: vaddps ymm4,ymm4,ymm8
- 1828: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0xa0]
- 1830: vaddps ymm5,ymm5,ymm9
- 1835: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0xc0]
- 183d: vaddps ymm6,ymm6,ymm10
- 1842: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0xe0]
- 184a: vaddps ymm7,ymm7,ymm11
- 184f: add rdx,0x400
- 1856: add rax,0x4
- 185a: cmp rax,0x400
- 1860: jl 17ee <rl_lstm/MatMul_4+0x68>
- 1862: vmovaps YMMWORD PTR [r8+rcx*1],ymm0
- 1868: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1
- 186f: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2
- 1876: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3
- 187d: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4
- 1887: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5
- 1891: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6
- 189b: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7
- 18a5: add rsi,0x100
- 18ac: add rcx,0x100
- 18b3: cmp rcx,0x400
- 18ba: jl 17a5 <rl_lstm/MatMul_4+0x1f>
- 00000000000018c0 <rl_lstm/MatMul_2>:
- 18c0: mov rdi,QWORD PTR [rbp+0x10]
- 18c4: movabs rsi,0x0
- 18c6: R_X86_64_64 rl_lstm/c2i
- 18ce: lea r9,[rbp+0xa20]
- 18d5: lea r8,[rbp+0x1220]
- 18dc: xor rcx,rcx
- 18df: vmovaps ymm0,YMMWORD PTR [r9+rcx*1]
- 18e5: vmovaps ymm1,YMMWORD PTR [r9+rcx*1+0x20]
- 18ec: vmovaps ymm2,YMMWORD PTR [r9+rcx*1+0x40]
- 18f3: vmovaps ymm3,YMMWORD PTR [r9+rcx*1+0x60]
- 18fa: vmovaps ymm4,YMMWORD PTR [r9+rcx*1+0x80]
- 1904: vmovaps ymm5,YMMWORD PTR [r9+rcx*1+0xa0]
- 190e: vmovaps ymm6,YMMWORD PTR [r9+rcx*1+0xc0]
- 1918: vmovaps ymm7,YMMWORD PTR [r9+rcx*1+0xe0]
- 1922: mov rdx,rsi
- 1925: xor rax,rax
- 1928: vbroadcastss ymm12,DWORD PTR [rdi+rax*1]
- 192e: vmulps ymm8,ymm12,YMMWORD PTR [rdx]
- 1932: vaddps ymm0,ymm0,ymm8
- 1937: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0x20]
- 193c: vaddps ymm1,ymm1,ymm9
- 1941: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0x40]
- 1946: vaddps ymm2,ymm2,ymm10
- 194b: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0x60]
- 1950: vaddps ymm3,ymm3,ymm11
- 1955: vmulps ymm8,ymm12,YMMWORD PTR [rdx+0x80]
- 195d: vaddps ymm4,ymm4,ymm8
- 1962: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0xa0]
- 196a: vaddps ymm5,ymm5,ymm9
- 196f: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0xc0]
- 1977: vaddps ymm6,ymm6,ymm10
- 197c: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0xe0]
- 1984: vaddps ymm7,ymm7,ymm11
- 1989: add rdx,0x400
- 1990: add rax,0x4
- 1994: cmp rax,0x400
- 199a: jl 1928 <rl_lstm/MatMul_2+0x68>
- 199c: vmovaps YMMWORD PTR [r8+rcx*1],ymm0
- 19a2: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1
- 19a9: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2
- 19b0: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3
- 19b7: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4
- 19c1: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5
- 19cb: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6
- 19d5: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7
- 19df: add rsi,0x100
- 19e6: add rcx,0x100
- 19ed: cmp rcx,0x400
- 19f4: jl 18df <rl_lstm/MatMul_2+0x1f>
- 00000000000019fa <rl_lstm/add_4>:
- 19fa: vmovaps ymm0,YMMWORD PTR [rip+0x73e] # 2140 <rl_lstm_data+0x1>
- 1a02: vmovaps ymm1,YMMWORD PTR [rip+0x756] # 2160 <rl_lstm_data+0x21>
- 1a0a: vmovaps ymm2,YMMWORD PTR [rip+0x76e] # 2180 <rl_lstm_data+0x41>
- 1a12: vmovaps ymm3,YMMWORD PTR [rip+0x786] # 21a0 <rl_lstm_data+0x61>
- 1a1a: vmovaps ymm4,YMMWORD PTR [rip+0x79e] # 21c0 <rl_lstm_data+0x81>
- 1a22: vmovaps ymm5,YMMWORD PTR [rip+0x7b6] # 21e0 <rl_lstm_data+0xa1>
- 1a2a: vmovaps ymm6,YMMWORD PTR [rip+0x7ce] # 2200 <rl_lstm_data+0xc1>
- 1a32: vmovaps ymm7,YMMWORD PTR [rip+0x7e6] # 2220 <rl_lstm_data+0xe1>
- 1a3a: vmovaps ymm8,YMMWORD PTR [rip+0x7fe] # 2240 <rl_lstm_data+0x101>
- 1a42: lea r8,[rbp+0xe20]
- 1a49: movabs rcx,0x0
- 1a4b: R_X86_64_64 rl_lstm/bc
- 1a53: lea r9,[rbp+0x1220]
- 1a5a: movabs rdx,0x0
- 1a5c: R_X86_64_64 rl_lstm/bi
- 1a64: mov rsi,QWORD PTR [rbp+0x10]
- 1a68: mov rdi,QWORD PTR [rbp+0x20]
- 1a6c: lea r10,[rbp+0xe20]
- 1a73: xor rax,rax
- 1a76: vmovaps ymm9,YMMWORD PTR [r9+rax*1]
- 1a7c: vaddps ymm10,ymm9,YMMWORD PTR [rdx+rax*1]
- 1a81: vxorps ymm9,ymm9,ymm9
- 1a86: vsubps ymm11,ymm9,ymm10
- 1a8b: vminps ymm9,ymm11,YMMWORD PTR [rip+0x7cd] # 2260 <rl_lstm_data+0x121>
- 1a93: vmaxps ymm10,ymm9,YMMWORD PTR [rip+0x7e5] # 2280 <rl_lstm_data+0x141>
- 1a9b: vmulps ymm9,ymm10,YMMWORD PTR [rip+0x7fd] # 22a0 <rl_lstm_data+0x161>
- 1aa3: vaddps ymm12,ymm9,YMMWORD PTR [rip+0x815] # 22c0 <rl_lstm_data+0x181>
- 1aab: vroundps ymm9,ymm12,0x1
- 1ab1: vmulps ymm12,ymm9,YMMWORD PTR [rip+0x827] # 22e0 <rl_lstm_data+0x1a1>
- 1ab9: vaddps ymm13,ymm12,ymm10
- 1abe: vmulps ymm10,ymm13,ymm13
- 1ac3: vmulps ymm12,ymm13,YMMWORD PTR [rip+0x835] # 2300 <rl_lstm_data+0x1c1>
- 1acb: vaddps ymm14,ymm12,YMMWORD PTR [rip+0x84d] # 2320 <rl_lstm_data+0x1e1>
- 1ad3: vmulps ymm12,ymm14,ymm13
- 1ad8: vaddps ymm14,ymm12,YMMWORD PTR [rip+0x860] # 2340 <rl_lstm_data+0x201>
- 1ae0: vmulps ymm12,ymm14,ymm13
- 1ae5: vaddps ymm14,ymm12,YMMWORD PTR [rip+0x873] # 2360 <rl_lstm_data+0x221>
- 1aed: vmulps ymm12,ymm14,ymm13
- 1af2: vaddps ymm14,ymm12,YMMWORD PTR [rip+0x886] # 2380 <rl_lstm_data+0x241>
- 1afa: vmulps ymm12,ymm14,ymm13
- 1aff: vaddps ymm14,ymm12,YMMWORD PTR [rip+0x7b9] # 22c0 <rl_lstm_data+0x181>
- 1b07: vmulps ymm12,ymm14,ymm10
- 1b0c: vaddps ymm10,ymm12,ymm13
- 1b11: vaddps ymm12,ymm10,ymm0
- 1b15: vaddps ymm10,ymm9,YMMWORD PTR [rip+0x883] # 23a0 <rl_lstm_data+0x261>
- 1b1d: vcvttps2dq ymm9,ymm10
- 1b22: vextractf128 xmm15,ymm9,0x1
- 1b28: vpslld xmm15,xmm15,0x17
- 1b2e: vpslld xmm10,xmm9,0x17
- 1b34: vinsertf128 ymm10,ymm10,xmm15,0x1
- 1b3a: vmulps ymm9,ymm12,ymm10
- 1b3f: vmaxps ymm10,ymm9,ymm11
- 1b44: vaddps ymm9,ymm0,ymm10
- 1b49: vdivps ymm10,ymm0,ymm9
- 1b4e: vmovaps ymm9,YMMWORD PTR [r8+rax*1]
- 1b54: vaddps ymm11,ymm9,YMMWORD PTR [rcx+rax*1]
- 1b59: vminps ymm9,ymm11,ymm2
- 1b5d: vmaxps ymm11,ymm9,ymm1
- 1b61: vmulps ymm9,ymm11,ymm11
- 1b66: vmulps ymm12,ymm9,ymm3
- 1b6a: vaddps ymm13,ymm12,ymm4
- 1b6e: vmulps ymm12,ymm9,ymm13
- 1b73: vaddps ymm13,ymm12,ymm5
- 1b77: vmulps ymm12,ymm9,ymm13
- 1b7c: vaddps ymm13,ymm12,ymm6
- 1b80: vmulps ymm12,ymm9,ymm13
- 1b85: vaddps ymm13,ymm12,ymm7
- 1b89: vmulps ymm12,ymm9,ymm13
- 1b8e: vaddps ymm13,ymm12,ymm8
- 1b93: vmulps ymm12,ymm9,ymm13
- 1b98: vaddps ymm13,ymm12,YMMWORD PTR [rip+0x820] # 23c0 <rl_lstm_data+0x281>
- 1ba0: vmulps ymm12,ymm11,ymm13
- 1ba5: vmulps ymm11,ymm9,YMMWORD PTR [rip+0x833] # 23e0 <rl_lstm_data+0x2a1>
- 1bad: vaddps ymm13,ymm11,YMMWORD PTR [rip+0x84b] # 2400 <rl_lstm_data+0x2c1>
- 1bb5: vmulps ymm11,ymm9,ymm13
- 1bba: vaddps ymm13,ymm11,YMMWORD PTR [rip+0x85e] # 2420 <rl_lstm_data+0x2e1>
- 1bc2: vmulps ymm11,ymm9,ymm13
- 1bc7: vaddps ymm9,ymm11,YMMWORD PTR [rip+0x871] # 2440 <rl_lstm_data+0x301>
- 1bcf: vdivps ymm11,ymm12,ymm9
- 1bd4: vmulps ymm9,ymm10,ymm11
- 1bd9: vsubps ymm11,ymm0,ymm10
- 1bde: vmulps ymm10,ymm11,YMMWORD PTR [rsi+rax*1]
- 1be3: vaddps ymm11,ymm9,ymm10
- 1be8: vmovaps YMMWORD PTR [rdi+rax*1],ymm11
- 1bed: vminps ymm9,ymm11,ymm2
- 1bf1: vmaxps ymm10,ymm9,ymm1
- 1bf5: vmulps ymm9,ymm10,ymm10
- 1bfa: vmulps ymm11,ymm9,ymm3
- 1bfe: vaddps ymm12,ymm11,ymm4
- 1c02: vmulps ymm11,ymm9,ymm12
- 1c07: vaddps ymm12,ymm11,ymm5
- 1c0b: vmulps ymm11,ymm9,ymm12
- 1c10: vaddps ymm12,ymm11,ymm6
- 1c14: vmulps ymm11,ymm9,ymm12
- 1c19: vaddps ymm12,ymm11,ymm7
- 1c1d: vmulps ymm11,ymm9,ymm12
- 1c22: vaddps ymm12,ymm11,ymm8
- 1c27: vmulps ymm11,ymm9,ymm12
- 1c2c: vaddps ymm12,ymm11,YMMWORD PTR [rip+0x78c] # 23c0 <rl_lstm_data+0x281>
- 1c34: vmulps ymm11,ymm10,ymm12
- 1c39: vmulps ymm10,ymm9,YMMWORD PTR [rip+0x79f] # 23e0 <rl_lstm_data+0x2a1>
- 1c41: vaddps ymm12,ymm10,YMMWORD PTR [rip+0x7b7] # 2400 <rl_lstm_data+0x2c1>
- 1c49: vmulps ymm10,ymm9,ymm12
- 1c4e: vaddps ymm12,ymm10,YMMWORD PTR [rip+0x7ca] # 2420 <rl_lstm_data+0x2e1>
- 1c56: vmulps ymm10,ymm9,ymm12
- 1c5b: vaddps ymm9,ymm10,YMMWORD PTR [rip+0x7dd] # 2440 <rl_lstm_data+0x301>
- 1c63: vdivps ymm10,ymm11,ymm9
- 1c68: vmovaps YMMWORD PTR [r10+rax*1],ymm10
- 1c6e: add rax,0x20
- 1c72: cmp rax,0x400
- 1c78: jl 1a76 <rl_lstm/add_4+0x7c>
- 0000000000001c7e <rl_lstm/MatMul_6>:
- 1c7e: mov rdi,QWORD PTR [rbp+0x20]
- 1c82: movabs rsi,0x0
- 1c84: R_X86_64_64 rl_lstm/c2o
- 1c8c: lea r8,[rbp+0x1620]
- 1c93: xor rcx,rcx
- 1c96: vxorps ymm0,ymm0,ymm0
- 1c9a: vxorps ymm1,ymm1,ymm1
- 1c9e: vxorps ymm2,ymm2,ymm2
- 1ca2: vxorps ymm3,ymm3,ymm3
- 1ca6: vxorps ymm4,ymm4,ymm4
- 1caa: vxorps ymm5,ymm5,ymm5
- 1cae: vxorps ymm6,ymm6,ymm6
- 1cb2: vxorps ymm7,ymm7,ymm7
- 1cb6: mov rdx,rsi
- 1cb9: xor rax,rax
- 1cbc: vbroadcastss ymm12,DWORD PTR [rdi+rax*1]
- 1cc2: vmulps ymm8,ymm12,YMMWORD PTR [rdx]
- 1cc6: vaddps ymm0,ymm0,ymm8
- 1ccb: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0x20]
- 1cd0: vaddps ymm1,ymm1,ymm9
- 1cd5: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0x40]
- 1cda: vaddps ymm2,ymm2,ymm10
- 1cdf: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0x60]
- 1ce4: vaddps ymm3,ymm3,ymm11
- 1ce9: vmulps ymm8,ymm12,YMMWORD PTR [rdx+0x80]
- 1cf1: vaddps ymm4,ymm4,ymm8
- 1cf6: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0xa0]
- 1cfe: vaddps ymm5,ymm5,ymm9
- 1d03: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0xc0]
- 1d0b: vaddps ymm6,ymm6,ymm10
- 1d10: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0xe0]
- 1d18: vaddps ymm7,ymm7,ymm11
- 1d1d: add rdx,0x400
- 1d24: add rax,0x4
- 1d28: cmp rax,0x400
- 1d2e: jl 1cbc <rl_lstm/MatMul_6+0x3e>
- 1d30: vmovaps YMMWORD PTR [r8+rcx*1],ymm0
- 1d36: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1
- 1d3d: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2
- 1d44: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3
- 1d4b: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4
- 1d55: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5
- 1d5f: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6
- 1d69: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7
- 1d73: add rsi,0x100
- 1d7a: add rcx,0x100
- 1d81: cmp rcx,0x400
- 1d88: jl 1c96 <rl_lstm/MatMul_6+0x18>
- 0000000000001d8e <rl_lstm/MatMul_5>:
- 1d8e: lea rdi,[rbp+0xc0]
- 1d95: movabs rsi,0x0
- 1d97: R_X86_64_64 rl_lstm/x2o
- 1d9f: lea r9,[rbp+0x1620]
- 1da6: lea r8,[rbp+0x1a20]
- 1dad: xor rcx,rcx
- 1db0: vmovaps ymm0,YMMWORD PTR [r9+rcx*1]
- 1db6: vmovaps ymm1,YMMWORD PTR [r9+rcx*1+0x20]
- 1dbd: vmovaps ymm2,YMMWORD PTR [r9+rcx*1+0x40]
- 1dc4: vmovaps ymm3,YMMWORD PTR [r9+rcx*1+0x60]
- 1dcb: vmovaps ymm4,YMMWORD PTR [r9+rcx*1+0x80]
- 1dd5: vmovaps ymm5,YMMWORD PTR [r9+rcx*1+0xa0]
- 1ddf: vmovaps ymm6,YMMWORD PTR [r9+rcx*1+0xc0]
- 1de9: vmovaps ymm7,YMMWORD PTR [r9+rcx*1+0xe0]
- 1df3: mov rdx,rsi
- 1df6: xor rax,rax
- 1df9: vbroadcastss ymm12,DWORD PTR [rdi+rax*1]
- 1dff: vmulps ymm8,ymm12,YMMWORD PTR [rdx]
- 1e03: vaddps ymm0,ymm0,ymm8
- 1e08: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0x20]
- 1e0d: vaddps ymm1,ymm1,ymm9
- 1e12: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0x40]
- 1e17: vaddps ymm2,ymm2,ymm10
- 1e1c: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0x60]
- 1e21: vaddps ymm3,ymm3,ymm11
- 1e26: vmulps ymm8,ymm12,YMMWORD PTR [rdx+0x80]
- 1e2e: vaddps ymm4,ymm4,ymm8
- 1e33: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0xa0]
- 1e3b: vaddps ymm5,ymm5,ymm9
- 1e40: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0xc0]
- 1e48: vaddps ymm6,ymm6,ymm10
- 1e4d: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0xe0]
- 1e55: vaddps ymm7,ymm7,ymm11
- 1e5a: add rdx,0x400
- 1e61: add rax,0x4
- 1e65: cmp rax,0x160
- 1e6b: jl 1df9 <rl_lstm/MatMul_5+0x6b>
- 1e6d: vmovaps YMMWORD PTR [r8+rcx*1],ymm0
- 1e73: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1
- 1e7a: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2
- 1e81: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3
- 1e88: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4
- 1e92: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5
- 1e9c: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6
- 1ea6: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7
- 1eb0: add rsi,0x100
- 1eb7: add rcx,0x100
- 1ebe: cmp rcx,0x400
- 1ec5: jl 1db0 <rl_lstm/MatMul_5+0x22>
- 0000000000001ecb <rl_lstm/MatMul_7>:
- 1ecb: mov rdi,QWORD PTR [rbp+0x8]
- 1ecf: movabs rsi,0x0
- 1ed1: R_X86_64_64 rl_lstm/h2o
- 1ed9: lea r9,[rbp+0x1a20]
- 1ee0: lea r8,[rbp+0x1e20]
- 1ee7: xor rcx,rcx
- 1eea: vmovaps ymm0,YMMWORD PTR [r9+rcx*1]
- 1ef0: vmovaps ymm1,YMMWORD PTR [r9+rcx*1+0x20]
- 1ef7: vmovaps ymm2,YMMWORD PTR [r9+rcx*1+0x40]
- 1efe: vmovaps ymm3,YMMWORD PTR [r9+rcx*1+0x60]
- 1f05: vmovaps ymm4,YMMWORD PTR [r9+rcx*1+0x80]
- 1f0f: vmovaps ymm5,YMMWORD PTR [r9+rcx*1+0xa0]
- 1f19: vmovaps ymm6,YMMWORD PTR [r9+rcx*1+0xc0]
- 1f23: vmovaps ymm7,YMMWORD PTR [r9+rcx*1+0xe0]
- 1f2d: mov rdx,rsi
- 1f30: xor rax,rax
- 1f33: vbroadcastss ymm12,DWORD PTR [rdi+rax*1]
- 1f39: vmulps ymm8,ymm12,YMMWORD PTR [rdx]
- 1f3d: vaddps ymm0,ymm0,ymm8
- 1f42: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0x20]
- 1f47: vaddps ymm1,ymm1,ymm9
- 1f4c: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0x40]
- 1f51: vaddps ymm2,ymm2,ymm10
- 1f56: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0x60]
- 1f5b: vaddps ymm3,ymm3,ymm11
- 1f60: vmulps ymm8,ymm12,YMMWORD PTR [rdx+0x80]
- 1f68: vaddps ymm4,ymm4,ymm8
- 1f6d: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0xa0]
- 1f75: vaddps ymm5,ymm5,ymm9
- 1f7a: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0xc0]
- 1f82: vaddps ymm6,ymm6,ymm10
- 1f87: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0xe0]
- 1f8f: vaddps ymm7,ymm7,ymm11
- 1f94: add rdx,0x400
- 1f9b: add rax,0x4
- 1f9f: cmp rax,0x400
- 1fa5: jl 1f33 <rl_lstm/MatMul_7+0x68>
- 1fa7: vmovaps YMMWORD PTR [r8+rcx*1],ymm0
- 1fad: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1
- 1fb4: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2
- 1fbb: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3
- 1fc2: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4
- 1fcc: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5
- 1fd6: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6
- 1fe0: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7
- 1fea: add rsi,0x100
- 1ff1: add rcx,0x100
- 1ff8: cmp rcx,0x400
- 1fff: jl 1eea <rl_lstm/MatMul_7+0x1f>
- 0000000000002005 <rl_lstm/add_7>:
- 2005: vmovaps ymm0,YMMWORD PTR [rip+0x133] # 2140 <rl_lstm_data+0x1>
- 200d: vxorps ymm1,ymm1,ymm1
- 2011: vmovaps ymm2,YMMWORD PTR [rip+0x267] # 2280 <rl_lstm_data+0x141>
- 2019: vmovaps ymm3,YMMWORD PTR [rip+0x23f] # 2260 <rl_lstm_data+0x121>
- 2021: vmovaps ymm4,YMMWORD PTR [rip+0x297] # 22c0 <rl_lstm_data+0x181>
- 2029: vmovaps ymm5,YMMWORD PTR [rip+0x26f] # 22a0 <rl_lstm_data+0x161>
- 2031: vmovaps ymm6,YMMWORD PTR [rip+0x2a7] # 22e0 <rl_lstm_data+0x1a1>
- 2039: vmovaps ymm7,YMMWORD PTR [rip+0x2bf] # 2300 <rl_lstm_data+0x1c1>
- 2041: vmovaps ymm8,YMMWORD PTR [rip+0x2d7] # 2320 <rl_lstm_data+0x1e1>
- 2049: lea rsi,[rbp+0x1e20]
- 2050: movabs rcx,0x0
- 2052: R_X86_64_64 rl_lstm/bo
- 205a: lea rdi,[rbp+0xe20]
- 2061: mov rdx,QWORD PTR [rbp+0x28]
- 2065: xor rax,rax
- 2068: vmovaps ymm9,YMMWORD PTR [rsi+rax*1]
- 206d: vaddps ymm10,ymm9,YMMWORD PTR [rcx+rax*1]
- 2072: vsubps ymm9,ymm1,ymm10
- 2077: vminps ymm10,ymm9,ymm3
- 207b: vmaxps ymm11,ymm10,ymm2
- 207f: vmulps ymm10,ymm11,ymm5
- 2083: vaddps ymm12,ymm10,ymm4
- 2087: vroundps ymm10,ymm12,0x1
- 208d: vmulps ymm12,ymm10,ymm6
- 2091: vaddps ymm13,ymm12,ymm11
- 2096: vmulps ymm11,ymm13,ymm13
- 209b: vmulps ymm12,ymm7,ymm13
- 20a0: vaddps ymm14,ymm12,ymm8
- 20a5: vmulps ymm12,ymm14,ymm13
- 20aa: vaddps ymm14,ymm12,YMMWORD PTR [rip+0x28e] # 2340 <rl_lstm_data+0x201>
- 20b2: vmulps ymm12,ymm14,ymm13
- 20b7: vaddps ymm14,ymm12,YMMWORD PTR [rip+0x2a1] # 2360 <rl_lstm_data+0x221>
- 20bf: vmulps ymm12,ymm14,ymm13
- 20c4: vaddps ymm14,ymm12,YMMWORD PTR [rip+0x2b4] # 2380 <rl_lstm_data+0x241>
- 20cc: vmulps ymm12,ymm14,ymm13
- 20d1: vaddps ymm14,ymm12,YMMWORD PTR [rip+0x1e7] # 22c0 <rl_lstm_data+0x181>
- 20d9: vmulps ymm12,ymm14,ymm11
- 20de: vaddps ymm11,ymm12,ymm13
- 20e3: vaddps ymm12,ymm11,ymm0
- 20e7: vaddps ymm11,ymm10,YMMWORD PTR [rip+0x2b1] # 23a0 <rl_lstm_data+0x261>
- 20ef: vcvttps2dq ymm10,ymm11
- 20f4: vextractf128 xmm15,ymm10,0x1
- 20fa: vpslld xmm15,xmm15,0x17
- 2100: vpslld xmm11,xmm10,0x17
- 2106: vinsertf128 ymm11,ymm11,xmm15,0x1
- 210c: vmulps ymm10,ymm12,ymm11
- 2111: vmaxps ymm11,ymm10,ymm9
- 2116: vaddps ymm9,ymm0,ymm11
- 211b: vdivps ymm10,ymm0,ymm9
- 2120: vmulps ymm9,ymm10,YMMWORD PTR [rdi+rax*1]
- 2125: vmovaps YMMWORD PTR [rdx+rax*1],ymm9
- 212a: add rax,0x20
- 212e: cmp rax,0x400
- 2134: jl 2068 <rl_lstm/add_7+0x63>
- 213a: pop rbp
- 213b: vzeroupper
- 213e: ret
- 000000000000213f <rl_lstm_data>:
- 213f: ....?...?...?...
- 214f: ?...?...?...?...
- 215f: ?...............
- 216f: ................
- 217f: ....A...A...A...
- 218f: A...A...A...A...
- 219f: A.%...%...%...%.
- 21af: ..%...%...%...%.
- 21bf: .~3a*~3a*~3a*~3a
- 21cf: *~3a*~3a*~3a*~3a
- 21df: *.7...7...7...7.
- 21ef: ..7...7...7...7.
- 21ff: .A.\3A.\3A.\3A.\
- 220f: 3A.\3A.\3A.\3A.\
- 221f: 3JCy7JCy7JCy7JCy
- 222f: 7JCy7JCy7JCy7JCy
- 223f: 7..':..':..':..'
- 224f: :..':..':..':..'
- 225f: :...B...B...B...
- 226f: B...B...B...B...
- 227f: B...............
- 228f: ................
- 229f: .;..?;..?;..?;..
- 22af: ?;..?;..?;..?;..
- 22bf: ?...?...?...?...
- 22cf: ?...?...?...?...
- 22df: ?.r1..r1..r1..r1
- 22ef: ..r1..r1..r1..r1
- 22ff: .giP9giP9giP9giP
- 230f: 9giP9giP9giP9giP
- 231f: 9.C.:.C.:.C.:.C.
- 232f: :.C.:.C.:.C.:.C.
- 233f: :...<...<...<...
- 234f: <...<...<...<...
- 235f: <..*=..*=..*=..*
- 236f: =..*=..*=..*=..*
- 237f: =..*>..*>..*>..*
- 238f: >..*>..*>..*>..*
- 239f: >...B...B...B...
- 23af: B...B...B...B...
- 23bf: B.Y.;.Y.;.Y.;.Y.
- 23cf: ;.Y.;.Y.;.Y.;.Y.
- 23df: ;...5...5...5...
- 23ef: 5...5...5...5...
- 23ff: 5...8...8...8...
- 240f: 8...8...8...8...
- 241f: 8...;...;...;...
- 242f: ;...;...;...;...
- 243f: ;.Y.;.Y.;.Y.;.Y.
- 244f: ;.Y.;.Y.;.Y.;.Y.
- 245f: ;
- 0000000000002460 <ff>:
- 2460: vzeroupper
- 2463: push rbp
- 2464: mov rbp,rdi
- 0000000000002467 <ff/rl/Collect>:
- 2467: lea rdx,[rbp+0x12c]
- 246e: mov r8,QWORD PTR [rbp+0x200]
- 2475: lea r9,[rbp+0x2a0]
- 247c: movsxd rax,DWORD PTR [rdx]
- 247f: test rax,rax
- 2482: js 24a3 <ff/rl/Collect+0x3c>
- 2488: shl rax,0xa
- 248c: add rax,r8
- 248f: mov rdi,r9
- 2492: mov rsi,rax
- 2495: mov rcx,0x400
- 249c: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
- 249e: jmp 24ba <ff/frame-end-lr/Collect>
- 24a3: cmp rax,0xffffffffffffffff
- 24a7: jne 24ba <ff/frame-end-lr/Collect>
- 24ad: mov r11d,0x3f800000
- 24b3: mov DWORD PTR [r9+0x400],r11d
- 00000000000024ba <ff/frame-end-lr/Collect>:
- 24ba: lea rdx,[rbp+0x164]
- 24c1: mov r8,QWORD PTR [rbp+0x138]
- 24c8: lea r9,[rbp+0x6c0]
- 24cf: xor r10,r10
- 24d2: movsxd rax,DWORD PTR [rdx+r10*4]
- 24d6: test rax,rax
- 24d9: js 24fa <ff/frame-end-lr/Collect+0x40>
- 24df: shl rax,0xa
- 24e3: add rax,r8
- 24e6: mov rdi,r9
- 24e9: mov rsi,rax
- 24ec: mov rcx,0x400
- 24f3: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
- 24f5: jmp 2511 <ff/frame-end-lr/Collect+0x57>
- 24fa: cmp rax,0xffffffffffffffff
- 24fe: jne 2511 <ff/frame-end-lr/Collect+0x57>
- 2504: mov r11d,0x3f800000
- 250a: mov DWORD PTR [r9+0x400],r11d
- 2511: add r9,0x420
- 2518: inc r10
- 251b: cmp r10,0x5
- 251f: jne 24d2 <ff/frame-end-lr/Collect+0x18>
- 0000000000002521 <ff/frame-end-rl/Collect>:
- 2521: lea rdx,[rbp+0x140]
- 2528: mov r8,QWORD PTR [rbp+0x200]
- 252f: lea r9,[rbp+0x1b60]
- 2536: xor r10,r10
- 2539: movsxd rax,DWORD PTR [rdx+r10*4]
- 253d: test rax,rax
- 2540: js 2561 <ff/frame-end-rl/Collect+0x40>
- 2546: shl rax,0xa
- 254a: add rax,r8
- 254d: mov rdi,r9
- 2550: mov rsi,rax
- 2553: mov rcx,0x400
- 255a: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
- 255c: jmp 2578 <ff/frame-end-rl/Collect+0x57>
- 2561: cmp rax,0xffffffffffffffff
- 2565: jne 2578 <ff/frame-end-rl/Collect+0x57>
- 256b: mov r11d,0x3f800000
- 2571: mov DWORD PTR [r9+0x400],r11d
- 2578: add r9,0x420
- 257f: inc r10
- 2582: cmp r10,0x5
- 2586: jne 2539 <ff/frame-end-rl/Collect+0x18>
- 0000000000002588 <ff/in-roles/Lookup>:
- 2588: lea rcx,[rbp+0x208]
- 258f: movabs rdx,0x0
- 2591: R_X86_64_64 ff/fixed_embedding_matrix_0
- 2599: lea rsi,[rbp+0x3000]
- 25a0: vxorps ymm0,ymm0,ymm0
- 25a4: vxorps ymm1,ymm1,ymm1
- 25a8: mov r8,0x7c
- 25af: xor rdi,rdi
- 25b2: movsxd rax,DWORD PTR [rcx+rdi*4]
- 25b6: test rax,rax
- 25b9: jns 25cc <ff/in-roles/Lookup+0x44>
- 25bf: cmp rax,0xffffffffffffffff
- 25c3: jne 25dc <ff/in-roles/Lookup+0x54>
- 25c9: mov rax,r8
- 25cc: shl rax,0x6
- 25d0: add rax,rdx
- 25d3: vaddps ymm0,ymm0,YMMWORD PTR [rax]
- 25d7: vaddps ymm1,ymm1,YMMWORD PTR [rax+0x20]
- 25dc: inc rdi
- 25df: cmp rdi,0x20
- 25e3: jne 25b2 <ff/in-roles/Lookup+0x2a>
- 25e5: vmovaps YMMWORD PTR [rsi],ymm0
- 25e9: vmovaps YMMWORD PTR [rsi+0x20],ymm1
- 00000000000025ee <ff/unlabeled-roles/Lookup>:
- 25ee: lea rcx,[rbp+0x180]
- 25f5: movabs rdx,0x0
- 25f7: R_X86_64_64 ff/fixed_embedding_matrix_3
- 25ff: lea rsi,[rbp+0x3040]
- 2606: vxorps ymm0,ymm0,ymm0
- 260a: vxorps ymm1,ymm1,ymm1
- 260e: mov r8,0x18
- 2615: xor rdi,rdi
- 2618: movsxd rax,DWORD PTR [rcx+rdi*4]
- 261c: test rax,rax
- 261f: jns 2632 <ff/unlabeled-roles/Lookup+0x44>
- 2625: cmp rax,0xffffffffffffffff
- 2629: jne 2642 <ff/unlabeled-roles/Lookup+0x54>
- 262f: mov rax,r8
- 2632: shl rax,0x6
- 2636: add rax,rdx
- 2639: vaddps ymm0,ymm0,YMMWORD PTR [rax]
- 263d: vaddps ymm1,ymm1,YMMWORD PTR [rax+0x20]
- 2642: inc rdi
- 2645: cmp rdi,0x20
- 2649: jne 2618 <ff/unlabeled-roles/Lookup+0x2a>
- 264b: vmovaps YMMWORD PTR [rsi],ymm0
- 264f: vmovaps YMMWORD PTR [rsi+0x20],ymm1
- 0000000000002654 <ff/labeled-roles/Lookup>:
- 2654: mov rcx,rbp
- 2657: movabs rdx,0x0
- 2659: R_X86_64_64 ff/fixed_embedding_matrix_2
- 2661: lea rsi,[rbp+0x3080]
- 2668: vxorps ymm0,ymm0,ymm0
- 266c: vxorps ymm1,ymm1,ymm1
- 2670: mov r8,0x270
- 2677: xor rdi,rdi
- 267a: movsxd rax,DWORD PTR [rcx+rdi*4]
- 267e: test rax,rax
- 2681: jns 2694 <ff/labeled-roles/Lookup+0x40>
- 2687: cmp rax,0xffffffffffffffff
- 268b: jne 26a4 <ff/labeled-roles/Lookup+0x50>
- 2691: mov rax,r8
- 2694: shl rax,0x6
- 2698: add rax,rdx
- 269b: vaddps ymm0,ymm0,YMMWORD PTR [rax]
- 269f: vaddps ymm1,ymm1,YMMWORD PTR [rax+0x20]
- 26a4: inc rdi
- 26a7: cmp rdi,0x20
- 26ab: jne 267a <ff/labeled-roles/Lookup+0x26>
- 26ad: vmovaps YMMWORD PTR [rsi],ymm0
- 26b1: vmovaps YMMWORD PTR [rsi+0x20],ymm1
- 00000000000026b6 <ff/out-roles/Lookup>:
- 26b6: lea rcx,[rbp+0xac]
- 26bd: movabs rdx,0x0
- 26bf: R_X86_64_64 ff/fixed_embedding_matrix_1
- 26c7: lea rsi,[rbp+0x30c0]
- 26ce: vxorps ymm0,ymm0,ymm0
- 26d2: vxorps ymm1,ymm1,ymm1
- 26d6: mov r8,0x7c
- 26dd: xor rdi,rdi
- 26e0: movsxd rax,DWORD PTR [rcx+rdi*4]
- 26e4: test rax,rax
- 26e7: jns 26fa <ff/out-roles/Lookup+0x44>
- 26ed: cmp rax,0xffffffffffffffff
- 26f1: jne 270a <ff/out-roles/Lookup+0x54>
- 26f7: mov rax,r8
- 26fa: shl rax,0x6
- 26fe: add rax,rdx
- 2701: vaddps ymm0,ymm0,YMMWORD PTR [rax]
- 2705: vaddps ymm1,ymm1,YMMWORD PTR [rax+0x20]
- 270a: inc rdi
- 270d: cmp rdi,0x20
- 2711: jne 26e0 <ff/out-roles/Lookup+0x2a>
- 2713: vmovaps YMMWORD PTR [rsi],ymm0
- 2717: vmovaps YMMWORD PTR [rsi+0x20],ymm1
- 000000000000271c <ff/frame-focus-steps/Collect>:
- 271c: lea rdx,[rbp+0x98]
- 2723: mov r8,QWORD PTR [rbp+0x178]
- 272a: lea r9,[rbp+0x3100]
- 2731: xor r10,r10
- 2734: movsxd rax,DWORD PTR [rdx+r10*4]
- 2738: test rax,rax
- 273b: js 275c <ff/frame-focus-steps/Collect+0x40>
- 2741: shl rax,0x9
- 2745: add rax,r8
- 2748: mov rdi,r9
- 274b: mov rsi,rax
- 274e: mov rcx,0x200
- 2755: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
- 2757: jmp 2773 <ff/frame-focus-steps/Collect+0x57>
- 275c: cmp rax,0xffffffffffffffff
- 2760: jne 2773 <ff/frame-focus-steps/Collect+0x57>
- 2766: mov r11d,0x3f800000
- 276c: mov DWORD PTR [r9+0x200],r11d
- 2773: add r9,0x220
- 277a: inc r10
- 277d: cmp r10,0x5
- 2781: jne 2734 <ff/frame-focus-steps/Collect+0x18>
- 0000000000002783 <ff/frame-creation-steps/Collect>:
- 2783: lea rdx,[rbp+0x84]
- 278a: mov r8,QWORD PTR [rbp+0x178]
- 2791: lea r9,[rbp+0x3ba0]
- 2798: xor r10,r10
- 279b: movsxd rax,DWORD PTR [rdx+r10*4]
- 279f: test rax,rax
- 27a2: js 27c3 <ff/frame-creation-steps/Collect+0x40>
- 27a8: shl rax,0x9
- 27ac: add rax,r8
- 27af: mov rdi,r9
- 27b2: mov rsi,rax
- 27b5: mov rcx,0x200
- 27bc: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
- 27be: jmp 27da <ff/frame-creation-steps/Collect+0x57>
- 27c3: cmp rax,0xffffffffffffffff
- 27c7: jne 27da <ff/frame-creation-steps/Collect+0x57>
- 27cd: mov r11d,0x3f800000
- 27d3: mov DWORD PTR [r9+0x200],r11d
- 27da: add r9,0x220
- 27e1: inc r10
- 27e4: cmp r10,0x5
- 27e8: jne 279b <ff/frame-creation-steps/Collect+0x18>
- 00000000000027ea <ff/lr/Collect>:
- 27ea: lea rdx,[rbp+0x80]
- 27f1: mov r8,QWORD PTR [rbp+0x138]
- 27f8: lea r9,[rbp+0x4640]
- 27ff: movsxd rax,DWORD PTR [rdx]
- 2802: test rax,rax
- 2805: js 2826 <ff/lr/Collect+0x3c>
- 280b: shl rax,0xa
- 280f: add rax,r8
- 2812: mov rdi,r9
- 2815: mov rsi,rax
- 2818: mov rcx,0x400
- 281f: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
- 2821: jmp 283d <ff/history/Collect>
- 2826: cmp rax,0xffffffffffffffff
- 282a: jne 283d <ff/history/Collect>
- 2830: mov r11d,0x3f800000
- 2836: mov DWORD PTR [r9+0x400],r11d
- 000000000000283d <ff/history/Collect>:
- 283d: lea rdx,[rbp+0x154]
- 2844: mov r8,QWORD PTR [rbp+0x178]
- 284b: lea r9,[rbp+0x4a60]
- 2852: xor r10,r10
- 2855: movsxd rax,DWORD PTR [rdx+r10*4]
- 2859: test rax,rax
- 285c: js 287d <ff/history/Collect+0x40>
- 2862: shl rax,0x9
- 2866: add rax,r8
- 2869: mov rdi,r9
- 286c: mov rsi,rax
- 286f: mov rcx,0x200
- 2876: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
- 2878: jmp 2894 <ff/history/Collect+0x57>
- 287d: cmp rax,0xffffffffffffffff
- 2881: jne 2894 <ff/history/Collect+0x57>
- 2887: mov r11d,0x3f800000
- 288d: mov DWORD PTR [r9+0x200],r11d
- 2894: add r9,0x220
- 289b: inc r10
- 289e: cmp r10,0x4
- 28a2: jne 2855 <ff/history/Collect+0x18>
- 00000000000028a4 <ff/rl/MatMul>:
- 28a4: lea rdi,[rbp+0x2a0]
- 28ab: movabs rsi,0x0
- 28ad: R_X86_64_64 ff/linked_embedding_matrix_6
- 28b5: lea r8,[rbp+0x52e0]
- 28bc: xor rcx,rcx
- 28bf: vxorps ymm0,ymm0,ymm0
- 28c3: vxorps ymm1,ymm1,ymm1
- 28c7: vxorps ymm2,ymm2,ymm2
- 28cb: vxorps ymm3,ymm3,ymm3
- 28cf: mov rdx,rsi
- 28d2: xor rax,rax
- 28d5: vbroadcastss ymm8,DWORD PTR [rdi+rax*1]
- 28db: vmulps ymm4,ymm8,YMMWORD PTR [rdx]
- 28df: vaddps ymm0,ymm0,ymm4
- 28e3: vmulps ymm5,ymm8,YMMWORD PTR [rdx+0x20]
- 28e8: vaddps ymm1,ymm1,ymm5
- 28ec: vmulps ymm6,ymm8,YMMWORD PTR [rdx+0x40]
- 28f1: vaddps ymm2,ymm2,ymm6
- 28f5: vmulps ymm7,ymm8,YMMWORD PTR [rdx+0x60]
- 28fa: vaddps ymm3,ymm3,ymm7
- 28fe: add rdx,0x80
- 2905: add rax,0x4
- 2909: cmp rax,0x404
- 290f: jl 28d5 <ff/rl/MatMul+0x31>
- 2911: vmovaps YMMWORD PTR [r8+rcx*1],ymm0
- 2917: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1
- 291e: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2
- 2925: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3
- 000000000000292c <ff/frame-end-lr/MatMul>:
- 292c: lea rax,[rbp+0x6c0]
- 2933: movabs rcx,0x0
- 2935: R_X86_64_64 ff/linked_embedding_matrix_2
- 293d: lea rdi,[rbp+0x5360]
- 2944: mov rsi,rcx
- 2947: add rsi,0x8400
- 294e: mov r8,rdi
- 2951: add r8,0x280
- 2958: mov rdx,rcx
- 295b: xor r9,r9
- 295e: vxorps ymm3,ymm3,ymm3
- 2962: vxorps ymm4,ymm4,ymm4
- 2966: vxorps ymm5,ymm5,ymm5
- 296a: vmovaps ymm0,YMMWORD PTR [rax+r9*4]
- 2970: vmovaps ymm1,YMMWORD PTR [rax+r9*4+0x20]
- 2977: vmovaps ymm2,YMMWORD PTR [rax+r9*4+0x40]
- 297e: vmulps ymm0,ymm0,YMMWORD PTR [rdx+r9*4]
- 2984: vaddps ymm3,ymm3,ymm0
- 2988: vmulps ymm1,ymm1,YMMWORD PTR [rdx+r9*4+0x20]
- 298f: vaddps ymm4,ymm4,ymm1
- 2993: vmulps ymm2,ymm2,YMMWORD PTR [rdx+r9*4+0x40]
- 299a: vaddps ymm5,ymm5,ymm2
- 299e: add r9,0x18
- 29a2: cmp r9,0x101
- 29a9: jl 296a <ff/frame-end-lr/MatMul+0x3e>
- 29ab: vaddps ymm3,ymm3,ymm4
- 29af: vaddps ymm3,ymm3,ymm5
- 29b3: vperm2f128 ymm6,ymm3,ymm3,0x1
- 29b9: vhaddps ymm3,ymm3,ymm6
- 29bd: vhaddps ymm3,ymm3,ymm3
- 29c1: vhaddps ymm3,ymm3,ymm3
- 29c5: vmovss DWORD PTR [rdi],xmm3
- 29c9: add rdi,0x4
- 29cd: add rdx,0x420
- 29d4: cmp rdx,rsi
- 29d7: jl 295b <ff/frame-end-lr/MatMul+0x2f>
- 29d9: add rax,0x420
- 29df: cmp rdi,r8
- 29e2: jl 2958 <ff/frame-end-lr/MatMul+0x2c>
- 00000000000029e8 <ff/frame-end-rl/MatMul>:
- 29e8: lea rax,[rbp+0x1b60]
- 29ef: movabs rcx,0x0
- 29f1: R_X86_64_64 ff/linked_embedding_matrix_3
- 29f9: lea rdi,[rbp+0x55e0]
- 2a00: mov rsi,rcx
- 2a03: add rsi,0x8400
- 2a0a: mov r8,rdi
- 2a0d: add r8,0x280
- 2a14: mov rdx,rcx
- 2a17: xor r9,r9
- 2a1a: vxorps ymm3,ymm3,ymm3
- 2a1e: vxorps ymm4,ymm4,ymm4
- 2a22: vxorps ymm5,ymm5,ymm5
- 2a26: vmovaps ymm0,YMMWORD PTR [rax+r9*4]
- 2a2c: vmovaps ymm1,YMMWORD PTR [rax+r9*4+0x20]
- 2a33: vmovaps ymm2,YMMWORD PTR [rax+r9*4+0x40]
- 2a3a: vmulps ymm0,ymm0,YMMWORD PTR [rdx+r9*4]
- 2a40: vaddps ymm3,ymm3,ymm0
- 2a44: vmulps ymm1,ymm1,YMMWORD PTR [rdx+r9*4+0x20]
- 2a4b: vaddps ymm4,ymm4,ymm1
- 2a4f: vmulps ymm2,ymm2,YMMWORD PTR [rdx+r9*4+0x40]
- 2a56: vaddps ymm5,ymm5,ymm2
- 2a5a: add r9,0x18
- 2a5e: cmp r9,0x101
- 2a65: jl 2a26 <ff/frame-end-rl/MatMul+0x3e>
- 2a67: vaddps ymm3,ymm3,ymm4
- 2a6b: vaddps ymm3,ymm3,ymm5
- 2a6f: vperm2f128 ymm6,ymm3,ymm3,0x1
- 2a75: vhaddps ymm3,ymm3,ymm6
- 2a79: vhaddps ymm3,ymm3,ymm3
- 2a7d: vhaddps ymm3,ymm3,ymm3
- 2a81: vmovss DWORD PTR [rdi],xmm3
- 2a85: add rdi,0x4
- 2a89: add rdx,0x420
- 2a90: cmp rdx,rsi
- 2a93: jl 2a17 <ff/frame-end-rl/MatMul+0x2f>
- 2a95: add rax,0x420
- 2a9b: cmp rdi,r8
- 2a9e: jl 2a14 <ff/frame-end-rl/MatMul+0x2c>
- 0000000000002aa4 <ff/frame-focus-steps/MatMul>:
- 2aa4: lea rax,[rbp+0x3100]
- 2aab: movabs rcx,0x0
- 2aad: R_X86_64_64 ff/linked_embedding_matrix_1
- 2ab5: lea rdi,[rbp+0x5860]
- 2abc: mov rsi,rcx
- 2abf: add rsi,0x8800
- 2ac6: mov r8,rdi
- 2ac9: add r8,0x500
- 2ad0: mov rdx,rcx
- 2ad3: xor r9,r9
- 2ad6: vxorps ymm1,ymm1,ymm1
- 2ada: vmovaps ymm0,YMMWORD PTR [rax+r9*4]
- 2ae0: vmulps ymm0,ymm0,YMMWORD PTR [rdx+r9*4]
- 2ae6: vaddps ymm1,ymm1,ymm0
- 2aea: add r9,0x8
- 2aee: cmp r9,0x81
- 2af5: jl 2ada <ff/frame-focus-steps/MatMul+0x36>
- 2af7: vperm2f128 ymm2,ymm1,ymm1,0x1
- 2afd: vhaddps ymm1,ymm1,ymm2
- 2b01: vhaddps ymm1,ymm1,ymm1
- 2b05: vhaddps ymm1,ymm1,ymm1
- 2b09: vmovss DWORD PTR [rdi],xmm1
- 2b0d: add rdi,0x4
- 2b11: add rdx,0x220
- 2b18: cmp rdx,rsi
- 2b1b: jl 2ad3 <ff/frame-focus-steps/MatMul+0x2f>
- 2b1d: add rax,0x220
- 2b23: cmp rdi,r8
- 2b26: jl 2ad0 <ff/frame-focus-steps/MatMul+0x2c>
- 0000000000002b28 <ff/frame-creation-steps/MatMul>:
- 2b28: lea rax,[rbp+0x3ba0]
- 2b2f: movabs rcx,0x0
- 2b31: R_X86_64_64 ff/linked_embedding_matrix_0
- 2b39: lea rdi,[rbp+0x5d60]
- 2b40: mov rsi,rcx
- 2b43: add rsi,0x8800
- 2b4a: mov r8,rdi
- 2b4d: add r8,0x500
- 2b54: mov rdx,rcx
- 2b57: xor r9,r9
- 2b5a: vxorps ymm1,ymm1,ymm1
- 2b5e: vmovaps ymm0,YMMWORD PTR [rax+r9*4]
- 2b64: vmulps ymm0,ymm0,YMMWORD PTR [rdx+r9*4]
- 2b6a: vaddps ymm1,ymm1,ymm0
- 2b6e: add r9,0x8
- 2b72: cmp r9,0x81
- 2b79: jl 2b5e <ff/frame-creation-steps/MatMul+0x36>
- 2b7b: vperm2f128 ymm2,ymm1,ymm1,0x1
- 2b81: vhaddps ymm1,ymm1,ymm2
- 2b85: vhaddps ymm1,ymm1,ymm1
- 2b89: vhaddps ymm1,ymm1,ymm1
- 2b8d: vmovss DWORD PTR [rdi],xmm1
- 2b91: add rdi,0x4
- 2b95: add rdx,0x220
- 2b9c: cmp rdx,rsi
- 2b9f: jl 2b57 <ff/frame-creation-steps/MatMul+0x2f>
- 2ba1: add rax,0x220
- 2ba7: cmp rdi,r8
- 2baa: jl 2b54 <ff/frame-creation-steps/MatMul+0x2c>
- 0000000000002bac <ff/lr/MatMul>:
- 2bac: lea rdi,[rbp+0x4640]
- 2bb3: movabs rsi,0x0
- 2bb5: R_X86_64_64 ff/linked_embedding_matrix_5
- 2bbd: lea r8,[rbp+0x6260]
- 2bc4: xor rcx,rcx
- 2bc7: vxorps ymm0,ymm0,ymm0
- 2bcb: vxorps ymm1,ymm1,ymm1
- 2bcf: vxorps ymm2,ymm2,ymm2
- 2bd3: vxorps ymm3,ymm3,ymm3
- 2bd7: mov rdx,rsi
- 2bda: xor rax,rax
- 2bdd: vbroadcastss ymm8,DWORD PTR [rdi+rax*1]
- 2be3: vmulps ymm4,ymm8,YMMWORD PTR [rdx]
- 2be7: vaddps ymm0,ymm0,ymm4
- 2beb: vmulps ymm5,ymm8,YMMWORD PTR [rdx+0x20]
- 2bf0: vaddps ymm1,ymm1,ymm5
- 2bf4: vmulps ymm6,ymm8,YMMWORD PTR [rdx+0x40]
- 2bf9: vaddps ymm2,ymm2,ymm6
- 2bfd: vmulps ymm7,ymm8,YMMWORD PTR [rdx+0x60]
- 2c02: vaddps ymm3,ymm3,ymm7
- 2c06: add rdx,0x80
- 2c0d: add rax,0x4
- 2c11: cmp rax,0x404
- 2c17: jl 2bdd <ff/lr/MatMul+0x31>
- 2c19: vmovaps YMMWORD PTR [r8+rcx*1],ymm0
- 2c1f: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1
- 2c26: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2
- 2c2d: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3
- 0000000000002c34 <ff/history/MatMul>:
- 2c34: lea rax,[rbp+0x4a60]
- 2c3b: movabs rcx,0x0
- 2c3d: R_X86_64_64 ff/linked_embedding_matrix_4
- 2c45: lea rdi,[rbp+0x62e0]
- 2c4c: mov rsi,rcx
- 2c4f: add rsi,0x8800
- 2c56: mov r8,rdi
- 2c59: add r8,0x400
- 2c60: mov rdx,rcx
- 2c63: xor r9,r9
- 2c66: vxorps ymm1,ymm1,ymm1
- 2c6a: vmovaps ymm0,YMMWORD PTR [rax+r9*4]
- 2c70: vmulps ymm0,ymm0,YMMWORD PTR [rdx+r9*4]
- 2c76: vaddps ymm1,ymm1,ymm0
- 2c7a: add r9,0x8
- 2c7e: cmp r9,0x81
- 2c85: jl 2c6a <ff/history/MatMul+0x36>
- 2c87: vperm2f128 ymm2,ymm1,ymm1,0x1
- 2c8d: vhaddps ymm1,ymm1,ymm2
- 2c91: vhaddps ymm1,ymm1,ymm1
- 2c95: vhaddps ymm1,ymm1,ymm1
- 2c99: vmovss DWORD PTR [rdi],xmm1
- 2c9d: add rdi,0x4
- 2ca1: add rdx,0x220
- 2ca8: cmp rdx,rsi
- 2cab: jl 2c63 <ff/history/MatMul+0x2f>
- 2cad: add rax,0x220
- 2cb3: cmp rdi,r8
- 2cb6: jl 2c60 <ff/history/MatMul+0x2c>
- 0000000000002cb8 <ff/concat>:
- 2cb8: lea r8,[rbp+0x66e0]
- 2cbf: lea rsi,[rbp+0x3000]
- 2cc6: lea rdi,[r8]
- 2cc9: mov rcx,0x40
- 2cd0: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
- 2cd2: lea rsi,[rbp+0x30c0]
- 2cd9: lea rdi,[r8+0x40]
- 2cdd: mov rcx,0x40
- 2ce4: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
- 2ce6: lea rsi,[rbp+0x3080]
- 2ced: lea rdi,[r8+0x80]
- 2cf4: mov rcx,0x40
- 2cfb: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
- 2cfd: lea rsi,[rbp+0x3040]
- 2d04: lea rdi,[r8+0xc0]
- 2d0b: mov rcx,0x40
- 2d12: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
- 2d14: lea rsi,[rbp+0x5d60]
- 2d1b: lea rdi,[r8+0x100]
- 2d22: mov rcx,0x500
- 2d29: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
- 2d2b: lea rsi,[rbp+0x5860]
- 2d32: lea rdi,[r8+0x600]
- 2d39: mov rcx,0x500
- 2d40: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
- 2d42: lea rsi,[rbp+0x5360]
- 2d49: lea rdi,[r8+0xb00]
- 2d50: mov rcx,0x280
- 2d57: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
- 2d59: lea rsi,[rbp+0x55e0]
- 2d60: lea rdi,[r8+0xd80]
- 2d67: mov rcx,0x280
- 2d6e: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
- 2d70: lea rsi,[rbp+0x62e0]
- 2d77: lea rdi,[r8+0x1000]
- 2d7e: mov rcx,0x400
- 2d85: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
- 2d87: lea rsi,[rbp+0x6260]
- 2d8e: lea rdi,[r8+0x1400]
- 2d95: mov rcx,0x80
- 2d9c: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
- 2d9e: lea rsi,[rbp+0x52e0]
- 2da5: lea rdi,[r8+0x1480]
- 2dac: mov rcx,0x80
- 2db3: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
- 0000000000002db5 <ff/MatMul>:
- 2db5: lea rdi,[rbp+0x66e0]
- 2dbc: movabs rsi,0x0
- 2dbe: R_X86_64_64 ff/weights_0
- 2dc6: movabs r9,0x0
- 2dc8: R_X86_64_64 ff/bias_0
- 2dd0: mov r8,QWORD PTR [rbp+0x130]
- 2dd7: vxorps ymm13,ymm13,ymm13
- 2ddc: xor rcx,rcx
- 2ddf: vmovaps ymm0,YMMWORD PTR [r9+rcx*1]
- 2de5: vmovaps ymm1,YMMWORD PTR [r9+rcx*1+0x20]
- 2dec: vmovaps ymm2,YMMWORD PTR [r9+rcx*1+0x40]
- 2df3: vmovaps ymm3,YMMWORD PTR [r9+rcx*1+0x60]
- 2dfa: vmovaps ymm4,YMMWORD PTR [r9+rcx*1+0x80]
- 2e04: vmovaps ymm5,YMMWORD PTR [r9+rcx*1+0xa0]
- 2e0e: vmovaps ymm6,YMMWORD PTR [r9+rcx*1+0xc0]
- 2e18: vmovaps ymm7,YMMWORD PTR [r9+rcx*1+0xe0]
- 2e22: mov rdx,rsi
- 2e25: xor rax,rax
- 2e28: vbroadcastss ymm12,DWORD PTR [rdi+rax*1]
- 2e2e: vmulps ymm8,ymm12,YMMWORD PTR [rdx]
- 2e32: vaddps ymm0,ymm0,ymm8
- 2e37: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0x20]
- 2e3c: vaddps ymm1,ymm1,ymm9
- 2e41: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0x40]
- 2e46: vaddps ymm2,ymm2,ymm10
- 2e4b: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0x60]
- 2e50: vaddps ymm3,ymm3,ymm11
- 2e55: vmulps ymm8,ymm12,YMMWORD PTR [rdx+0x80]
- 2e5d: vaddps ymm4,ymm4,ymm8
- 2e62: vmulps ymm9,ymm12,YMMWORD PTR [rdx+0xa0]
- 2e6a: vaddps ymm5,ymm5,ymm9
- 2e6f: vmulps ymm10,ymm12,YMMWORD PTR [rdx+0xc0]
- 2e77: vaddps ymm6,ymm6,ymm10
- 2e7c: vmulps ymm11,ymm12,YMMWORD PTR [rdx+0xe0]
- 2e84: vaddps ymm7,ymm7,ymm11
- 2e89: add rdx,0x200
- 2e90: add rax,0x4
- 2e94: cmp rax,0x1500
- 2e9a: jl 2e28 <ff/MatMul+0x73>
- 2e9c: vmaxps ymm0,ymm0,ymm13
- 2ea1: vmovaps YMMWORD PTR [r8+rcx*1],ymm0
- 2ea7: vmaxps ymm1,ymm1,ymm13
- 2eac: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1
- 2eb3: vmaxps ymm2,ymm2,ymm13
- 2eb8: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2
- 2ebf: vmaxps ymm3,ymm3,ymm13
- 2ec4: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3
- 2ecb: vmaxps ymm4,ymm4,ymm13
- 2ed0: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4
- 2eda: vmaxps ymm5,ymm5,ymm13
- 2edf: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5
- 2ee9: vmaxps ymm6,ymm6,ymm13
- 2eee: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6
- 2ef8: vmaxps ymm7,ymm7,ymm13
- 2efd: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7
- 2f07: add rsi,0x100
- 2f0e: add rcx,0x100
- 2f15: cmp rcx,0x200
- 2f1c: jl 2ddf <ff/MatMul+0x2a>
- 0000000000002f22 <ff/MatMul_1>:
- 2f22: mov rdi,QWORD PTR [rbp+0x130]
- 2f29: movabs rsi,0x0
- 2f2b: R_X86_64_64 ff/weights_softmax
- 2f33: movabs r9,0x0
- 2f35: R_X86_64_64 ff/bias_softmax
- 2f3d: lea r8,[rbp+0x7be0]
- 2f44: xor rcx,rcx
- 2f47: vmovaps ymm0,YMMWORD PTR [r9+rcx*1]
- 2f4d: mov rdx,rsi
- 2f50: xor rax,rax
- 2f53: vbroadcastss ymm8,DWORD PTR [rdi+rax*1]
- 2f59: vmulps ymm4,ymm8,YMMWORD PTR [rdx]
- 2f5d: vaddps ymm0,ymm0,ymm4
- 2f61: add rdx,0x6ce0
- 2f68: add rax,0x4
- 2f6c: cmp rax,0x200
- 2f72: jl 2f53 <ff/MatMul_1+0x31>
- 2f74: vmovaps YMMWORD PTR [r8+rcx*1],ymm0
- 2f7a: add rsi,0x20
- 2f7e: add rcx,0x20
- 2f82: cmp rcx,0x6ce0
- 2f89: jl 2f47 <ff/MatMul_1+0x25>
- 2f8b: pop rbp
- 2f8c: vzeroupper
- 2f8f: ret
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement