Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- .section .text.rav1e::asm::x86::dist::hbd::satd_kernel_4x4_hbd_avx2,"ax",@progbits
- .p2align 4, 0x90
- .type rav1e::asm::x86::dist::hbd::satd_kernel_4x4_hbd_avx2,@function
- rav1e::asm::x86::dist::hbd::satd_kernel_4x4_hbd_avx2:
- .cfi_startproc
- push rbp
- .cfi_def_cfa_offset 16
- push r15
- .cfi_def_cfa_offset 24
- push r14
- .cfi_def_cfa_offset 32
- push r13
- .cfi_def_cfa_offset 40
- push r12
- .cfi_def_cfa_offset 48
- push rbx
- .cfi_def_cfa_offset 56
- sub rsp, 24
- .cfi_def_cfa_offset 80
- .cfi_offset rbx, -56
- .cfi_offset r12, -48
- .cfi_offset r13, -40
- .cfi_offset r14, -32
- .cfi_offset r15, -24
- .cfi_offset rbp, -16
- vpmovsxwd xmm0, qword ptr [rdi]
- vpmovsxwd xmm1, qword ptr [rdx]
- vpmovsxwd xmm2, qword ptr [rdx + 2*rcx]
- vpmovsxwd xmm3, qword ptr [rdx + 4*rcx]
- lea rax, [rsi + 2*rsi]
- vpsubd xmm1, xmm0, xmm1
- vpmovsxwd xmm0, qword ptr [rdi + 2*rsi]
- vmovd r9d, xmm1
- vpextrd r14d, xmm1, 1
- vpsubd xmm2, xmm0, xmm2
- vpmovsxwd xmm0, qword ptr [rdi + 4*rsi]
- vpsubd xmm0, xmm0, xmm3
- vpmovsxwd xmm3, qword ptr [rdi + 2*rax]
- lea rax, [rcx + 2*rcx]
- vpmovsxwd xmm4, qword ptr [rdx + 2*rax]
- vmovd eax, xmm2
- vpextrd edi, xmm0, 2
- lea r12d, [rax + r9]
- sub r9d, eax
- vmovd eax, xmm0
- vpsubd xmm3, xmm3, xmm4
- vmovd edx, xmm3
- vpextrd r8d, xmm3, 3
- lea esi, [rdx + rax]
- sub eax, edx
- vpextrd edx, xmm3, 1
- lea r15d, [rax + r9]
- sub r9d, eax
- vpextrd eax, xmm2, 1
- lea r11d, [rsi + r12]
- sub r12d, esi
- lea ecx, [rax + r14]
- sub r14d, eax
- vpextrd eax, xmm0, 1
- lea esi, [rdx + rax]
- sub eax, edx
- lea edx, [rsi + rcx]
- sub ecx, esi
- vpextrd esi, xmm1, 2
- mov qword ptr [rsp + 16], rcx
- lea ecx, [rax + r14]
- sub r14d, eax
- vpextrd eax, xmm2, 2
- mov qword ptr [rsp], rdx
- mov qword ptr [rsp + 8], rcx
- lea r10d, [rax + rsi]
- sub esi, eax
- vpextrd eax, xmm3, 2
- lea ebp, [rax + rdi]
- sub edi, eax
- lea ebx, [rbp + r10]
- sub r10d, ebp
- lea edx, [rdi + rsi]
- sub esi, edi
- vpextrd edi, xmm1, 3
- vpextrd ebp, xmm2, 3
- lea r13d, [rbp + rdi]
- sub edi, ebp
- vpextrd ebp, xmm0, 3
- lea eax, [r8 + rbp]
- sub ebp, r8d
- lea ecx, [rax + r13]
- sub r13d, eax
- mov rax, qword ptr [rsp]
- lea r8d, [rbp + rdi]
- sub edi, ebp
- lea ebp, [rax + r11]
- sub r11d, eax
- lea eax, [rcx + rbx]
- sub ebx, ecx
- lea ecx, [rax + rbp]
- sub ebp, eax
- mov rax, qword ptr [rsp + 8]
- mov dword ptr [rsp], ecx
- lea ecx, [rbx + r11]
- sub r11d, ebx
- vmovd xmm1, dword ptr [rsp]
- lea ebx, [rax + r15]
- sub r15d, eax
- lea eax, [r8 + rdx]
- sub edx, r8d
- lea r8d, [rax + rbx]
- sub ebx, eax
- lea eax, [rdx + r15]
- sub r15d, edx
- lea edx, [r14 + r9]
- sub r9d, r14d
- vpinsrd xmm1, xmm1, ecx, 1
- mov rcx, qword ptr [rsp + 16]
- vmovd xmm0, r8d
- vpinsrd xmm1, xmm1, ebp, 2
- vpinsrd xmm1, xmm1, r11d, 3
- vpinsrd xmm0, xmm0, eax, 1
- vpinsrd xmm0, xmm0, ebx, 2
- lea ebx, [rdi + rsi]
- sub esi, edi
- lea edi, [rbx + rdx]
- sub edx, ebx
- lea ebx, [rsi + r9]
- vpinsrd xmm0, xmm0, r15d, 3
- sub r9d, esi
- vmovd xmm2, edi
- vinserti128 ymm0, ymm1, xmm0, 1
- lea eax, [rcx + r12]
- sub r12d, ecx
- lea ecx, [r13 + r10]
- sub r10d, r13d
- vpabsd ymm0, ymm0
- lea ebp, [rcx + rax]
- sub eax, ecx
- lea ecx, [r10 + r12]
- vpinsrd xmm2, xmm2, ebx, 1
- sub r12d, r10d
- vmovd xmm3, ebp
- vpinsrd xmm2, xmm2, edx, 2
- vpinsrd xmm2, xmm2, r9d, 3
- vpinsrd xmm3, xmm3, ecx, 1
- vpinsrd xmm1, xmm3, eax, 2
- vpinsrd xmm1, xmm1, r12d, 3
- vinserti128 ymm1, ymm1, xmm2, 1
- vpabsd ymm1, ymm1
- vpaddd ymm0, ymm1, ymm0
- vmovd eax, xmm0
- vpextrd ecx, xmm0, 1
- add rcx, rax
- vpextrd eax, xmm0, 2
- add rax, rcx
- vpextrd ecx, xmm0, 3
- vextracti128 xmm0, ymm0, 1
- add rcx, rax
- vmovd eax, xmm0
- vpextrd edx, xmm0, 2
- add rax, rcx
- vpextrd ecx, xmm0, 1
- add rcx, rax
- vpextrd eax, xmm0, 3
- add rdx, rcx
- add rax, rdx
- add rsp, 24
- .cfi_def_cfa_offset 56
- pop rbx
- .cfi_def_cfa_offset 48
- pop r12
- .cfi_def_cfa_offset 40
- pop r13
- .cfi_def_cfa_offset 32
- pop r14
- .cfi_def_cfa_offset 24
- pop r15
- .cfi_def_cfa_offset 16
- pop rbp
- .cfi_def_cfa_offset 8
- vzeroupper
- ret
- .Lfunc_end666:
- .size rav1e::asm::x86::dist::hbd::satd_kernel_4x4_hbd_avx2, .Lfunc_end666-rav1e::asm::x86::dist::hbd::satd_kernel_4x4_hbd_avx2
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement