Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- .section .text.rav1e::asm::x86::dist::hbd::satd_kernel_8x8_hbd_avx2,"ax",@progbits
- .p2align 4, 0x90
- .type rav1e::asm::x86::dist::hbd::satd_kernel_8x8_hbd_avx2,@function
- rav1e::asm::x86::dist::hbd::satd_kernel_8x8_hbd_avx2:
- .cfi_startproc
- push rbp
- .cfi_def_cfa_offset 16
- .cfi_offset rbp, -16
- mov rbp, rsp
- .cfi_def_cfa_register rbp
- push r15
- push r14
- push r12
- push rbx
- and rsp, -64
- sub rsp, 832
- .cfi_offset rbx, -48
- .cfi_offset r12, -40
- .cfi_offset r14, -32
- .cfi_offset r15, -24
- add rdx, 14
- add rcx, rcx
- add rdi, 14
- add rsi, rsi
- xor eax, eax
- .p2align 4, 0x90
- .LBB666_1:
- movzx ebx, word ptr [rdi - 14]
- mov dword ptr [rsp + rax + 256], ebx
- movzx ebx, word ptr [rdx - 14]
- mov dword ptr [rsp + rax + 512], ebx
- movzx ebx, word ptr [rdi - 12]
- mov dword ptr [rsp + rax + 260], ebx
- movzx ebx, word ptr [rdx - 12]
- mov dword ptr [rsp + rax + 516], ebx
- movzx ebx, word ptr [rdi - 10]
- mov dword ptr [rsp + rax + 264], ebx
- movzx ebx, word ptr [rdx - 10]
- mov dword ptr [rsp + rax + 520], ebx
- movzx ebx, word ptr [rdi - 8]
- mov dword ptr [rsp + rax + 268], ebx
- movzx ebx, word ptr [rdx - 8]
- mov dword ptr [rsp + rax + 524], ebx
- movzx ebx, word ptr [rdi - 6]
- mov dword ptr [rsp + rax + 272], ebx
- movzx ebx, word ptr [rdx - 6]
- mov dword ptr [rsp + rax + 528], ebx
- movzx ebx, word ptr [rdi - 4]
- mov dword ptr [rsp + rax + 276], ebx
- movzx ebx, word ptr [rdx - 4]
- mov dword ptr [rsp + rax + 532], ebx
- movzx ebx, word ptr [rdi - 2]
- mov dword ptr [rsp + rax + 280], ebx
- movzx ebx, word ptr [rdx - 2]
- mov dword ptr [rsp + rax + 536], ebx
- movzx ebx, word ptr [rdi]
- add rdi, rsi
- mov dword ptr [rsp + rax + 284], ebx
- movzx ebx, word ptr [rdx]
- add rdx, rcx
- mov dword ptr [rsp + rax + 540], ebx
- add rax, 32
- cmp rax, 256
- jne .LBB666_1
- vmovdqa ymm0, ymmword ptr [rsp + 256]
- vpsubd ymm0, ymm0, ymmword ptr [rsp + 512]
- vmovdqa ymm1, ymmword ptr [rsp + 288]
- vmovdqa ymm2, ymmword ptr [rsp + 320]
- vmovdqa ymm3, ymmword ptr [rsp + 352]
- vpsubd ymm1, ymm1, ymmword ptr [rsp + 544]
- vpsubd ymm2, ymm2, ymmword ptr [rsp + 576]
- vpsubd ymm3, ymm3, ymmword ptr [rsp + 608]
- vmovdqa ymm4, ymmword ptr [rsp + 384]
- vpsubd ymm4, ymm4, ymmword ptr [rsp + 640]
- vmovdqa ymm5, ymmword ptr [rsp + 416]
- vpsubd ymm5, ymm5, ymmword ptr [rsp + 672]
- vmovdqa ymm6, ymmword ptr [rsp + 448]
- vmovdqa ymm7, ymmword ptr [rsp + 480]
- vpsubd ymm6, ymm6, ymmword ptr [rsp + 704]
- vpsubd ymm7, ymm7, ymmword ptr [rsp + 736]
- mov rcx, -7
- vmovd r12d, xmm0
- vmovd r9d, xmm1
- vmovd r8d, xmm2
- vmovd r10d, xmm3
- vmovd r15d, xmm4
- vmovd r11d, xmm5
- vmovd r14d, xmm6
- vmovd ebx, xmm7
- vmovdqa ymmword ptr [rsp], ymm0
- vmovdqa ymmword ptr [rsp + 32], ymm1
- vmovdqa ymmword ptr [rsp + 64], ymm2
- vmovdqa ymmword ptr [rsp + 96], ymm3
- vmovdqa ymmword ptr [rsp + 128], ymm4
- vmovdqa ymmword ptr [rsp + 160], ymm5
- vmovdqa ymmword ptr [rsp + 192], ymm6
- vmovdqa ymmword ptr [rsp + 224], ymm7
- .p2align 4, 0x90
- .LBB666_3:
- lea esi, [r9 + r12]
- sub r12d, r9d
- lea edx, [r10 + r8]
- sub r8d, r10d
- lea edi, [r11 + r15]
- lea r9d, [rbx + r14]
- sub r15d, r11d
- sub r14d, ebx
- lea r10d, [rdx + rsi]
- sub esi, edx
- lea edx, [r8 + r12]
- lea eax, [r9 + rdi]
- sub r12d, r8d
- lea ebx, [r14 + r15]
- sub edi, r9d
- sub r15d, r14d
- lea r8d, [rax + r10]
- sub r10d, eax
- lea eax, [rbx + rdx]
- sub edx, ebx
- lea ebx, [rdi + rsi]
- sub esi, edi
- lea edi, [r15 + r12]
- sub r12d, r15d
- mov dword ptr [rsp + 4*rcx + 28], r8d
- mov dword ptr [rsp + 4*rcx + 60], eax
- mov dword ptr [rsp + 4*rcx + 92], ebx
- mov dword ptr [rsp + 4*rcx + 124], edi
- mov dword ptr [rsp + 4*rcx + 156], r10d
- mov dword ptr [rsp + 4*rcx + 188], edx
- mov dword ptr [rsp + 4*rcx + 220], esi
- mov dword ptr [rsp + 4*rcx + 252], r12d
- test rcx, rcx
- je .LBB666_4
- mov r12d, dword ptr [rsp + 4*rcx + 32]
- mov r9d, dword ptr [rsp + 4*rcx + 64]
- mov r8d, dword ptr [rsp + 4*rcx + 96]
- mov r10d, dword ptr [rsp + 4*rcx + 128]
- mov r15d, dword ptr [rsp + 4*rcx + 160]
- mov r11d, dword ptr [rsp + 4*rcx + 192]
- mov r14d, dword ptr [rsp + 4*rcx + 224]
- mov ebx, dword ptr [rsp + 4*rcx + 256]
- inc rcx
- jmp .LBB666_3
- .LBB666_4:
- xor eax, eax
- .p2align 4, 0x90
- .LBB666_5:
- mov ecx, dword ptr [rsp + rax]
- mov esi, dword ptr [rsp + rax + 4]
- mov ebx, dword ptr [rsp + rax + 8]
- mov edx, dword ptr [rsp + rax + 20]
- mov edi, dword ptr [rsp + rax + 28]
- lea r10d, [rsi + rcx]
- sub ecx, esi
- mov esi, dword ptr [rsp + rax + 12]
- lea r9d, [rsi + rbx]
- sub ebx, esi
- mov esi, dword ptr [rsp + rax + 16]
- lea r8d, [r9 + r10]
- sub r10d, r9d
- lea r9d, [rbx + rcx]
- sub ecx, ebx
- lea r11d, [rdx + rsi]
- sub esi, edx
- mov edx, dword ptr [rsp + rax + 24]
- lea r14d, [rdi + rdx]
- sub edx, edi
- lea edi, [r14 + r11]
- sub r11d, r14d
- lea ebx, [rdx + rsi]
- sub esi, edx
- lea r14d, [rdi + r8]
- sub r8d, edi
- lea edi, [rbx + r9]
- sub r9d, ebx
- lea ebx, [r11 + r10]
- lea edx, [rsi + rcx]
- sub r10d, r11d
- sub ecx, esi
- mov dword ptr [rsp + rax], r14d
- mov dword ptr [rsp + rax + 4], edi
- mov dword ptr [rsp + rax + 8], ebx
- mov dword ptr [rsp + rax + 12], edx
- mov dword ptr [rsp + rax + 16], r8d
- mov dword ptr [rsp + rax + 20], r9d
- mov dword ptr [rsp + rax + 24], r10d
- mov dword ptr [rsp + rax + 28], ecx
- add rax, 32
- cmp rax, 256
- jne .LBB666_5
- vpabsd ymm0, ymmword ptr [rsp]
- vpabsd ymm1, ymmword ptr [rsp + 32]
- vpabsd ymm2, ymmword ptr [rsp + 96]
- vpaddd ymm0, ymm1, ymm0
- vpabsd ymm1, ymmword ptr [rsp + 64]
- vpaddd ymm1, ymm1, ymm2
- vpabsd ymm2, ymmword ptr [rsp + 160]
- vpaddd ymm0, ymm0, ymm1
- vpabsd ymm1, ymmword ptr [rsp + 128]
- vpaddd ymm1, ymm1, ymm2
- vpabsd ymm2, ymmword ptr [rsp + 192]
- vpaddd ymm1, ymm1, ymm2
- vpabsd ymm2, ymmword ptr [rsp + 224]
- vpaddd ymm0, ymm0, ymm1
- vpaddd ymm0, ymm0, ymm2
- vmovd eax, xmm0
- vpextrd ecx, xmm0, 1
- add rcx, rax
- vpextrd eax, xmm0, 2
- add rax, rcx
- vpextrd ecx, xmm0, 3
- vextracti128 xmm0, ymm0, 1
- add rcx, rax
- vmovd eax, xmm0
- vpextrd edx, xmm0, 2
- add rax, rcx
- vpextrd ecx, xmm0, 1
- add rcx, rax
- vpextrd eax, xmm0, 3
- add rdx, rcx
- add rax, rdx
- lea rsp, [rbp - 32]
- pop rbx
- pop r12
- pop r14
- pop r15
- pop rbp
- .cfi_def_cfa rsp, 8
- vzeroupper
- ret
- .Lfunc_end666:
- .size rav1e::asm::x86::dist::hbd::satd_kernel_8x8_hbd_avx2, .Lfunc_end666-rav1e::asm::x86::dist::hbd::satd_kernel_8x8_hbd_avx2
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement