Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- [global ff_hevc_put_hevc_epel_hv16_8_avx2]
- %line 1371+0 libavcodec/x86/hevc_mc.asm
- [align 16]
- ff_hevc_put_hevc_epel_hv16_8_avx2:
- [cpu amdnop]
- mov rax, qword [rsp + 0 + 8]
- sub rdx, rcx
- sub R9, 1
- sub rax, 1
- shl R9, 6
- shl rax, 6
- vmovdqa ymm14, [hevc_epel_filters_avx2_8 + R9]
- vmovdqa ymm15, [hevc_epel_filters_avx2_8 + R9+32]
- lea R10, [rcx*3]
- vmovdqa ymm12, [hevc_epel_filters_avx2_10 + rax]
- vmovdqa ymm13, [hevc_epel_filters_avx2_10 + rax+32]
- vmovdqu ymm0, [rdx-1 ]
- vmovdqu ymm1, [rdx-1+ 1]
- vmovdqu ymm2, [rdx-1+2*1]
- vmovdqu ymm3, [rdx-1+3*1]
- vpunpckhbw ymm10, ymm0, ymm1
- vpunpcklbw ymm0, ymm1
- vinserti128 ymm0, ymm0, xmm10, 1
- vpunpckhbw ymm10, ymm2, ymm3
- vpunpcklbw ymm2, ymm3
- vinserti128 ymm2, ymm2, xmm10, 1
- vpmaddubsw ymm0, ymm14
- vpmaddubsw ymm2, ymm15
- vpaddw ymm0, ymm2
- vpmaddubsw ymm1, ymm14
- vpmaddubsw ymm3, ymm15
- vpaddw ymm1, ymm3
- lea rdx, [rdx + rcx]
- vmovdqu ymm4, [rdx-1 ]
- vmovdqu ymm1, [rdx-1+ 1]
- vmovdqu ymm2, [rdx-1+2*1]
- vmovdqu ymm3, [rdx-1+3*1]
- vpunpckhbw ymm10, ymm4, ymm1
- vpunpcklbw ymm4, ymm1
- vinserti128 ymm4, ymm4, xmm10, 1
- vpunpckhbw ymm10, ymm2, ymm3
- vpunpcklbw ymm2, ymm3
- vinserti128 ymm2, ymm2, xmm10, 1
- vpmaddubsw ymm4, ymm14
- vpmaddubsw ymm2, ymm15
- vpaddw ymm4, ymm2
- vpmaddubsw ymm1, ymm14
- vpmaddubsw ymm3, ymm15
- vpaddw ymm1, ymm3
- lea rdx, [rdx + rcx]
- vmovdqu ymm5, [rdx-1 ]
- vmovdqu ymm1, [rdx-1+ 1]
- vmovdqu ymm2, [rdx-1+2*1]
- vmovdqu ymm3, [rdx-1+3*1]
- vpunpckhbw ymm10, ymm5, ymm1
- vpunpcklbw ymm5, ymm1
- vinserti128 ymm5, ymm5, xmm10, 1
- vpunpckhbw ymm10, ymm2, ymm3
- vpunpcklbw ymm2, ymm3
- vinserti128 ymm2, ymm2, xmm10, 1
- vpmaddubsw ymm5, ymm14
- vpmaddubsw ymm2, ymm15
- vpaddw ymm5, ymm2
- vpmaddubsw ymm1, ymm14
- vpmaddubsw ymm3, ymm15
- vpaddw ymm1, ymm3
- lea rdx, [rdx + rcx]
- .loop
- vmovdqu ymm6, [rdx-1 ]
- vmovdqu ymm1, [rdx-1+ 1]
- vmovdqu ymm2, [rdx-1+2*1]
- vmovdqu ymm3, [rdx-1+3*1]
- vpunpckhbw ymm10, ymm6, ymm1
- vpunpcklbw ymm6, ymm1
- vinserti128 ymm6, ymm6, xmm10, 1
- vpunpckhbw ymm10, ymm2, ymm3
- vpunpcklbw ymm2, ymm3
- vinserti128 ymm2, ymm2, xmm10, 1
- vpmaddubsw ymm6, ymm14
- vpmaddubsw ymm2, ymm15
- vpaddw ymm6, ymm2
- vpmaddubsw ymm1, ymm14
- vpmaddubsw ymm3, ymm15
- vpaddw ymm1, ymm3
- vpunpckhwd ymm10, ymm0, ymm4
- vpunpcklwd ymm7, ymm0, ymm4
- vextracti128 xmm1, ymm7, 1
- vinserti128 ymm1, ymm10, xmm1, 0
- vinserti128 ymm7, ymm7, xmm10, 1
- vpunpckhwd ymm10, ymm5, ymm6
- vpunpcklwd ymm2, ymm5, ymm6
- vextracti128 xmm3, ymm2, 1
- vinserti128 ymm3, ymm10, xmm3, 0
- vinserti128 ymm2, ymm2, xmm10, 1
- vpmaddwd ymm7, ymm12
- vpmaddwd ymm2, ymm13
- vpaddd ymm7, ymm2
- vpmaddwd ymm1, ymm12
- vpmaddwd ymm3, ymm13
- vpaddd ymm1, ymm3
- vpsrad ymm7, 14-8
- vpsrad ymm1, 14-8
- vpackssdw ymm7, ymm1
- vpermq ymm7, ymm7, 216
- vmovdqu [rdi], ymm7
- vmovdqa ymm0, ymm4
- vmovdqa ymm4, ymm5
- vmovdqa ymm5, ymm6
- lea rdi, [rdi+2*rsi]
- lea rdx, [rdx+ rcx]
- dec R8d
- jnz .loop
- ..@1363.branch_instr:
- vzeroupper
- ret
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement