Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- commit c57a2562ce648e054ee00c8990ad627fe8888d9c (HEAD -> hevc-12b-x86-fix-overflow)
- Author: Ronald S. Bultje <[email protected]>
- Date: Sun Feb 25 10:49:35 2024 -0500
- hevx/x86/deblock: fix 12bit overflow.
- diff --git a/libavcodec/x86/hevc_deblock.asm b/libavcodec/x86/hevc_deblock.asm
- index 85ee4800bb..15fecaed96 100644
- --- a/libavcodec/x86/hevc_deblock.asm
- +++ b/libavcodec/x86/hevc_deblock.asm
- @@ -31,6 +31,7 @@ cextern pw_1023
- pw_pixel_max_12: times 8 dw ((1 << 12)-1)
- pw_m2: times 8 dw -2
- pd_1 : times 4 dd 1
- +pd_8 : times 8 dd 8
- cextern pw_4
- cextern pw_8
- @@ -541,19 +542,46 @@ ALIGN 16
- add betaq, r13
- shr betaq, 3; ((beta + (beta >> 1)) >> 3))
- - mova m13, [pw_8]
- psubw m12, m4, m3 ; q0 - p0
- - psllw m10, m12, 3; 8 * (q0 - p0)
- - paddw m12, m10 ; 9 * (q0 - p0)
- -
- + paddw m10, m12, m12
- + paddw m12, m10 ; 3 * (q0 - p0)
- psubw m10, m5, m2 ; q1 - p1
- - psllw m8, m10, 1; 2 * ( q1 - p1 )
- - paddw m10, m8; 3 * ( q1 - p1 )
- - psubw m12, m10; 9 * (q0 - p0) - 3 * ( q1 - p1 )
- + psubw m12, m10 ; 3 * (q0 - p0) - (q1 - p1)
- +%if %1 < 12
- + mova m13, [pw_8]
- + paddw m10, m12, m12
- + paddw m12, m10 ; 9 * (q0 - p0) - 3 * ( q1 - p1 )
- paddw m12, m13; + 8
- psraw m12, 4; >> 4 , delta0
- PABSW m13, m12; abs(delta0)
- -
- +%elif cpuflag(ssse3)
- + pabsw m13, m12
- + paddw m10, m13, m13
- + paddw m13, m10 ; abs(9 * (q0 - p0) - 3 * ( q1 - p1 ))
- + paddw m13, [pw_8]
- + pxor m10, m10
- + pcmpgtw m10, m12
- + paddw m13, m10
- + psrlw m13, 4; >> 4, abs(delta0)
- + psignw m10, m13, m12
- + SWAP 10, 12
- +%else
- + pxor m13, m13
- + pcmpgtw m13, m12
- + punpckhwd m10, m12, m13
- + punpcklwd m12, m13
- + paddd m13, m10, m10
- + paddd m10, m13
- + paddd m13, m12, m12
- + paddd m12, m13
- + mova m13, [pd_8]
- + paddd m10, m13
- + paddd m12, m13
- + psrad m10, 4
- + psrad m12, 4
- + packssdw m12, m10
- + PABSW m13, m12
- +%endif
- psllw m10, m9, 2; 8 * tc
- paddw m10, m9; 10 * tc
Advertisement
Add Comment
Please, Sign In to add comment