Guest User

Untitled

a guest
Feb 25th, 2024
149
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.48 KB | None | 0 0
  1. commit c57a2562ce648e054ee00c8990ad627fe8888d9c (HEAD -> hevc-12b-x86-fix-overflow)
  2. Author: Ronald S. Bultje <[email protected]>
  3. Date: Sun Feb 25 10:49:35 2024 -0500
  4.  
  5. hevx/x86/deblock: fix 12bit overflow.
  6.  
  7. diff --git a/libavcodec/x86/hevc_deblock.asm b/libavcodec/x86/hevc_deblock.asm
  8. index 85ee4800bb..15fecaed96 100644
  9. --- a/libavcodec/x86/hevc_deblock.asm
  10. +++ b/libavcodec/x86/hevc_deblock.asm
  11. @@ -31,6 +31,7 @@ cextern pw_1023
  12. pw_pixel_max_12: times 8 dw ((1 << 12)-1)
  13. pw_m2: times 8 dw -2
  14. pd_1 : times 4 dd 1
  15. +pd_8 : times 8 dd 8
  16.  
  17. cextern pw_4
  18. cextern pw_8
  19. @@ -541,19 +542,46 @@ ALIGN 16
  20. add betaq, r13
  21. shr betaq, 3; ((beta + (beta >> 1)) >> 3))
  22.  
  23. - mova m13, [pw_8]
  24. psubw m12, m4, m3 ; q0 - p0
  25. - psllw m10, m12, 3; 8 * (q0 - p0)
  26. - paddw m12, m10 ; 9 * (q0 - p0)
  27. -
  28. + paddw m10, m12, m12
  29. + paddw m12, m10 ; 3 * (q0 - p0)
  30. psubw m10, m5, m2 ; q1 - p1
  31. - psllw m8, m10, 1; 2 * ( q1 - p1 )
  32. - paddw m10, m8; 3 * ( q1 - p1 )
  33. - psubw m12, m10; 9 * (q0 - p0) - 3 * ( q1 - p1 )
  34. + psubw m12, m10 ; 3 * (q0 - p0) - (q1 - p1)
  35. +%if %1 < 12
  36. + mova m13, [pw_8]
  37. + paddw m10, m12, m12
  38. + paddw m12, m10 ; 9 * (q0 - p0) - 3 * ( q1 - p1 )
  39. paddw m12, m13; + 8
  40. psraw m12, 4; >> 4 , delta0
  41. PABSW m13, m12; abs(delta0)
  42. -
  43. +%elif cpuflag(ssse3)
  44. + pabsw m13, m12
  45. + paddw m10, m13, m13
  46. + paddw m13, m10 ; abs(9 * (q0 - p0) - 3 * ( q1 - p1 ))
  47. + paddw m13, [pw_8]
  48. + pxor m10, m10
  49. + pcmpgtw m10, m12
  50. + paddw m13, m10
  51. + psrlw m13, 4; >> 4, abs(delta0)
  52. + psignw m10, m13, m12
  53. + SWAP 10, 12
  54. +%else
  55. + pxor m13, m13
  56. + pcmpgtw m13, m12
  57. + punpckhwd m10, m12, m13
  58. + punpcklwd m12, m13
  59. + paddd m13, m10, m10
  60. + paddd m10, m13
  61. + paddd m13, m12, m12
  62. + paddd m12, m13
  63. + mova m13, [pd_8]
  64. + paddd m10, m13
  65. + paddd m12, m13
  66. + psrad m10, 4
  67. + psrad m12, 4
  68. + packssdw m12, m10
  69. + PABSW m13, m12
  70. +%endif
  71.  
  72. psllw m10, m9, 2; 8 * tc
  73. paddw m10, m9; 10 * tc
  74.  
Advertisement
Add Comment
Please, Sign In to add comment