Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- From 7d462192dfb66c6b2b3bdbaa841a6a69e5b7848e Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Sat, 25 Dec 2010 14:32:11 -0500
- Subject: [PATCH 1/5] pred8x8l_top_dc_mmxext
- ---
- libavcodec/x86/h264_intrapred.asm | 78 ++++++++++++++++++++++++++++-----
- libavcodec/x86/h264_intrapred_init.c | 2 +
- 2 files changed, 68 insertions(+), 12 deletions(-)
- diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
- index 14a6038..b21516b 100644
- --- a/libavcodec/x86/h264_intrapred.asm
- +++ b/libavcodec/x86/h264_intrapred.asm
- @@ -20,6 +20,7 @@
- ;******************************************************************************
- %include "x86inc.asm"
- +%include "x86util.asm"
- SECTION_RODATA
- @@ -37,6 +38,7 @@ SECTION .text
- cextern pb_1
- cextern pb_3
- +cextern pw_4
- cextern pw_5
- cextern pw_16
- cextern pw_17
- @@ -827,6 +829,70 @@ PRED8x8_H mmx
- PRED8x8_H mmxext
- PRED8x8_H ssse3
- +; dest, left, right, src, tmp
- +; output: %1 = (t[n-1] + t[n]*2 + t[n+1] + 2) >> 2
- +%macro PRED4x4_LOWPASS 5
- + mova %5, %2
- + pavgb %2, %3
- + pxor %3, %5
- + mova %1, %4
- + pand %3, [pb_1]
- + psubusb %2, %3
- + pavgb %1, %2
- +%endmacro
- +
- +;-----------------------------------------------------------------------------
- +; void pred8x8l_top_dc(uint8_t *src, int has_topleft, int has_topright, int stride)
- +;-----------------------------------------------------------------------------
- +INIT_MMX
- +%define PALIGNR PALIGNR_MMX
- +cglobal pred8x8l_top_dc_mmxext, 4,4
- + sub r0, r3
- + movq mm0, [r0-8]
- + movq mm3, [r0]
- + movq mm1, [r0+8]
- + movq mm2, mm3
- + movq mm4, mm3
- + PALIGNR mm2, mm0, 7, mm0
- + PALIGNR mm1, mm4, 1, mm4
- + test r1, r1 ; top_left
- + jz .fix_lt_2
- + test r2, r2 ; top_right
- + jz .fix_tr_1
- +.do_top:
- + PRED4x4_LOWPASS mm0, mm2, mm1, mm3, mm5
- + jmp .body
- +.fix_lt_2:
- + movq mm5, mm3
- + pxor mm5, mm2
- + psllq mm5, 56
- + psrlq mm5, 56
- + pxor mm2, mm5
- + test r2, r2 ; top_right
- + jnz .do_top
- +.fix_tr_1:
- + movq mm5, mm3
- + pxor mm5, mm1
- + psrlq mm5, 56
- + psllq mm5, 56
- + pxor mm1, mm5
- + jmp .do_top
- +.body
- + pxor mm1, mm1
- + psadbw mm1, mm0
- + paddw mm1, [pw_4]
- + psrlw mm1, 3
- + pshufw mm1, mm1, 0
- + packuswb mm1, mm1
- +%rep 3
- + movq [r0+r3*1], mm1
- + movq [r0+r3*2], mm1
- + lea r0, [r0+r3*2]
- +%endrep
- + movq [r0+r3*1], mm1
- + movq [r0+r3*2], mm1
- + RET
- +
- ;-----------------------------------------------------------------------------
- ; void pred8x8_dc_rv40(uint8_t *src, int stride)
- ;-----------------------------------------------------------------------------
- @@ -1073,18 +1139,6 @@ cglobal pred4x4_tm_vp8_ssse3, 3,3
- movd [r1+r2*2], mm5
- RET
- -; dest, left, right, src, tmp
- -; output: %1 = (t[n-1] + t[n]*2 + t[n+1] + 2) >> 2
- -%macro PRED4x4_LOWPASS 5
- - mova %5, %2
- - pavgb %2, %3
- - pxor %3, %5
- - mova %1, %4
- - pand %3, [pb_1]
- - psubusb %2, %3
- - pavgb %1, %2
- -%endmacro
- -
- ;-----------------------------------------------------------------------------
- ; void pred4x4_vertical_vp8_mmxext(uint8_t *src, const uint8_t *topright, int stride)
- ;-----------------------------------------------------------------------------
- diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
- index 10a6dd6..aba02ce 100644
- --- a/libavcodec/x86/h264_intrapred_init.c
- +++ b/libavcodec/x86/h264_intrapred_init.c
- @@ -57,6 +57,7 @@ void ff_pred8x8_tm_vp8_mmx (uint8_t *src, int stride);
- void ff_pred8x8_tm_vp8_mmxext (uint8_t *src, int stride);
- void ff_pred8x8_tm_vp8_sse2 (uint8_t *src, int stride);
- void ff_pred8x8_tm_vp8_ssse3 (uint8_t *src, int stride);
- +void ff_pred8x8l_top_dc_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred4x4_dc_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_down_left_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_tm_vp8_mmx (uint8_t *src, const uint8_t *topright, int stride);
- @@ -94,6 +95,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
- h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmxext;
- h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmxext;
- h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext;
- + h->pred8x8l[TOP_DC_PRED ] = ff_pred8x8l_top_dc_mmxext;
- h->pred4x4 [DC_PRED ] = ff_pred4x4_dc_mmxext;
- if (codec_id == CODEC_ID_VP8 || codec_id == CODEC_ID_H264)
- h->pred4x4 [DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_mmxext;
- --
- 1.7.2.2
- From 30e18a52fb0409fd02de4f25e26285084fbb304f Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Sat, 25 Dec 2010 14:49:27 -0500
- Subject: [PATCH 2/5] pred8x8l_vertical_mmxext
- ---
- libavcodec/x86/h264_intrapred.asm | 46 ++++++++++++++++++++++++++++++++++
- libavcodec/x86/h264_intrapred_init.c | 2 +
- 2 files changed, 48 insertions(+), 0 deletions(-)
- diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
- index b21516b..62a16ff 100644
- --- a/libavcodec/x86/h264_intrapred.asm
- +++ b/libavcodec/x86/h264_intrapred.asm
- @@ -842,6 +842,52 @@ PRED8x8_H ssse3
- %endmacro
- ;-----------------------------------------------------------------------------
- +; void pred8x8l_vertical_mmxext(uint8_t *src, int has_topleft, int has_topright, int stride)
- +;-----------------------------------------------------------------------------
- +INIT_MMX
- +%define PALIGNR PALIGNR_MMX
- +cglobal pred8x8l_vertical_mmxext, 4,4
- + sub r0, r3
- + movq mm0, [r0-8]
- + movq mm3, [r0]
- + movq mm1, [r0+8]
- + movq mm2, mm3
- + movq mm4, mm3
- + PALIGNR mm2, mm0, 7, mm0
- + PALIGNR mm1, mm4, 1, mm4
- + test r1, r1 ; top_left
- + jz .fix_lt_2
- + test r2, r2 ; top_right
- + jz .fix_tr_1
- +.do_top:
- + PRED4x4_LOWPASS mm0, mm2, mm1, mm3, mm5
- + jmp .body
- +.fix_lt_2:
- + movq mm5, mm3
- + pxor mm5, mm2
- + psllq mm5, 56
- + psrlq mm5, 56
- + pxor mm2, mm5
- + test r2, r2 ; top_right
- + jnz .do_top
- +.fix_tr_1:
- + movq mm5, mm3
- + pxor mm5, mm1
- + psrlq mm5, 56
- + psllq mm5, 56
- + pxor mm1, mm5
- + jmp .do_top
- +.body
- +%rep 3
- + movq [r0+r3*1], mm0
- + movq [r0+r3*2], mm0
- + lea r0, [r0+r3*2]
- +%endrep
- + movq [r0+r3*1], mm0
- + movq [r0+r3*2], mm0
- + RET
- +
- +;-----------------------------------------------------------------------------
- ; void pred8x8l_top_dc(uint8_t *src, int has_topleft, int has_topright, int stride)
- ;-----------------------------------------------------------------------------
- INIT_MMX
- diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
- index aba02ce..79bdaec 100644
- --- a/libavcodec/x86/h264_intrapred_init.c
- +++ b/libavcodec/x86/h264_intrapred_init.c
- @@ -58,6 +58,7 @@ void ff_pred8x8_tm_vp8_mmxext (uint8_t *src, int stride);
- void ff_pred8x8_tm_vp8_sse2 (uint8_t *src, int stride);
- void ff_pred8x8_tm_vp8_ssse3 (uint8_t *src, int stride);
- void ff_pred8x8l_top_dc_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- +void ff_pred8x8l_vertical_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred4x4_dc_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_down_left_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_tm_vp8_mmx (uint8_t *src, const uint8_t *topright, int stride);
- @@ -96,6 +97,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
- h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmxext;
- h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext;
- h->pred8x8l[TOP_DC_PRED ] = ff_pred8x8l_top_dc_mmxext;
- + h->pred8x8l[VERT_PRED ] = ff_pred8x8l_vertical_mmxext;
- h->pred4x4 [DC_PRED ] = ff_pred4x4_dc_mmxext;
- if (codec_id == CODEC_ID_VP8 || codec_id == CODEC_ID_H264)
- h->pred4x4 [DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_mmxext;
- --
- 1.7.2.2
- From 5fda00845e1c491ea8706782885737cc0ad15d7e Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Sat, 25 Dec 2010 16:21:59 -0500
- Subject: [PATCH 3/5] pred8x8_top_dc_mmxext
- ---
- libavcodec/x86/h264_intrapred.asm | 28 ++++++++++++++++++++++++++++
- libavcodec/x86/h264_intrapred_init.c | 3 +++
- 2 files changed, 31 insertions(+), 0 deletions(-)
- diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
- index 62a16ff..3c39b71 100644
- --- a/libavcodec/x86/h264_intrapred.asm
- +++ b/libavcodec/x86/h264_intrapred.asm
- @@ -940,6 +940,34 @@ cglobal pred8x8l_top_dc_mmxext, 4,4
- RET
- ;-----------------------------------------------------------------------------
- +; void pred8x8_top_dc_mmxext(uint8_t *src, int stride)
- +;-----------------------------------------------------------------------------
- +cglobal pred8x8_top_dc_mmxext, 2,2
- + sub r0, r1
- + movq mm0, [r0]
- + pxor mm1, mm1
- + pxor mm2, mm2
- + punpckhbw mm1, mm0
- + punpcklbw mm0, mm2
- + psadbw mm1, mm2 ; s1
- + psadbw mm0, mm2 ; s0
- + psrlw mm1, 1
- + psrlw mm0, 1
- + pavgw mm1, mm2
- + pavgw mm0, mm2
- + pshufw mm1, mm1, 0
- + pshufw mm0, mm0, 0 ; dc0 (w)
- + packuswb mm0, mm1 ; dc0,dc1 (b)
- +%rep 3
- + movq [r0+r1*1], mm0
- + movq [r0+r1*2], mm0
- + lea r0, [r0+r1*2]
- +%endrep
- + movq [r0+r1*1], mm0
- + movq [r0+r1*2], mm0
- + RET
- +
- +;-----------------------------------------------------------------------------
- ; void pred8x8_dc_rv40(uint8_t *src, int stride)
- ;-----------------------------------------------------------------------------
- diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
- index 79bdaec..dfa3b3f 100644
- --- a/libavcodec/x86/h264_intrapred_init.c
- +++ b/libavcodec/x86/h264_intrapred_init.c
- @@ -49,6 +49,7 @@ void ff_pred8x8_vertical_mmx (uint8_t *src, int stride);
- void ff_pred8x8_horizontal_mmx (uint8_t *src, int stride);
- void ff_pred8x8_horizontal_mmxext (uint8_t *src, int stride);
- void ff_pred8x8_horizontal_ssse3 (uint8_t *src, int stride);
- +void ff_pred8x8_top_dc_mmxext (uint8_t *src, int stride);
- void ff_pred8x8_plane_mmx (uint8_t *src, int stride);
- void ff_pred8x8_plane_mmx2 (uint8_t *src, int stride);
- void ff_pred8x8_plane_sse2 (uint8_t *src, int stride);
- @@ -101,6 +102,8 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
- h->pred4x4 [DC_PRED ] = ff_pred4x4_dc_mmxext;
- if (codec_id == CODEC_ID_VP8 || codec_id == CODEC_ID_H264)
- h->pred4x4 [DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_mmxext;
- + if (codec_id == CODEC_ID_SVQ3 || codec_id == CODEC_ID_H264)
- + h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_mmxext;
- if (codec_id == CODEC_ID_VP8) {
- h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_mmxext;
- h->pred8x8 [DC_PRED8x8 ] = ff_pred8x8_dc_rv40_mmxext;
- --
- 1.7.2.2
- From 8d6b2365fa48e6dab2957e81702297d56ee67429 Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Sat, 25 Dec 2010 17:42:43 -0500
- Subject: [PATCH 4/5] pred8x8l_horizontal_mmxext
- ---
- libavcodec/x86/h264_intrapred.asm | 75 ++++++++++++++++++++++++++++++++++
- libavcodec/x86/h264_intrapred_init.c | 2 +
- 2 files changed, 77 insertions(+), 0 deletions(-)
- diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
- index 3c39b71..5c5af03 100644
- --- a/libavcodec/x86/h264_intrapred.asm
- +++ b/libavcodec/x86/h264_intrapred.asm
- @@ -968,6 +968,81 @@ cglobal pred8x8_top_dc_mmxext, 2,2
- RET
- ;-----------------------------------------------------------------------------
- +; void pred8x8l_horizontal_mmxext(uint8_t *src, int has_topleft, int has_topright, int stride)
- +;-----------------------------------------------------------------------------
- +
- +INIT_MMX
- +%define PALIGNR PALIGNR_MMX
- +cglobal pred8x8l_horizontal_mmxext, 4,4
- + sub r0, r3
- + lea r2, [r0+r3*2]
- + movq mm0, [r0+r3*1-8]
- + punpckhbw mm0, [r0+r3*0-8]
- + movq mm1, [r2+r3*1-8]
- + punpckhbw mm1, [r0+r3*2-8]
- + mov r2, r0
- + punpckhwd mm1, mm0
- + lea r0, [r0+r3*4]
- + movq mm2, [r0+r3*1-8]
- + punpckhbw mm2, [r0+r3*0-8]
- + lea r0, [r0+r3*2]
- + movq mm3, [r0+r3*1-8]
- + punpckhbw mm3, [r0+r3*0-8]
- + punpckhwd mm3, mm2
- + punpckhdq mm3, mm1
- + lea r0, [r0+r3*2]
- + movq mm0, [r0+r3*0-8]
- + movq mm1, [r2]
- + mov r0, r2
- + movq mm4, mm3
- + movq mm2, mm3
- + PALIGNR mm4, mm0, 7, mm0
- + PALIGNR mm1, mm2, 1, mm2
- + test r1, r1 ; top_left
- + jz .fix_lt_1
- +.do_left:
- + movq mm0, mm4
- + PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
- + movq mm4, mm0
- + movq mm7, mm2
- + PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5
- + psllq mm1, 56
- + PALIGNR mm7, mm1, 7, mm3
- + movq mm3, mm7
- + jmp .body
- +.fix_lt_1:
- + movq mm5, mm3
- + pxor mm5, mm4
- + psrlq mm5, 56
- + psllq mm5, 48
- + pxor mm1, mm5
- + jmp .do_left
- +.body
- + movq mm7, mm3
- + punpckhbw mm3, mm3
- + punpcklbw mm7, mm7
- + pshufw mm0, mm3, 0xff
- + pshufw mm1, mm3, 0xaa
- + pshufw mm2, mm3, 0x55
- + pshufw mm3, mm3, 0x00
- + pshufw mm4, mm7, 0xff
- + pshufw mm5, mm7, 0xaa
- + pshufw mm6, mm7, 0x55
- + pshufw mm7, mm7, 0x00
- + lea r1, [r0+r3*2]
- + lea r2, [r1+r3*2]
- + movq [r0+r3*1], mm0
- + movq [r0+r3*2], mm1
- + movq [r1+r3*1], mm2
- + movq [r1+r3*2], mm3
- + movq [r2+r3*1], mm4
- + movq [r2+r3*2], mm5
- + lea r0, [r2+r3*2]
- + movq [r0+r3*1], mm6
- + movq [r0+r3*2], mm7
- + RET
- +
- +;-----------------------------------------------------------------------------
- ; void pred8x8_dc_rv40(uint8_t *src, int stride)
- ;-----------------------------------------------------------------------------
- diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
- index dfa3b3f..98a906f 100644
- --- a/libavcodec/x86/h264_intrapred_init.c
- +++ b/libavcodec/x86/h264_intrapred_init.c
- @@ -60,6 +60,7 @@ void ff_pred8x8_tm_vp8_sse2 (uint8_t *src, int stride);
- void ff_pred8x8_tm_vp8_ssse3 (uint8_t *src, int stride);
- void ff_pred8x8l_top_dc_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_vertical_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- +void ff_pred8x8l_horizontal_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred4x4_dc_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_down_left_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_tm_vp8_mmx (uint8_t *src, const uint8_t *topright, int stride);
- @@ -99,6 +100,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
- h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext;
- h->pred8x8l[TOP_DC_PRED ] = ff_pred8x8l_top_dc_mmxext;
- h->pred8x8l[VERT_PRED ] = ff_pred8x8l_vertical_mmxext;
- + h->pred8x8l[HOR_PRED ] = ff_pred8x8l_horizontal_mmxext;
- h->pred4x4 [DC_PRED ] = ff_pred4x4_dc_mmxext;
- if (codec_id == CODEC_ID_VP8 || codec_id == CODEC_ID_H264)
- h->pred4x4 [DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_mmxext;
- --
- 1.7.2.2
- From 1713c97a216d9127e6e29d1edddc62dacde6608b Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Sat, 25 Dec 2010 19:45:23 -0500
- Subject: [PATCH 5/5] pred8x8l_dc_mmxext
- ---
- libavcodec/x86/h264_intrapred.asm | 98 ++++++++++++++++++++++++++++++++++
- libavcodec/x86/h264_intrapred_init.c | 2 +
- 2 files changed, 100 insertions(+), 0 deletions(-)
- diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
- index 5c5af03..dfe381c 100644
- --- a/libavcodec/x86/h264_intrapred.asm
- +++ b/libavcodec/x86/h264_intrapred.asm
- @@ -40,6 +40,7 @@ cextern pb_1
- cextern pb_3
- cextern pw_4
- cextern pw_5
- +cextern pw_8
- cextern pw_16
- cextern pw_17
- cextern pw_32
- @@ -888,6 +889,103 @@ cglobal pred8x8l_vertical_mmxext, 4,4
- RET
- ;-----------------------------------------------------------------------------
- +;void pred8x8l_dc_mmxext(uint8_t *src, int has_topleft, int has_topright, int stride)
- +;-----------------------------------------------------------------------------
- +
- +INIT_MMX
- +%define PALIGNR PALIGNR_MMX
- +cglobal pred8x8l_dc_mmxext, 4,5
- + sub r0, r3
- + lea r4, [r0+r3*2]
- + movq mm0, [r0+r3*1-8]
- + punpckhbw mm0, [r0+r3*0-8]
- + movq mm1, [r4+r3*1-8]
- + punpckhbw mm1, [r0+r3*2-8]
- + mov r4, r0
- + punpckhwd mm1, mm0
- + lea r0, [r0+r3*4]
- + movq mm2, [r0+r3*1-8]
- + punpckhbw mm2, [r0+r3*0-8]
- + lea r0, [r0+r3*2]
- + movq mm3, [r0+r3*1-8]
- + punpckhbw mm3, [r0+r3*0-8]
- + punpckhwd mm3, mm2
- + punpckhdq mm3, mm1
- + lea r0, [r0+r3*2]
- + movq mm0, [r0+r3*0-8]
- + movq mm1, [r4]
- + mov r0, r4
- + movq mm4, mm3
- + movq mm2, mm3
- + PALIGNR mm4, mm0, 7, mm0
- + PALIGNR mm1, mm2, 1, mm2
- + test r1, r1 ; top_left
- + jz .fix_lt_1
- +.do_left:
- + movq mm0, mm4
- + PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
- + movq mm4, mm0
- + movq mm7, mm2
- + PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5
- + psllq mm1, 56
- + PALIGNR mm7, mm1, 7, mm3
- +.check_top:
- + movq mm0, [r0-8]
- + movq mm3, [r0]
- + movq mm1, [r0+8]
- + movq mm2, mm3
- + movq mm4, mm3
- + PALIGNR mm2, mm0, 7, mm0
- + PALIGNR mm1, mm4, 1, mm4
- + test r1, r1 ; top_left
- + jz .fix_lt_2
- + test r2, r2 ; top_right
- + jz .fix_tr_1
- +.do_top:
- + PRED4x4_LOWPASS mm6, mm2, mm1, mm3, mm5
- + jmp .body
- +.fix_lt_1:
- + movq mm5, mm3
- + pxor mm5, mm4
- + psrlq mm5, 56
- + psllq mm5, 48
- + pxor mm1, mm5
- + jmp .do_left
- +.fix_lt_2:
- + movq mm5, mm3
- + pxor mm5, mm2
- + psllq mm5, 56
- + psrlq mm5, 56
- + pxor mm2, mm5
- + test r2, r2 ; top_right
- + jnz .do_top
- +.fix_tr_1:
- + movq mm5, mm3
- + pxor mm5, mm1
- + psrlq mm5, 56
- + psllq mm5, 56
- + pxor mm1, mm5
- + jmp .do_top
- +.body
- + pxor mm0, mm0
- + pxor mm1, mm1
- + psadbw mm0, mm7
- + psadbw mm1, mm6
- + paddw mm0, [pw_8]
- + paddw mm0, mm1
- + psrlw mm0, 4
- + pshufw mm0, mm0, 0
- + packuswb mm0, mm0
- +%rep 3
- + movq [r0+r3*1], mm0
- + movq [r0+r3*2], mm0
- + lea r0, [r0+r3*2]
- +%endrep
- + movq [r0+r3*1], mm0
- + movq [r0+r3*2], mm0
- + RET
- +
- +;-----------------------------------------------------------------------------
- ; void pred8x8l_top_dc(uint8_t *src, int has_topleft, int has_topright, int stride)
- ;-----------------------------------------------------------------------------
- INIT_MMX
- diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
- index 98a906f..b5e6c02 100644
- --- a/libavcodec/x86/h264_intrapred_init.c
- +++ b/libavcodec/x86/h264_intrapred_init.c
- @@ -58,6 +58,7 @@ void ff_pred8x8_tm_vp8_mmx (uint8_t *src, int stride);
- void ff_pred8x8_tm_vp8_mmxext (uint8_t *src, int stride);
- void ff_pred8x8_tm_vp8_sse2 (uint8_t *src, int stride);
- void ff_pred8x8_tm_vp8_ssse3 (uint8_t *src, int stride);
- +void ff_pred8x8l_dc_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_top_dc_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_vertical_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_horizontal_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- @@ -98,6 +99,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
- h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmxext;
- h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmxext;
- h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext;
- + h->pred8x8l[DC_PRED ] = ff_pred8x8l_dc_mmxext;
- h->pred8x8l[TOP_DC_PRED ] = ff_pred8x8l_top_dc_mmxext;
- h->pred8x8l[VERT_PRED ] = ff_pred8x8l_vertical_mmxext;
- h->pred8x8l[HOR_PRED ] = ff_pred8x8l_horizontal_mmxext;
- --
- 1.7.2.2
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement