Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- From 1b9409b5264ee17651a4467529a841b8de7545f7 Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Mon, 27 Dec 2010 17:11:09 -0500
- Subject: [PATCH 1/5] pred8x8_top_dc_mmxext
- ---
- libavcodec/x86/h264_intrapred.asm | 33 +++++++++++++++++++++++++++++++++
- libavcodec/x86/h264_intrapred_init.c | 4 ++++
- 2 files changed, 37 insertions(+), 0 deletions(-)
- diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
- index 14a6038..1cb22f5 100644
- --- a/libavcodec/x86/h264_intrapred.asm
- +++ b/libavcodec/x86/h264_intrapred.asm
- @@ -828,6 +828,39 @@ PRED8x8_H mmxext
- PRED8x8_H ssse3
- ;-----------------------------------------------------------------------------
- +; void pred8x8_top_dc_mmxext(uint8_t *src, int stride)
- +;-----------------------------------------------------------------------------
- +cglobal pred8x8_top_dc_mmxext, 2,5
- + sub r0, r1
- + movq mm0, [r0]
- + pxor mm1, mm1
- + pxor mm2, mm2
- + lea r2, [r0+r1*2]
- + punpckhbw mm1, mm0
- + punpcklbw mm0, mm2
- + psadbw mm1, mm2 ; s1
- + lea r3, [r2+r1*2]
- + psadbw mm0, mm2 ; s0
- + psrlw mm1, 1
- + psrlw mm0, 1
- + pavgw mm1, mm2
- + lea r4, [r3+r1*2]
- + pavgw mm0, mm2
- + pshufw mm1, mm1, 0
- + pshufw mm0, mm0, 0 ; dc0 (w)
- + packuswb mm0, mm1 ; dc0,dc1 (b)
- + movq [r0+r1*1], mm0
- + movq [r0+r1*2], mm0
- + lea r0, [r3+r1*2]
- + movq [r2+r1*1], mm0
- + movq [r2+r1*2], mm0
- + movq [r3+r1*1], mm0
- + movq [r3+r1*2], mm0
- + movq [r0+r1*1], mm0
- + movq [r0+r1*2], mm0
- + RET
- +
- +;-----------------------------------------------------------------------------
- ; void pred8x8_dc_rv40(uint8_t *src, int stride)
- ;-----------------------------------------------------------------------------
- diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
- index 10a6dd6..57a93f7 100644
- --- a/libavcodec/x86/h264_intrapred_init.c
- +++ b/libavcodec/x86/h264_intrapred_init.c
- @@ -44,6 +44,7 @@ void ff_pred16x16_plane_svq3_ssse3 (uint8_t *src, int stride);
- void ff_pred16x16_tm_vp8_mmx (uint8_t *src, int stride);
- void ff_pred16x16_tm_vp8_mmxext (uint8_t *src, int stride);
- void ff_pred16x16_tm_vp8_sse2 (uint8_t *src, int stride);
- +void ff_pred8x8_top_dc_mmxext (uint8_t *src, int stride);
- void ff_pred8x8_dc_rv40_mmxext (uint8_t *src, int stride);
- void ff_pred8x8_vertical_mmx (uint8_t *src, int stride);
- void ff_pred8x8_horizontal_mmx (uint8_t *src, int stride);
- @@ -97,6 +98,9 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
- h->pred4x4 [DC_PRED ] = ff_pred4x4_dc_mmxext;
- if (codec_id == CODEC_ID_VP8 || codec_id == CODEC_ID_H264)
- h->pred4x4 [DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_mmxext;
- + if (codec_id == CODEC_ID_SVQ3 || codec_id == CODEC_ID_H264) {
- + h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_mmxext;
- + }
- if (codec_id == CODEC_ID_VP8) {
- h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_mmxext;
- h->pred8x8 [DC_PRED8x8 ] = ff_pred8x8_dc_rv40_mmxext;
- --
- 1.7.2.2
- From fbafcfba182db3d9539a62cefb028a087e0c473e Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Mon, 27 Dec 2010 17:42:14 -0500
- Subject: [PATCH 2/5] pred8x8_dc_mmxext
- ---
- libavcodec/x86/h264_intrapred.asm | 61 ++++++++++++++++++++++++++++++++++
- libavcodec/x86/h264_intrapred_init.c | 2 +
- 2 files changed, 63 insertions(+), 0 deletions(-)
- diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
- index 1cb22f5..61a587d 100644
- --- a/libavcodec/x86/h264_intrapred.asm
- +++ b/libavcodec/x86/h264_intrapred.asm
- @@ -861,6 +861,67 @@ cglobal pred8x8_top_dc_mmxext, 2,5
- RET
- ;-----------------------------------------------------------------------------
- +; void pred8x8_dc_mmxext(uint8_t *src, int stride)
- +;-----------------------------------------------------------------------------
- +
- +INIT_MMX
- +cglobal pred8x8_dc_mmxext, 2,5
- + sub r0, r1
- + pxor m7, m7
- + movd m0, [r0+0]
- + movd m1, [r0+4]
- + psadbw m0, m7 ; s0
- + mov r4, r0
- + psadbw m1, m7 ; s1
- +
- + movzx r2d, byte [r0+r1*1-1]
- + movzx r3d, byte [r0+r1*2-1]
- + lea r0, [r0+r1*2]
- + add r2d, r3d
- + movzx r3d, byte [r0+r1*1-1]
- + add r2d, r3d
- + movzx r3d, byte [r0+r1*2-1]
- + add r2d, r3d
- + lea r0, [r0+r1*2]
- + movd m2, r2d ; s2
- + movzx r2d, byte [r0+r1*1-1]
- + movzx r3d, byte [r0+r1*2-1]
- + lea r0, [r0+r1*2]
- + add r2d, r3d
- + movzx r3d, byte [r0+r1*1-1]
- + add r2d, r3d
- + movzx r3d, byte [r0+r1*2-1]
- + add r2d, r3d
- + movd m3, r2d ; s3
- +
- + punpcklwd m0, m1
- + mov r0, r4
- + punpcklwd m2, m3
- + punpckldq m0, m2 ; s0, s1, s2, s3
- + pshufw m3, m0, 11110110b ; s2, s1, s3, s3
- + lea r2, [r0+r1*2]
- + pshufw m0, m0, 01110100b ; s0, s1, s3, s1
- + paddw m0, m3
- + lea r3, [r2+r1*2]
- + psrlw m0, 2
- + pavgw m0, m7 ; s0+s2, s1, s3, s1+s3
- + lea r4, [r3+r1*2]
- + packuswb m0, m0
- + punpcklbw m0, m0
- + movq m1, m0
- + punpcklbw m0, m0
- + punpckhbw m1, m1
- + movq [r0+r1*1], m0
- + movq [r0+r1*2], m0
- + movq [r2+r1*1], m0
- + movq [r2+r1*2], m0
- + movq [r3+r1*1], m1
- + movq [r3+r1*2], m1
- + movq [r4+r1*1], m1
- + movq [r4+r1*2], m1
- + RET
- +
- +;-----------------------------------------------------------------------------
- ; void pred8x8_dc_rv40(uint8_t *src, int stride)
- ;-----------------------------------------------------------------------------
- diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
- index 57a93f7..8f6fd2f 100644
- --- a/libavcodec/x86/h264_intrapred_init.c
- +++ b/libavcodec/x86/h264_intrapred_init.c
- @@ -46,6 +46,7 @@ void ff_pred16x16_tm_vp8_mmxext (uint8_t *src, int stride);
- void ff_pred16x16_tm_vp8_sse2 (uint8_t *src, int stride);
- void ff_pred8x8_top_dc_mmxext (uint8_t *src, int stride);
- void ff_pred8x8_dc_rv40_mmxext (uint8_t *src, int stride);
- +void ff_pred8x8_dc_mmxext (uint8_t *src, int stride);
- void ff_pred8x8_vertical_mmx (uint8_t *src, int stride);
- void ff_pred8x8_horizontal_mmx (uint8_t *src, int stride);
- void ff_pred8x8_horizontal_mmxext (uint8_t *src, int stride);
- @@ -100,6 +101,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
- h->pred4x4 [DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_mmxext;
- if (codec_id == CODEC_ID_SVQ3 || codec_id == CODEC_ID_H264) {
- h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_mmxext;
- + h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_mmxext;
- }
- if (codec_id == CODEC_ID_VP8) {
- h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_mmxext;
- --
- 1.7.2.2
- From 8196e167180f448d637a19e32fb79c9a05c2b71d Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Mon, 27 Dec 2010 17:56:00 -0500
- Subject: [PATCH 3/5] pred8x8l_top_dc_(mmx|ssse3)
- ---
- libavcodec/x86/h264_intrapred.asm | 83 +++++++++++++++++++++++++++++-----
- libavcodec/x86/h264_intrapred_init.c | 4 ++
- 2 files changed, 75 insertions(+), 12 deletions(-)
- diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
- index 61a587d..c1b7886 100644
- --- a/libavcodec/x86/h264_intrapred.asm
- +++ b/libavcodec/x86/h264_intrapred.asm
- @@ -20,6 +20,7 @@
- ;******************************************************************************
- %include "x86inc.asm"
- +%include "x86util.asm"
- SECTION_RODATA
- @@ -37,6 +38,7 @@ SECTION .text
- cextern pb_1
- cextern pb_3
- +cextern pw_4
- cextern pw_5
- cextern pw_16
- cextern pw_17
- @@ -1060,6 +1062,75 @@ cglobal pred8x8_tm_vp8_ssse3, 2,3,6
- jg .loop
- REP_RET
- +; dest, left, right, src, tmp
- +; output: %1 = (t[n-1] + t[n]*2 + t[n+1] + 2) >> 2
- +%macro PRED4x4_LOWPASS 5
- + mova %5, %2
- + pavgb %2, %3
- + pxor %3, %5
- + mova %1, %4
- + pand %3, [pb_1]
- + psubusb %2, %3
- + pavgb %1, %2
- +%endmacro
- +
- +;-----------------------------------------------------------------------------
- +; void pred8x8l_top_dc(uint8_t *src, int has_topleft, int has_topright, int stride)
- +;-----------------------------------------------------------------------------
- +
- +%macro PRED8x8L_TOP_DC 1
- +cglobal pred8x8l_top_dc_%1, 4,4
- + sub r0, r3
- + pxor mm7, mm7
- + movq mm0, [r0-8]
- + movq mm3, [r0]
- + movq mm1, [r0+8]
- + movq mm2, mm3
- + movq mm4, mm3
- + PALIGNR mm2, mm0, 7, mm0
- + PALIGNR mm1, mm4, 1, mm4
- + test r1, r1 ; top_left
- + jz .fix_lt_2
- + test r2, r2 ; top_right
- + jz .fix_tr_1
- + jmp .body
- +.fix_lt_2:
- + movq mm5, mm3
- + pxor mm5, mm2
- + psllq mm5, 56
- + psrlq mm5, 56
- + pxor mm2, mm5
- + test r2, r2 ; top_right
- + jnz .body
- +.fix_tr_1:
- + movq mm5, mm3
- + pxor mm5, mm1
- + psrlq mm5, 56
- + psllq mm5, 56
- + pxor mm1, mm5
- +.body
- + PRED4x4_LOWPASS mm0, mm2, mm1, mm3, mm5
- + psadbw mm7, mm0
- + paddw mm7, [pw_4]
- + psrlw mm7, 3
- + pshufw mm7, mm7, 0
- + packuswb mm7, mm7
- +%rep 3
- + movq [r0+r3*1], mm7
- + movq [r0+r3*2], mm7
- + lea r0, [r0+r3*2]
- +%endrep
- + movq [r0+r3*1], mm7
- + movq [r0+r3*2], mm7
- + RET
- +%endmacro
- +
- +INIT_MMX
- +%define PALIGNR PALIGNR_MMX
- +PRED8x8L_TOP_DC mmxext
- +%define PALIGNR PALIGNR_SSSE3
- +PRED8x8L_TOP_DC ssse3
- +
- ;-----------------------------------------------------------------------------
- ; void pred4x4_dc_mmxext(uint8_t *src, const uint8_t *topright, int stride)
- ;-----------------------------------------------------------------------------
- @@ -1167,18 +1238,6 @@ cglobal pred4x4_tm_vp8_ssse3, 3,3
- movd [r1+r2*2], mm5
- RET
- -; dest, left, right, src, tmp
- -; output: %1 = (t[n-1] + t[n]*2 + t[n+1] + 2) >> 2
- -%macro PRED4x4_LOWPASS 5
- - mova %5, %2
- - pavgb %2, %3
- - pxor %3, %5
- - mova %1, %4
- - pand %3, [pb_1]
- - psubusb %2, %3
- - pavgb %1, %2
- -%endmacro
- -
- ;-----------------------------------------------------------------------------
- ; void pred4x4_vertical_vp8_mmxext(uint8_t *src, const uint8_t *topright, int stride)
- ;-----------------------------------------------------------------------------
- diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
- index 8f6fd2f..1a5c6de 100644
- --- a/libavcodec/x86/h264_intrapred_init.c
- +++ b/libavcodec/x86/h264_intrapred_init.c
- @@ -59,6 +59,8 @@ void ff_pred8x8_tm_vp8_mmx (uint8_t *src, int stride);
- void ff_pred8x8_tm_vp8_mmxext (uint8_t *src, int stride);
- void ff_pred8x8_tm_vp8_sse2 (uint8_t *src, int stride);
- void ff_pred8x8_tm_vp8_ssse3 (uint8_t *src, int stride);
- +void ff_pred8x8l_top_dc_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- +void ff_pred8x8l_top_dc_ssse3 (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred4x4_dc_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_down_left_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_tm_vp8_mmx (uint8_t *src, const uint8_t *topright, int stride);
- @@ -96,6 +98,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
- h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmxext;
- h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmxext;
- h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext;
- + h->pred8x8l[TOP_DC_PRED ] = ff_pred8x8l_top_dc_mmxext;
- h->pred4x4 [DC_PRED ] = ff_pred4x4_dc_mmxext;
- if (codec_id == CODEC_ID_VP8 || codec_id == CODEC_ID_H264)
- h->pred4x4 [DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_mmxext;
- @@ -146,6 +149,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
- h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_ssse3;
- h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_ssse3;
- h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_ssse3;
- + h->pred8x8l[TOP_DC_PRED ] = ff_pred8x8l_top_dc_ssse3;
- if (codec_id == CODEC_ID_VP8) {
- h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_ssse3;
- h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_ssse3;
- --
- 1.7.2.2
- From 26d0c78dc0149a60d128765e48660f99c5a978f0 Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Mon, 27 Dec 2010 18:35:22 -0500
- Subject: [PATCH 4/5] pred8x8l_dc_(mmx|ssse3)
- ---
- libavcodec/x86/h264_intrapred.asm | 104 ++++++++++++++++++++++++++++++++++
- libavcodec/x86/h264_intrapred_init.c | 4 +
- 2 files changed, 108 insertions(+), 0 deletions(-)
- diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
- index c1b7886..dcc8d67 100644
- --- a/libavcodec/x86/h264_intrapred.asm
- +++ b/libavcodec/x86/h264_intrapred.asm
- @@ -40,6 +40,7 @@ cextern pb_1
- cextern pb_3
- cextern pw_4
- cextern pw_5
- +cextern pw_8
- cextern pw_16
- cextern pw_17
- cextern pw_32
- @@ -1132,6 +1133,109 @@ PRED8x8L_TOP_DC mmxext
- PRED8x8L_TOP_DC ssse3
- ;-----------------------------------------------------------------------------
- +;void pred8x8l_dc(uint8_t *src, int has_topleft, int has_topright, int stride)
- +;-----------------------------------------------------------------------------
- +
- +%macro PRED8x8L_DC 1
- +cglobal pred8x8l_dc_%1, 4,5
- + sub r0, r3
- + lea r4, [r0+r3*2]
- + movq mm0, [r0+r3*1-8]
- + punpckhbw mm0, [r0+r3*0-8]
- + movq mm1, [r4+r3*1-8]
- + punpckhbw mm1, [r0+r3*2-8]
- + mov r4, r0
- + punpckhwd mm1, mm0
- + lea r0, [r0+r3*4]
- + movq mm2, [r0+r3*1-8]
- + punpckhbw mm2, [r0+r3*0-8]
- + lea r0, [r0+r3*2]
- + movq mm3, [r0+r3*1-8]
- + punpckhbw mm3, [r0+r3*0-8]
- + punpckhwd mm3, mm2
- + punpckhdq mm3, mm1
- + lea r0, [r0+r3*2]
- + movq mm0, [r0+r3*0-8]
- + movq mm1, [r4]
- + mov r0, r4
- + movq mm4, mm3
- + movq mm2, mm3
- + PALIGNR mm4, mm0, 7, mm0
- + PALIGNR mm1, mm2, 1, mm2
- + test r1, r1
- + jnz .do_left
- +.fix_lt_1:
- + movq mm5, mm3
- + pxor mm5, mm4
- + psrlq mm5, 56
- + psllq mm5, 48
- + pxor mm1, mm5
- + jmp .do_left
- +.fix_lt_2:
- + movq mm5, mm3
- + pxor mm5, mm2
- + psllq mm5, 56
- + psrlq mm5, 56
- + pxor mm2, mm5
- + test r2, r2
- + jnz .body
- +.fix_tr_1:
- + movq mm5, mm3
- + pxor mm5, mm1
- + psrlq mm5, 56
- + psllq mm5, 56
- + pxor mm1, mm5
- + jmp .body
- +.do_left:
- + movq mm0, mm4
- + PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
- + movq mm4, mm0
- + movq mm7, mm2
- + PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5
- + psllq mm1, 56
- + PALIGNR mm7, mm1, 7, mm3
- + movq mm0, [r0-8]
- + movq mm3, [r0]
- + movq mm1, [r0+8]
- + movq mm2, mm3
- + movq mm4, mm3
- + PALIGNR mm2, mm0, 7, mm0
- + PALIGNR mm1, mm4, 1, mm4
- + test r1, r1
- + jz .fix_lt_2
- + test r2, r2
- + jz .fix_tr_1
- +.body
- + lea r1, [r0+r3*2]
- + PRED4x4_LOWPASS mm6, mm2, mm1, mm3, mm5
- + pxor mm0, mm0
- + pxor mm1, mm1
- + lea r2, [r1+r3*2]
- + psadbw mm0, mm7
- + psadbw mm1, mm6
- + paddw mm0, [pw_8]
- + paddw mm0, mm1
- + lea r4, [r2+r3*2]
- + psrlw mm0, 4
- + pshufw mm0, mm0, 0
- + packuswb mm0, mm0
- + movq [r0+r3*1], mm0
- + movq [r0+r3*2], mm0
- + movq [r1+r3*1], mm0
- + movq [r1+r3*2], mm0
- + movq [r2+r3*1], mm0
- + movq [r2+r3*2], mm0
- + movq [r4+r3*1], mm0
- + movq [r4+r3*2], mm0
- + RET
- +%endmacro
- +INIT_MMX
- +%define PALIGNR PALIGNR_MMX
- +PRED8x8L_DC mmxext
- +%define PALIGNR PALIGNR_SSSE3
- +PRED8x8L_DC ssse3
- +
- +;-----------------------------------------------------------------------------
- ; void pred4x4_dc_mmxext(uint8_t *src, const uint8_t *topright, int stride)
- ;-----------------------------------------------------------------------------
- diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
- index 1a5c6de..b8c7ff1 100644
- --- a/libavcodec/x86/h264_intrapred_init.c
- +++ b/libavcodec/x86/h264_intrapred_init.c
- @@ -61,6 +61,8 @@ void ff_pred8x8_tm_vp8_sse2 (uint8_t *src, int stride);
- void ff_pred8x8_tm_vp8_ssse3 (uint8_t *src, int stride);
- void ff_pred8x8l_top_dc_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_top_dc_ssse3 (uint8_t *src, int has_topleft, int has_topright, int stride);
- +void ff_pred8x8l_dc_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- +void ff_pred8x8l_dc_ssse3 (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred4x4_dc_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_down_left_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_tm_vp8_mmx (uint8_t *src, const uint8_t *topright, int stride);
- @@ -99,6 +101,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
- h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmxext;
- h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext;
- h->pred8x8l[TOP_DC_PRED ] = ff_pred8x8l_top_dc_mmxext;
- + h->pred8x8l[DC_PRED ] = ff_pred8x8l_dc_mmxext;
- h->pred4x4 [DC_PRED ] = ff_pred4x4_dc_mmxext;
- if (codec_id == CODEC_ID_VP8 || codec_id == CODEC_ID_H264)
- h->pred4x4 [DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_mmxext;
- @@ -150,6 +153,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
- h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_ssse3;
- h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_ssse3;
- h->pred8x8l[TOP_DC_PRED ] = ff_pred8x8l_top_dc_ssse3;
- + h->pred8x8l[DC_PRED ] = ff_pred8x8l_dc_ssse3;
- if (codec_id == CODEC_ID_VP8) {
- h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_ssse3;
- h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_ssse3;
- --
- 1.7.2.2
- From 9535a1017f05a108370bfd9a4bf28a0dfaf89bab Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Mon, 27 Dec 2010 21:05:24 -0500
- Subject: [PATCH 5/5] pred8x8l_horizontal_(mmx|ssse3)
- ---
- libavcodec/x86/h264_intrapred.asm | 78 ++++++++++++++++++++++++++++++++++
- libavcodec/x86/h264_intrapred_init.c | 4 ++
- 2 files changed, 82 insertions(+), 0 deletions(-)
- diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
- index dcc8d67..ba27e0f 100644
- --- a/libavcodec/x86/h264_intrapred.asm
- +++ b/libavcodec/x86/h264_intrapred.asm
- @@ -1236,6 +1236,84 @@ PRED8x8L_DC mmxext
- PRED8x8L_DC ssse3
- ;-----------------------------------------------------------------------------
- +; void pred8x8l_horizontal(uint8_t *src, int has_topleft, int has_topright, int stride)
- +;-----------------------------------------------------------------------------
- +
- +%macro PRED8x8L_HORIZONTAL 1
- +cglobal pred8x8l_horizontal_%1, 4,4
- + sub r0, r3
- + lea r2, [r0+r3*2]
- + movq mm0, [r0+r3*1-8]
- + punpckhbw mm0, [r0+r3*0-8]
- + movq mm1, [r2+r3*1-8]
- + punpckhbw mm1, [r0+r3*2-8]
- + mov r2, r0
- + punpckhwd mm1, mm0
- + lea r0, [r0+r3*4]
- + movq mm2, [r0+r3*1-8]
- + punpckhbw mm2, [r0+r3*0-8]
- + lea r0, [r0+r3*2]
- + movq mm3, [r0+r3*1-8]
- + punpckhbw mm3, [r0+r3*0-8]
- + punpckhwd mm3, mm2
- + punpckhdq mm3, mm1
- + lea r0, [r0+r3*2]
- + movq mm0, [r0+r3*0-8]
- + movq mm1, [r2]
- + mov r0, r2
- + movq mm4, mm3
- + movq mm2, mm3
- + PALIGNR mm4, mm0, 7, mm0
- + PALIGNR mm1, mm2, 1, mm2
- + test r1, r1 ; top_left
- + jnz .do_left
- +.fix_lt_1:
- + movq mm5, mm3
- + pxor mm5, mm4
- + psrlq mm5, 56
- + psllq mm5, 48
- + pxor mm1, mm5
- +.do_left:
- + movq mm0, mm4
- + PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
- + movq mm4, mm0
- + movq mm7, mm2
- + PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5
- + psllq mm1, 56
- + PALIGNR mm7, mm1, 7, mm3
- + movq mm3, mm7
- + lea r1, [r0+r3*2]
- + movq mm7, mm3
- + punpckhbw mm3, mm3
- + punpcklbw mm7, mm7
- + pshufw mm0, mm3, 0xff
- + pshufw mm1, mm3, 0xaa
- + lea r2, [r1+r3*2]
- + pshufw mm2, mm3, 0x55
- + pshufw mm3, mm3, 0x00
- + pshufw mm4, mm7, 0xff
- + pshufw mm5, mm7, 0xaa
- + pshufw mm6, mm7, 0x55
- + pshufw mm7, mm7, 0x00
- + movq [r0+r3*1], mm0
- + movq [r0+r3*2], mm1
- + movq [r1+r3*1], mm2
- + movq [r1+r3*2], mm3
- + movq [r2+r3*1], mm4
- + movq [r2+r3*2], mm5
- + lea r0, [r2+r3*2]
- + movq [r0+r3*1], mm6
- + movq [r0+r3*2], mm7
- + RET
- +%endmacro
- +
- +INIT_MMX
- +%define PALIGNR PALIGNR_MMX
- +PRED8x8L_HORIZONTAL mmxext
- +%define PALIGNR PALIGNR_SSSE3
- +PRED8x8L_HORIZONTAL ssse3
- +
- +;-----------------------------------------------------------------------------
- ; void pred4x4_dc_mmxext(uint8_t *src, const uint8_t *topright, int stride)
- ;-----------------------------------------------------------------------------
- diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
- index b8c7ff1..a22c5cd 100644
- --- a/libavcodec/x86/h264_intrapred_init.c
- +++ b/libavcodec/x86/h264_intrapred_init.c
- @@ -63,6 +63,8 @@ void ff_pred8x8l_top_dc_mmxext (uint8_t *src, int has_topleft, int has_topri
- void ff_pred8x8l_top_dc_ssse3 (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_dc_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_dc_ssse3 (uint8_t *src, int has_topleft, int has_topright, int stride);
- +void ff_pred8x8l_horizontal_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- +void ff_pred8x8l_horizontal_ssse3 (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred4x4_dc_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_down_left_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_tm_vp8_mmx (uint8_t *src, const uint8_t *topright, int stride);
- @@ -102,6 +104,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
- h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext;
- h->pred8x8l[TOP_DC_PRED ] = ff_pred8x8l_top_dc_mmxext;
- h->pred8x8l[DC_PRED ] = ff_pred8x8l_dc_mmxext;
- + h->pred8x8l[HOR_PRED ] = ff_pred8x8l_horizontal_mmxext;
- h->pred4x4 [DC_PRED ] = ff_pred4x4_dc_mmxext;
- if (codec_id == CODEC_ID_VP8 || codec_id == CODEC_ID_H264)
- h->pred4x4 [DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_mmxext;
- @@ -154,6 +157,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
- h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_ssse3;
- h->pred8x8l[TOP_DC_PRED ] = ff_pred8x8l_top_dc_ssse3;
- h->pred8x8l[DC_PRED ] = ff_pred8x8l_dc_ssse3;
- + h->pred8x8l[HOR_PRED ] = ff_pred8x8l_horizontal_ssse3;
- if (codec_id == CODEC_ID_VP8) {
- h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_ssse3;
- h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_ssse3;
- --
- 1.7.2.2
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement