Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- From 7fde0735a8676fc36efc054abb55b4a3c9580773 Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Thu, 25 Nov 2010 19:44:56 -0500
- Subject: [PATCH 1/8] predict_4x4_ddl
- ---
- common/x86/predict-a.asm | 108 +++++++++++++++++++++++++--------------------
- common/x86/predict-c.c | 9 +++-
- 2 files changed, 67 insertions(+), 50 deletions(-)
- diff --git a/common/x86/predict-a.asm b/common/x86/predict-a.asm
- index a05e91b..739ce35 100644
- --- a/common/x86/predict-a.asm
- +++ b/common/x86/predict-a.asm
- @@ -40,6 +40,7 @@ SECTION .text
- cextern pb_1
- cextern pb_3
- +cextern pw_1
- cextern pw_2
- cextern pw_4
- cextern pw_8
- @@ -98,19 +99,16 @@ cextern pb_reverse
- ; dest, left, right, src, tmp
- ; output: %1 = (t[n-1] + t[n]*2 + t[n+1] + 2) >> 2
- %macro PRED8x8_LOWPASS0 6
- - mov%6 %5, %2
- - pavgb %2, %3
- - pxor %3, %5
- - mov%6 %1, %4
- - pand %3, [pb_1]
- - psubusb %2, %3
- - pavgb %1, %2
- + mova %6, %3
- + pavg%1 %3, %4
- + pxor %4, %6
- + mova %2, %5
- + pand %4, [p%1_1]
- + psubus%1 %3, %4
- + pavg%1 %2, %3
- %endmacro
- -%macro PRED8x8_LOWPASS 5
- - PRED8x8_LOWPASS0 %1, %2, %3, %4, %5, q
- -%endmacro
- -%macro PRED8x8_LOWPASS_XMM 5
- - PRED8x8_LOWPASS0 %1, %2, %3, %4, %5, dqa
- +%macro PRED8x8_LOWPASS 6
- + PRED8x8_LOWPASS0 %1, %2, %3, %4, %5, %6
- %endmacro
- %macro LOAD_PLANE_ARGS 0
- @@ -129,27 +127,37 @@ cextern pb_reverse
- %endmacro
- ;-----------------------------------------------------------------------------
- -; void predict_4x4_ddl( uint8_t *src )
- +; void predict_4x4_ddl( pixel *src )
- ;-----------------------------------------------------------------------------
- -cglobal predict_4x4_ddl_mmxext, 1,1
- - movq mm1, [r0-FDEC_STRIDE]
- - movq mm2, mm1
- - movq mm3, mm1
- - movq mm4, mm1
- - psllq mm1, 8
- - pxor mm2, mm1
- - psrlq mm2, 8
- - pxor mm3, mm2
- - PRED8x8_LOWPASS mm0, mm1, mm3, mm4, mm5
- +%macro PREDICT_4x4_DDL 4
- +cglobal predict_4x4_ddl_%1, 1,1
- + mova m1, [r0-SIZEOF_PIXEL*FDEC_STRIDE]
- + mova m2, m1
- + mova m3, m1
- + mova m4, m1
- + psll%2 m1, %3
- + pxor m2, m1
- + psrl%2 m2, %3
- + pxor m3, m2
- + PRED8x8_LOWPASS %4, m0, m1, m3, m4, m5
- %assign Y 0
- %rep 4
- - psrlq mm0, 8
- - movd [r0+Y*FDEC_STRIDE], mm0
- + psrl%2 m0, %3
- + movh [r0+SIZEOF_PIXEL*Y*FDEC_STRIDE], m0
- %assign Y (Y+1)
- %endrep
- RET
- +%endmacro
- +
- +%ifdef HIGH_BIT_DEPTH
- +INIT_XMM
- +PREDICT_4x4_DDL sse2 , dq, 2, w
- +%else
- +INIT_MMX
- +PREDICT_4x4_DDL mmxext, q , 8, b
- +%endif
- ;-----------------------------------------------------------------------------
- ; void predict_4x4_ddr( uint8_t *src )
- @@ -166,7 +174,7 @@ cglobal predict_4x4_ddr_%1, 1,1
- PALIGNR mm3, [r0+2*FDEC_STRIDE-8], 7, mm4
- movq mm2, mm3
- PALIGNR mm3, [r0+3*FDEC_STRIDE-8], 7, mm4
- - PRED8x8_LOWPASS mm0, mm3, mm1, mm2, mm4
- + PRED8x8_LOWPASS b, mm0, mm3, mm1, mm2, mm4
- %assign Y 3
- movd [r0+Y*FDEC_STRIDE], mm0
- %rep 3
- @@ -186,7 +194,7 @@ cglobal predict_4x4_vr_%1, 1,1
- PALIGNR mm0, [r0+1*FDEC_STRIDE-8], 7, mm2 ; ..t3t2t1t0ltl0l1
- movq mm2, mm0
- PALIGNR mm0, [r0+2*FDEC_STRIDE-8], 7, mm3 ; t3t2t1t0ltl0l1l2
- - PRED8x8_LOWPASS mm3, mm1, mm0, mm2, mm4
- + PRED8x8_LOWPASS b, mm3, mm1, mm0, mm2, mm4
- movq mm1, mm3
- psrlq mm3, 16
- psllq mm1, 48
- @@ -215,7 +223,7 @@ cglobal predict_4x4_hd_%1, 1,1
- psrlq mm0, 16 ; .. .. t2 t1 t0 lt l0 l1
- psrlq mm2, 8 ; .. t2 t1 t0 lt l0 l1 l2
- pavgb mm7, mm2
- - PRED8x8_LOWPASS mm3, mm1, mm0, mm2, mm4
- + PRED8x8_LOWPASS b, mm3, mm1, mm0, mm2, mm4
- punpcklbw mm7, mm3
- psrlq mm3, 32
- PALIGNR mm3, mm7, 6, mm6
- @@ -230,6 +238,7 @@ cglobal predict_4x4_hd_%1, 1,1
- RET
- %endmacro
- +INIT_MMX
- %define PALIGNR PALIGNR_MMX
- PREDICT_4x4 mmxext
- %define PALIGNR PALIGNR_SSSE3
- @@ -254,7 +263,7 @@ cglobal predict_4x4_hu_mmxext, 1,1
- psrlq mm2, 16
- psrlq mm3, 8
- pavgb mm7, mm3
- - PRED8x8_LOWPASS mm4, mm0, mm2, mm3, mm5
- + PRED8x8_LOWPASS b, mm4, mm0, mm2, mm3, mm5
- punpcklbw mm7, mm4
- %assign Y 0
- movd [r0+Y*FDEC_STRIDE], mm7
- @@ -278,7 +287,7 @@ cglobal predict_4x4_vl_mmxext, 1,1
- movq mm4, mm3
- pavgb mm4, mm1
- - PRED8x8_LOWPASS mm0, mm1, mm2, mm3, mm5
- + PRED8x8_LOWPASS b, mm0, mm1, mm2, mm3, mm5
- movd [r0+0*FDEC_STRIDE], mm4
- movd [r0+1*FDEC_STRIDE], mm0
- @@ -353,10 +362,10 @@ cglobal predict_8x8_filter_%1, 4,5
- je .fix_lt_1
- .do_left:
- movq mm0, mm4
- - PRED8x8_LOWPASS mm2, mm1, mm4, mm3, mm5
- + PRED8x8_LOWPASS b, mm2, mm1, mm4, mm3, mm5
- movq [t1+8], mm2
- movq mm4, mm0
- - PRED8x8_LOWPASS mm1, mm3, mm0, mm4, mm5
- + PRED8x8_LOWPASS b, mm1, mm3, mm0, mm4, mm5
- movd t4, mm1
- mov [t1+7], t4b
- .check_top:
- @@ -374,7 +383,7 @@ cglobal predict_8x8_filter_%1, 4,5
- test r2b, 0x04
- je .fix_tr_1
- .do_top:
- - PRED8x8_LOWPASS mm4, mm2, mm1, mm3, mm5
- + PRED8x8_LOWPASS b, mm4, mm2, mm1, mm3, mm5
- movq [t1+16], mm4
- test r3b, 0x04
- je .done
- @@ -387,7 +396,7 @@ cglobal predict_8x8_filter_%1, 4,5
- psrlq mm5, 56
- PALIGNR mm2, mm3, 7, mm3
- PALIGNR mm5, mm4, 1, mm4
- - PRED8x8_LOWPASS mm1, mm2, mm5, mm0, mm4
- + PRED8x8_LOWPASS b, mm1, mm2, mm5, mm0, mm4
- jmp .do_topright
- .fix_tr_2:
- punpckhbw mm3, mm3
- @@ -424,6 +433,7 @@ cglobal predict_8x8_filter_%1, 4,5
- %endmacro
- %define PALIGNR PALIGNR_MMX
- +INIT_MMX
- PREDICT_FILTER mmxext
- %define PALIGNR PALIGNR_SSSE3
- PREDICT_FILTER ssse3
- @@ -511,8 +521,8 @@ cglobal predict_8x8_ddl_mmxext, 2,2
- movq mm1, mm5
- psllq mm1, 8
- add r0, FDEC_STRIDE*4
- - PRED8x8_LOWPASS mm0, mm1, mm2, mm5, mm7
- - PRED8x8_LOWPASS mm1, mm3, mm4, [r1+24], mm6
- + PRED8x8_LOWPASS b, mm0, mm1, mm2, mm5, mm7
- + PRED8x8_LOWPASS b, mm1, mm3, mm4, [r1+24], mm6
- %assign Y 3
- %rep 6
- movq [r0+Y*FDEC_STRIDE], mm1
- @@ -540,8 +550,8 @@ cglobal predict_8x8_ddr_mmxext, 2,2
- movq mm3, [r1+15]
- movq mm4, [r1+17]
- add r0, FDEC_STRIDE*4
- - PRED8x8_LOWPASS mm0, mm1, mm2, [r1+8], mm7
- - PRED8x8_LOWPASS mm1, mm3, mm4, [r1+16], mm6
- + PRED8x8_LOWPASS b, mm0, mm1, mm2, [r1+8], mm7
- + PRED8x8_LOWPASS b, mm1, mm3, mm4, [r1+16], mm6
- %assign Y 3
- %rep 6
- movq [r0+Y*FDEC_STRIDE], mm0
- @@ -582,7 +592,7 @@ cglobal predict_8x8_hu_mmxext, 2,2
- punpckhbw mm1, mm1
- por mm3, mm1 ; l7 l7 l7 l6 l5 l4 l3 l2
- pavgb mm4, mm2
- - PRED8x8_LOWPASS mm1, mm3, mm5, mm2, mm6
- + PRED8x8_LOWPASS b, mm1, mm3, mm5, mm2, mm6
- movq mm5, mm4
- punpcklbw mm4, mm1 ; p4 p3 p2 p1
- punpckhbw mm5, mm1 ; p8 p7 p6 p5
- @@ -627,7 +637,7 @@ cglobal predict_8x8_vr_core_mmxext, 2,2
- movq mm4, mm3
- pavgb mm3, mm2
- add r0, FDEC_STRIDE*4
- - PRED8x8_LOWPASS mm0, mm1, mm2, mm4, mm7
- + PRED8x8_LOWPASS b, mm0, mm1, mm2, mm4, mm7
- %assign Y -4
- %rep 3
- @@ -714,6 +724,7 @@ ALIGN 4
- %endif ; !ARCH_X86_64
- +INIT_XMM
- ;-----------------------------------------------------------------------------
- ; void predict_8x8_ddl( uint8_t *src, uint8_t *edge )
- ;-----------------------------------------------------------------------------
- @@ -723,7 +734,7 @@ cglobal predict_8x8_ddl_sse2, 2,2
- movdqa xmm1, xmm3
- pslldq xmm1, 1
- add r0, FDEC_STRIDE*4
- - PRED8x8_LOWPASS_XMM xmm0, xmm1, xmm2, xmm3, xmm4
- + PRED8x8_LOWPASS b, xmm0, xmm1, xmm2, xmm3, xmm4
- %assign Y -4
- %rep 8
- @@ -742,7 +753,7 @@ cglobal predict_8x8_ddr_sse2, 2,2
- movdqa xmm2, xmm3
- psrldq xmm2, 1
- add r0, FDEC_STRIDE*4
- - PRED8x8_LOWPASS_XMM xmm0, xmm1, xmm2, xmm3, xmm4
- + PRED8x8_LOWPASS b, xmm0, xmm1, xmm2, xmm3, xmm4
- movdqa xmm1, xmm0
- psrldq xmm1, 1
- @@ -771,7 +782,7 @@ cglobal predict_8x8_vl_sse2, 2,2
- pslldq xmm1, 1
- pavgb xmm3, xmm2
- add r0, FDEC_STRIDE*4
- - PRED8x8_LOWPASS_XMM xmm0, xmm1, xmm2, xmm4, xmm5
- + PRED8x8_LOWPASS b, xmm0, xmm1, xmm2, xmm4, xmm5
- ; xmm0: (t0 + 2*t1 + t2 + 2) >> 2
- ; xmm3: (t0 + t1 + 1) >> 1
- @@ -802,7 +813,7 @@ cglobal predict_8x8_vr_sse2, 2,2,7
- pslldq xmm0, 1
- pslldq xmm1, 2
- pavgb xmm2, xmm0
- - PRED8x8_LOWPASS_XMM xmm4, xmm3, xmm1, xmm0, xmm5
- + PRED8x8_LOWPASS b, xmm4, xmm3, xmm1, xmm0, xmm5
- pandn xmm6, xmm4
- movdqa xmm5, xmm4
- psrlw xmm4, 8
- @@ -824,6 +835,7 @@ cglobal predict_8x8_vr_sse2, 2,2,7
- %endrep
- RET
- +INIT_MMX
- ;-----------------------------------------------------------------------------
- ; void predict_8x8_hd( uint8_t *src, uint8_t *edge )
- ;-----------------------------------------------------------------------------
- @@ -840,12 +852,12 @@ cglobal predict_8x8_hd_mmxext, 2,2
- PALIGNR mm4, mm3, 1, mm7 ; t0 lt l0 l1 l2 l3 l4 l5
- movq mm5, mm3
- pavgb mm3, mm1
- - PRED8x8_LOWPASS mm0, mm4, mm1, mm5, mm7
- + PRED8x8_LOWPASS b, mm0, mm4, mm1, mm5, mm7
- movq mm4, mm2
- movq mm1, mm2 ; t6 t5 t4 t3 t2 t1 t0 lt
- psrlq mm4, 16 ; .. .. t6 t5 t4 t3 t2 t1
- psrlq mm1, 8 ; .. t6 t5 t4 t3 t2 t1 t0
- - PRED8x8_LOWPASS mm6, mm4, mm2, mm1, mm5
- + PRED8x8_LOWPASS b, mm6, mm4, mm2, mm1, mm5
- ; .. p11 p10 p9
- movq mm7, mm3
- punpcklbw mm3, mm0 ; p4 p3 p2 p1
- @@ -886,7 +898,7 @@ cglobal predict_8x8_hd_%1, 2,2
- PALIGNR xmm3, xmm0, 8, xmm0
- movdqa xmm4, xmm1
- pavgb xmm4, xmm3
- - PRED8x8_LOWPASS_XMM xmm0, xmm1, xmm2, xmm3, xmm5
- + PRED8x8_LOWPASS b, xmm0, xmm1, xmm2, xmm3, xmm5
- punpcklbw xmm4, xmm0
- movhlps xmm0, xmm4
- @@ -946,7 +958,7 @@ cglobal predict_8x8_hu_%1, 2,2
- por mm3, mm1 ; l7 l7 l7 l6 l5 l4 l3 l2
- %endif
- pavgb mm4, mm2
- - PRED8x8_LOWPASS mm1, mm3, mm5, mm2, mm6
- + PRED8x8_LOWPASS b, mm1, mm3, mm5, mm2, mm6
- movq2dq xmm0, mm4
- movq2dq xmm1, mm1
- diff --git a/common/x86/predict-c.c b/common/x86/predict-c.c
- index 8eafcc0..077f2c7 100644
- --- a/common/x86/predict-c.c
- +++ b/common/x86/predict-c.c
- @@ -64,6 +64,7 @@
- void x264_predict_8x8_filter_mmxext( uint8_t *src, uint8_t edge[33], int i_neighbor, int i_filters );
- void x264_predict_8x8_filter_ssse3( uint8_t *src, uint8_t edge[33], int i_neighbor, int i_filters );
- void x264_predict_4x4_ddl_mmxext( uint8_t *src );
- + void x264_predict_4x4_ddl_sse2( uint16_t *src );
- void x264_predict_4x4_ddr_mmxext( uint8_t *src );
- void x264_predict_4x4_vl_mmxext( uint8_t *src );
- void x264_predict_4x4_vr_mmxext( uint8_t *src );
- @@ -474,7 +475,11 @@ void x264_predict_4x4_init_mmx( int cpu, x264_predict_t pf[12] )
- {
- if( !(cpu&X264_CPU_MMXEXT) )
- return;
- -#if !HIGH_BIT_DEPTH
- +#if HIGH_BIT_DEPTH
- + if( !(cpu&X264_CPU_SSE2) )
- + return;
- + pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_sse2;
- +#else
- pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_mmxext;
- pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_mmxext;
- pf[I_PRED_4x4_VL] = x264_predict_4x4_vl_mmxext;
- @@ -487,5 +492,5 @@ void x264_predict_4x4_init_mmx( int cpu, x264_predict_t pf[12] )
- pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_ssse3;
- pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_ssse3;
- pf[I_PRED_4x4_HD] = x264_predict_4x4_hd_ssse3;
- -#endif // !HIGH_BIT_DEPTH
- +#endif // HIGH_BIT_DEPTH
- }
- --
- 1.7.2.3
- From ad274f6257b3205448a2dbb4ad77a6d0a51b722b Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Thu, 25 Nov 2010 19:57:30 -0500
- Subject: [PATCH 2/8] predict_4x4_hu
- ---
- common/x86/predict-a.asm | 36 +++++++++++++++++++++++++++++++++++-
- common/x86/predict-c.c | 2 ++
- 2 files changed, 37 insertions(+), 1 deletions(-)
- diff --git a/common/x86/predict-a.asm b/common/x86/predict-a.asm
- index 739ce35..f2ab91e 100644
- --- a/common/x86/predict-a.asm
- +++ b/common/x86/predict-a.asm
- @@ -245,8 +245,40 @@ PREDICT_4x4 mmxext
- PREDICT_4x4 ssse3
- ;-----------------------------------------------------------------------------
- -; void predict_4x4_hu( uint8_t *src )
- +; void predict_4x4_hu( pixel *src )
- ;-----------------------------------------------------------------------------
- +%ifdef HIGH_BIT_DEPTH
- +INIT_XMM
- +cglobal predict_4x4_hu_sse2, 1,1,7
- + mova m0, [r0+2*0*FDEC_STRIDE-8*2]
- + punpckhwd m0, [r0+2*1*FDEC_STRIDE-8*2]
- + mova m1, [r0+2*2*FDEC_STRIDE-8*2]
- + punpckhwd m1, [r0+2*3*FDEC_STRIDE-8*2]
- + punpckhdq m0, m1
- + mova m1, m0
- + pshufhw m1, m1, 0xFF
- + punpckhqdq m1, m1
- + punpckhqdq m0, m1
- + mova m2, m0
- + mova m3, m0
- + mova m6, m0
- + psrldq m2, 4
- + psrldq m3, 2
- + pavgw m6, m3
- + PRED8x8_LOWPASS w, m4, m0, m2, m3, m5
- + punpcklwd m6, m4
- +%assign Y 0
- + movq [r0+Y*2*FDEC_STRIDE], m6
- +%rep 2
- +%assign Y (Y+1)
- + psrldq m6, 4
- + movq [r0+2*Y*FDEC_STRIDE], m6
- +%endrep
- + movq [r0+2*3*FDEC_STRIDE], m1
- + RET
- +
- +%else
- +INIT_MMX
- cglobal predict_4x4_hu_mmxext, 1,1
- movq mm0, [r0+0*FDEC_STRIDE-8]
- punpckhbw mm0, [r0+1*FDEC_STRIDE-8]
- @@ -274,10 +306,12 @@ cglobal predict_4x4_hu_mmxext, 1,1
- %endrep
- movd [r0+3*FDEC_STRIDE], mm1
- RET
- +%endif ;HIGH_BIT_DEPTH
- ;-----------------------------------------------------------------------------
- ; void predict_4x4_vl( uint8_t *src )
- ;-----------------------------------------------------------------------------
- +INIT_MMX
- cglobal predict_4x4_vl_mmxext, 1,1
- movq mm1, [r0-FDEC_STRIDE]
- movq mm3, mm1
- diff --git a/common/x86/predict-c.c b/common/x86/predict-c.c
- index 077f2c7..8f15e4e 100644
- --- a/common/x86/predict-c.c
- +++ b/common/x86/predict-c.c
- @@ -74,6 +74,7 @@
- void x264_predict_4x4_dc_mmxext( uint8_t *src );
- void x264_predict_4x4_ddr_ssse3( uint8_t *src );
- void x264_predict_4x4_hu_mmxext( uint8_t *src );
- + void x264_predict_4x4_hu_sse2( uint16_t *src );
- void x264_predict_16x16_dc_top_sse2( uint8_t *src );
- void x264_predict_16x16_dc_core_sse2( uint8_t *src, int i_dc_left );
- void x264_predict_16x16_dc_left_core_sse2( uint8_t *src, int i_dc_left );
- @@ -479,6 +480,7 @@ void x264_predict_4x4_init_mmx( int cpu, x264_predict_t pf[12] )
- if( !(cpu&X264_CPU_SSE2) )
- return;
- pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_sse2;
- + pf[I_PRED_4x4_HU] = x264_predict_4x4_hu_sse2;
- #else
- pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_mmxext;
- pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_mmxext;
- --
- 1.7.2.3
- From e5ff6fb0be3ac1a19b9414db5e83589d6c5a0b4a Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Fri, 26 Nov 2010 00:24:28 -0500
- Subject: [PATCH 3/8] predict_4x4_vl
- ---
- common/x86/predict-a.asm | 43 ++++++++++++++++++++++++++-----------------
- common/x86/predict-c.c | 2 ++
- 2 files changed, 28 insertions(+), 17 deletions(-)
- diff --git a/common/x86/predict-a.asm b/common/x86/predict-a.asm
- index f2ab91e..acdcca3 100644
- --- a/common/x86/predict-a.asm
- +++ b/common/x86/predict-a.asm
- @@ -309,28 +309,37 @@ cglobal predict_4x4_hu_mmxext, 1,1
- %endif ;HIGH_BIT_DEPTH
- ;-----------------------------------------------------------------------------
- -; void predict_4x4_vl( uint8_t *src )
- -;-----------------------------------------------------------------------------
- -INIT_MMX
- -cglobal predict_4x4_vl_mmxext, 1,1
- - movq mm1, [r0-FDEC_STRIDE]
- - movq mm3, mm1
- - movq mm2, mm1
- - psrlq mm3, 8
- - psrlq mm2, 16
- - movq mm4, mm3
- - pavgb mm4, mm1
- -
- - PRED8x8_LOWPASS b, mm0, mm1, mm2, mm3, mm5
- -
- - movd [r0+0*FDEC_STRIDE], mm4
- - movd [r0+1*FDEC_STRIDE], mm0
- - psrlq mm4, 8
- - psrlq mm0, 8
- - movd [r0+2*FDEC_STRIDE], mm4
- - movd [r0+3*FDEC_STRIDE], mm0
- +; void predict_4x4_vl( pixel *src )
- +;-----------------------------------------------------------------------------
- +%macro PREDICT_4x4_V1 4
- +cglobal predict_4x4_vl_%1, 1,1
- + mova m1, [r0-SIZEOF_PIXEL*FDEC_STRIDE]
- + mova m3, m1
- + mova m2, m1
- + psrl%2 m3, %3
- + psrl%2 m2, %3*2
- + mova m4, m3
- + pavg%4 m4, m1
- +
- + PRED8x8_LOWPASS %4, m0, m1, m2, m3, m5
- +
- + movh [r0+SIZEOF_PIXEL*0*FDEC_STRIDE], m4
- + movh [r0+SIZEOF_PIXEL*1*FDEC_STRIDE], m0
- + psrl%2 m4, %3
- + psrl%2 m0, %3
- + movh [r0+SIZEOF_PIXEL*2*FDEC_STRIDE], m4
- + movh [r0+SIZEOF_PIXEL*3*FDEC_STRIDE], m0
- RET
- +%endmacro
- +
- +%ifdef HIGH_BIT_DEPTH
- +INIT_XMM
- +PREDICT_4x4_V1 sse2 , dq, 2, w
- +%else
- +INIT_MMX
- +PREDICT_4x4_V1 mmxext, q , 8, b
- +%endif
- ;-----------------------------------------------------------------------------
- ; void predict_4x4_dc( uint8_t *src )
- diff --git a/common/x86/predict-c.c b/common/x86/predict-c.c
- index 8f15e4e..75843a0 100644
- --- a/common/x86/predict-c.c
- +++ b/common/x86/predict-c.c
- @@ -67,6 +67,7 @@
- void x264_predict_4x4_ddl_sse2( uint16_t *src );
- void x264_predict_4x4_ddr_mmxext( uint8_t *src );
- void x264_predict_4x4_vl_mmxext( uint8_t *src );
- + void x264_predict_4x4_vl_sse2( uint16_t *src );
- void x264_predict_4x4_vr_mmxext( uint8_t *src );
- void x264_predict_4x4_vr_ssse3( uint8_t *src );
- void x264_predict_4x4_hd_mmxext( uint8_t *src );
- @@ -481,6 +482,7 @@ void x264_predict_4x4_init_mmx( int cpu, x264_predict_t pf[12] )
- return;
- pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_sse2;
- pf[I_PRED_4x4_HU] = x264_predict_4x4_hu_sse2;
- + pf[I_PRED_4x4_VL] = x264_predict_4x4_vl_sse2;
- #else
- pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_mmxext;
- pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_mmxext;
- --
- 1.7.2.3
- From d9df9a5c8ae6d55cf5bb814b99ab98f6ba517b0d Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Fri, 26 Nov 2010 02:29:40 -0500
- Subject: [PATCH 4/8] predict_8x8_v
- ---
- common/x86/predict-a.asm | 36 +++++++++++++++++++++++-------------
- common/x86/predict-c.c | 9 +++++++--
- 2 files changed, 30 insertions(+), 15 deletions(-)
- diff --git a/common/x86/predict-a.asm b/common/x86/predict-a.asm
- index acdcca3..8d0ffd7 100644
- --- a/common/x86/predict-a.asm
- +++ b/common/x86/predict-a.asm
- @@ -48,15 +48,15 @@ cextern pw_ff00
- cextern pb_reverse
- %macro STORE8x8 2
- - add r0, 4*FDEC_STRIDE
- - movq [r0 + -4*FDEC_STRIDE], %1
- - movq [r0 + -3*FDEC_STRIDE], %1
- - movq [r0 + -2*FDEC_STRIDE], %1
- - movq [r0 + -1*FDEC_STRIDE], %1
- - movq [r0 + 0*FDEC_STRIDE], %2
- - movq [r0 + 1*FDEC_STRIDE], %2
- - movq [r0 + 2*FDEC_STRIDE], %2
- - movq [r0 + 3*FDEC_STRIDE], %2
- + add r0, 4*SIZEOF_PIXEL*FDEC_STRIDE
- + mova [r0 + -4*SIZEOF_PIXEL*FDEC_STRIDE], %1
- + mova [r0 + -3*SIZEOF_PIXEL*FDEC_STRIDE], %1
- + mova [r0 + -2*SIZEOF_PIXEL*FDEC_STRIDE], %1
- + mova [r0 + -1*SIZEOF_PIXEL*FDEC_STRIDE], %1
- + mova [r0 + 0*SIZEOF_PIXEL*FDEC_STRIDE], %2
- + mova [r0 + 1*SIZEOF_PIXEL*FDEC_STRIDE], %2
- + mova [r0 + 2*SIZEOF_PIXEL*FDEC_STRIDE], %2
- + mova [r0 + 3*SIZEOF_PIXEL*FDEC_STRIDE], %2
- %endmacro
- %macro STORE16x16 2
- @@ -482,12 +482,22 @@ PREDICT_FILTER mmxext
- PREDICT_FILTER ssse3
- ;-----------------------------------------------------------------------------
- -; void predict_8x8_v( uint8_t *src, uint8_t *edge )
- +; void predict_8x8_v( pixel *src, pixel *edge )
- ;-----------------------------------------------------------------------------
- -cglobal predict_8x8_v_mmxext, 2,2
- - movq mm0, [r1+16]
- - STORE8x8 mm0, mm0
- +%macro PREDICT_8x8_V 1
- +cglobal predict_8x8_v_%1, 2,2
- + mova m0, [r1+SIZEOF_PIXEL*16]
- + STORE8x8 m0, m0
- RET
- +%endmacro
- +
- +%ifdef HIGH_BIT_DEPTH
- +INIT_XMM
- +PREDICT_8x8_V sse2
- +%else
- +INIT_MMX
- +PREDICT_8x8_V mmxext
- +%endif
- ;-----------------------------------------------------------------------------
- ; void predict_8x8_h( uint8_t *src, uint8_t edge[33] )
- diff --git a/common/x86/predict-c.c b/common/x86/predict-c.c
- index 75843a0..90fd0c7 100644
- --- a/common/x86/predict-c.c
- +++ b/common/x86/predict-c.c
- @@ -44,6 +44,7 @@
- void x264_predict_8x8c_h_mmxext( uint8_t *src );
- void x264_predict_8x8c_h_ssse3( uint8_t *src );
- void x264_predict_8x8_v_mmxext( uint8_t *src, uint8_t edge[33] );
- + void x264_predict_8x8_v_sse2( uint16_t *src, uint16_t edge[33] );
- void x264_predict_8x8_h_mmxext( uint8_t *src, uint8_t edge[33] );
- void x264_predict_8x8_hd_mmxext( uint8_t *src, uint8_t edge[33] );
- void x264_predict_8x8_hu_mmxext( uint8_t *src, uint8_t edge[33] );
- @@ -443,7 +444,11 @@ void x264_predict_8x8_init_mmx( int cpu, x264_predict8x8_t pf[12], x264_predict_
- {
- if( !(cpu&X264_CPU_MMXEXT) )
- return;
- -#if !HIGH_BIT_DEPTH
- +#if HIGH_BIT_DEPTH
- + if( !(cpu&X264_CPU_SSE2) )
- + return;
- + pf[I_PRED_8x8_V] = x264_predict_8x8_v_sse2;
- +#else
- pf[I_PRED_8x8_V] = x264_predict_8x8_v_mmxext;
- pf[I_PRED_8x8_H] = x264_predict_8x8_h_mmxext;
- pf[I_PRED_8x8_DC] = x264_predict_8x8_dc_mmxext;
- @@ -470,7 +475,7 @@ void x264_predict_8x8_init_mmx( int cpu, x264_predict8x8_t pf[12], x264_predict_
- pf[I_PRED_8x8_HD] = x264_predict_8x8_hd_ssse3;
- pf[I_PRED_8x8_HU] = x264_predict_8x8_hu_ssse3;
- *predict_8x8_filter = x264_predict_8x8_filter_ssse3;
- -#endif // !HIGH_BIT_DEPTH
- +#endif // HIGH_BIT_DEPTH
- }
- void x264_predict_4x4_init_mmx( int cpu, x264_predict_t pf[12] )
- --
- 1.7.2.3
- From 44ec73f438c89ad86c2c73706045ea92da967ec1 Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Fri, 26 Nov 2010 03:30:32 -0500
- Subject: [PATCH 5/8] predict_8x8_h
- ---
- common/x86/predict-a.asm | 43 ++++++++++++++++++++++++++-----------------
- common/x86/predict-c.c | 2 ++
- 2 files changed, 28 insertions(+), 17 deletions(-)
- diff --git a/common/x86/predict-a.asm b/common/x86/predict-a.asm
- index 8d0ffd7..cedc3dd 100644
- --- a/common/x86/predict-a.asm
- +++ b/common/x86/predict-a.asm
- @@ -500,31 +500,40 @@ PREDICT_8x8_V mmxext
- %endif
- ;-----------------------------------------------------------------------------
- -; void predict_8x8_h( uint8_t *src, uint8_t edge[33] )
- +; void predict_8x8_h( pixel *src, pixel edge[33] )
- ;-----------------------------------------------------------------------------
- -
- -INIT_MMX
- -cglobal predict_8x8_h_mmxext, 2,2
- - movu m3, [r1+7]
- - add r0, FDEC_STRIDE*4
- +%macro PREDICT_8x8_H 3
- +cglobal predict_8x8_h_%1, 2, 2
- + movu m3, [r1+SIZEOF_PIXEL*7]
- + add r0, SIZEOF_PIXEL*FDEC_STRIDE*4
- mova m7, m3
- - punpckhbw m3, m3
- - punpcklbw m7, m7
- - pshufw m0, m3, 0xff
- - pshufw m1, m3, 0xaa
- - pshufw m2, m3, 0x55
- - pshufw m3, m3, 0x00
- - pshufw m4, m7, 0xff
- - pshufw m5, m7, 0xaa
- - pshufw m6, m7, 0x55
- - pshufw m7, m7, 0x00
- + punpckh%2 m3, m3
- + punpckl%2 m7, m7
- + pshuf%3 m0, m3, 0xff
- + pshuf%3 m1, m3, 0xaa
- + pshuf%3 m2, m3, 0x55
- + pshuf%3 m3, m3, 0x00
- + pshuf%3 m4, m7, 0xff
- + pshuf%3 m5, m7, 0xaa
- + pshuf%3 m6, m7, 0x55
- + pshuf%3 m7, m7, 0x00
- %assign n 0
- %rep 8
- - mova [r0+(n-4)*FDEC_STRIDE], m %+ n
- + mova [r0+(n-4)*SIZEOF_PIXEL*FDEC_STRIDE], m %+ n
- %assign n n+1
- %endrep
- RET
- +%endmacro
- +%ifdef HIGH_BIT_DEPTH
- +INIT_XMM
- +PREDICT_8x8_H sse2 , wd, d
- +%else
- +INIT_MMX
- +PREDICT_8x8_H mmxext, bw, w
- +%endif
- +
- +INIT_MMX
- ;-----------------------------------------------------------------------------
- ; void predict_8x8_dc( uint8_t *src, uint8_t *edge );
- ;-----------------------------------------------------------------------------
- diff --git a/common/x86/predict-c.c b/common/x86/predict-c.c
- index 90fd0c7..e5a3fa7 100644
- --- a/common/x86/predict-c.c
- +++ b/common/x86/predict-c.c
- @@ -46,6 +46,7 @@
- void x264_predict_8x8_v_mmxext( uint8_t *src, uint8_t edge[33] );
- void x264_predict_8x8_v_sse2( uint16_t *src, uint16_t edge[33] );
- void x264_predict_8x8_h_mmxext( uint8_t *src, uint8_t edge[33] );
- + void x264_predict_8x8_h_sse2( uint16_t *src, uint16_t edge[33] );
- void x264_predict_8x8_hd_mmxext( uint8_t *src, uint8_t edge[33] );
- void x264_predict_8x8_hu_mmxext( uint8_t *src, uint8_t edge[33] );
- void x264_predict_8x8_dc_mmxext( uint8_t *src, uint8_t edge[33] );
- @@ -448,6 +449,7 @@ void x264_predict_8x8_init_mmx( int cpu, x264_predict8x8_t pf[12], x264_predict_
- if( !(cpu&X264_CPU_SSE2) )
- return;
- pf[I_PRED_8x8_V] = x264_predict_8x8_v_sse2;
- + pf[I_PRED_8x8_H] = x264_predict_8x8_h_sse2;
- #else
- pf[I_PRED_8x8_V] = x264_predict_8x8_v_mmxext;
- pf[I_PRED_8x8_H] = x264_predict_8x8_h_mmxext;
- --
- 1.7.2.3
- From 0933e04c859c9f4bcd93dd6e03984d09dde314b0 Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Fri, 26 Nov 2010 13:57:21 -0500
- Subject: [PATCH 6/8] predict_4x4_ddr, predict_4x4_vr, predict_4x4_hd
- ---
- common/x86/predict-a.asm | 132 ++++++++++++++++++++++++---------------------
- common/x86/predict-c.c | 5 ++
- 2 files changed, 75 insertions(+), 62 deletions(-)
- diff --git a/common/x86/predict-a.asm b/common/x86/predict-a.asm
- index cedc3dd..6bd845d 100644
- --- a/common/x86/predict-a.asm
- +++ b/common/x86/predict-a.asm
- @@ -160,89 +160,97 @@ PREDICT_4x4_DDL mmxext, q , 8, b
- %endif
- ;-----------------------------------------------------------------------------
- -; void predict_4x4_ddr( uint8_t *src )
- +; void predict_4x4_ddr( pixel *src )
- ;-----------------------------------------------------------------------------
- -%macro PREDICT_4x4 1
- +%macro PREDICT_4x4 7
- cglobal predict_4x4_ddr_%1, 1,1
- - movq mm1, [r0+1*FDEC_STRIDE-8]
- - movq mm2, [r0+0*FDEC_STRIDE-8]
- - punpckhbw mm2, [r0-1*FDEC_STRIDE-8]
- - movd mm3, [r0-1*FDEC_STRIDE]
- - punpckhwd mm1, mm2
- - PALIGNR mm3, mm1, 5, mm1
- - movq mm1, mm3
- - PALIGNR mm3, [r0+2*FDEC_STRIDE-8], 7, mm4
- - movq mm2, mm3
- - PALIGNR mm3, [r0+3*FDEC_STRIDE-8], 7, mm4
- - PRED8x8_LOWPASS b, mm0, mm3, mm1, mm2, mm4
- + mova m1, [r0+1*SIZEOF_PIXEL*FDEC_STRIDE-8*SIZEOF_PIXEL]
- + mova m2, [r0+0*SIZEOF_PIXEL*FDEC_STRIDE-8*SIZEOF_PIXEL]
- + punpckh%2 m2, [r0-1*SIZEOF_PIXEL*FDEC_STRIDE-8*SIZEOF_PIXEL]
- + movh m3, [r0-1*SIZEOF_PIXEL*FDEC_STRIDE]
- + punpckh%3 m1, m2
- + PALIGNR m3, m1, 5*SIZEOF_PIXEL, m1
- + mova m1, m3
- + PALIGNR m3, [r0+2*SIZEOF_PIXEL*FDEC_STRIDE-8*SIZEOF_PIXEL], 7*SIZEOF_PIXEL, m4
- + mova m2, m3
- + PALIGNR m3, [r0+3*SIZEOF_PIXEL*FDEC_STRIDE-8*SIZEOF_PIXEL], 7*SIZEOF_PIXEL, m4
- + PRED8x8_LOWPASS %5, m0, m3, m1, m2, m4
- %assign Y 3
- - movd [r0+Y*FDEC_STRIDE], mm0
- + movh [r0+Y*SIZEOF_PIXEL*FDEC_STRIDE], m0
- %rep 3
- %assign Y (Y-1)
- - psrlq mm0, 8
- - movd [r0+Y*FDEC_STRIDE], mm0
- + psrl%4 m0, %7
- + movh [r0+Y*SIZEOF_PIXEL*FDEC_STRIDE], m0
- %endrep
- RET
- cglobal predict_4x4_vr_%1, 1,1
- - movd mm0, [r0-1*FDEC_STRIDE] ; ........t3t2t1t0
- - movq mm7, mm0
- - PALIGNR mm0, [r0-1*FDEC_STRIDE-8], 7, mm1 ; ......t3t2t1t0lt
- - pavgb mm7, mm0
- - PALIGNR mm0, [r0+0*FDEC_STRIDE-8], 7, mm1 ; ....t3t2t1t0ltl0
- - movq mm1, mm0
- - PALIGNR mm0, [r0+1*FDEC_STRIDE-8], 7, mm2 ; ..t3t2t1t0ltl0l1
- - movq mm2, mm0
- - PALIGNR mm0, [r0+2*FDEC_STRIDE-8], 7, mm3 ; t3t2t1t0ltl0l1l2
- - PRED8x8_LOWPASS b, mm3, mm1, mm0, mm2, mm4
- - movq mm1, mm3
- - psrlq mm3, 16
- - psllq mm1, 48
- - movd [r0+0*FDEC_STRIDE], mm7
- - movd [r0+1*FDEC_STRIDE], mm3
- - PALIGNR mm7, mm1, 7, mm2
- - psllq mm1, 8
- - movd [r0+2*FDEC_STRIDE], mm7
- - PALIGNR mm3, mm1, 7, mm1
- - movd [r0+3*FDEC_STRIDE], mm3
- + movh m0, [r0-1*SIZEOF_PIXEL*FDEC_STRIDE] ; ........t3t2t1t0
- + mova m5, m0
- + PALIGNR m0, [r0-1*SIZEOF_PIXEL*FDEC_STRIDE-8*SIZEOF_PIXEL], 7*SIZEOF_PIXEL, m1 ; ......t3t2t1t0lt
- + pavg%5 m5, m0
- + PALIGNR m0, [r0+0*SIZEOF_PIXEL*FDEC_STRIDE-8*SIZEOF_PIXEL], 7*SIZEOF_PIXEL, m1 ; ....t3t2t1t0ltl0
- + mova m1, m0
- + PALIGNR m0, [r0+1*SIZEOF_PIXEL*FDEC_STRIDE-8*SIZEOF_PIXEL], 7*SIZEOF_PIXEL, m2 ; ..t3t2t1t0ltl0l1
- + mova m2, m0
- + PALIGNR m0, [r0+2*SIZEOF_PIXEL*FDEC_STRIDE-8*SIZEOF_PIXEL], 7*SIZEOF_PIXEL, m3 ; t3t2t1t0ltl0l1l2
- + PRED8x8_LOWPASS %5, m3, m1, m0, m2, m4
- + mova m1, m3
- + psrl%4 m3, %7*2
- + psll%4 m1, %7*6
- + movh [r0+0*SIZEOF_PIXEL*FDEC_STRIDE], m5
- + movh [r0+1*SIZEOF_PIXEL*FDEC_STRIDE], m3
- + PALIGNR m5, m1, 7*SIZEOF_PIXEL, m2
- + psll%4 m1, %7
- + movh [r0+2*SIZEOF_PIXEL*FDEC_STRIDE], m5
- + PALIGNR m3, m1, 7*SIZEOF_PIXEL, m1
- + movh [r0+3*SIZEOF_PIXEL*FDEC_STRIDE], m3
- RET
- -cglobal predict_4x4_hd_%1, 1,1
- - movd mm0, [r0-1*FDEC_STRIDE-4] ; lt ..
- - punpckldq mm0, [r0-1*FDEC_STRIDE] ; t3 t2 t1 t0 lt .. .. ..
- - psllq mm0, 8 ; t2 t1 t0 lt .. .. .. ..
- - movq mm1, [r0+3*FDEC_STRIDE-8] ; l3
- - punpckhbw mm1, [r0+2*FDEC_STRIDE-8] ; l2 l3
- - movq mm2, [r0+1*FDEC_STRIDE-8] ; l1
- - punpckhbw mm2, [r0+0*FDEC_STRIDE-8] ; l0 l1
- - punpckhwd mm1, mm2 ; l0 l1 l2 l3
- - punpckhdq mm1, mm0 ; t2 t1 t0 lt l0 l1 l2 l3
- - movq mm0, mm1
- - movq mm2, mm1
- - movq mm7, mm1
- - psrlq mm0, 16 ; .. .. t2 t1 t0 lt l0 l1
- - psrlq mm2, 8 ; .. t2 t1 t0 lt l0 l1 l2
- - pavgb mm7, mm2
- - PRED8x8_LOWPASS b, mm3, mm1, mm0, mm2, mm4
- - punpcklbw mm7, mm3
- - psrlq mm3, 32
- - PALIGNR mm3, mm7, 6, mm6
- +cglobal predict_4x4_hd_%1, 1,1,7
- + movh m0, [r0-1*SIZEOF_PIXEL*FDEC_STRIDE-4*SIZEOF_PIXEL] ; lt ..
- + punpckl%6 m0, [r0-1*SIZEOF_PIXEL*FDEC_STRIDE] ; t3 t2 t1 t0 lt .. .. ..
- + psll%4 m0, %7 ; t2 t1 t0 lt .. .. .. ..
- + mova m1, [r0+3*SIZEOF_PIXEL*FDEC_STRIDE-8*SIZEOF_PIXEL] ; l3
- + punpckh%2 m1, [r0+2*SIZEOF_PIXEL*FDEC_STRIDE-8*SIZEOF_PIXEL] ; l2 l3
- + mova m2, [r0+1*SIZEOF_PIXEL*FDEC_STRIDE-8*SIZEOF_PIXEL] ; l1
- + punpckh%2 m2, [r0+0*SIZEOF_PIXEL*FDEC_STRIDE-8*SIZEOF_PIXEL] ; l0 l1
- + punpckh%3 m1, m2 ; l0 l1 l2 l3
- + punpckh%6 m1, m0 ; t2 t1 t0 lt l0 l1 l2 l3
- + mova m0, m1
- + mova m2, m1
- + mova m6, m1
- + psrl%4 m0, %7*2 ; .. .. t2 t1 t0 lt l0 l1
- + psrl%4 m2, %7 ; .. t2 t1 t0 lt l0 l1 l2
- + pavg%5 m6, m2
- + PRED8x8_LOWPASS %5, m3, m1, m0, m2, m4
- + punpckl%2 m6, m3
- + psrl%4 m3, %7*4
- + PALIGNR m3, m6, 6*SIZEOF_PIXEL, m5
- %assign Y 3
- - movd [r0+Y*FDEC_STRIDE], mm7
- + movh [r0+Y*SIZEOF_PIXEL*FDEC_STRIDE], m6
- %rep 2
- %assign Y (Y-1)
- - psrlq mm7, 16
- - movd [r0+Y*FDEC_STRIDE], mm7
- + psrl%4 m6, %7*2
- + movh [r0+Y*SIZEOF_PIXEL*FDEC_STRIDE], m6
- %endrep
- - movd [r0+0*FDEC_STRIDE], mm3
- + movh [r0+0*SIZEOF_PIXEL*FDEC_STRIDE], m3
- RET
- %endmacro
- +%ifdef HIGH_BIT_DEPTH
- +INIT_XMM
- +%define PALIGNR PALIGNR_SSSE3
- +PREDICT_4x4 ssse3 , wd, dq, dq, w, qdq, 2
- +
- +%else
- INIT_MMX
- %define PALIGNR PALIGNR_MMX
- -PREDICT_4x4 mmxext
- +PREDICT_4x4 mmxext, bw, wd, q , b, dq , 8
- %define PALIGNR PALIGNR_SSSE3
- -PREDICT_4x4 ssse3
- +PREDICT_4x4 ssse3 , bw, wd, q , b, dq , 8
- +
- +%endif
- ;-----------------------------------------------------------------------------
- ; void predict_4x4_hu( pixel *src )
- diff --git a/common/x86/predict-c.c b/common/x86/predict-c.c
- index e5a3fa7..0d6f7f2 100644
- --- a/common/x86/predict-c.c
- +++ b/common/x86/predict-c.c
- @@ -490,6 +490,11 @@ void x264_predict_4x4_init_mmx( int cpu, x264_predict_t pf[12] )
- pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_sse2;
- pf[I_PRED_4x4_HU] = x264_predict_4x4_hu_sse2;
- pf[I_PRED_4x4_VL] = x264_predict_4x4_vl_sse2;
- + if( !(cpu&X264_CPU_SSSE3) )
- + return;
- + pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_ssse3;
- + pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_ssse3;
- + pf[I_PRED_4x4_HD] = x264_predict_4x4_hd_ssse3;
- #else
- pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_mmxext;
- pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_mmxext;
- --
- 1.7.2.3
- From d7d01cb8124f447e05d29c3db0ec40ae456de87c Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Fri, 26 Nov 2010 15:54:40 -0500
- Subject: [PATCH 7/8] predict_4x4_dc
- ---
- common/x86/predict-a.asm | 26 ++++++++++++++++++++++++--
- common/x86/predict-c.c | 4 ++--
- 2 files changed, 26 insertions(+), 4 deletions(-)
- diff --git a/common/x86/predict-a.asm b/common/x86/predict-a.asm
- index 6bd845d..fd322f3 100644
- --- a/common/x86/predict-a.asm
- +++ b/common/x86/predict-a.asm
- @@ -350,9 +350,30 @@ PREDICT_4x4_V1 mmxext, q , 8, b
- %endif
- ;-----------------------------------------------------------------------------
- -; void predict_4x4_dc( uint8_t *src )
- +; void predict_4x4_dc( pixel *src )
- ;-----------------------------------------------------------------------------
- +%ifdef HIGH_BIT_DEPTH
- +INIT_MMX
- +cglobal predict_4x4_dc_mmxext, 1,1
- + movq m0, [r0-SIZEOF_PIXEL*FDEC_STRIDE]
- + HADDW m0, m1
- +%assign n 0
- +%rep 4
- + movd m1, [r0+SIZEOF_PIXEL*(FDEC_STRIDE*n-1)]
- + paddw m0, m1
- +%assign n n+1
- +%endrep
- + paddw m0, [pw_4]
- + psrlw m0, 3
- + SPLATW m0, m0
- + movq [r0+SIZEOF_PIXEL*FDEC_STRIDE*0], m0
- + movq [r0+SIZEOF_PIXEL*FDEC_STRIDE*1], m0
- + movq [r0+SIZEOF_PIXEL*FDEC_STRIDE*2], m0
- + movq [r0+SIZEOF_PIXEL*FDEC_STRIDE*3], m0
- + RET
- +%else
- +INIT_MMX
- cglobal predict_4x4_dc_mmxext, 1,4
- pxor mm7, mm7
- movd mm0, [r0-FDEC_STRIDE]
- @@ -373,12 +394,13 @@ cglobal predict_4x4_dc_mmxext, 1,4
- mov [r0+FDEC_STRIDE*2], r1d
- mov [r0+FDEC_STRIDE*3], r1d
- RET
- +%endif
- %macro PREDICT_FILTER 1
- ;-----------------------------------------------------------------------------
- ;void predict_8x8_filter( uint8_t *src, uint8_t edge[33], int i_neighbor, int i_filters )
- ;-----------------------------------------------------------------------------
- -
- +INIT_MMX
- cglobal predict_8x8_filter_%1, 4,5
- add r0, 0x58
- %define src r0-0x58
- diff --git a/common/x86/predict-c.c b/common/x86/predict-c.c
- index 0d6f7f2..1a44f3a 100644
- --- a/common/x86/predict-c.c
- +++ b/common/x86/predict-c.c
- @@ -74,7 +74,7 @@
- void x264_predict_4x4_vr_ssse3( uint8_t *src );
- void x264_predict_4x4_hd_mmxext( uint8_t *src );
- void x264_predict_4x4_hd_ssse3( uint8_t *src );
- - void x264_predict_4x4_dc_mmxext( uint8_t *src );
- + void x264_predict_4x4_dc_mmxext( pixel *src );
- void x264_predict_4x4_ddr_ssse3( uint8_t *src );
- void x264_predict_4x4_hu_mmxext( uint8_t *src );
- void x264_predict_4x4_hu_sse2( uint16_t *src );
- @@ -484,6 +484,7 @@ void x264_predict_4x4_init_mmx( int cpu, x264_predict_t pf[12] )
- {
- if( !(cpu&X264_CPU_MMXEXT) )
- return;
- + pf[I_PRED_4x4_DC] = x264_predict_4x4_dc_mmxext;
- #if HIGH_BIT_DEPTH
- if( !(cpu&X264_CPU_SSE2) )
- return;
- @@ -499,7 +500,6 @@ void x264_predict_4x4_init_mmx( int cpu, x264_predict_t pf[12] )
- pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_mmxext;
- pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_mmxext;
- pf[I_PRED_4x4_VL] = x264_predict_4x4_vl_mmxext;
- - pf[I_PRED_4x4_DC] = x264_predict_4x4_dc_mmxext;
- pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_mmxext;
- pf[I_PRED_4x4_HD] = x264_predict_4x4_hd_mmxext;
- pf[I_PRED_4x4_HU] = x264_predict_4x4_hu_mmxext;
- --
- 1.7.2.3
- From 51d57538ea8ea9ee729cae9890bcd5164ca16e6d Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Fri, 26 Nov 2010 19:41:53 -0500
- Subject: [PATCH 8/8] predict_8x8_dc
- ---
- common/x86/predict-a.asm | 23 +++++++++++++++++++++--
- common/x86/predict-c.c | 2 ++
- 2 files changed, 23 insertions(+), 2 deletions(-)
- diff --git a/common/x86/predict-a.asm b/common/x86/predict-a.asm
- index fd322f3..82a4cf7 100644
- --- a/common/x86/predict-a.asm
- +++ b/common/x86/predict-a.asm
- @@ -563,10 +563,27 @@ INIT_MMX
- PREDICT_8x8_H mmxext, bw, w
- %endif
- -INIT_MMX
- ;-----------------------------------------------------------------------------
- -; void predict_8x8_dc( uint8_t *src, uint8_t *edge );
- +; void predict_8x8_dc( pixel *src, pixel *edge );
- ;-----------------------------------------------------------------------------
- +%ifdef HIGH_BIT_DEPTH
- +INIT_XMM
- +cglobal predict_8x8_dc_sse2, 2,2,3
- + pxor m0, m0
- + movu m1, [r1+14]
- + HADDW m1, m2
- + paddd m0, m1
- + mova m3, [r1+32]
- + HADDW m3, m4
- + paddd m0, m3
- + paddw m0, [pw_8]
- + psrlw m0, 4
- + SPLATW m0, m0
- + STORE8x8 m0, m0
- + RET
- +
- +%else
- +INIT_MMX
- cglobal predict_8x8_dc_mmxext, 2,2
- pxor mm0, mm0
- pxor mm1, mm1
- @@ -579,7 +596,9 @@ cglobal predict_8x8_dc_mmxext, 2,2
- packuswb mm0, mm0
- STORE8x8 mm0, mm0
- RET
- +%endif
- +INIT_MMX
- ;-----------------------------------------------------------------------------
- ; void predict_8x8_dc_top( uint8_t *src, uint8_t *edge );
- ;-----------------------------------------------------------------------------
- diff --git a/common/x86/predict-c.c b/common/x86/predict-c.c
- index 1a44f3a..39cebd9 100644
- --- a/common/x86/predict-c.c
- +++ b/common/x86/predict-c.c
- @@ -50,6 +50,7 @@
- void x264_predict_8x8_hd_mmxext( uint8_t *src, uint8_t edge[33] );
- void x264_predict_8x8_hu_mmxext( uint8_t *src, uint8_t edge[33] );
- void x264_predict_8x8_dc_mmxext( uint8_t *src, uint8_t edge[33] );
- + void x264_predict_8x8_dc_sse2( uint16_t *src, uint16_t edge[33] );
- void x264_predict_8x8_dc_top_mmxext( uint8_t *src, uint8_t edge[33] );
- void x264_predict_8x8_dc_left_mmxext( uint8_t *src, uint8_t edge[33] );
- void x264_predict_8x8_ddl_mmxext( uint8_t *src, uint8_t edge[33] );
- @@ -450,6 +451,7 @@ void x264_predict_8x8_init_mmx( int cpu, x264_predict8x8_t pf[12], x264_predict_
- return;
- pf[I_PRED_8x8_V] = x264_predict_8x8_v_sse2;
- pf[I_PRED_8x8_H] = x264_predict_8x8_h_sse2;
- + pf[I_PRED_8x8_DC] = x264_predict_8x8_dc_sse2;
- #else
- pf[I_PRED_8x8_V] = x264_predict_8x8_v_mmxext;
- pf[I_PRED_8x8_H] = x264_predict_8x8_h_mmxext;
- --
- 1.7.2.3
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement