Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- From 0e12a9ffa3336419d359b8c79634d963241d5681 Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Thu, 25 Nov 2010 19:44:56 -0500
- Subject: [PATCH 1/3] Patch 1?
- ---
- common/x86/predict-a.asm | 108 +++++++++++++++++++++++++--------------------
- common/x86/predict-c.c | 9 +++-
- 2 files changed, 67 insertions(+), 50 deletions(-)
- diff --git a/common/x86/predict-a.asm b/common/x86/predict-a.asm
- index a05e91b..739ce35 100644
- --- a/common/x86/predict-a.asm
- +++ b/common/x86/predict-a.asm
- @@ -40,6 +40,7 @@ SECTION .text
- cextern pb_1
- cextern pb_3
- +cextern pw_1
- cextern pw_2
- cextern pw_4
- cextern pw_8
- @@ -98,19 +99,16 @@ cextern pb_reverse
- ; dest, left, right, src, tmp
- ; output: %1 = (t[n-1] + t[n]*2 + t[n+1] + 2) >> 2
- %macro PRED8x8_LOWPASS0 6
- - mov%6 %5, %2
- - pavgb %2, %3
- - pxor %3, %5
- - mov%6 %1, %4
- - pand %3, [pb_1]
- - psubusb %2, %3
- - pavgb %1, %2
- + mova %6, %3
- + pavg%1 %3, %4
- + pxor %4, %6
- + mova %2, %5
- + pand %4, [p%1_1]
- + psubus%1 %3, %4
- + pavg%1 %2, %3
- %endmacro
- -%macro PRED8x8_LOWPASS 5
- - PRED8x8_LOWPASS0 %1, %2, %3, %4, %5, q
- -%endmacro
- -%macro PRED8x8_LOWPASS_XMM 5
- - PRED8x8_LOWPASS0 %1, %2, %3, %4, %5, dqa
- +%macro PRED8x8_LOWPASS 6
- + PRED8x8_LOWPASS0 %1, %2, %3, %4, %5, %6
- %endmacro
- %macro LOAD_PLANE_ARGS 0
- @@ -129,27 +127,37 @@ cextern pb_reverse
- %endmacro
- ;-----------------------------------------------------------------------------
- -; void predict_4x4_ddl( uint8_t *src )
- +; void predict_4x4_ddl( pixel *src )
- ;-----------------------------------------------------------------------------
- -cglobal predict_4x4_ddl_mmxext, 1,1
- - movq mm1, [r0-FDEC_STRIDE]
- - movq mm2, mm1
- - movq mm3, mm1
- - movq mm4, mm1
- - psllq mm1, 8
- - pxor mm2, mm1
- - psrlq mm2, 8
- - pxor mm3, mm2
- - PRED8x8_LOWPASS mm0, mm1, mm3, mm4, mm5
- +%macro PREDICT_4x4_DDL 4
- +cglobal predict_4x4_ddl_%1, 1,1
- + mova m1, [r0-SIZEOF_PIXEL*FDEC_STRIDE]
- + mova m2, m1
- + mova m3, m1
- + mova m4, m1
- + psll%2 m1, %3
- + pxor m2, m1
- + psrl%2 m2, %3
- + pxor m3, m2
- + PRED8x8_LOWPASS %4, m0, m1, m3, m4, m5
- %assign Y 0
- %rep 4
- - psrlq mm0, 8
- - movd [r0+Y*FDEC_STRIDE], mm0
- + psrl%2 m0, %3
- + movh [r0+SIZEOF_PIXEL*Y*FDEC_STRIDE], m0
- %assign Y (Y+1)
- %endrep
- RET
- +%endmacro
- +
- +%ifdef HIGH_BIT_DEPTH
- +INIT_XMM
- +PREDICT_4x4_DDL sse2 , dq, 2, w
- +%else
- +INIT_MMX
- +PREDICT_4x4_DDL mmxext, q , 8, b
- +%endif
- ;-----------------------------------------------------------------------------
- ; void predict_4x4_ddr( uint8_t *src )
- @@ -166,7 +174,7 @@ cglobal predict_4x4_ddr_%1, 1,1
- PALIGNR mm3, [r0+2*FDEC_STRIDE-8], 7, mm4
- movq mm2, mm3
- PALIGNR mm3, [r0+3*FDEC_STRIDE-8], 7, mm4
- - PRED8x8_LOWPASS mm0, mm3, mm1, mm2, mm4
- + PRED8x8_LOWPASS b, mm0, mm3, mm1, mm2, mm4
- %assign Y 3
- movd [r0+Y*FDEC_STRIDE], mm0
- %rep 3
- @@ -186,7 +194,7 @@ cglobal predict_4x4_vr_%1, 1,1
- PALIGNR mm0, [r0+1*FDEC_STRIDE-8], 7, mm2 ; ..t3t2t1t0ltl0l1
- movq mm2, mm0
- PALIGNR mm0, [r0+2*FDEC_STRIDE-8], 7, mm3 ; t3t2t1t0ltl0l1l2
- - PRED8x8_LOWPASS mm3, mm1, mm0, mm2, mm4
- + PRED8x8_LOWPASS b, mm3, mm1, mm0, mm2, mm4
- movq mm1, mm3
- psrlq mm3, 16
- psllq mm1, 48
- @@ -215,7 +223,7 @@ cglobal predict_4x4_hd_%1, 1,1
- psrlq mm0, 16 ; .. .. t2 t1 t0 lt l0 l1
- psrlq mm2, 8 ; .. t2 t1 t0 lt l0 l1 l2
- pavgb mm7, mm2
- - PRED8x8_LOWPASS mm3, mm1, mm0, mm2, mm4
- + PRED8x8_LOWPASS b, mm3, mm1, mm0, mm2, mm4
- punpcklbw mm7, mm3
- psrlq mm3, 32
- PALIGNR mm3, mm7, 6, mm6
- @@ -230,6 +238,7 @@ cglobal predict_4x4_hd_%1, 1,1
- RET
- %endmacro
- +INIT_MMX
- %define PALIGNR PALIGNR_MMX
- PREDICT_4x4 mmxext
- %define PALIGNR PALIGNR_SSSE3
- @@ -254,7 +263,7 @@ cglobal predict_4x4_hu_mmxext, 1,1
- psrlq mm2, 16
- psrlq mm3, 8
- pavgb mm7, mm3
- - PRED8x8_LOWPASS mm4, mm0, mm2, mm3, mm5
- + PRED8x8_LOWPASS b, mm4, mm0, mm2, mm3, mm5
- punpcklbw mm7, mm4
- %assign Y 0
- movd [r0+Y*FDEC_STRIDE], mm7
- @@ -278,7 +287,7 @@ cglobal predict_4x4_vl_mmxext, 1,1
- movq mm4, mm3
- pavgb mm4, mm1
- - PRED8x8_LOWPASS mm0, mm1, mm2, mm3, mm5
- + PRED8x8_LOWPASS b, mm0, mm1, mm2, mm3, mm5
- movd [r0+0*FDEC_STRIDE], mm4
- movd [r0+1*FDEC_STRIDE], mm0
- @@ -353,10 +362,10 @@ cglobal predict_8x8_filter_%1, 4,5
- je .fix_lt_1
- .do_left:
- movq mm0, mm4
- - PRED8x8_LOWPASS mm2, mm1, mm4, mm3, mm5
- + PRED8x8_LOWPASS b, mm2, mm1, mm4, mm3, mm5
- movq [t1+8], mm2
- movq mm4, mm0
- - PRED8x8_LOWPASS mm1, mm3, mm0, mm4, mm5
- + PRED8x8_LOWPASS b, mm1, mm3, mm0, mm4, mm5
- movd t4, mm1
- mov [t1+7], t4b
- .check_top:
- @@ -374,7 +383,7 @@ cglobal predict_8x8_filter_%1, 4,5
- test r2b, 0x04
- je .fix_tr_1
- .do_top:
- - PRED8x8_LOWPASS mm4, mm2, mm1, mm3, mm5
- + PRED8x8_LOWPASS b, mm4, mm2, mm1, mm3, mm5
- movq [t1+16], mm4
- test r3b, 0x04
- je .done
- @@ -387,7 +396,7 @@ cglobal predict_8x8_filter_%1, 4,5
- psrlq mm5, 56
- PALIGNR mm2, mm3, 7, mm3
- PALIGNR mm5, mm4, 1, mm4
- - PRED8x8_LOWPASS mm1, mm2, mm5, mm0, mm4
- + PRED8x8_LOWPASS b, mm1, mm2, mm5, mm0, mm4
- jmp .do_topright
- .fix_tr_2:
- punpckhbw mm3, mm3
- @@ -424,6 +433,7 @@ cglobal predict_8x8_filter_%1, 4,5
- %endmacro
- %define PALIGNR PALIGNR_MMX
- +INIT_MMX
- PREDICT_FILTER mmxext
- %define PALIGNR PALIGNR_SSSE3
- PREDICT_FILTER ssse3
- @@ -511,8 +521,8 @@ cglobal predict_8x8_ddl_mmxext, 2,2
- movq mm1, mm5
- psllq mm1, 8
- add r0, FDEC_STRIDE*4
- - PRED8x8_LOWPASS mm0, mm1, mm2, mm5, mm7
- - PRED8x8_LOWPASS mm1, mm3, mm4, [r1+24], mm6
- + PRED8x8_LOWPASS b, mm0, mm1, mm2, mm5, mm7
- + PRED8x8_LOWPASS b, mm1, mm3, mm4, [r1+24], mm6
- %assign Y 3
- %rep 6
- movq [r0+Y*FDEC_STRIDE], mm1
- @@ -540,8 +550,8 @@ cglobal predict_8x8_ddr_mmxext, 2,2
- movq mm3, [r1+15]
- movq mm4, [r1+17]
- add r0, FDEC_STRIDE*4
- - PRED8x8_LOWPASS mm0, mm1, mm2, [r1+8], mm7
- - PRED8x8_LOWPASS mm1, mm3, mm4, [r1+16], mm6
- + PRED8x8_LOWPASS b, mm0, mm1, mm2, [r1+8], mm7
- + PRED8x8_LOWPASS b, mm1, mm3, mm4, [r1+16], mm6
- %assign Y 3
- %rep 6
- movq [r0+Y*FDEC_STRIDE], mm0
- @@ -582,7 +592,7 @@ cglobal predict_8x8_hu_mmxext, 2,2
- punpckhbw mm1, mm1
- por mm3, mm1 ; l7 l7 l7 l6 l5 l4 l3 l2
- pavgb mm4, mm2
- - PRED8x8_LOWPASS mm1, mm3, mm5, mm2, mm6
- + PRED8x8_LOWPASS b, mm1, mm3, mm5, mm2, mm6
- movq mm5, mm4
- punpcklbw mm4, mm1 ; p4 p3 p2 p1
- punpckhbw mm5, mm1 ; p8 p7 p6 p5
- @@ -627,7 +637,7 @@ cglobal predict_8x8_vr_core_mmxext, 2,2
- movq mm4, mm3
- pavgb mm3, mm2
- add r0, FDEC_STRIDE*4
- - PRED8x8_LOWPASS mm0, mm1, mm2, mm4, mm7
- + PRED8x8_LOWPASS b, mm0, mm1, mm2, mm4, mm7
- %assign Y -4
- %rep 3
- @@ -714,6 +724,7 @@ ALIGN 4
- %endif ; !ARCH_X86_64
- +INIT_XMM
- ;-----------------------------------------------------------------------------
- ; void predict_8x8_ddl( uint8_t *src, uint8_t *edge )
- ;-----------------------------------------------------------------------------
- @@ -723,7 +734,7 @@ cglobal predict_8x8_ddl_sse2, 2,2
- movdqa xmm1, xmm3
- pslldq xmm1, 1
- add r0, FDEC_STRIDE*4
- - PRED8x8_LOWPASS_XMM xmm0, xmm1, xmm2, xmm3, xmm4
- + PRED8x8_LOWPASS b, xmm0, xmm1, xmm2, xmm3, xmm4
- %assign Y -4
- %rep 8
- @@ -742,7 +753,7 @@ cglobal predict_8x8_ddr_sse2, 2,2
- movdqa xmm2, xmm3
- psrldq xmm2, 1
- add r0, FDEC_STRIDE*4
- - PRED8x8_LOWPASS_XMM xmm0, xmm1, xmm2, xmm3, xmm4
- + PRED8x8_LOWPASS b, xmm0, xmm1, xmm2, xmm3, xmm4
- movdqa xmm1, xmm0
- psrldq xmm1, 1
- @@ -771,7 +782,7 @@ cglobal predict_8x8_vl_sse2, 2,2
- pslldq xmm1, 1
- pavgb xmm3, xmm2
- add r0, FDEC_STRIDE*4
- - PRED8x8_LOWPASS_XMM xmm0, xmm1, xmm2, xmm4, xmm5
- + PRED8x8_LOWPASS b, xmm0, xmm1, xmm2, xmm4, xmm5
- ; xmm0: (t0 + 2*t1 + t2 + 2) >> 2
- ; xmm3: (t0 + t1 + 1) >> 1
- @@ -802,7 +813,7 @@ cglobal predict_8x8_vr_sse2, 2,2,7
- pslldq xmm0, 1
- pslldq xmm1, 2
- pavgb xmm2, xmm0
- - PRED8x8_LOWPASS_XMM xmm4, xmm3, xmm1, xmm0, xmm5
- + PRED8x8_LOWPASS b, xmm4, xmm3, xmm1, xmm0, xmm5
- pandn xmm6, xmm4
- movdqa xmm5, xmm4
- psrlw xmm4, 8
- @@ -824,6 +835,7 @@ cglobal predict_8x8_vr_sse2, 2,2,7
- %endrep
- RET
- +INIT_MMX
- ;-----------------------------------------------------------------------------
- ; void predict_8x8_hd( uint8_t *src, uint8_t *edge )
- ;-----------------------------------------------------------------------------
- @@ -840,12 +852,12 @@ cglobal predict_8x8_hd_mmxext, 2,2
- PALIGNR mm4, mm3, 1, mm7 ; t0 lt l0 l1 l2 l3 l4 l5
- movq mm5, mm3
- pavgb mm3, mm1
- - PRED8x8_LOWPASS mm0, mm4, mm1, mm5, mm7
- + PRED8x8_LOWPASS b, mm0, mm4, mm1, mm5, mm7
- movq mm4, mm2
- movq mm1, mm2 ; t6 t5 t4 t3 t2 t1 t0 lt
- psrlq mm4, 16 ; .. .. t6 t5 t4 t3 t2 t1
- psrlq mm1, 8 ; .. t6 t5 t4 t3 t2 t1 t0
- - PRED8x8_LOWPASS mm6, mm4, mm2, mm1, mm5
- + PRED8x8_LOWPASS b, mm6, mm4, mm2, mm1, mm5
- ; .. p11 p10 p9
- movq mm7, mm3
- punpcklbw mm3, mm0 ; p4 p3 p2 p1
- @@ -886,7 +898,7 @@ cglobal predict_8x8_hd_%1, 2,2
- PALIGNR xmm3, xmm0, 8, xmm0
- movdqa xmm4, xmm1
- pavgb xmm4, xmm3
- - PRED8x8_LOWPASS_XMM xmm0, xmm1, xmm2, xmm3, xmm5
- + PRED8x8_LOWPASS b, xmm0, xmm1, xmm2, xmm3, xmm5
- punpcklbw xmm4, xmm0
- movhlps xmm0, xmm4
- @@ -946,7 +958,7 @@ cglobal predict_8x8_hu_%1, 2,2
- por mm3, mm1 ; l7 l7 l7 l6 l5 l4 l3 l2
- %endif
- pavgb mm4, mm2
- - PRED8x8_LOWPASS mm1, mm3, mm5, mm2, mm6
- + PRED8x8_LOWPASS b, mm1, mm3, mm5, mm2, mm6
- movq2dq xmm0, mm4
- movq2dq xmm1, mm1
- diff --git a/common/x86/predict-c.c b/common/x86/predict-c.c
- index 8eafcc0..077f2c7 100644
- --- a/common/x86/predict-c.c
- +++ b/common/x86/predict-c.c
- @@ -64,6 +64,7 @@
- void x264_predict_8x8_filter_mmxext( uint8_t *src, uint8_t edge[33], int i_neighbor, int i_filters );
- void x264_predict_8x8_filter_ssse3( uint8_t *src, uint8_t edge[33], int i_neighbor, int i_filters );
- void x264_predict_4x4_ddl_mmxext( uint8_t *src );
- + void x264_predict_4x4_ddl_sse2( uint16_t *src );
- void x264_predict_4x4_ddr_mmxext( uint8_t *src );
- void x264_predict_4x4_vl_mmxext( uint8_t *src );
- void x264_predict_4x4_vr_mmxext( uint8_t *src );
- @@ -474,7 +475,11 @@ void x264_predict_4x4_init_mmx( int cpu, x264_predict_t pf[12] )
- {
- if( !(cpu&X264_CPU_MMXEXT) )
- return;
- -#if !HIGH_BIT_DEPTH
- +#if HIGH_BIT_DEPTH
- + if( !(cpu&X264_CPU_SSE2) )
- + return;
- + pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_sse2;
- +#else
- pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_mmxext;
- pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_mmxext;
- pf[I_PRED_4x4_VL] = x264_predict_4x4_vl_mmxext;
- @@ -487,5 +492,5 @@ void x264_predict_4x4_init_mmx( int cpu, x264_predict_t pf[12] )
- pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_ssse3;
- pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_ssse3;
- pf[I_PRED_4x4_HD] = x264_predict_4x4_hd_ssse3;
- -#endif // !HIGH_BIT_DEPTH
- +#endif // HIGH_BIT_DEPTH
- }
- --
- 1.7.2.3
- From b0236007e86ec4bdc5909d950a2a57f3aac5ce6d Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Thu, 25 Nov 2010 19:57:30 -0500
- Subject: [PATCH 2/3] Function 2?
- ---
- common/x86/predict-a.asm | 36 +++++++++++++++++++++++++++++++++++-
- common/x86/predict-c.c | 2 ++
- 2 files changed, 37 insertions(+), 1 deletions(-)
- diff --git a/common/x86/predict-a.asm b/common/x86/predict-a.asm
- index 739ce35..f2ab91e 100644
- --- a/common/x86/predict-a.asm
- +++ b/common/x86/predict-a.asm
- @@ -245,8 +245,40 @@ PREDICT_4x4 mmxext
- PREDICT_4x4 ssse3
- ;-----------------------------------------------------------------------------
- -; void predict_4x4_hu( uint8_t *src )
- +; void predict_4x4_hu( pixel *src )
- ;-----------------------------------------------------------------------------
- +%ifdef HIGH_BIT_DEPTH
- +INIT_XMM
- +cglobal predict_4x4_hu_sse2, 1,1,7
- + mova m0, [r0+2*0*FDEC_STRIDE-8*2]
- + punpckhwd m0, [r0+2*1*FDEC_STRIDE-8*2]
- + mova m1, [r0+2*2*FDEC_STRIDE-8*2]
- + punpckhwd m1, [r0+2*3*FDEC_STRIDE-8*2]
- + punpckhdq m0, m1
- + mova m1, m0
- + pshufhw m1, m1, 0xFF
- + punpckhqdq m1, m1
- + punpckhqdq m0, m1
- + mova m2, m0
- + mova m3, m0
- + mova m6, m0
- + psrldq m2, 4
- + psrldq m3, 2
- + pavgw m6, m3
- + PRED8x8_LOWPASS w, m4, m0, m2, m3, m5
- + punpcklwd m6, m4
- +%assign Y 0
- + movq [r0+Y*2*FDEC_STRIDE], m6
- +%rep 2
- +%assign Y (Y+1)
- + psrldq m6, 4
- + movq [r0+2*Y*FDEC_STRIDE], m6
- +%endrep
- + movq [r0+2*3*FDEC_STRIDE], m1
- + RET
- +
- +%else
- +INIT_MMX
- cglobal predict_4x4_hu_mmxext, 1,1
- movq mm0, [r0+0*FDEC_STRIDE-8]
- punpckhbw mm0, [r0+1*FDEC_STRIDE-8]
- @@ -274,10 +306,12 @@ cglobal predict_4x4_hu_mmxext, 1,1
- %endrep
- movd [r0+3*FDEC_STRIDE], mm1
- RET
- +%endif ;HIGH_BIT_DEPTH
- ;-----------------------------------------------------------------------------
- ; void predict_4x4_vl( uint8_t *src )
- ;-----------------------------------------------------------------------------
- +INIT_MMX
- cglobal predict_4x4_vl_mmxext, 1,1
- movq mm1, [r0-FDEC_STRIDE]
- movq mm3, mm1
- diff --git a/common/x86/predict-c.c b/common/x86/predict-c.c
- index 077f2c7..8f15e4e 100644
- --- a/common/x86/predict-c.c
- +++ b/common/x86/predict-c.c
- @@ -74,6 +74,7 @@
- void x264_predict_4x4_dc_mmxext( uint8_t *src );
- void x264_predict_4x4_ddr_ssse3( uint8_t *src );
- void x264_predict_4x4_hu_mmxext( uint8_t *src );
- + void x264_predict_4x4_hu_sse2( uint16_t *src );
- void x264_predict_16x16_dc_top_sse2( uint8_t *src );
- void x264_predict_16x16_dc_core_sse2( uint8_t *src, int i_dc_left );
- void x264_predict_16x16_dc_left_core_sse2( uint8_t *src, int i_dc_left );
- @@ -479,6 +480,7 @@ void x264_predict_4x4_init_mmx( int cpu, x264_predict_t pf[12] )
- if( !(cpu&X264_CPU_SSE2) )
- return;
- pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_sse2;
- + pf[I_PRED_4x4_HU] = x264_predict_4x4_hu_sse2;
- #else
- pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_mmxext;
- pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_mmxext;
- --
- 1.7.2.3
- From 8a6a2631e91e4b80834631abb257b306f8d90e64 Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Fri, 26 Nov 2010 00:24:28 -0500
- Subject: [PATCH 3/3] Patch 3?
- ---
- common/x86/predict-a.asm | 43 ++++++++++++++++++++++++++-----------------
- common/x86/predict-c.c | 2 ++
- 2 files changed, 28 insertions(+), 17 deletions(-)
- diff --git a/common/x86/predict-a.asm b/common/x86/predict-a.asm
- index f2ab91e..acdcca3 100644
- --- a/common/x86/predict-a.asm
- +++ b/common/x86/predict-a.asm
- @@ -309,28 +309,37 @@ cglobal predict_4x4_hu_mmxext, 1,1
- %endif ;HIGH_BIT_DEPTH
- ;-----------------------------------------------------------------------------
- -; void predict_4x4_vl( uint8_t *src )
- -;-----------------------------------------------------------------------------
- -INIT_MMX
- -cglobal predict_4x4_vl_mmxext, 1,1
- - movq mm1, [r0-FDEC_STRIDE]
- - movq mm3, mm1
- - movq mm2, mm1
- - psrlq mm3, 8
- - psrlq mm2, 16
- - movq mm4, mm3
- - pavgb mm4, mm1
- -
- - PRED8x8_LOWPASS b, mm0, mm1, mm2, mm3, mm5
- -
- - movd [r0+0*FDEC_STRIDE], mm4
- - movd [r0+1*FDEC_STRIDE], mm0
- - psrlq mm4, 8
- - psrlq mm0, 8
- - movd [r0+2*FDEC_STRIDE], mm4
- - movd [r0+3*FDEC_STRIDE], mm0
- +; void predict_4x4_vl( pixel *src )
- +;-----------------------------------------------------------------------------
- +%macro PREDICT_4x4_V1 4
- +cglobal predict_4x4_vl_%1, 1,1
- + mova m1, [r0-SIZEOF_PIXEL*FDEC_STRIDE]
- + mova m3, m1
- + mova m2, m1
- + psrl%2 m3, %3
- + psrl%2 m2, %3*2
- + mova m4, m3
- + pavg%4 m4, m1
- +
- + PRED8x8_LOWPASS %4, m0, m1, m2, m3, m5
- +
- + movh [r0+SIZEOF_PIXEL*0*FDEC_STRIDE], m4
- + movh [r0+SIZEOF_PIXEL*1*FDEC_STRIDE], m0
- + psrl%2 m4, %3
- + psrl%2 m0, %3
- + movh [r0+SIZEOF_PIXEL*2*FDEC_STRIDE], m4
- + movh [r0+SIZEOF_PIXEL*3*FDEC_STRIDE], m0
- RET
- +%endmacro
- +
- +%ifdef HIGH_BIT_DEPTH
- +INIT_XMM
- +PREDICT_4x4_V1 sse2 , dq, 2, w
- +%else
- +INIT_MMX
- +PREDICT_4x4_V1 mmxext, q , 8, b
- +%endif
- ;-----------------------------------------------------------------------------
- ; void predict_4x4_dc( uint8_t *src )
- diff --git a/common/x86/predict-c.c b/common/x86/predict-c.c
- index 8f15e4e..75843a0 100644
- --- a/common/x86/predict-c.c
- +++ b/common/x86/predict-c.c
- @@ -67,6 +67,7 @@
- void x264_predict_4x4_ddl_sse2( uint16_t *src );
- void x264_predict_4x4_ddr_mmxext( uint8_t *src );
- void x264_predict_4x4_vl_mmxext( uint8_t *src );
- + void x264_predict_4x4_vl_sse2( uint16_t *src );
- void x264_predict_4x4_vr_mmxext( uint8_t *src );
- void x264_predict_4x4_vr_ssse3( uint8_t *src );
- void x264_predict_4x4_hd_mmxext( uint8_t *src );
- @@ -481,6 +482,7 @@ void x264_predict_4x4_init_mmx( int cpu, x264_predict_t pf[12] )
- return;
- pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_sse2;
- pf[I_PRED_4x4_HU] = x264_predict_4x4_hu_sse2;
- + pf[I_PRED_4x4_VL] = x264_predict_4x4_vl_sse2;
- #else
- pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_mmxext;
- pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_mmxext;
- --
- 1.7.2.3
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement