Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- From 6e9f6f7c55bd4fe95adc69b1ec7a7ff335f46fe7 Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Tue, 7 Dec 2010 15:19:46 -0500
- Subject: [PATCH 1/3] fixes
- ---
- common/x86/predict-a.asm | 48 ++++++++++++++++++++++++++++++++++++++++++---
- common/x86/predict-c.c | 10 ++++----
- 2 files changed, 49 insertions(+), 9 deletions(-)
- diff --git a/common/x86/predict-a.asm b/common/x86/predict-a.asm
- index c908c3a..7af6024 100644
- --- a/common/x86/predict-a.asm
- +++ b/common/x86/predict-a.asm
- @@ -168,7 +168,7 @@ cextern pb_reverse
- ;-----------------------------------------------------------------------------
- %macro PREDICT_4x4_DDL 4
- cglobal predict_4x4_ddl_%1, 1,1
- - mova m1, [r0-FDEC_STRIDEB]
- + movu m1, [r0-FDEC_STRIDEB]
- mova m2, m1
- mova m3, m1
- mova m4, m1
- @@ -224,8 +224,21 @@ PREDICT_4x4_DDL mmxext, q , 8, b
- ;-----------------------------------------------------------------------------
- %macro PREDICT_4x4 7
- cglobal predict_4x4_ddr_%1, 1,1
- - mova m1, [r0+1*FDEC_STRIDEB-8*SIZEOF_PIXEL]
- - mova m2, [r0+0*FDEC_STRIDEB-8*SIZEOF_PIXEL]
- + movu m1, [r0+1*FDEC_STRIDEB-8*SIZEOF_PIXEL]
- + movu m2, [r0+0*FDEC_STRIDEB-8*SIZEOF_PIXEL]
- +%ifdef HIGH_BIT_DEPTH
- + movu m4, [r0-1*FDEC_STRIDEB-8*SIZEOF_PIXEL]
- + punpckh%2 m2, m4
- + movh m3, [r0-1*FDEC_STRIDEB]
- + punpckh%3 m1, m2
- + PALIGNR m3, m1, 5*SIZEOF_PIXEL, m1
- + mova m1, m3
- + movu m4, [r0+2*FDEC_STRIDEB-8*SIZEOF_PIXEL]
- + PALIGNR m3, m4, 7*SIZEOF_PIXEL, m4
- + mova m2, m3
- + movu m4, [r0+3*FDEC_STRIDEB-8*SIZEOF_PIXEL]
- + PALIGNR m3, m4, 7*SIZEOF_PIXEL, m4
- +%else
- punpckh%2 m2, [r0-1*FDEC_STRIDEB-8*SIZEOF_PIXEL]
- movh m3, [r0-1*FDEC_STRIDEB]
- punpckh%3 m1, m2
- @@ -234,6 +247,7 @@ cglobal predict_4x4_ddr_%1, 1,1
- PALIGNR m3, [r0+2*FDEC_STRIDEB-8*SIZEOF_PIXEL], 7*SIZEOF_PIXEL, m4
- mova m2, m3
- PALIGNR m3, [r0+3*FDEC_STRIDEB-8*SIZEOF_PIXEL], 7*SIZEOF_PIXEL, m4
- +%endif
- PRED8x8_LOWPASS %5, m0, m3, m1, m2, m4
- %assign Y 3
- movh [r0+Y*FDEC_STRIDEB], m0
- @@ -247,6 +261,19 @@ cglobal predict_4x4_ddr_%1, 1,1
- cglobal predict_4x4_vr_%1, 1,1,6*(mmsize/16)
- movh m0, [r0-1*FDEC_STRIDEB] ; ........t3t2t1t0
- mova m5, m0
- +%ifdef HIGH_BIT_DEPTH
- + movu m1, [r0-1*FDEC_STRIDEB-8*SIZEOF_PIXEL]
- + PALIGNR m0, m1, 7*SIZEOF_PIXEL, m1 ; ......t3t2t1t0lt
- + pavg%5 m5, m0
- + movu m1, [r0+0*FDEC_STRIDEB-8*SIZEOF_PIXEL]
- + PALIGNR m0, m1, 7*SIZEOF_PIXEL, m1 ; ....t3t2t1t0ltl0
- + mova m1, m0
- + movu m2, [r0+1*FDEC_STRIDEB-8*SIZEOF_PIXEL]
- + PALIGNR m0, m2, 7*SIZEOF_PIXEL, m2 ; ..t3t2t1t0ltl0l1
- + mova m2, m0
- + movu m3, [r0+2*FDEC_STRIDEB-8*SIZEOF_PIXEL]
- + PALIGNR m0, m3, 7*SIZEOF_PIXEL, m3 ; t3t2t1t0ltl0l1l2
- +%else
- PALIGNR m0, [r0-1*FDEC_STRIDEB-8*SIZEOF_PIXEL], 7*SIZEOF_PIXEL, m1 ; ......t3t2t1t0lt
- pavg%5 m5, m0
- PALIGNR m0, [r0+0*FDEC_STRIDEB-8*SIZEOF_PIXEL], 7*SIZEOF_PIXEL, m1 ; ....t3t2t1t0ltl0
- @@ -254,6 +281,7 @@ cglobal predict_4x4_vr_%1, 1,1,6*(mmsize/16)
- PALIGNR m0, [r0+1*FDEC_STRIDEB-8*SIZEOF_PIXEL], 7*SIZEOF_PIXEL, m2 ; ..t3t2t1t0ltl0l1
- mova m2, m0
- PALIGNR m0, [r0+2*FDEC_STRIDEB-8*SIZEOF_PIXEL], 7*SIZEOF_PIXEL, m3 ; t3t2t1t0ltl0l1l2
- +%endif
- PRED8x8_LOWPASS %5, m3, m1, m0, m2, m4
- mova m1, m3
- psrl%4 m3, %7*2
- @@ -269,12 +297,24 @@ cglobal predict_4x4_vr_%1, 1,1,6*(mmsize/16)
- cglobal predict_4x4_hd_%1, 1,1,6*(mmsize/16)
- movh m0, [r0-1*FDEC_STRIDEB-4*SIZEOF_PIXEL] ; lt ..
- +%ifdef HIGH_BIT_DEPTH
- + movu m1, [r0-1*FDEC_STRIDEB]
- + punpckl%6 m0, m1 ; t3 t2 t1 t0 lt .. .. ..
- + psll%4 m0, %7 ; t2 t1 t0 lt .. .. .. ..
- + movu m1, [r0+3*FDEC_STRIDEB-8*SIZEOF_PIXEL] ; l3
- + movu m2, [r0+2*FDEC_STRIDEB-8*SIZEOF_PIXEL]
- + punpckh%2 m1, m2 ; l2 l3
- + movu m2, [r0+1*FDEC_STRIDEB-8*SIZEOF_PIXEL] ; l1
- + movu m3, [r0+0*FDEC_STRIDEB-8*SIZEOF_PIXEL]
- + punpckh%2 m2, m3 ; l0 l1
- +%else
- punpckl%6 m0, [r0-1*FDEC_STRIDEB] ; t3 t2 t1 t0 lt .. .. ..
- psll%4 m0, %7 ; t2 t1 t0 lt .. .. .. ..
- mova m1, [r0+3*FDEC_STRIDEB-8*SIZEOF_PIXEL] ; l3
- punpckh%2 m1, [r0+2*FDEC_STRIDEB-8*SIZEOF_PIXEL] ; l2 l3
- mova m2, [r0+1*FDEC_STRIDEB-8*SIZEOF_PIXEL] ; l1
- punpckh%2 m2, [r0+0*FDEC_STRIDEB-8*SIZEOF_PIXEL] ; l0 l1
- +%endif
- punpckh%3 m1, m2 ; l0 l1 l2 l3
- punpckh%6 m1, m0 ; t2 t1 t0 lt l0 l1 l2 l3
- mova m0, m1
- @@ -378,7 +418,7 @@ cglobal predict_4x4_hu_mmxext, 1,1
- ;-----------------------------------------------------------------------------
- %macro PREDICT_4x4_V1 4
- cglobal predict_4x4_vl_%1, 1,1,6*(mmsize/16)
- - mova m1, [r0-FDEC_STRIDEB]
- + movu m1, [r0-FDEC_STRIDEB]
- mova m3, m1
- mova m2, m1
- psrl%2 m3, %3
- diff --git a/common/x86/predict-c.c b/common/x86/predict-c.c
- index 994e05f..829a191 100644
- --- a/common/x86/predict-c.c
- +++ b/common/x86/predict-c.c
- @@ -505,14 +505,14 @@ void x264_predict_4x4_init_mmx( int cpu, x264_predict_t pf[12] )
- #if HIGH_BIT_DEPTH
- if( !(cpu&X264_CPU_SSE2) )
- return;
- -// pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_sse2;
- + pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_sse2;
- pf[I_PRED_4x4_HU] = x264_predict_4x4_hu_sse2;
- -// pf[I_PRED_4x4_VL] = x264_predict_4x4_vl_sse2;
- + pf[I_PRED_4x4_VL] = x264_predict_4x4_vl_sse2;
- if( !(cpu&X264_CPU_SSSE3) )
- return;
- -// pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_ssse3;
- -// pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_ssse3;
- -// pf[I_PRED_4x4_HD] = x264_predict_4x4_hd_ssse3;
- + pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_ssse3;
- + pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_ssse3;
- + pf[I_PRED_4x4_HD] = x264_predict_4x4_hd_ssse3;
- #else
- pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_mmxext;
- pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_mmxext;
- --
- 1.7.2.3
- From 590562302162f9f150b36a2ab813e190d70abd2a Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Tue, 7 Dec 2010 22:48:15 -0500
- Subject: [PATCH 2/3] zigzag_scan_4x4_frame, zigzag_scan_4x4_field, zigzag_scan_8x8_frame, zigzag_scan_8x8_field
- ---
- common/dct.c | 23 +++-
- common/x86/dct-a.asm | 440 ++++++++++++++++++++++++++++----------------------
- common/x86/dct.h | 5 +-
- 3 files changed, 268 insertions(+), 200 deletions(-)
- diff --git a/common/dct.c b/common/dct.c
- index 788452b..25c53d9 100644
- --- a/common/dct.c
- +++ b/common/dct.c
- @@ -732,7 +732,16 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf, int b_interlaced )
- pf->sub_8x8 = zigzag_sub_8x8_field;
- pf->sub_4x4 = zigzag_sub_4x4_field;
- pf->sub_4x4ac = zigzag_sub_4x4ac_field;
- -#if !HIGH_BIT_DEPTH
- +#if HIGH_BIT_DEPTH
- + if( cpu&X264_CPU_SSE2 )
- + {
- + pf->scan_4x4 = x264_zigzag_scan_4x4_field_sse2;
- + }
- + if( cpu&X264_CPU_SSE4 )
- + {
- + pf->scan_8x8 = x264_zigzag_scan_8x8_field_sse4;
- + }
- +#else
- #if HAVE_MMX
- if( cpu&X264_CPU_MMXEXT )
- {
- @@ -750,7 +759,7 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf, int b_interlaced )
- if( cpu&X264_CPU_ALTIVEC )
- pf->scan_4x4 = x264_zigzag_scan_4x4_field_altivec;
- #endif
- -#endif // !HIGH_BIT_DEPTH
- +#endif // HIGH_BIT_DEPTH
- }
- else
- {
- @@ -759,7 +768,13 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf, int b_interlaced )
- pf->sub_8x8 = zigzag_sub_8x8_frame;
- pf->sub_4x4 = zigzag_sub_4x4_frame;
- pf->sub_4x4ac = zigzag_sub_4x4ac_frame;
- -#if !HIGH_BIT_DEPTH
- +#if HIGH_BIT_DEPTH
- + if( cpu&X264_CPU_SSE2 )
- + {
- + pf->scan_4x4 = x264_zigzag_scan_4x4_frame_sse2;
- + pf->scan_8x8 = x264_zigzag_scan_8x8_frame_sse2;
- + }
- +#else
- #if HAVE_MMX
- if( cpu&X264_CPU_MMX )
- pf->scan_4x4 = x264_zigzag_scan_4x4_frame_mmx;
- @@ -785,7 +800,7 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf, int b_interlaced )
- if( cpu&X264_CPU_NEON )
- pf->scan_4x4 = x264_zigzag_scan_4x4_frame_neon;
- #endif
- -#endif // !HIGH_BIT_DEPTH
- +#endif // HIGH_BIT_DEPTH
- }
- pf->interleave_8x8_cavlc = zigzag_interleave_8x8_cavlc;
- diff --git a/common/x86/dct-a.asm b/common/x86/dct-a.asm
- index 50f806a..8500e03 100644
- --- a/common/x86/dct-a.asm
- +++ b/common/x86/dct-a.asm
- @@ -891,136 +891,158 @@ cglobal zigzag_scan_8x8_frame_%1, 2,2,8
- RET
- %endmacro
- +%ifndef HIGH_BIT_DEPTH
- INIT_XMM
- %define PALIGNR PALIGNR_MMX
- SCAN_8x8 sse2
- %define PALIGNR PALIGNR_SSSE3
- SCAN_8x8 ssse3
- +%endif
- ;-----------------------------------------------------------------------------
- -; void zigzag_scan_8x8_frame( int16_t level[64], int16_t dct[8][8] )
- +; void zigzag_scan_8x8_frame( dctcoef level[64], dctcoef dct[8][8] )
- ;-----------------------------------------------------------------------------
- -cglobal zigzag_scan_8x8_frame_mmxext, 2,2
- - movq mm0, [r1]
- - movq mm1, [r1+2*8]
- - movq mm2, [r1+2*14]
- - movq mm3, [r1+2*21]
- - movq mm4, [r1+2*28]
- - movq mm5, mm0
- - movq mm6, mm1
- - psrlq mm0, 16
- - punpckldq mm1, mm1
- - punpcklwd mm5, mm6
- - punpckhwd mm1, mm3
- - punpckhwd mm6, mm0
- - punpckldq mm5, mm0
- - movq mm7, [r1+2*52]
- - movq mm0, [r1+2*60]
- - punpckhwd mm1, mm2
- - punpcklwd mm2, mm4
- - punpckhwd mm4, mm3
- - punpckldq mm3, mm3
- - punpckhwd mm3, mm2
- - movq [r0], mm5
- - movq [r0+2*4], mm1
- - movq [r0+2*8], mm6
- - punpcklwd mm6, mm0
- - punpcklwd mm6, mm7
- - movq mm1, [r1+2*32]
- - movq mm5, [r1+2*39]
- - movq mm2, [r1+2*46]
- - movq [r0+2*35], mm3
- - movq [r0+2*47], mm4
- - punpckhwd mm7, mm0
- - psllq mm0, 16
- - movq mm3, mm5
- - punpcklwd mm5, mm1
- - punpckhwd mm1, mm2
- - punpckhdq mm3, mm3
- - movq [r0+2*52], mm6
- - movq [r0+2*13], mm5
- - movq mm4, [r1+2*11]
- - movq mm6, [r1+2*25]
- - punpcklwd mm5, mm7
- - punpcklwd mm1, mm3
- - punpckhdq mm0, mm7
- - movq mm3, [r1+2*4]
- - movq mm7, [r1+2*18]
- - punpcklwd mm2, mm5
- - movq [r0+2*25], mm1
- - movq mm1, mm4
- - movq mm5, mm6
- - punpcklwd mm4, mm3
- - punpcklwd mm6, mm7
- - punpckhwd mm1, mm3
- - punpckhwd mm5, mm7
- - movq mm3, mm6
- - movq mm7, mm5
- - punpckldq mm6, mm4
- - punpckldq mm5, mm1
- - punpckhdq mm3, mm4
- - punpckhdq mm7, mm1
- - movq mm4, [r1+2*35]
- - movq mm1, [r1+2*49]
- - pshufw mm6, mm6, 0x1b
- - pshufw mm5, mm5, 0x1b
- - movq [r0+2*60], mm0
- - movq [r0+2*56], mm2
- - movq mm0, [r1+2*42]
- - movq mm2, [r1+2*56]
- - movq [r0+2*17], mm3
- - movq [r0+2*32], mm7
- - movq [r0+2*10], mm6
- - movq [r0+2*21], mm5
- - movq mm3, mm0
- - movq mm7, mm2
- - punpcklwd mm0, mm4
- - punpcklwd mm2, mm1
- - punpckhwd mm3, mm4
- - punpckhwd mm7, mm1
- - movq mm4, mm2
- - movq mm1, mm7
- - punpckhdq mm2, mm0
- - punpckhdq mm7, mm3
- - punpckldq mm4, mm0
- - punpckldq mm1, mm3
- - pshufw mm2, mm2, 0x1b
- - pshufw mm7, mm7, 0x1b
- - movq [r0+2*28], mm4
- - movq [r0+2*43], mm1
- - movq [r0+2*39], mm2
- - movq [r0+2*50], mm7
- +%macro SCAN_8x8_FRAME 6
- +cglobal zigzag_scan_8x8_frame_%1, 2,2,8*(mmsize/16)
- + mova m0, [r1]
- + mova m1, [r1+SIZEOF_PIXEL*2* 8]
- + movu m2, [r1+SIZEOF_PIXEL*2*14]
- + movu m3, [r1+SIZEOF_PIXEL*2*21]
- + mova m4, [r1+SIZEOF_PIXEL*2*28]
- + mova m5, m0
- + mova m6, m1
- + psrl%3 m0, %2
- + punpckl%4 m1, m1
- + punpckl%5 m5, m6
- + punpckh%5 m1, m3
- + punpckh%5 m6, m0
- + punpckl%4 m5, m0
- + mova m7, [r1+SIZEOF_PIXEL*2*52]
- + mova m0, [r1+SIZEOF_PIXEL*2*60]
- + punpckh%5 m1, m2
- + punpckl%5 m2, m4
- + punpckh%5 m4, m3
- + punpckl%4 m3, m3
- + punpckh%5 m3, m2
- + mova [r0], m5
- + mova [r0+SIZEOF_PIXEL*2*4], m1
- + mova [r0+SIZEOF_PIXEL*2*8], m6
- + punpckl%5 m6, m0
- + punpckl%5 m6, m7
- + mova m1, [r1+SIZEOF_PIXEL*2*32]
- + movu m5, [r1+SIZEOF_PIXEL*2*39]
- + movu m2, [r1+SIZEOF_PIXEL*2*46]
- + movu [r0+SIZEOF_PIXEL*2*35], m3
- + movu [r0+SIZEOF_PIXEL*2*47], m4
- + punpckh%5 m7, m0
- + psll%3 m0, %2
- + mova m3, m5
- + punpckl%5 m5, m1
- + punpckh%5 m1, m2
- + punpckh%4 m3, m3
- + mova [r0+SIZEOF_PIXEL*2*52], m6
- + movu [r0+SIZEOF_PIXEL*2*13], m5
- + movu m4, [r1+SIZEOF_PIXEL*2*11]
- + movu m6, [r1+SIZEOF_PIXEL*2*25]
- + punpckl%5 m5, m7
- + punpckl%5 m1, m3
- + punpckh%4 m0, m7
- + mova m3, [r1+SIZEOF_PIXEL*2* 4]
- + movu m7, [r1+SIZEOF_PIXEL*2*18]
- + punpckl%5 m2, m5
- + movu [r0+SIZEOF_PIXEL*2*25], m1
- + mova m1, m4
- + mova m5, m6
- + punpckl%5 m4, m3
- + punpckl%5 m6, m7
- + punpckh%5 m1, m3
- + punpckh%5 m5, m7
- + mova m3, m6
- + mova m7, m5
- + punpckl%4 m6, m4
- + punpckl%4 m5, m1
- + punpckh%4 m3, m4
- + punpckh%4 m7, m1
- + movu m4, [r1+SIZEOF_PIXEL*2*35]
- + movu m1, [r1+SIZEOF_PIXEL*2*49]
- + pshuf%6 m6, m6, 0x1b
- + pshuf%6 m5, m5, 0x1b
- + mova [r0+SIZEOF_PIXEL*2*60], m0
- + mova [r0+SIZEOF_PIXEL*2*56], m2
- + movu m0, [r1+SIZEOF_PIXEL*2*42]
- + mova m2, [r1+SIZEOF_PIXEL*2*56]
- + movu [r0+SIZEOF_PIXEL*2*17], m3
- + mova [r0+SIZEOF_PIXEL*2*32], m7
- + movu [r0+SIZEOF_PIXEL*2*10], m6
- + movu [r0+SIZEOF_PIXEL*2*21], m5
- + mova m3, m0
- + mova m7, m2
- + punpckl%5 m0, m4
- + punpckl%5 m2, m1
- + punpckh%5 m3, m4
- + punpckh%5 m7, m1
- + mova m4, m2
- + mova m1, m7
- + punpckh%4 m2, m0
- + punpckh%4 m7, m3
- + punpckl%4 m4, m0
- + punpckl%4 m1, m3
- + pshuf%6 m2, m2, 0x1b
- + pshuf%6 m7, m7, 0x1b
- + mova [r0+SIZEOF_PIXEL*2*28], m4
- + movu [r0+SIZEOF_PIXEL*2*43], m1
- + movu [r0+SIZEOF_PIXEL*2*39], m2
- + movu [r0+SIZEOF_PIXEL*2*50], m7
- RET
- +%endmacro
- +
- +%ifdef HIGH_BIT_DEPTH
- +INIT_XMM
- +SCAN_8x8_FRAME sse2 , 4 , dq, qdq, dq, d
- +%else
- +INIT_MMX
- +SCAN_8x8_FRAME mmxext, 16, q , dq , wd, w
- +%endif
- ;-----------------------------------------------------------------------------
- -; void zigzag_scan_4x4_frame( int16_t level[16], int16_t dct[4][4] )
- +; void zigzag_scan_4x4_frame( dctcoef level[16], dctcoef dct[4][4] )
- ;-----------------------------------------------------------------------------
- -cglobal zigzag_scan_4x4_frame_mmx, 2,2
- - movq mm0, [r1]
- - movq mm1, [r1+8]
- - movq mm2, [r1+16]
- - movq mm3, [r1+24]
- - movq mm4, mm0
- - movq mm5, mm1
- - movq mm6, mm2
- - movq mm7, mm3
- - psllq mm3, 16
- - psrlq mm0, 16
- - punpckldq mm2, mm2
- - punpckhdq mm1, mm1
- - punpcklwd mm4, mm5
- - punpcklwd mm5, mm3
- - punpckldq mm4, mm0
- - punpckhwd mm5, mm2
- - punpckhwd mm0, mm6
- - punpckhwd mm6, mm7
- - punpcklwd mm1, mm0
- - punpckhdq mm3, mm6
- - movq [r0], mm4
- - movq [r0+8], mm5
- - movq [r0+16], mm1
- - movq [r0+24], mm3
- +%macro SCAN_4x4 5
- +cglobal zigzag_scan_4x4_frame_%1, 2,2,8*(mmsize)/16
- + mova m0, [r1]
- + mova m1, [r1+SIZEOF_PIXEL* 8]
- + mova m2, [r1+SIZEOF_PIXEL*16]
- + mova m3, [r1+SIZEOF_PIXEL*24]
- + mova m4, m0
- + mova m5, m1
- + mova m6, m2
- + mova m7, m3
- + psll%3 m3, %2
- + psrl%3 m0, %2
- + punpckl%4 m2, m2
- + punpckh%4 m1, m1
- + punpckl%5 m4, m5
- + punpckl%5 m5, m3
- + punpckl%4 m4, m0
- + punpckh%5 m5, m2
- + punpckh%5 m0, m6
- + punpckh%5 m6, m7
- + punpckl%5 m1, m0
- + punpckh%4 m3, m6
- + mova [r0], m4
- + mova [r0+SIZEOF_PIXEL* 8], m5
- + mova [r0+SIZEOF_PIXEL*16], m1
- + mova [r0+SIZEOF_PIXEL*24], m3
- RET
- +%endmacro
- +
- +%ifdef HIGH_BIT_DEPTH
- +INIT_XMM
- +SCAN_4x4 sse2, 4 , dq, qdq, dq
- +%else
- +INIT_MMX
- +SCAN_4x4 mmx , 16, q , dq , wd
- +%endif
- ;-----------------------------------------------------------------------------
- ; void zigzag_scan_4x4_frame( int16_t level[16], int16_t dct[4][4] )
- @@ -1039,6 +1061,25 @@ cglobal zigzag_scan_4x4_frame_ssse3, 2,2
- movdqa [r0+16], xmm1
- RET
- +%ifdef HIGH_BIT_DEPTH
- +INIT_XMM
- +;-----------------------------------------------------------------------------
- +; void zigzag_scan_4x4_field( int32_t level[16], int32_t dct[4][4] )
- +;-----------------------------------------------------------------------------
- +cglobal zigzag_scan_4x4_field_sse2, 2,3
- + movu m4, [r1+8]
- + pshufd m0, m4, 0xd2
- + mova m1, [r1+32]
- + mova m2, [r1+48]
- + movu [r0+8], m0
- + mova [r0+32], m1
- + mova [r0+48], m2
- + movq mm0, [r1]
- + movq [r0], mm0
- + movq mm0, [r1+24]
- + movq [r0+24], mm0
- + RET
- +%else
- ;-----------------------------------------------------------------------------
- ; void zigzag_scan_4x4_field( int16_t level[16], int16_t dct[4][4] )
- ;-----------------------------------------------------------------------------
- @@ -1055,11 +1096,11 @@ cglobal zigzag_scan_4x4_field_mmxext, 2,3
- mov r2d, [r1+12]
- mov [r0+12], r2d
- RET
- +%endif ; HIGH_BIT_DEPTH
- ;-----------------------------------------------------------------------------
- ; void zigzag_scan_8x8_field( int16_t level[64], int16_t dct[8][8] )
- ;-----------------------------------------------------------------------------
- -
- ; Output order:
- ; 0 1 2 8 9 3 4 10
- ; 16 11 5 6 7 12 17 24
- @@ -1069,84 +1110,93 @@ cglobal zigzag_scan_4x4_field_mmxext, 2,3
- ; 36 37 38 39 43 49 50 44
- ; 45 46 47 51 56 57 52 53
- ; 54 55 58 59 60 61 62 63
- -
- -cglobal zigzag_scan_8x8_field_mmxext, 2,3
- - movq mm0, [r1+2*0] ; 03 02 01 00
- - movq mm1, [r1+2*4] ; 07 06 05 04
- - movq mm2, [r1+2*8] ; 11 10 09 08
- - pshufw mm3, mm0, 011111111b ; 03 03 03 03
- - movd r2, mm2 ; 09 08
- - pshufw mm2, mm2, 000111001b ; 08 11 10 09
- - punpcklwd mm3, mm1 ; 05 03 04 03
- - pinsrw mm0, r2, 3 ; 08 02 01 00
- - movq mm4, mm2
- - punpcklwd mm2, mm3 ; 04 10 03 09
- - pshufw mm2, mm2, 010110100b ; 10 04 03 09
- - movq [r0+2*0], mm0 ; 08 02 01 00
- - movq [r0+2*4], mm2 ; 10 04 03 09
- - movq mm3, [r1+2*12] ; 15 14 13 12
- - movq mm5, [r1+2*16] ; 19 18 17 16
- - punpckldq mm6, mm5 ; 17 16 XX XX
- - psrlq mm1, 16 ; XX 07 06 05
- - punpckhwd mm6, mm4 ; 08 17 11 16
- - punpckldq mm6, mm1 ; 06 05 11 16
- - movq [r0+2*8], mm6 ; 06 05 11 16
- - psrlq mm1, 16 ; XX XX 07 06
- - punpcklwd mm1, mm5 ; 17 07 16 06
- - movq mm0, [r1+2*20] ; 23 22 21 20
- - movq mm2, [r1+2*24] ; 27 26 25 24
- - movq mm6, mm3
- - punpckhdq mm1, mm1 ; 17 07 17 07
- - punpcklwd mm6, mm2 ; 25 13 24 12
- - pextrw r2, mm5, 2
- - movq [r0+2*24], mm0 ; 23 22 21 20
- - punpcklwd mm1, mm6 ; 24 17 12 07
- - movq [r0+2*12], mm1
- - pinsrw mm3, r2, 0 ; 15 14 13 18
- - movq [r0+2*16], mm3 ; 15 14 13 18
- - movq mm7, [r1+2*28]
- - movq mm0, [r1+2*32] ; 35 34 33 32
- - psrlq mm5, 48 ; XX XX XX 19
- - pshufw mm1, mm2, 011111001b ; 27 27 26 25
- - punpcklwd mm5, mm0 ; 33 XX 32 19
- - psrlq mm2, 48 ; XX XX XX 27
- - punpcklwd mm5, mm1 ; 26 32 25 19
- - movq [r0+2*32], mm7
- - movq [r0+2*20], mm5 ; 26 32 25 19
- - movq mm7, [r1+2*36]
- - movq mm1, [r1+2*40] ; 43 42 41 40
- - pshufw mm3, mm0, 011111001b ; 35 35 34 33
- - punpcklwd mm2, mm1 ; 41 XX 40 27
- - movq [r0+2*40], mm7
- - punpcklwd mm2, mm3 ; 34 40 33 27
- - movq [r0+2*28], mm2
- - movq mm7, [r1+2*44] ; 47 46 45 44
- - movq mm2, [r1+2*48] ; 51 50 49 48
- - psrlq mm0, 48 ; XX XX XX 35
- - punpcklwd mm0, mm2 ; 49 XX 48 35
- - pshufw mm3, mm1, 011111001b ; 43 43 42 41
- - punpcklwd mm0, mm3 ; 42 48 41 35
- - movq [r0+2*36], mm0
- - pextrw r2, mm2, 3 ; 51
- - psrlq mm1, 48 ; XX XX XX 43
- - punpcklwd mm1, mm7 ; 45 XX 44 43
- - psrlq mm2, 16 ; XX 51 50 49
- - punpcklwd mm1, mm2 ; 50 44 49 43
- - pshufw mm1, mm1, 010110100b ; 44 50 49 43
- - movq [r0+2*44], mm1
- - psrlq mm7, 16 ; XX 47 46 45
- - pinsrw mm7, r2, 3 ; 51 47 46 45
- - movq [r0+2*48], mm7
- - movq mm0, [r1+2*56] ; 59 58 57 56
- - movq mm1, [r1+2*52] ; 55 54 53 52
- - movq mm2, mm0
- - movq mm7, [r1+2*60]
- - punpckldq mm2, mm1 ; 53 52 57 56
- - punpckhdq mm1, mm0 ; 59 58 55 54
- - movq [r0+2*52], mm2
- - movq [r0+2*56], mm1
- - movq [r0+2*60], mm7
- +%undef SCAN_8x8
- +%macro SCAN_8x8 6
- +cglobal zigzag_scan_8x8_field_%1, 2,3,8*(mmsize/16)
- + mova m0, [r1+SIZEOF_PIXEL*2*0] ; 03 02 01 00
- + mova m1, [r1+SIZEOF_PIXEL*2*4] ; 07 06 05 04
- + mova m2, [r1+SIZEOF_PIXEL*2*8] ; 11 10 09 08
- + pshuf%2 m3, m0, 011111111b ; 03 03 03 03
- + movd r2, m2 ; 09 08
- + pshuf%2 m2, m2, 000111001b ; 08 11 10 09
- + punpckl%3 m3, m1 ; 05 03 04 03
- + pinsr%2 m0, r2d, 3 ; 08 02 01 00
- + mova m4, m2
- + punpckl%3 m2, m3 ; 04 10 03 09
- + pshuf%2 m2, m2, 010110100b ; 10 04 03 09
- + mova [r0+SIZEOF_PIXEL*2*0], m0 ; 08 02 01 00
- + mova [r0+SIZEOF_PIXEL*2*4], m2 ; 10 04 03 09
- + mova m3, [r1+SIZEOF_PIXEL*2*12] ; 15 14 13 12
- + mova m5, [r1+SIZEOF_PIXEL*2*16] ; 19 18 17 16
- + punpckl%4 m6, m5 ; 17 16 XX XX
- + psrl%5 m1, %6 ; XX 07 06 05
- + punpckh%3 m6, m4 ; 08 17 11 16
- + punpckl%4 m6, m1 ; 06 05 11 16
- + mova [r0+SIZEOF_PIXEL*2*8], m6 ; 06 05 11 16
- + psrl%5 m1, %6 ; XX XX 07 06
- + punpckl%3 m1, m5 ; 17 07 16 06
- + mova m0, [r1+SIZEOF_PIXEL*2*20] ; 23 22 21 20
- + mova m2, [r1+SIZEOF_PIXEL*2*24] ; 27 26 25 24
- + mova m6, m3
- + punpckh%4 m1, m1 ; 17 07 17 07
- + punpckl%3 m6, m2 ; 25 13 24 12
- + pextr%2 r2d, m5, 2
- + mova [r0+SIZEOF_PIXEL*2*24], m0 ; 23 22 21 20
- + punpckl%3 m1, m6 ; 24 17 12 07
- + mova [r0+SIZEOF_PIXEL*2*12], m1
- + pinsr%2 m3, r2d, 0 ; 15 14 13 18
- + mova [r0+SIZEOF_PIXEL*2*16], m3 ; 15 14 13 18
- + mova m7, [r1+SIZEOF_PIXEL*2*28]
- + mova m0, [r1+SIZEOF_PIXEL*2*32] ; 35 34 33 32
- + psrl%5 m5, %6*3 ; XX XX XX 19
- + pshuf%2 m1, m2, 011111001b ; 27 27 26 25
- + punpckl%3 m5, m0 ; 33 XX 32 19
- + psrl%5 m2, %6*3 ; XX XX XX 27
- + punpckl%3 m5, m1 ; 26 32 25 19
- + mova [r0+SIZEOF_PIXEL*2*32], m7
- + mova [r0+SIZEOF_PIXEL*2*20], m5 ; 26 32 25 19
- + mova m7, [r1+SIZEOF_PIXEL*2*36]
- + mova m1, [r1+SIZEOF_PIXEL*2*40] ; 43 42 41 40
- + pshuf%2 m3, m0, 011111001b ; 35 35 34 33
- + punpckl%3 m2, m1 ; 41 XX 40 27
- + mova [r0+SIZEOF_PIXEL*2*40], m7
- + punpckl%3 m2, m3 ; 34 40 33 27
- + mova [r0+SIZEOF_PIXEL*2*28], m2
- + mova m7, [r1+SIZEOF_PIXEL*2*44] ; 47 46 45 44
- + mova m2, [r1+SIZEOF_PIXEL*2*48] ; 51 50 49 48
- + psrl%5 m0, %6*3 ; XX XX XX 35
- + punpckl%3 m0, m2 ; 49 XX 48 35
- + pshuf%2 m3, m1, 011111001b ; 43 43 42 41
- + punpckl%3 m0, m3 ; 42 48 41 35
- + mova [r0+SIZEOF_PIXEL*2*36], m0
- + pextr%2 r2d, m2, 3 ; 51
- + psrl%5 m1, %6*3 ; XX XX XX 43
- + punpckl%3 m1, m7 ; 45 XX 44 43
- + psrl%5 m2, %6 ; XX 51 50 49
- + punpckl%3 m1, m2 ; 50 44 49 43
- + pshuf%2 m1, m1, 010110100b ; 44 50 49 43
- + mova [r0+SIZEOF_PIXEL*2*44], m1
- + psrl%5 m7, %6 ; XX 47 46 45
- + pinsr%2 m7, r2d, 3 ; 51 47 46 45
- + mova [r0+SIZEOF_PIXEL*2*48], m7
- + mova m0, [r1+SIZEOF_PIXEL*2*56] ; 59 58 57 56
- + mova m1, [r1+SIZEOF_PIXEL*2*52] ; 55 54 53 52
- + mova m2, m0
- + mova m7, [r1+SIZEOF_PIXEL*2*60]
- + punpckl%4 m2, m1 ; 53 52 57 56
- + punpckh%4 m1, m0 ; 59 58 55 54
- + mova [r0+SIZEOF_PIXEL*2*52], m2
- + mova [r0+SIZEOF_PIXEL*2*56], m1
- + mova [r0+SIZEOF_PIXEL*2*60], m7
- RET
- +%endmacro
- +%ifdef HIGH_BIT_DEPTH
- +INIT_XMM
- +SCAN_8x8 sse4 , d, dq, qdq, dq, 4
- +%else
- +INIT_MMX
- +SCAN_8x8 mmxext, w, wd, dq , q , 16
- +%endif
- ;-----------------------------------------------------------------------------
- ; void zigzag_sub_4x4_frame( int16_t level[16], const uint8_t *src, uint8_t *dst )
- diff --git a/common/x86/dct.h b/common/x86/dct.h
- index bb8c250..54a6e44 100644
- --- a/common/x86/dct.h
- +++ b/common/x86/dct.h
- @@ -72,11 +72,14 @@ void x264_add8x8_idct8_sse2 ( uint8_t *dst, int16_t dct [64] );
- void x264_add16x16_idct8_sse2( uint8_t *dst, int16_t dct[4][64] );
- void x264_zigzag_scan_8x8_frame_ssse3 ( int16_t level[64], int16_t dct[64] );
- -void x264_zigzag_scan_8x8_frame_sse2 ( int16_t level[64], int16_t dct[64] );
- +void x264_zigzag_scan_8x8_frame_sse2 ( dctcoef level[64], dctcoef dct[64] );
- void x264_zigzag_scan_8x8_frame_mmxext( int16_t level[64], int16_t dct[64] );
- void x264_zigzag_scan_4x4_frame_ssse3 ( int16_t level[16], int16_t dct[16] );
- +void x264_zigzag_scan_4x4_frame_sse2 ( int32_t level[16], int32_t dct[16] );
- void x264_zigzag_scan_4x4_frame_mmx ( int16_t level[16], int16_t dct[16] );
- +void x264_zigzag_scan_4x4_field_sse2 ( int32_t level[16], int32_t dct[16] );
- void x264_zigzag_scan_4x4_field_mmxext( int16_t level[16], int16_t dct[16] );
- +void x264_zigzag_scan_8x8_field_sse4 ( int32_t level[64], int32_t dct[64] );
- void x264_zigzag_scan_8x8_field_mmxext( int16_t level[64], int16_t dct[64] );
- int x264_zigzag_sub_4x4_frame_ssse3 ( int16_t level[16], const uint8_t *src, uint8_t *dst );
- int x264_zigzag_sub_4x4ac_frame_ssse3( int16_t level[16], const uint8_t *src, uint8_t *dst, int16_t *dc );
- --
- 1.7.2.3
- From dc3aeb9cae6a3ba6c8e03689d09b04dda8b22ca6 Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Wed, 8 Dec 2010 17:56:22 -0500
- Subject: [PATCH 3/3] dequant4x4, dequant8x8, dequant_4x4dc
- ---
- common/quant.c | 3 ++
- common/x86/quant-a.asm | 94 +++++++++++++++++++++++++++++++++---------------
- common/x86/quant.h | 3 ++
- 3 files changed, 71 insertions(+), 29 deletions(-)
- diff --git a/common/quant.c b/common/quant.c
- index 816e60a..39576a0 100644
- --- a/common/quant.c
- +++ b/common/quant.c
- @@ -367,6 +367,9 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
- pf->quant_4x4_dc = x264_quant_4x4_dc_sse4;
- pf->quant_4x4 = x264_quant_4x4_sse4;
- pf->quant_8x8 = x264_quant_8x8_sse4;
- + pf->dequant_4x4 = x264_dequant_4x4_sse4;
- + pf->dequant_8x8 = x264_dequant_8x8_sse4;
- + pf->dequant_4x4_dc = x264_dequant_4x4dc_sse4;
- }
- #endif // HAVE_MMX
- #else // !HIGH_BIT_DEPTH
- diff --git a/common/x86/quant-a.asm b/common/x86/quant-a.asm
- index 5d7a15e..48501a3 100644
- --- a/common/x86/quant-a.asm
- +++ b/common/x86/quant-a.asm
- @@ -469,26 +469,33 @@ QUANT_AC quant_8x8_sse4, 8
- ; dequant
- ;=============================================================================
- -%macro DEQUANT16_L 3
- +%macro DEQUANT16_L 4
- ;;; %1 dct[y][x]
- ;;; %2,%3 dequant_mf[i_mf][y][x]
- ;;; m2 i_qbits
- -
- mova m0, %2
- +%ifdef HIGH_BIT_DEPTH
- + pmulld m0, %1
- +%else
- packssdw m0, %3
- pmullw m0, %1
- - psllw m0, m2
- +%endif
- + psll%4 m0, m2
- mova %1, m0
- %endmacro
- -%macro DEQUANT32_R 3
- +%macro DEQUANT32_R 3-4
- ;;; %1 dct[y][x]
- ;;; %2,%3 dequant_mf[i_mf][y][x]
- ;;; m2 -i_qbits
- ;;; m3 f
- ;;; m4 0
- -
- mova m0, %1
- +%ifdef HIGH_BIT_DEPTH
- + pmulld m0, %2
- + paddd m0, m3
- + psrad m0, m2
- +%else
- mova m1, m0
- punpcklwd m0, m4
- punpckhwd m1, m4
- @@ -499,21 +506,22 @@ QUANT_AC quant_8x8_sse4, 8
- psrad m0, m2
- psrad m1, m2
- packssdw m0, m1
- +%endif
- mova %1, m0
- %endmacro
- -%macro DEQUANT_LOOP 3
- +%macro DEQUANT_LOOP 4
- %if 8*(%2-2*%3)
- mov t0d, 8*(%2-2*%3)
- %%loop:
- - %1 [r0+t0+8*%3], [r1+t0*2+16*%3], [r1+t0*2+24*%3]
- - %1 [r0+t0 ], [r1+t0*2 ], [r1+t0*2+ 8*%3]
- + %1 [r0+(t0 )*SIZEOF_PIXEL], [r1+t0*2 ], [r1+t0*2+ 8*%3], %4
- + %1 [r0+(t0+8*%3)*SIZEOF_PIXEL], [r1+t0*2+16*%3], [r1+t0*2+24*%3], %4
- sub t0d, 16*%3
- jge %%loop
- REP_RET
- %else
- - %1 [r0+8*%3], [r1+16*%3], [r1+24*%3]
- - %1 [r0 ], [r1 ], [r1+ 8*%3]
- + %1 [r0+(8*%3)*SIZEOF_PIXEL], [r1+16*%3], [r1+24*%3], %4
- + %1 [r0+(0 )*SIZEOF_PIXEL], [r1+0 ], [r1+ 8*%3], %4
- RET
- %endif
- %endmacro
- @@ -562,16 +570,16 @@ QUANT_AC quant_8x8_sse4, 8
- %endmacro
- ;-----------------------------------------------------------------------------
- -; void dequant_4x4( int16_t dct[4][4], int dequant_mf[6][4][4], int i_qp )
- +; void dequant_4x4( dctcoef dct[4][4], int dequant_mf[6][4][4], int i_qp )
- ;-----------------------------------------------------------------------------
- -%macro DEQUANT 4
- -cglobal dequant_%2x%2_%1, 0,3
- +%macro DEQUANT 5
- +cglobal dequant_%2x%2_%1, 0,3,8*(mmsize/16)
- .skip_prologue:
- DEQUANT_START %3+2, %3
- .lshift:
- movd m2, t0d
- - DEQUANT_LOOP DEQUANT16_L, %2*%2/4, %4
- + DEQUANT_LOOP DEQUANT16_L, %2*%2/4, %4, %5
- .rshift32:
- neg t0d
- @@ -580,7 +588,7 @@ cglobal dequant_%2x%2_%1, 0,3
- pxor m4, m4
- pslld m3, m2
- psrld m3, 1
- - DEQUANT_LOOP DEQUANT32_R, %2*%2/4, %4
- + DEQUANT_LOOP DEQUANT32_R, %2*%2/4, %4, %5
- cglobal dequant_%2x%2_flat16_%1, 0,3
- movifnidn t2d, r2m
- @@ -623,35 +631,45 @@ cglobal dequant_%2x%2_flat16_%1, 0,3
- RET
- %endmacro ; DEQUANT
- +%ifdef HIGH_BIT_DEPTH
- +INIT_XMM
- +DEQUANT sse4, 4, 4, 1, d
- +DEQUANT sse4, 8, 6, 1, d
- +%else
- %ifndef ARCH_X86_64
- INIT_MMX
- -DEQUANT mmx, 4, 4, 1
- -DEQUANT mmx, 8, 6, 1
- +DEQUANT mmx, 4, 4, 1, w
- +DEQUANT mmx, 8, 6, 1, w
- %endif
- INIT_XMM
- -DEQUANT sse2, 4, 4, 2
- -DEQUANT sse2, 8, 6, 2
- +DEQUANT sse2, 4, 4, 2, w
- +DEQUANT sse2, 8, 6, 2, w
- +%endif
- -%macro DEQUANT_DC 1
- -cglobal dequant_4x4dc_%1, 0,3
- +%macro DEQUANT_DC 2
- +cglobal dequant_4x4dc_%1, 0,3,6*(mmsize/16)
- DEQUANT_START 6, 6
- .lshift:
- movd m3, [r1]
- movd m2, t0d
- pslld m3, m2
- +%ifdef HIGH_BIT_DEPTH
- + pshufd m3, m3, 0
- +%else
- %if mmsize==16
- pshuflw m3, m3, 0
- punpcklqdq m3, m3
- %else
- pshufw m3, m3, 0
- %endif
- +%endif ; HIGH_BIT_DEPTH
- %assign x 0
- -%rep 16/mmsize
- +%rep SIZEOF_PIXEL*16/mmsize
- mova m0, [r0+mmsize*0+x]
- mova m1, [r0+mmsize*1+x]
- - pmullw m0, m3
- - pmullw m1, m3
- + pmull%2 m0, m3
- + pmull%2 m1, m3
- mova [r0+mmsize*0+x], m0
- mova [r0+mmsize*1+x], m1
- %assign x x+mmsize*2
- @@ -661,19 +679,31 @@ cglobal dequant_4x4dc_%1, 0,3
- .rshift32:
- neg t0d
- movd m3, t0d
- - mova m4, [pw_1]
- + mova m4, [p%2_1]
- mova m5, m4
- pslld m4, m3
- psrld m4, 1
- movd m2, [r1]
- +%assign x 0
- +%ifdef HIGH_BIT_DEPTH
- + pshufd m2, m2, 0
- +%rep SIZEOF_PIXEL*32/mmsize
- + mova m0, [r0+x]
- + pmulld m0, m2
- + paddd m0, m4
- + psrad m0, m3
- + mova [r0+x], m0
- +%assign x x+mmsize
- +%endrep
- +
- +%else
- %if mmsize==8
- punpcklwd m2, m2
- %else
- pshuflw m2, m2, 0
- %endif
- punpcklwd m2, m4
- -%assign x 0
- -%rep 32/mmsize
- +%rep SIZEOF_PIXEL*32/mmsize
- mova m0, [r0+x]
- mova m1, m0
- punpcklwd m0, m5
- @@ -686,13 +716,19 @@ cglobal dequant_4x4dc_%1, 0,3
- mova [r0+x], m0
- %assign x x+mmsize
- %endrep
- +%endif
- RET
- %endmacro
- +%ifdef HIGH_BIT_DEPTH
- +INIT_XMM
- +DEQUANT_DC sse4 , d
- +%else
- INIT_MMX
- -DEQUANT_DC mmxext
- +DEQUANT_DC mmxext, w
- INIT_XMM
- -DEQUANT_DC sse2
- +DEQUANT_DC sse2 , w
- +%endif
- %ifdef HIGH_BIT_DEPTH
- ;-----------------------------------------------------------------------------
- diff --git a/common/x86/quant.h b/common/x86/quant.h
- index a28099c..78817e0 100644
- --- a/common/x86/quant.h
- +++ b/common/x86/quant.h
- @@ -57,6 +57,9 @@ void x264_dequant_8x8_flat16_sse2( int16_t dct[64], int dequant_mf[6][64], int i
- void x264_denoise_dct_mmx( dctcoef *dct, uint32_t *sum, udctcoef *offset, int size );
- void x264_denoise_dct_sse2( dctcoef *dct, uint32_t *sum, udctcoef *offset, int size );
- void x264_denoise_dct_ssse3( dctcoef *dct, uint32_t *sum, udctcoef *offset, int size );
- +void x264_dequant_4x4_sse4( int32_t dct[16], int dequant_mf[6][16], int i_qp );
- +void x264_dequant_4x4dc_sse4( int32_t dct[16], int dequant_mf[6][16], int i_qp );
- +void x264_dequant_8x8_sse4( int32_t dct[64], int dequant_mf[6][64], int i_qp );
- int x264_decimate_score15_mmxext( dctcoef *dct );
- int x264_decimate_score15_sse2 ( dctcoef *dct );
- int x264_decimate_score15_ssse3 ( dctcoef *dct );
- --
- 1.7.2.3
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement