Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- From 09b5a943c62212447a0151dfd5324f8e36715272 Mon Sep 17 00:00:00 2001
- From: Anton Mitrofanov <BugMaster@narod.ru>
- Date: Thu, 6 May 2010 10:03:31 -0700
- Subject: [PATCH 1/9] More cosmetics
- ---
- common/cpu.c | 4 +-
- common/macroblock.c | 6 +++-
- common/mc.c | 4 +-
- common/mvpred.c | 12 ++++----
- common/ppc/dct.c | 2 +-
- common/ppc/mc.c | 12 ++++----
- common/ppc/ppccommon.h | 8 +++---
- common/ppc/quant.c | 6 ++--
- common/predict.c | 2 +-
- common/x86/const-a.asm | 2 +-
- common/x86/mc-c.c | 2 +-
- common/x86/predict-c.c | 2 +-
- encoder/cabac.c | 8 +++---
- encoder/me.c | 18 ++++++------
- input/avs.c | 2 +-
- tools/checkasm.c | 66 ++++++++++++++++++++++++------------------------
- 16 files changed, 79 insertions(+), 77 deletions(-)
- diff --git a/common/cpu.c b/common/cpu.c
- index 904eedc..933a754 100644
- --- a/common/cpu.c
- +++ b/common/cpu.c
- @@ -87,8 +87,8 @@ static void sigill_handler( int sig )
- #endif
- #ifdef HAVE_MMX
- -extern int x264_cpu_cpuid_test( void );
- -extern uint32_t x264_cpu_cpuid( uint32_t op, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx );
- +int x264_cpu_cpuid_test( void );
- +uint32_t x264_cpu_cpuid( uint32_t op, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx );
- uint32_t x264_cpu_detect( void )
- {
- diff --git a/common/macroblock.c b/common/macroblock.c
- index f402588..110c3a5 100644
- --- a/common/macroblock.c
- +++ b/common/macroblock.c
- @@ -295,7 +295,8 @@ int x264_macroblock_cache_allocate( x264_t *h )
- }
- return 0;
- -fail: return -1;
- +fail:
- + return -1;
- }
- void x264_macroblock_cache_free( x264_t *h )
- {
- @@ -348,7 +349,8 @@ int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
- CHECKED_MALLOC( h->scratch_buffer, scratch_size );
- return 0;
- -fail: return -1;
- +fail:
- + return -1;
- }
- void x264_macroblock_thread_free( x264_t *h, int b_lookahead )
- diff --git a/common/mc.c b/common/mc.c
- index ad7fe79..ada8bdc 100644
- --- a/common/mc.c
- +++ b/common/mc.c
- @@ -97,9 +97,9 @@ static void name( uint8_t *pix1, int i_stride_pix1, \
- uint8_t *pix2, int i_stride_pix2, \
- uint8_t *pix3, int i_stride_pix3, int weight ) \
- { \
- - if( weight == 32 )\
- + if( weight == 32 ) \
- pixel_avg_wxh( pix1, i_stride_pix1, pix2, i_stride_pix2, pix3, i_stride_pix3, width, height ); \
- - else\
- + else \
- pixel_avg_weight_wxh( pix1, i_stride_pix1, pix2, i_stride_pix2, pix3, i_stride_pix3, width, height, weight ); \
- }
- PIXEL_AVG_C( pixel_avg_16x16, 16, 16 )
- diff --git a/common/mvpred.c b/common/mvpred.c
- index de91826..54b4d5a 100755
- --- a/common/mvpred.c
- +++ b/common/mvpred.c
- @@ -394,7 +394,7 @@ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t mvc[
- int16_t (*mvr)[2] = h->mb.mvr[i_list][i_ref];
- int i = 0;
- -#define SET_MVP(mvp)\
- +#define SET_MVP(mvp) \
- { \
- CP32( mvc[i], mvp ); \
- i++; \
- @@ -445,13 +445,13 @@ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t mvc[
- if( h->sh.b_mbaff && field^(i_ref&1) )
- refpoc += h->sh.i_delta_poc_bottom;
- -#define SET_TMVP( dx, dy )\
- +#define SET_TMVP( dx, dy ) \
- { \
- int mb_index = h->mb.i_mb_xy + dx + dy*h->mb.i_mb_stride; \
- - int scale = (curpoc - refpoc) * l0->inv_ref_poc[h->mb.b_interlaced&field];\
- - mvc[i][0] = (l0->mv16x16[mb_index][0]*scale + 128) >> 8;\
- - mvc[i][1] = (l0->mv16x16[mb_index][1]*scale + 128) >> 8;\
- - i++;\
- + int scale = (curpoc - refpoc) * l0->inv_ref_poc[h->mb.b_interlaced&field]; \
- + mvc[i][0] = (l0->mv16x16[mb_index][0]*scale + 128) >> 8; \
- + mvc[i][1] = (l0->mv16x16[mb_index][1]*scale + 128) >> 8; \
- + i++; \
- }
- SET_TMVP(0,0);
- diff --git a/common/ppc/dct.c b/common/ppc/dct.c
- index fdadf53..eb223ae 100644
- --- a/common/ppc/dct.c
- +++ b/common/ppc/dct.c
- @@ -205,7 +205,7 @@ void x264_sub8x8_dct8_altivec( int16_t dct[8][8], uint8_t *pix1, uint8_t *pix2 )
- vec_st( dct_tr1v, 16, (signed short *)dct );
- vec_st( dct_tr2v, 32, (signed short *)dct );
- vec_st( dct_tr3v, 48, (signed short *)dct );
- -
- +
- vec_st( dct_tr4v, 64, (signed short *)dct );
- vec_st( dct_tr5v, 80, (signed short *)dct );
- vec_st( dct_tr6v, 96, (signed short *)dct );
- diff --git a/common/ppc/mc.c b/common/ppc/mc.c
- index dfe250a..26b81f8 100644
- --- a/common/ppc/mc.c
- +++ b/common/ppc/mc.c
- @@ -291,8 +291,8 @@ static void mc_chroma_2xh( uint8_t *dst, int i_dst_stride,
- }
- -#define DO_PROCESS_W4( a ) \
- - dstv_16A = vec_mladd( src##a##v_16A, coeff##a##v, dstv_16A ); \
- +#define DO_PROCESS_W4( a ) \
- + dstv_16A = vec_mladd( src##a##v_16A, coeff##a##v, dstv_16A ); \
- dstv_16B = vec_mladd( src##a##v_16B, coeff##a##v, dstv_16B )
- static void mc_chroma_altivec_4xh( uint8_t *dst, int i_dst_stride,
- @@ -369,10 +369,10 @@ static void mc_chroma_altivec_4xh( uint8_t *dst, int i_dst_stride,
- }
- }
- -#define DO_PROCESS_W8( a ) \
- - src##a##v_16A = vec_u8_to_u16( src##a##v_8A ); \
- - src##a##v_16B = vec_u8_to_u16( src##a##v_8B ); \
- - dstv_16A = vec_mladd( src##a##v_16A, coeff##a##v, dstv_16A ); \
- +#define DO_PROCESS_W8( a ) \
- + src##a##v_16A = vec_u8_to_u16( src##a##v_8A ); \
- + src##a##v_16B = vec_u8_to_u16( src##a##v_8B ); \
- + dstv_16A = vec_mladd( src##a##v_16A, coeff##a##v, dstv_16A ); \
- dstv_16B = vec_mladd( src##a##v_16B, coeff##a##v, dstv_16B )
- static void mc_chroma_altivec_8xh( uint8_t *dst, int i_dst_stride,
- diff --git a/common/ppc/ppccommon.h b/common/ppc/ppccommon.h
- index 510ab26..e61afaa 100644
- --- a/common/ppc/ppccommon.h
- +++ b/common/ppc/ppccommon.h
- @@ -113,13 +113,13 @@ typedef union {
- vec_u8_t _hv, _lv
- #define PREP_LOAD_SRC( src ) \
- - vec_u8_t _##src##_ = vec_lvsl(0, src)
- + vec_u8_t _##src##_ = vec_lvsl(0, src)
- #define VEC_LOAD_G( p, v, n, t ) \
- _hv = vec_ld( 0, p ); \
- v = (t) vec_lvsl( 0, p ); \
- _lv = vec_ld( n - 1, p ); \
- - v = (t) vec_perm( _hv, _lv, (vec_u8_t) v )
- + v = (t) vec_perm( _hv, _lv, (vec_u8_t) v )
- #define VEC_LOAD( p, v, n, t, g ) \
- _hv = vec_ld( 0, p ); \
- @@ -134,7 +134,7 @@ typedef union {
- #define VEC_LOAD_PARTIAL( p, v, n, t, g) \
- _hv = vec_ld( 0, p); \
- v = (t) vec_perm( _hv, _hv, (vec_u8_t) _##g##_ )
- -
- +
- /***********************************************************************
- * PREP_STORE##n: declares required vectors to store n bytes to a
- @@ -155,7 +155,7 @@ typedef union {
- _lv = vec_perm( (vec_u8_t) v, _tmp1v, _##o##r_ ); \
- vec_st( _lv, 15, (uint8_t *) p ); \
- _hv = vec_perm( _tmp1v, (vec_u8_t) v, _##o##r_ ); \
- - vec_st( _hv, 0, (uint8_t *) p )
- + vec_st( _hv, 0, (uint8_t *) p )
- #define PREP_STORE8 \
- diff --git a/common/ppc/quant.c b/common/ppc/quant.c
- index 4b2825c..6f41a06 100644
- --- a/common/ppc/quant.c
- +++ b/common/ppc/quant.c
- @@ -20,7 +20,7 @@
- #include "common/common.h"
- #include "ppccommon.h"
- -#include "quant.h"
- +#include "quant.h"
- // quant of a whole 4x4 block, unrolled 2x and "pre-scheduled"
- #define QUANT_16_U( idx0, idx1 ) \
- @@ -55,7 +55,7 @@
- nz = vec_or(nz, vec_or(temp1v, temp2v)); \
- vec_st(temp2v, (idx1), (int16_t*)dct); \
- }
- -
- +
- int x264_quant_4x4_altivec( int16_t dct[4][4], uint16_t mf[16], uint16_t bias[16] )
- {
- LOAD_ZERO;
- @@ -220,7 +220,7 @@ int x264_quant_8x8_altivec( int16_t dct[8][8], uint16_t mf[64], uint16_t bias[64
- vec_u16_t biasvB;
- vec_s16_t temp1v, temp2v;
- -
- +
- vec_u32_u qbits_u;
- qbits_u.s[0]=16;
- i_qbitsv = vec_splat(qbits_u.v, 0);
- diff --git a/common/predict.c b/common/predict.c
- index 783cc9b..f120ca7 100644
- --- a/common/predict.c
- +++ b/common/predict.c
- @@ -41,7 +41,7 @@
- * 16x16 prediction for intra luma block
- ****************************************************************************/
- -#define PREDICT_16x16_DC(v) \
- +#define PREDICT_16x16_DC(v)\
- for( int i = 0; i < 16; i++ )\
- {\
- M32( src+ 0 ) = v;\
- diff --git a/common/x86/const-a.asm b/common/x86/const-a.asm
- index 79bbf1b..99a34be 100755
- --- a/common/x86/const-a.asm
- +++ b/common/x86/const-a.asm
- @@ -43,7 +43,7 @@ const pw_64, times 8 dw 64
- const pw_32_0, times 4 dw 32,
- times 4 dw 0
- const pw_8000, times 8 dw 0x8000
- -const pw_3fff, times 8 dw 0x3fff
- +const pw_3fff, times 8 dw 0x3fff
- const pd_1, times 4 dd 1
- const pd_128, times 4 dd 128
- diff --git a/common/x86/mc-c.c b/common/x86/mc-c.c
- index fb73562..6d386f6 100644
- --- a/common/x86/mc-c.c
- +++ b/common/x86/mc-c.c
- @@ -103,7 +103,7 @@ void x264_integral_init8v_sse2( uint16_t *sum8, int stride );
- void x264_integral_init4v_ssse3( uint16_t *sum8, uint16_t *sum4, int stride );
- void x264_mbtree_propagate_cost_sse2( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
- uint16_t *inter_costs, uint16_t *inv_qscales, int len );
- -#define LOWRES(cpu) \
- +#define LOWRES(cpu)\
- void x264_frame_init_lowres_core_##cpu( uint8_t *src0, uint8_t *dst0, uint8_t *dsth, uint8_t *dstv, uint8_t *dstc,\
- int src_stride, int dst_stride, int width, int height );
- LOWRES(mmxext)
- diff --git a/common/x86/predict-c.c b/common/x86/predict-c.c
- index 6fa7e3b..0e3e1c7 100644
- --- a/common/x86/predict-c.c
- +++ b/common/x86/predict-c.c
- @@ -326,7 +326,7 @@ static void x264_predict_8x8_vr_mmxext( uint8_t *src, uint8_t edge[33] )
- t=e; e+=f; f-=t;\
- t=g; g+=h; h-=t;
- -#define INTRA_SA8D_X3(cpu) \
- +#define INTRA_SA8D_X3(cpu)\
- void x264_intra_sa8d_x3_8x8_##cpu( uint8_t *fenc, uint8_t edge[33], int res[3] )\
- {\
- PREDICT_8x8_LOAD_TOP\
- diff --git a/encoder/cabac.c b/encoder/cabac.c
- index 1086447..bc76fc8 100644
- --- a/encoder/cabac.c
- +++ b/encoder/cabac.c
- @@ -736,13 +736,13 @@ static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBl
- }
- #endif
- -#define block_residual_write_cabac_cbf( h, cb, i_ctxBlockCat, i_idx, l, b_intra ) \
- -{ \
- - int ctxidxinc = x264_cabac_mb_cbf_ctxidxinc( h, i_ctxBlockCat, i_idx, b_intra ); \
- +#define block_residual_write_cabac_cbf( h, cb, i_ctxBlockCat, i_idx, l, b_intra )\
- +{\
- + int ctxidxinc = x264_cabac_mb_cbf_ctxidxinc( h, i_ctxBlockCat, i_idx, b_intra );\
- if( h->mb.cache.non_zero_count[x264_scan8[i_idx]] )\
- {\
- x264_cabac_encode_decision( cb, ctxidxinc, 1 );\
- - block_residual_write_cabac( h, cb, i_ctxBlockCat, l ); \
- + block_residual_write_cabac( h, cb, i_ctxBlockCat, l );\
- }\
- else\
- x264_cabac_encode_decision( cb, ctxidxinc, 0 );\
- diff --git a/encoder/me.c b/encoder/me.c
- index d7b2928..5e113f0 100644
- --- a/encoder/me.c
- +++ b/encoder/me.c
- @@ -914,14 +914,14 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
- m->cost_mv = p_cost_mvx[bmx] + p_cost_mvy[bmy];
- }
- -#define BIME_CACHE( dx, dy, list ) \
- -{ \
- +#define BIME_CACHE( dx, dy, list )\
- +{\
- x264_me_t *m = m##list;\
- - int i = 4 + 3*dx + dy; \
- + int i = 4 + 3*dx + dy;\
- int mvx = bm##list##x+dx;\
- int mvy = bm##list##y+dy;\
- stride[list][i] = bw;\
- - src[list][i] = h->mc.get_ref( pixy_buf[list][i], &stride[list][i], m->p_fref, m->i_stride[0], mvx, mvy, bw, bh, weight_none ); \
- + src[list][i] = h->mc.get_ref( pixy_buf[list][i], &stride[list][i], m->p_fref, m->i_stride[0], mvx, mvy, bw, bh, weight_none );\
- if( rd )\
- {\
- h->mc.mc_chroma( pixu_buf[list][i], 8, m->p_fref[4], m->i_stride[1], mvx, mvy + mv##list##y_offset, bw>>1, bh>>1 );\
- @@ -1107,11 +1107,11 @@ void x264_me_refine_bidir_rd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_wei
- { \
- uint64_t cost; \
- M32( cache_mv ) = pack16to32_mask(mx,my); \
- - if( m->i_pixel <= PIXEL_8x8 )\
- - {\
- - h->mc.mc_chroma( pixu, FDEC_STRIDE, m->p_fref[4], m->i_stride[1], mx, my + mvy_offset, bw>>1, bh>>1 );\
- - h->mc.mc_chroma( pixv, FDEC_STRIDE, m->p_fref[5], m->i_stride[1], mx, my + mvy_offset, bw>>1, bh>>1 );\
- - }\
- + if( m->i_pixel <= PIXEL_8x8 ) \
- + { \
- + h->mc.mc_chroma( pixu, FDEC_STRIDE, m->p_fref[4], m->i_stride[1], mx, my + mvy_offset, bw>>1, bh>>1 ); \
- + h->mc.mc_chroma( pixv, FDEC_STRIDE, m->p_fref[5], m->i_stride[1], mx, my + mvy_offset, bw>>1, bh>>1 ); \
- + } \
- cost = x264_rd_cost_part( h, i_lambda2, i4, m->i_pixel ); \
- COPY4_IF_LT( bcost, cost, bmx, mx, bmy, my, dir, do_dir?mdir:dir ); \
- } \
- diff --git a/input/avs.c b/input/avs.c
- index 9e3aa55..5489a5e 100644
- --- a/input/avs.c
- +++ b/input/avs.c
- @@ -45,7 +45,7 @@
- /* maximum size of the sequence of filters to try on non script files */
- #define AVS_MAX_SEQUENCE 5
- -#define LOAD_AVS_FUNC(name, continue_on_fail) \
- +#define LOAD_AVS_FUNC(name, continue_on_fail)\
- {\
- h->func.name = (void*)GetProcAddress( h->library, #name );\
- if( !continue_on_fail && !h->func.name )\
- diff --git a/tools/checkasm.c b/tools/checkasm.c
- index 228b75f..2008d2f 100644
- --- a/tools/checkasm.c
- +++ b/tools/checkasm.c
- @@ -265,7 +265,7 @@ static int check_pixel( int cpu_ref, int cpu_new )
- buf3[i] = ~(buf4[i] = -(buf1[i&~0x88]&1));
- #define TEST_PIXEL( name, align ) \
- - ok = 1, used_asm = 0;\
- + ok = 1, used_asm = 0; \
- for( int i = 0; i < 7; i++ ) \
- { \
- int res_c, res_asm; \
- @@ -305,7 +305,7 @@ static int check_pixel( int cpu_ref, int cpu_new )
- TEST_PIXEL( sa8d, 1 );
- #define TEST_PIXEL_X( N ) \
- - ok = 1; used_asm = 0;\
- + ok = 1; used_asm = 0; \
- for( int i = 0; i < 7; i++ ) \
- { \
- int res_c[4]={0}, res_asm[4]={0}; \
- @@ -350,7 +350,7 @@ static int check_pixel( int cpu_ref, int cpu_new )
- { \
- set_func_name( "%s_%s", "var", pixel_names[i] ); \
- used_asm = 1; \
- - /* abi-check wrapper can't return uint64_t, so separate it from return value check */\
- + /* abi-check wrapper can't return uint64_t, so separate it from return value check */ \
- call_c1( pixel_c.var[i], buf1, 16 ); \
- call_a1( pixel_asm.var[i], buf1, 16 ); \
- uint64_t res_c = pixel_c.var[i]( buf1, 16 ); \
- @@ -415,7 +415,7 @@ static int check_pixel( int cpu_ref, int cpu_new )
- if( pixel_asm.name && pixel_asm.name != pixel_ref.name ) \
- { \
- int res_c[3], res_asm[3]; \
- - set_func_name( #name );\
- + set_func_name( #name ); \
- used_asm = 1; \
- memcpy( buf3, buf2, 1024 ); \
- for( int i = 0; i < 3; i++ ) \
- @@ -538,7 +538,7 @@ static int check_dct( int cpu_ref, int cpu_new )
- #define TEST_DCT( name, t1, t2, size ) \
- if( dct_asm.name != dct_ref.name ) \
- { \
- - set_func_name( #name );\
- + set_func_name( #name ); \
- used_asm = 1; \
- call_c( dct_c.name, t1, buf1, buf2 ); \
- call_a( dct_asm.name, t2, buf1, buf2 ); \
- @@ -579,7 +579,7 @@ static int check_dct( int cpu_ref, int cpu_new )
- #define TEST_IDCT( name, src ) \
- if( dct_asm.name != dct_ref.name ) \
- { \
- - set_func_name( #name );\
- + set_func_name( #name ); \
- used_asm = 1; \
- memcpy( buf3, buf1, 32*32 ); \
- memcpy( buf4, buf1, 32*32 ); \
- @@ -644,12 +644,12 @@ static int check_dct( int cpu_ref, int cpu_new )
- ALIGNED_16( int16_t level1[64] );
- ALIGNED_16( int16_t level2[64] );
- -#define TEST_ZIGZAG_SCAN( name, t1, t2, dct, size ) \
- +#define TEST_ZIGZAG_SCAN( name, t1, t2, dct, size ) \
- if( zigzag_asm.name != zigzag_ref.name ) \
- { \
- - set_func_name( "zigzag_"#name"_%s", interlace?"field":"frame" );\
- + set_func_name( "zigzag_"#name"_%s", interlace?"field":"frame" ); \
- used_asm = 1; \
- - memcpy(dct, buf1, size*sizeof(int16_t));\
- + memcpy(dct, buf1, size*sizeof(int16_t)); \
- call_c( zigzag_c.name, t1, dct ); \
- call_a( zigzag_asm.name, t2, dct ); \
- if( memcmp( t1, t2, size*sizeof(int16_t) ) ) \
- @@ -663,18 +663,18 @@ static int check_dct( int cpu_ref, int cpu_new )
- if( zigzag_asm.name != zigzag_ref.name ) \
- { \
- int nz_a, nz_c; \
- - set_func_name( "zigzag_"#name"_%s", interlace?"field":"frame" );\
- + set_func_name( "zigzag_"#name"_%s", interlace?"field":"frame" ); \
- used_asm = 1; \
- memcpy( buf3, buf1, 16*FDEC_STRIDE ); \
- memcpy( buf4, buf1, 16*FDEC_STRIDE ); \
- - nz_c = call_c1( zigzag_c.name, t1, buf2, buf3 ); \
- + nz_c = call_c1( zigzag_c.name, t1, buf2, buf3 ); \
- nz_a = call_a1( zigzag_asm.name, t2, buf2, buf4 ); \
- - if( memcmp( t1, t2, size*sizeof(int16_t) )|| memcmp( buf3, buf4, 16*FDEC_STRIDE ) || nz_c != nz_a ) \
- + if( memcmp( t1, t2, size*sizeof(int16_t) )|| memcmp( buf3, buf4, 16*FDEC_STRIDE ) || nz_c != nz_a ) \
- { \
- ok = 0; \
- fprintf( stderr, #name " [FAILED]\n" ); \
- } \
- - call_c2( zigzag_c.name, t1, buf2, buf3 ); \
- + call_c2( zigzag_c.name, t1, buf2, buf3 ); \
- call_a2( zigzag_asm.name, t2, buf2, buf4 ); \
- }
- @@ -683,7 +683,7 @@ static int check_dct( int cpu_ref, int cpu_new )
- { \
- int nz_a, nz_c; \
- int16_t dc_a, dc_c; \
- - set_func_name( "zigzag_"#name"_%s", interlace?"field":"frame" );\
- + set_func_name( "zigzag_"#name"_%s", interlace?"field":"frame" ); \
- used_asm = 1; \
- for( int i = 0; i < 2; i++ ) \
- { \
- @@ -694,27 +694,27 @@ static int check_dct( int cpu_ref, int cpu_new )
- memcpy( buf3 + j*FDEC_STRIDE, (i?buf1:buf2) + j*FENC_STRIDE, 4 ); \
- memcpy( buf4 + j*FDEC_STRIDE, (i?buf1:buf2) + j*FENC_STRIDE, 4 ); \
- } \
- - nz_c = call_c1( zigzag_c.name, t1, buf2, buf3, &dc_c ); \
- + nz_c = call_c1( zigzag_c.name, t1, buf2, buf3, &dc_c ); \
- nz_a = call_a1( zigzag_asm.name, t2, buf2, buf4, &dc_a ); \
- - if( memcmp( t1+1, t2+1, 15*sizeof(int16_t) ) || memcmp( buf3, buf4, 16*FDEC_STRIDE ) || nz_c != nz_a || dc_c != dc_a ) \
- + if( memcmp( t1+1, t2+1, 15*sizeof(int16_t) ) || memcmp( buf3, buf4, 16*FDEC_STRIDE ) || nz_c != nz_a || dc_c != dc_a ) \
- { \
- ok = 0; \
- fprintf( stderr, #name " [FAILED]\n" ); \
- break; \
- } \
- } \
- - call_c2( zigzag_c.name, t1, buf2, buf3, &dc_c ); \
- + call_c2( zigzag_c.name, t1, buf2, buf3, &dc_c ); \
- call_a2( zigzag_asm.name, t2, buf2, buf4, &dc_a ); \
- }
- -#define TEST_INTERLEAVE( name, t1, t2, dct, size ) \
- +#define TEST_INTERLEAVE( name, t1, t2, dct, size ) \
- if( zigzag_asm.name != zigzag_ref.name ) \
- { \
- for( int j = 0; j < 100; j++ ) \
- { \
- - set_func_name( "zigzag_"#name"_%s", interlace?"field":"frame" );\
- + set_func_name( "zigzag_"#name"_%s", interlace?"field":"frame" ); \
- used_asm = 1; \
- - memcpy(dct, buf1, size*sizeof(int16_t));\
- + memcpy(dct, buf1, size*sizeof(int16_t)); \
- for( int i = 0; i < size; i++ ) \
- dct[i] = rand()&0x1F ? 0 : dct[i]; \
- memcpy(buf3, buf4, 10*sizeof(uint8_t)); \
- @@ -784,7 +784,7 @@ static int check_mc( int cpu_ref, int cpu_new )
- if( mc_a.mc_luma != mc_ref.mc_luma && !(w&(w-1)) && h<=16 ) \
- { \
- const x264_weight_t *weight = weight_none; \
- - set_func_name( "mc_luma_%dx%d", w, h );\
- + set_func_name( "mc_luma_%dx%d", w, h ); \
- used_asm = 1; \
- memset( buf3, 0xCD, 1024 ); \
- memset( buf4, 0xCD, 1024 ); \
- @@ -801,7 +801,7 @@ static int check_mc( int cpu_ref, int cpu_new )
- uint8_t *ref = dst2; \
- int ref_stride = 32; \
- const x264_weight_t *weight = weight_none; \
- - set_func_name( "get_ref_%dx%d", w, h );\
- + set_func_name( "get_ref_%dx%d", w, h ); \
- used_asm = 1; \
- memset( buf3, 0xCD, 1024 ); \
- memset( buf4, 0xCD, 1024 ); \
- @@ -819,13 +819,13 @@ static int check_mc( int cpu_ref, int cpu_new )
- #define MC_TEST_CHROMA( w, h ) \
- if( mc_a.mc_chroma != mc_ref.mc_chroma ) \
- { \
- - set_func_name( "mc_chroma_%dx%d", w, h );\
- + set_func_name( "mc_chroma_%dx%d", w, h ); \
- used_asm = 1; \
- memset( buf3, 0xCD, 1024 ); \
- memset( buf4, 0xCD, 1024 ); \
- call_c( mc_c.mc_chroma, dst1, 16, src, 64, dx, dy, w, h ); \
- call_a( mc_a.mc_chroma, dst2, 16, src, 64, dx, dy, w, h ); \
- - /* mc_chroma width=2 may write garbage to the right of dst. ignore that. */\
- + /* mc_chroma width=2 may write garbage to the right of dst. ignore that. */ \
- for( int j = 0; j < h; j++ ) \
- for( int i = w; i < 4; i++ ) \
- dst2[i+j*16] = dst1[i+j*16]; \
- @@ -878,7 +878,7 @@ static int check_mc( int cpu_ref, int cpu_new )
- memcpy( buf4, buf1+320, 320 ); \
- if( mc_a.name[i] != mc_ref.name[i] ) \
- { \
- - set_func_name( "%s_%s", #name, pixel_names[i] );\
- + set_func_name( "%s_%s", #name, pixel_names[i] ); \
- used_asm = 1; \
- call_c1( mc_c.name[i], buf3, 16, buf2+1, 16, buf1+18, 16, weight ); \
- call_a1( mc_a.name[i], buf4, 16, buf2+1, 16, buf1+18, 16, weight ); \
- @@ -899,7 +899,7 @@ static int check_mc( int cpu_ref, int cpu_new )
- #define MC_TEST_WEIGHT( name, weight, aligned ) \
- int align_off = (aligned ? 0 : rand()%16); \
- - ok = 1, used_asm = 0;\
- + ok = 1, used_asm = 0; \
- for( int i = 1; i <= 5; i++ ) \
- { \
- ALIGNED_16( uint8_t buffC[640] ); \
- @@ -1115,14 +1115,14 @@ static int check_deblock( int cpu_ref, int cpu_new )
- #define TEST_DEBLOCK( name, align, ... ) \
- for( int i = 0; i < 36; i++ ) \
- { \
- - int off = 8*32 + (i&15)*4*!align; /* benchmark various alignments of h filter */\
- + int off = 8*32 + (i&15)*4*!align; /* benchmark various alignments of h filter */ \
- for( int j = 0; j < 1024; j++ ) \
- - /* two distributions of random to excersize different failure modes */\
- + /* two distributions of random to excersize different failure modes */ \
- buf3[j] = rand() & (i&1 ? 0xf : 0xff ); \
- memcpy( buf4, buf3, 1024 ); \
- if( db_a.name != db_ref.name ) \
- { \
- - set_func_name( #name );\
- + set_func_name( #name ); \
- used_asm = 1; \
- call_c1( db_c.name, buf3+off, 32, alphas[i], betas[i], ##__VA_ARGS__ ); \
- call_a1( db_a.name, buf4+off, 32, alphas[i], betas[i], ##__VA_ARGS__ ); \
- @@ -1236,7 +1236,7 @@ static int check_quant( int cpu_ref, int cpu_new )
- dct1[i] = dct2[i] = j ? (rand() & 0x1fff) - 0xfff : 0; \
- result_c = call_c1( qf_c.name, dct1, h->quant4_mf[CQM_4IY][qp][0], h->quant4_bias[CQM_4IY][qp][0] ); \
- result_a = call_a1( qf_a.name, dct2, h->quant4_mf[CQM_4IY][qp][0], h->quant4_bias[CQM_4IY][qp][0] ); \
- - if( memcmp( dct1, dct2, 16*2 ) || result_c != result_a ) \
- + if( memcmp( dct1, dct2, 16*2 ) || result_c != result_a ) \
- { \
- oks[0] = 0; \
- fprintf( stderr, #name "(cqm=%d): [FAILED]\n", i_cqm ); \
- @@ -1491,11 +1491,11 @@ static int check_intra( int cpu_ref, int cpu_new )
- ip_c.predict_8x8_filter( buf1+48, edge, ALL_NEIGHBORS, ALL_NEIGHBORS );
- -#define INTRA_TEST( name, dir, w, ... ) \
- +#define INTRA_TEST( name, dir, w, ... )\
- if( ip_a.name[dir] != ip_ref.name[dir] )\
- - { \
- + {\
- set_func_name( "intra_%s_%s", #name, intra_##name##_names[dir] );\
- - used_asm = 1; \
- + used_asm = 1;\
- memcpy( buf3, buf1, 32*20 );\
- memcpy( buf4, buf1, 32*20 );\
- call_c( ip_c.name[dir], buf3+48, ##__VA_ARGS__ );\
- --
- 1.7.0.4
- From 29b379cc3499541e72007131909d45a8c472f2b5 Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Sat, 8 May 2010 11:58:22 -0700
- Subject: [PATCH 2/9] Fix intra refresh behavior with I-frames
- Intra refresh still allows I-frames (for scenecuts/etc).
- Now I-frames count as a full refresh, as opposed to instantly triggering a refresh.
- ---
- common/frame.h | 1 +
- encoder/encoder.c | 28 +++++++++++++++++-----------
- 2 files changed, 18 insertions(+), 11 deletions(-)
- diff --git a/common/frame.h b/common/frame.h
- index 357929e..e2766ad 100644
- --- a/common/frame.h
- +++ b/common/frame.h
- @@ -142,6 +142,7 @@ typedef struct x264_frame
- float f_pir_position;
- int i_pir_start_col;
- int i_pir_end_col;
- + int i_frames_since_pir;
- } x264_frame_t;
- /* synchronized frame list */
- diff --git a/encoder/encoder.c b/encoder/encoder.c
- index 7ad4295..7c5a64f 100644
- --- a/encoder/encoder.c
- +++ b/encoder/encoder.c
- @@ -2375,25 +2375,31 @@ int x264_encoder_encode( x264_t *h,
- h->i_nal_type = i_nal_type;
- h->i_nal_ref_idc = i_nal_ref_idc;
- - if( h->param.b_intra_refresh && h->fenc->i_type == X264_TYPE_P )
- + if( h->param.b_intra_refresh )
- {
- - int pocdiff = (h->fdec->i_poc - h->fref0[0]->i_poc)/2;
- - float increment = X264_MAX( ((float)h->sps->i_mb_width-1) / h->param.i_keyint_max, 1 );
- - int max_position = (int)(increment * h->param.i_keyint_max);
- - if( IS_X264_TYPE_I( h->fref0[0]->i_type ) )
- - h->fdec->f_pir_position = 0;
- - else
- + if( IS_X264_TYPE_I( h->fenc->i_type ) )
- + {
- + h->fdec->i_frames_since_pir = 0;
- + /* PIR is currently only supported with ref == 1, so any intra frame effectively refreshes
- + * the whole frame and counts as an intra refresh. */
- + h->fdec->f_pir_position = h->sps->i_mb_width;
- + }
- + else if( h->fenc->i_type == X264_TYPE_P )
- {
- + int pocdiff = (h->fdec->i_poc - h->fref0[0]->i_poc)/2;
- + float increment = X264_MAX( ((float)h->sps->i_mb_width-1) / h->param.i_keyint_max, 1 );
- h->fdec->f_pir_position = h->fref0[0]->f_pir_position;
- - if( h->fdec->f_pir_position+0.5 >= max_position )
- + h->fdec->i_frames_since_pir = h->fref0[0]->i_frames_since_pir + pocdiff;
- + if( h->fdec->i_frames_since_pir >= h->param.i_keyint_max )
- {
- h->fdec->f_pir_position = 0;
- + h->fdec->i_frames_since_pir = 0;
- h->fenc->b_keyframe = 1;
- }
- + h->fdec->i_pir_start_col = h->fdec->f_pir_position+0.5;
- + h->fdec->f_pir_position += increment * pocdiff;
- + h->fdec->i_pir_end_col = h->fdec->f_pir_position+0.5;
- }
- - h->fdec->i_pir_start_col = h->fdec->f_pir_position+0.5;
- - h->fdec->f_pir_position += increment * pocdiff;
- - h->fdec->i_pir_end_col = h->fdec->f_pir_position+0.5;
- }
- if( h->fenc->b_keyframe )
- --
- 1.7.0.4
- From 47b30702e9e8b0f9ff6f87a52e0bbc0755a1dbd9 Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Sat, 8 May 2010 12:07:13 -0700
- Subject: [PATCH 3/9] Add API function to trigger intra refresh
- Useful for interactive applications where the encoder knows that packet loss has occurred on the client.
- Full documentation is in x264.h.
- ---
- common/common.h | 2 ++
- encoder/encoder.c | 11 ++++++++++-
- x264.h | 10 +++++++++-
- 3 files changed, 21 insertions(+), 2 deletions(-)
- diff --git a/common/common.h b/common/common.h
- index 91d5030..f673648 100644
- --- a/common/common.h
- +++ b/common/common.h
- @@ -408,6 +408,8 @@ struct x264_t
- int i_coded_fields_lookahead; /* Use separate counters for lookahead */
- int i_cpb_delay_lookahead;
- + int b_queued_intra_refresh;
- +
- /* We use only one SPS and one PPS */
- x264_sps_t sps_array[1];
- x264_sps_t *sps;
- diff --git a/encoder/encoder.c b/encoder/encoder.c
- index 7c5a64f..42d49bf 100644
- --- a/encoder/encoder.c
- +++ b/encoder/encoder.c
- @@ -2131,6 +2131,12 @@ static int x264_threaded_slices_write( x264_t *h )
- return 0;
- }
- +void x264_encoder_intra_refresh( x264_t *h )
- +{
- + h = h->thread[h->thread[0]->i_thread_phase];
- + h->b_queued_intra_refresh = 1;
- +}
- +
- /****************************************************************************
- * x264_encoder_encode:
- * XXX: i_poc : is the poc of the current given picture
- @@ -2380,6 +2386,7 @@ int x264_encoder_encode( x264_t *h,
- if( IS_X264_TYPE_I( h->fenc->i_type ) )
- {
- h->fdec->i_frames_since_pir = 0;
- + h->b_queued_intra_refresh = 0;
- /* PIR is currently only supported with ref == 1, so any intra frame effectively refreshes
- * the whole frame and counts as an intra refresh. */
- h->fdec->f_pir_position = h->sps->i_mb_width;
- @@ -2390,10 +2397,12 @@ int x264_encoder_encode( x264_t *h,
- float increment = X264_MAX( ((float)h->sps->i_mb_width-1) / h->param.i_keyint_max, 1 );
- h->fdec->f_pir_position = h->fref0[0]->f_pir_position;
- h->fdec->i_frames_since_pir = h->fref0[0]->i_frames_since_pir + pocdiff;
- - if( h->fdec->i_frames_since_pir >= h->param.i_keyint_max )
- + if( h->fdec->i_frames_since_pir >= h->param.i_keyint_max ||
- + (h->b_queued_intra_refresh && h->fdec->f_pir_position + 0.5 >= h->sps->i_mb_width) )
- {
- h->fdec->f_pir_position = 0;
- h->fdec->i_frames_since_pir = 0;
- + h->b_queued_intra_refresh = 0;
- h->fenc->b_keyframe = 1;
- }
- h->fdec->i_pir_start_col = h->fdec->f_pir_position+0.5;
- diff --git a/x264.h b/x264.h
- index 83f087e..f568dc5 100644
- --- a/x264.h
- +++ b/x264.h
- @@ -35,7 +35,7 @@
- #include <stdarg.h>
- -#define X264_BUILD 94
- +#define X264_BUILD 95
- /* x264_t:
- * opaque handler for encoder */
- @@ -639,5 +639,13 @@ void x264_encoder_close ( x264_t * );
- * return the number of currently delayed (buffered) frames
- * this should be used at the end of the stream, to know when you have all the encoded frames. */
- int x264_encoder_delayed_frames( x264_t * );
- +/* x264_encoder_intra_refresh:
- + * If an intra refresh is not in progress, begin one with the next P-frame.
- + * If an intra refresh is in progress, begin one as soon as the current one finishes.
- + * Requires that b_intra_refresh be set.
- + * Useful for interactive streaming where the client can tell the server that packet loss has
- + * occurred. In this case, keyint can be set to an extremely high value so that intra refreshes
- + * only occur when calling x264_encoder_intra_refresh. */
- +void x264_encoder_intra_refresh( x264_t * );
- #endif
- --
- 1.7.0.4
- From 548ea47cb5484a3754a1217e30b7640a12d061b5 Mon Sep 17 00:00:00 2001
- From: Henrik Gramner <hengar-6@student.ltu.se>
- Date: Mon, 10 May 2010 23:27:36 +0200
- Subject: [PATCH 4/9] Shrink even more constant arrays
- ---
- common/arm/mc-c.c | 4 ++--
- common/mc.c | 4 ++--
- common/ppc/mc.c | 4 ++--
- common/set.c | 10 +++++-----
- common/x86/mc-c.c | 4 ++--
- encoder/encoder.c | 4 ++--
- encoder/me.c | 2 +-
- encoder/set.c | 14 +++++---------
- 8 files changed, 21 insertions(+), 25 deletions(-)
- diff --git a/common/arm/mc-c.c b/common/arm/mc-c.c
- index 0a7b734..d294eff 100644
- --- a/common/arm/mc-c.c
- +++ b/common/arm/mc-c.c
- @@ -112,8 +112,8 @@ static void (* const x264_mc_copy_wtab_neon[5])( uint8_t *, int, uint8_t *, int,
- x264_mc_copy_w16_neon,
- };
- -static const int hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
- -static const int hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
- +static const uint8_t hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
- +static const uint8_t hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
- static void mc_luma_neon( uint8_t *dst, int i_dst_stride,
- uint8_t *src[4], int i_src_stride,
- diff --git a/common/mc.c b/common/mc.c
- index ada8bdc..e0dc659 100644
- --- a/common/mc.c
- +++ b/common/mc.c
- @@ -203,8 +203,8 @@ static void hpel_filter( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *s
- }
- }
- -static const int hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
- -static const int hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
- +static const uint8_t hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
- +static const uint8_t hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
- static void mc_luma( uint8_t *dst, int i_dst_stride,
- uint8_t *src[4], int i_src_stride,
- diff --git a/common/ppc/mc.c b/common/ppc/mc.c
- index 26b81f8..83c60b1 100644
- --- a/common/ppc/mc.c
- +++ b/common/ppc/mc.c
- @@ -37,8 +37,8 @@ typedef void (*pf_mc_t)( uint8_t *src, int i_src,
- uint8_t *dst, int i_dst, int i_height );
- -static const int hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
- -static const int hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
- +static const uint8_t hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
- +static const uint8_t hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
- static inline int x264_tapfilter( uint8_t *pix, int i_pix_next )
- diff --git a/common/set.c b/common/set.c
- index 50d4213..16cff8e 100644
- --- a/common/set.c
- +++ b/common/set.c
- @@ -23,7 +23,7 @@
- #define SHIFT(x,s) ((s)<=0 ? (x)<<-(s) : ((x)+(1<<((s)-1)))>>(s))
- #define DIV(n,d) (((n) + ((d)>>1)) / (d))
- -static const int dequant4_scale[6][3] =
- +static const uint8_t dequant4_scale[6][3] =
- {
- { 10, 13, 16 },
- { 11, 14, 18 },
- @@ -32,7 +32,7 @@ static const int dequant4_scale[6][3] =
- { 16, 20, 25 },
- { 18, 23, 29 }
- };
- -static const int quant4_scale[6][3] =
- +static const uint16_t quant4_scale[6][3] =
- {
- { 13107, 8066, 5243 },
- { 11916, 7490, 4660 },
- @@ -42,11 +42,11 @@ static const int quant4_scale[6][3] =
- { 7282, 4559, 2893 },
- };
- -static const int quant8_scan[16] =
- +static const uint8_t quant8_scan[16] =
- {
- 0,3,4,3, 3,1,5,1, 4,5,2,5, 3,1,5,1
- };
- -static const int dequant8_scale[6][6] =
- +static const uint8_t dequant8_scale[6][6] =
- {
- { 20, 18, 32, 19, 25, 24 },
- { 22, 19, 35, 21, 28, 26 },
- @@ -55,7 +55,7 @@ static const int dequant8_scale[6][6] =
- { 32, 28, 51, 30, 40, 38 },
- { 36, 32, 58, 34, 46, 43 },
- };
- -static const int quant8_scale[6][6] =
- +static const uint16_t quant8_scale[6][6] =
- {
- { 13107, 11428, 20972, 12222, 16777, 15481 },
- { 11916, 10826, 19174, 11058, 14980, 14290 },
- diff --git a/common/x86/mc-c.c b/common/x86/mc-c.c
- index 6d386f6..f641cff 100644
- --- a/common/x86/mc-c.c
- +++ b/common/x86/mc-c.c
- @@ -228,8 +228,8 @@ static void x264_weight_cache_ssse3( x264_t *h, x264_weight_t *w )
- }
- }
- -static const int hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
- -static const int hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
- +static const uint8_t hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
- +static const uint8_t hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
- #define MC_LUMA(name,instr1,instr2)\
- static void mc_luma_##name( uint8_t *dst, int i_dst_stride,\
- diff --git a/encoder/encoder.c b/encoder/encoder.c
- index 42d49bf..e082024 100644
- --- a/encoder/encoder.c
- +++ b/encoder/encoder.c
- @@ -2816,8 +2816,8 @@ void x264_encoder_close ( x264_t *h )
- /* Slices used and PSNR */
- for( int i = 0; i < 5; i++ )
- {
- - static const int slice_order[] = { SLICE_TYPE_I, SLICE_TYPE_SI, SLICE_TYPE_P, SLICE_TYPE_SP, SLICE_TYPE_B };
- - static const char *slice_name[] = { "P", "B", "I", "SP", "SI" };
- + static const uint8_t slice_order[] = { SLICE_TYPE_I, SLICE_TYPE_SI, SLICE_TYPE_P, SLICE_TYPE_SP, SLICE_TYPE_B };
- + static const char * const slice_name[] = { "P", "B", "I", "SP", "SI" };
- int i_slice = slice_order[i];
- if( h->stat.i_frame_count[i_slice] > 0 )
- diff --git a/encoder/me.c b/encoder/me.c
- index 5e113f0..a35da53 100644
- --- a/encoder/me.c
- +++ b/encoder/me.c
- @@ -484,7 +484,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
- int i = 1;
- do
- {
- - static const int hex4[16][2] = {
- + static const int8_t hex4[16][2] = {
- { 0,-4}, { 0, 4}, {-2,-3}, { 2,-3},
- {-4,-2}, { 4,-2}, {-4,-1}, { 4,-1},
- {-4, 0}, { 4, 0}, {-4, 1}, { 4, 1},
- diff --git a/encoder/set.c b/encoder/set.c
- index e3a071c..ce52a4b 100644
- --- a/encoder/set.c
- +++ b/encoder/set.c
- @@ -315,26 +315,22 @@ void x264_sps_write( bs_t *s, x264_sps_t *sps )
- if( sps->vui.b_aspect_ratio_info_present )
- {
- int i;
- - static const struct { int w, h; int sar; } sar[] =
- + static const struct { uint8_t w, h, sar; } sar[] =
- {
- { 1, 1, 1 }, { 12, 11, 2 }, { 10, 11, 3 }, { 16, 11, 4 },
- { 40, 33, 5 }, { 24, 11, 6 }, { 20, 11, 7 }, { 32, 11, 8 },
- { 80, 33, 9 }, { 18, 11, 10}, { 15, 11, 11}, { 64, 33, 12},
- - { 160,99, 13}, { 0, 0, -1 }
- + { 160,99, 13}, { 0, 0, 255 }
- };
- - for( i = 0; sar[i].sar != -1; i++ )
- + for( i = 0; sar[i].sar != 255; i++ )
- {
- if( sar[i].w == sps->vui.i_sar_width &&
- sar[i].h == sps->vui.i_sar_height )
- break;
- }
- - if( sar[i].sar != -1 )
- + bs_write( s, 8, sar[i].sar );
- + if( sar[i].sar == 255 ) /* aspect_ratio_idc (extended) */
- {
- - bs_write( s, 8, sar[i].sar );
- - }
- - else
- - {
- - bs_write( s, 8, 255); /* aspect_ratio_idc (extended) */
- bs_write( s, 16, sps->vui.i_sar_width );
- bs_write( s, 16, sps->vui.i_sar_height );
- }
- --
- 1.7.0.4
- From 5d1dd185510c753033ed841e55425eded293a10b Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Mon, 10 May 2010 22:59:12 -0700
- Subject: [PATCH 5/9] Fix condition for printing rc=cbr in options SEI
- Also fix crf-max formatting.
- ---
- common/common.c | 4 ++--
- 1 files changed, 2 insertions(+), 2 deletions(-)
- diff --git a/common/common.c b/common/common.c
- index 848c6de..ad7cf98 100644
- --- a/common/common.c
- +++ b/common/common.c
- @@ -1237,7 +1237,7 @@ char *x264_param2string( x264_param_t *p, int b_res )
- s += sprintf( s, " rc_lookahead=%d", p->rc.i_lookahead );
- s += sprintf( s, " rc=%s mbtree=%d", p->rc.i_rc_method == X264_RC_ABR ?
- - ( p->rc.b_stat_read ? "2pass" : p->rc.i_vbv_buffer_size == p->rc.i_bitrate ? "cbr" : "abr" )
- + ( p->rc.b_stat_read ? "2pass" : p->rc.i_vbv_max_bitrate == p->rc.i_bitrate ? "cbr" : "abr" )
- : p->rc.i_rc_method == X264_RC_CRF ? "crf" : "cqp", p->rc.b_mb_tree );
- if( p->rc.i_rc_method == X264_RC_ABR || p->rc.i_rc_method == X264_RC_CRF )
- {
- @@ -1256,7 +1256,7 @@ char *x264_param2string( x264_param_t *p, int b_res )
- s += sprintf( s, " vbv_maxrate=%d vbv_bufsize=%d",
- p->rc.i_vbv_max_bitrate, p->rc.i_vbv_buffer_size );
- if( p->rc.i_rc_method == X264_RC_CRF )
- - s += sprintf( s, " crf-max=%.1f", p->rc.f_rf_constant_max );
- + s += sprintf( s, " crf_max=%.1f", p->rc.f_rf_constant_max );
- }
- }
- else if( p->rc.i_rc_method == X264_RC_CQP )
- --
- 1.7.0.4
- From ffaf1e14b54d791f369fc51a534111ddd839c55d Mon Sep 17 00:00:00 2001
- From: Anton Mitrofanov <BugMaster@narod.ru>
- Date: Wed, 12 May 2010 01:57:38 +0400
- Subject: [PATCH 6/9] Fix crash with sliced-threads on Phenom
- ---
- encoder/encoder.c | 4 ++++
- 1 files changed, 4 insertions(+), 0 deletions(-)
- diff --git a/encoder/encoder.c b/encoder/encoder.c
- index e082024..3a5520f 100644
- --- a/encoder/encoder.c
- +++ b/encoder/encoder.c
- @@ -2066,6 +2066,10 @@ static void *x264_slices_write( x264_t *h )
- static int x264_threaded_slices_write( x264_t *h )
- {
- void *ret = NULL;
- +#ifdef HAVE_MMX
- + if( h->param.cpu&X264_CPU_SSE_MISALIGN )
- + x264_cpu_mask_misalign_sse();
- +#endif
- /* set first/last mb and sync contexts */
- for( int i = 0; i < h->param.i_threads; i++ )
- {
- --
- 1.7.0.4
- From ec937b4219673bdea810f00bd9cc91f5d174302b Mon Sep 17 00:00:00 2001
- From: Anton Mitrofanov <BugMaster@narod.ru>
- Date: Wed, 12 May 2010 22:05:34 +0400
- Subject: [PATCH 7/9] Fix bitrate calculation in progress status
- Was slightly incorrect due to using pts, which is out of order.
- ---
- x264.c | 34 +++++++++++++++++++++++++---------
- 1 files changed, 25 insertions(+), 9 deletions(-)
- diff --git a/x264.c b/x264.c
- index 8f4e372..1a85c74 100644
- --- a/x264.c
- +++ b/x264.c
- @@ -1312,7 +1312,7 @@ static void parse_qpfile( cli_opt_t *opt, x264_picture_t *pic, int i_frame )
- * Encode:
- *****************************************************************************/
- -static int Encode_frame( x264_t *h, hnd_t hout, x264_picture_t *pic, int64_t *last_pts )
- +static int Encode_frame( x264_t *h, hnd_t hout, x264_picture_t *pic, int64_t *last_dts )
- {
- x264_picture_t pic_out;
- x264_nal_t *nal;
- @@ -1330,18 +1330,22 @@ static int Encode_frame( x264_t *h, hnd_t hout, x264_picture_t *pic, int64_t *l
- if( i_frame_size )
- {
- i_frame_size = output.write_frame( hout, nal[0].p_payload, i_frame_size, &pic_out );
- - *last_pts = pic_out.i_pts;
- + *last_dts = pic_out.i_dts;
- }
- return i_frame_size;
- }
- -static void Print_status( int64_t i_start, int i_frame, int i_frame_total, int64_t i_file, x264_param_t *param, int64_t last_pts )
- +static void Print_status( int64_t i_start, int i_frame, int i_frame_total, int64_t i_file, x264_param_t *param, int64_t last_ts )
- {
- char buf[200];
- int64_t i_elapsed = x264_mdate() - i_start;
- double fps = i_elapsed > 0 ? i_frame * 1000000. / i_elapsed : 0;
- - double bitrate = (double) i_file * 8 / ( (double) last_pts * 1000 * param->i_timebase_num / param->i_timebase_den );
- + double bitrate;
- + if( last_ts )
- + bitrate = (double) i_file * 8 / ( (double) last_ts * 1000 * param->i_timebase_num / param->i_timebase_den );
- + else
- + bitrate = (double) i_file * 8 / ( (double) 1000 * param->i_fps_den / param->i_fps_num );
- if( i_frame_total )
- {
- int eta = i_elapsed * (i_frame_total - i_frame) / ((int64_t)i_frame * 1000000);
- @@ -1369,7 +1373,9 @@ static int Encode( x264_param_t *param, cli_opt_t *opt )
- int64_t i_file = 0;
- int i_frame_size;
- int i_update_interval;
- - int64_t last_pts = 0;
- + int64_t last_dts = 0;
- + int64_t prev_dts = 0;
- + int64_t first_dts = 0;
- # define MAX_PTS_WARNING 3 /* arbitrary */
- int pts_warning_cnt = 0;
- int64_t largest_pts = -1;
- @@ -1506,12 +1512,17 @@ static int Encode( x264_param_t *param, cli_opt_t *opt )
- pic.i_qpplus1 = 0;
- }
- - i_frame_size = Encode_frame( h, opt->hout, &pic, &last_pts );
- + prev_dts = last_dts;
- + i_frame_size = Encode_frame( h, opt->hout, &pic, &last_dts );
- if( i_frame_size < 0 )
- return -1;
- i_file += i_frame_size;
- if( i_frame_size )
- + {
- i_frame_output++;
- + if( i_frame_output == 1 )
- + first_dts = prev_dts = last_dts;
- + }
- i_frame++;
- @@ -1520,19 +1531,24 @@ static int Encode( x264_param_t *param, cli_opt_t *opt )
- /* update status line (up to 1000 times per input file) */
- if( opt->b_progress && i_frame_output % i_update_interval == 0 && i_frame_output )
- - Print_status( i_start, i_frame_output, i_frame_total, i_file, param, last_pts );
- + Print_status( i_start, i_frame_output, i_frame_total, i_file, param, 2 * last_dts - prev_dts - first_dts );
- }
- /* Flush delayed frames */
- while( !b_ctrl_c && x264_encoder_delayed_frames( h ) )
- {
- - i_frame_size = Encode_frame( h, opt->hout, NULL, &last_pts );
- + prev_dts = last_dts;
- + i_frame_size = Encode_frame( h, opt->hout, NULL, &last_dts );
- if( i_frame_size < 0 )
- return -1;
- i_file += i_frame_size;
- if( i_frame_size )
- + {
- i_frame_output++;
- + if( i_frame_output == 1 )
- + first_dts = prev_dts = last_dts;
- + }
- if( opt->b_progress && i_frame_output % i_update_interval == 0 && i_frame_output )
- - Print_status( i_start, i_frame_output, i_frame_total, i_file, param, last_pts );
- + Print_status( i_start, i_frame_output, i_frame_total, i_file, param, 2 * last_dts - prev_dts - first_dts );
- }
- if( pts_warning_cnt >= MAX_PTS_WARNING && param->i_log_level < X264_LOG_DEBUG )
- fprintf( stderr, "x264 [warning]: %d suppressed nonmonotonic pts warnings\n", pts_warning_cnt-MAX_PTS_WARNING );
- --
- 1.7.0.4
- From d1d7484aba046614add62e2bdc4da23e570525c3 Mon Sep 17 00:00:00 2001
- From: Kieran Kunhya <kieran@kunhya.com>
- Date: Thu, 13 May 2010 19:13:35 +0100
- Subject: [PATCH 8/9] Fix typo in pulldown
- ---
- x264.c | 2 +-
- 1 files changed, 1 insertions(+), 1 deletions(-)
- diff --git a/x264.c b/x264.c
- index 1a85c74..862aabb 100644
- --- a/x264.c
- +++ b/x264.c
- @@ -120,7 +120,7 @@ enum pulldown_type_e
- static const cli_pulldown_t pulldown_values[] =
- {
- - [X264_PULLDOWN_22] = {1, {TB}, 2.0},
- + [X264_PULLDOWN_22] = {1, {TB}, 1.0},
- [X264_PULLDOWN_32] = {4, {TBT, BT, BTB, TB}, 1.25},
- [X264_PULLDOWN_64] = {2, {PIC_STRUCT_DOUBLE, PIC_STRUCT_TRIPLE}, 1.0},
- [X264_PULLDOWN_DOUBLE] = {1, {PIC_STRUCT_DOUBLE}, 2.0},
- --
- 1.7.0.4
- From a21e7bd854c8c441a081c4a353b02bf41454bb95 Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Sat, 15 May 2010 14:48:58 -0700
- Subject: [PATCH 9/9] Overhaul CABAC: faster, less cache usage
- Horribly munge up the CABAC tables to allow deduplication of some data.
- Saves 256 bytes of L1d cache in non-RD, 512 bytes in RD.
- Add asm versions of bypass and terminal; save L1i cache by re-using putbyte code.
- Further optimize encode_decision.
- All 3 primary CABAC functions fit in under 256 bytes of code total on x86_64.
- ---
- common/cabac.c | 185 ++++++++++++++++++++----------------------------
- common/cabac.h | 22 ++++--
- common/x86/cabac-a.asm | 76 ++++++++++++++++----
- common/x86/x86inc.asm | 2 +-
- encoder/rdo.c | 2 +
- tools/checkasm.c | 46 ++++++++++--
- 6 files changed, 195 insertions(+), 138 deletions(-)
- diff --git a/common/cabac.c b/common/cabac.c
- index f50aef6..11988a1 100644
- --- a/common/cabac.c
- +++ b/common/cabac.c
- @@ -664,75 +664,44 @@ static const int8_t x264_cabac_context_init_PB[3][460][2] =
- }
- };
- -/* FIXME could avoid this duplication by reversing the order of states
- - * with MPS=0, but that would uglify the other tables */
- -const uint8_t x264_cabac_range_lps[128][4] =
- +const uint8_t x264_cabac_range_lps[64][4] =
- {
- - { 2, 2, 2, 2 },
- - { 6, 7, 8, 9 }, { 6, 7, 9, 10 }, { 6, 8, 9, 11 },
- - { 7, 8, 10, 11 }, { 7, 9, 10, 12 }, { 7, 9, 11, 12 },
- - { 8, 9, 11, 13 }, { 8, 10, 12, 14 }, { 9, 11, 12, 14 },
- - { 9, 11, 13, 15 }, { 10, 12, 14, 16 }, { 10, 12, 15, 17 },
- - { 11, 13, 15, 18 }, { 11, 14, 16, 19 }, { 12, 14, 17, 20 },
- - { 12, 15, 18, 21 }, { 13, 16, 19, 22 }, { 14, 17, 20, 23 },
- - { 14, 18, 21, 24 }, { 15, 19, 22, 25 }, { 16, 20, 23, 27 },
- - { 17, 21, 25, 28 }, { 18, 22, 26, 30 }, { 19, 23, 27, 31 },
- - { 20, 24, 29, 33 }, { 21, 26, 30, 35 }, { 22, 27, 32, 37 },
- - { 23, 28, 33, 39 }, { 24, 30, 35, 41 }, { 26, 31, 37, 43 },
- - { 27, 33, 39, 45 }, { 29, 35, 41, 48 }, { 30, 37, 43, 50 },
- - { 32, 39, 46, 53 }, { 33, 41, 48, 56 }, { 35, 43, 51, 59 },
- - { 37, 45, 54, 62 }, { 39, 48, 56, 65 }, { 41, 50, 59, 69 },
- - { 43, 53, 63, 72 }, { 46, 56, 66, 76 }, { 48, 59, 69, 80 },
- - { 51, 62, 73, 85 }, { 53, 65, 77, 89 }, { 56, 69, 81, 94 },
- - { 59, 72, 86, 99 }, { 62, 76, 90, 104 }, { 66, 80, 95, 110 },
- - { 69, 85, 100, 116 }, { 73, 89, 105, 122 }, { 77, 94, 111, 128 },
- - { 81, 99, 117, 135 }, { 85, 104, 123, 142 }, { 90, 110, 130, 150 },
- - { 95, 116, 137, 158 }, { 100, 122, 144, 166 }, { 105, 128, 152, 175 },
- - { 111, 135, 160, 185 }, { 116, 142, 169, 195 }, { 123, 150, 178, 205 },
- - { 128, 158, 187, 216 }, { 128, 167, 197, 227 }, { 128, 176, 208, 240 },
- -
- - { 128, 176, 208, 240 }, { 128, 167, 197, 227 }, { 128, 158, 187, 216 },
- - { 123, 150, 178, 205 }, { 116, 142, 169, 195 }, { 111, 135, 160, 185 },
- - { 105, 128, 152, 175 }, { 100, 122, 144, 166 }, { 95, 116, 137, 158 },
- - { 90, 110, 130, 150 }, { 85, 104, 123, 142 }, { 81, 99, 117, 135 },
- - { 77, 94, 111, 128 }, { 73, 89, 105, 122 }, { 69, 85, 100, 116 },
- - { 66, 80, 95, 110 }, { 62, 76, 90, 104 }, { 59, 72, 86, 99 },
- - { 56, 69, 81, 94 }, { 53, 65, 77, 89 }, { 51, 62, 73, 85 },
- - { 48, 59, 69, 80 }, { 46, 56, 66, 76 }, { 43, 53, 63, 72 },
- - { 41, 50, 59, 69 }, { 39, 48, 56, 65 }, { 37, 45, 54, 62 },
- - { 35, 43, 51, 59 }, { 33, 41, 48, 56 }, { 32, 39, 46, 53 },
- - { 30, 37, 43, 50 }, { 29, 35, 41, 48 }, { 27, 33, 39, 45 },
- - { 26, 31, 37, 43 }, { 24, 30, 35, 41 }, { 23, 28, 33, 39 },
- - { 22, 27, 32, 37 }, { 21, 26, 30, 35 }, { 20, 24, 29, 33 },
- - { 19, 23, 27, 31 }, { 18, 22, 26, 30 }, { 17, 21, 25, 28 },
- - { 16, 20, 23, 27 }, { 15, 19, 22, 25 }, { 14, 18, 21, 24 },
- - { 14, 17, 20, 23 }, { 13, 16, 19, 22 }, { 12, 15, 18, 21 },
- - { 12, 14, 17, 20 }, { 11, 14, 16, 19 }, { 11, 13, 15, 18 },
- - { 10, 12, 15, 17 }, { 10, 12, 14, 16 }, { 9, 11, 13, 15 },
- - { 9, 11, 12, 14 }, { 8, 10, 12, 14 }, { 8, 9, 11, 13 },
- - { 7, 9, 11, 12 }, { 7, 9, 10, 12 }, { 7, 8, 10, 11 },
- - { 6, 8, 9, 11 }, { 6, 7, 9, 10 }, { 6, 7, 8, 9 },
- - { 2, 2, 2, 2 },
- + { 2, 2, 2, 2}, { 6, 7, 8, 9}, { 6, 7, 9, 10}, { 6, 8, 9, 11},
- + { 7, 8, 10, 11}, { 7, 9, 10, 12}, { 7, 9, 11, 12}, { 8, 9, 11, 13},
- + { 8, 10, 12, 14}, { 9, 11, 12, 14}, { 9, 11, 13, 15}, { 10, 12, 14, 16},
- + { 10, 12, 15, 17}, { 11, 13, 15, 18}, { 11, 14, 16, 19}, { 12, 14, 17, 20},
- + { 12, 15, 18, 21}, { 13, 16, 19, 22}, { 14, 17, 20, 23}, { 14, 18, 21, 24},
- + { 15, 19, 22, 25}, { 16, 20, 23, 27}, { 17, 21, 25, 28}, { 18, 22, 26, 30},
- + { 19, 23, 27, 31}, { 20, 24, 29, 33}, { 21, 26, 30, 35}, { 22, 27, 32, 37},
- + { 23, 28, 33, 39}, { 24, 30, 35, 41}, { 26, 31, 37, 43}, { 27, 33, 39, 45},
- + { 29, 35, 41, 48}, { 30, 37, 43, 50}, { 32, 39, 46, 53}, { 33, 41, 48, 56},
- + { 35, 43, 51, 59}, { 37, 45, 54, 62}, { 39, 48, 56, 65}, { 41, 50, 59, 69},
- + { 43, 53, 63, 72}, { 46, 56, 66, 76}, { 48, 59, 69, 80}, { 51, 62, 73, 85},
- + { 53, 65, 77, 89}, { 56, 69, 81, 94}, { 59, 72, 86, 99}, { 62, 76, 90, 104},
- + { 66, 80, 95, 110}, { 69, 85, 100, 116}, { 73, 89, 105, 122}, { 77, 94, 111, 128},
- + { 81, 99, 117, 135}, { 85, 104, 123, 142}, { 90, 110, 130, 150}, { 95, 116, 137, 158},
- + {100, 122, 144, 166}, {105, 128, 152, 175}, {111, 135, 160, 185}, {116, 142, 169, 195},
- + {123, 150, 178, 205}, {128, 158, 187, 216}, {128, 167, 197, 227}, {128, 176, 208, 240}
- };
- const uint8_t x264_cabac_transition[128][2] =
- {
- - { 0, 0}, { 1, 25}, { 1, 25}, { 2, 26}, { 3, 26}, { 4, 26}, { 5, 27}, { 6, 27},
- - { 7, 27}, { 8, 28}, { 9, 28}, { 10, 28}, { 11, 29}, { 12, 29}, { 13, 30}, { 14, 30},
- - { 15, 30}, { 16, 31}, { 17, 31}, { 18, 32}, { 19, 33}, { 20, 33}, { 21, 33}, { 22, 34},
- - { 23, 34}, { 24, 35}, { 25, 36}, { 26, 36}, { 27, 37}, { 28, 37}, { 29, 38}, { 30, 39},
- - { 31, 39}, { 32, 40}, { 33, 41}, { 34, 41}, { 35, 42}, { 36, 42}, { 37, 44}, { 38, 44},
- - { 39, 45}, { 40, 45}, { 41, 47}, { 42, 47}, { 43, 48}, { 44, 48}, { 45, 50}, { 46, 50},
- - { 47, 51}, { 48, 52}, { 49, 52}, { 50, 54}, { 51, 54}, { 52, 55}, { 53, 56}, { 54, 57},
- - { 55, 58}, { 56, 59}, { 57, 59}, { 58, 61}, { 59, 61}, { 60, 62}, { 61, 63}, { 62, 64},
- - { 63, 65}, { 64, 66}, { 65, 67}, { 66, 68}, { 66, 69}, { 68, 70}, { 68, 71}, { 69, 72},
- - { 70, 73}, { 71, 74}, { 72, 75}, { 73, 76}, { 73, 77}, { 75, 78}, { 75, 79}, { 76, 80},
- - { 77, 81}, { 77, 82}, { 79, 83}, { 79, 84}, { 80, 85}, { 80, 86}, { 82, 87}, { 82, 88},
- - { 83, 89}, { 83, 90}, { 85, 91}, { 85, 92}, { 86, 93}, { 86, 94}, { 87, 95}, { 88, 96},
- - { 88, 97}, { 89, 98}, { 90, 99}, { 90,100}, { 91,101}, { 91,102}, { 92,103}, { 93,104},
- - { 93,105}, { 94,106}, { 94,107}, { 94,108}, { 95,109}, { 96,110}, { 96,111}, { 97,112},
- - { 97,113}, { 97,114}, { 98,115}, { 98,116}, { 99,117}, { 99,118}, { 99,119}, {100,120},
- - {100,121}, {100,122}, {101,123}, {101,124}, {101,125}, {102,126}, {102,126}, {127,127},
- + { 0, 0}, { 1, 1}, { 2, 50}, { 51, 3}, { 2, 50}, { 51, 3}, { 4, 52}, { 53, 5},
- + { 6, 52}, { 53, 7}, { 8, 52}, { 53, 9}, { 10, 54}, { 55, 11}, { 12, 54}, { 55, 13},
- + { 14, 54}, { 55, 15}, { 16, 56}, { 57, 17}, { 18, 56}, { 57, 19}, { 20, 56}, { 57, 21},
- + { 22, 58}, { 59, 23}, { 24, 58}, { 59, 25}, { 26, 60}, { 61, 27}, { 28, 60}, { 61, 29},
- + { 30, 60}, { 61, 31}, { 32, 62}, { 63, 33}, { 34, 62}, { 63, 35}, { 36, 64}, { 65, 37},
- + { 38, 66}, { 67, 39}, { 40, 66}, { 67, 41}, { 42, 66}, { 67, 43}, { 44, 68}, { 69, 45},
- + { 46, 68}, { 69, 47}, { 48, 70}, { 71, 49}, { 50, 72}, { 73, 51}, { 52, 72}, { 73, 53},
- + { 54, 74}, { 75, 55}, { 56, 74}, { 75, 57}, { 58, 76}, { 77, 59}, { 60, 78}, { 79, 61},
- + { 62, 78}, { 79, 63}, { 64, 80}, { 81, 65}, { 66, 82}, { 83, 67}, { 68, 82}, { 83, 69},
- + { 70, 84}, { 85, 71}, { 72, 84}, { 85, 73}, { 74, 88}, { 89, 75}, { 76, 88}, { 89, 77},
- + { 78, 90}, { 91, 79}, { 80, 90}, { 91, 81}, { 82, 94}, { 95, 83}, { 84, 94}, { 95, 85},
- + { 86, 96}, { 97, 87}, { 88, 96}, { 97, 89}, { 90, 100}, {101, 91}, { 92, 100}, {101, 93},
- + { 94, 102}, {103, 95}, { 96, 104}, {105, 97}, { 98, 104}, {105, 99}, {100, 108}, {109, 101},
- + {102, 108}, {109, 103}, {104, 110}, {111, 105}, {106, 112}, {113, 107}, {108, 114}, {115, 109},
- + {110, 116}, {117, 111}, {112, 118}, {119, 113}, {114, 118}, {119, 115}, {116, 122}, {123, 117},
- + {118, 122}, {123, 119}, {120, 124}, {125, 121}, {122, 126}, {127, 123}, {124, 127}, {126, 125}
- };
- const uint8_t x264_cabac_renorm_shift[64]= {
- @@ -743,41 +712,40 @@ const uint8_t x264_cabac_renorm_shift[64]= {
- };
- /* -ln2(probability) */
- -#define F(a,b) {FIX8(a),FIX8(b)}
- -const uint16_t x264_cabac_entropy[128][2] =
- +const uint16_t x264_cabac_entropy[128] =
- {
- - F(0.0273,5.7370), F(0.0288,5.6618), F(0.0303,5.5866), F(0.0320,5.5114),
- - F(0.0337,5.4362), F(0.0355,5.3610), F(0.0375,5.2859), F(0.0395,5.2106),
- - F(0.0416,5.1354), F(0.0439,5.0602), F(0.0463,4.9851), F(0.0488,4.9099),
- - F(0.0515,4.8347), F(0.0543,4.7595), F(0.0572,4.6843), F(0.0604,4.6091),
- - F(0.0637,4.5339), F(0.0671,4.4588), F(0.0708,4.3836), F(0.0747,4.3083),
- - F(0.0788,4.2332), F(0.0832,4.1580), F(0.0878,4.0828), F(0.0926,4.0076),
- - F(0.0977,3.9324), F(0.1032,3.8572), F(0.1089,3.7820), F(0.1149,3.7068),
- - F(0.1214,3.6316), F(0.1282,3.5565), F(0.1353,3.4813), F(0.1429,3.4061),
- - F(0.1510,3.3309), F(0.1596,3.2557), F(0.1686,3.1805), F(0.1782,3.1053),
- - F(0.1884,3.0301), F(0.1992,2.9549), F(0.2107,2.8797), F(0.2229,2.8046),
- - F(0.2358,2.7294), F(0.2496,2.6542), F(0.2642,2.5790), F(0.2798,2.5038),
- - F(0.2964,2.4286), F(0.3142,2.3534), F(0.3331,2.2782), F(0.3532,2.2030),
- - F(0.3748,2.1278), F(0.3979,2.0527), F(0.4226,1.9775), F(0.4491,1.9023),
- - F(0.4776,1.8271), F(0.5082,1.7519), F(0.5412,1.6767), F(0.5768,1.6015),
- - F(0.6152,1.5263), F(0.6568,1.4511), F(0.7020,1.3759), F(0.7513,1.3008),
- - F(0.8050,1.2256), F(0.8638,1.1504), F(0.9285,1.0752), F(1.0000,1.0000),
- - F(1.0000,1.0000), F(1.0752,0.9285), F(1.1504,0.8638), F(1.2256,0.8050),
- - F(1.3008,0.7513), F(1.3759,0.7020), F(1.4511,0.6568), F(1.5263,0.6152),
- - F(1.6015,0.5768), F(1.6767,0.5412), F(1.7519,0.5082), F(1.8271,0.4776),
- - F(1.9023,0.4491), F(1.9775,0.4226), F(2.0527,0.3979), F(2.1278,0.3748),
- - F(2.2030,0.3532), F(2.2782,0.3331), F(2.3534,0.3142), F(2.4286,0.2964),
- - F(2.5038,0.2798), F(2.5790,0.2642), F(2.6542,0.2496), F(2.7294,0.2358),
- - F(2.8046,0.2229), F(2.8797,0.2107), F(2.9549,0.1992), F(3.0301,0.1884),
- - F(3.1053,0.1782), F(3.1805,0.1686), F(3.2557,0.1596), F(3.3309,0.1510),
- - F(3.4061,0.1429), F(3.4813,0.1353), F(3.5565,0.1282), F(3.6316,0.1214),
- - F(3.7068,0.1149), F(3.7820,0.1089), F(3.8572,0.1032), F(3.9324,0.0977),
- - F(4.0076,0.0926), F(4.0828,0.0878), F(4.1580,0.0832), F(4.2332,0.0788),
- - F(4.3083,0.0747), F(4.3836,0.0708), F(4.4588,0.0671), F(4.5339,0.0637),
- - F(4.6091,0.0604), F(4.6843,0.0572), F(4.7595,0.0543), F(4.8347,0.0515),
- - F(4.9099,0.0488), F(4.9851,0.0463), F(5.0602,0.0439), F(5.1354,0.0416),
- - F(5.2106,0.0395), F(5.2859,0.0375), F(5.3610,0.0355), F(5.4362,0.0337),
- - F(5.5114,0.0320), F(5.5866,0.0303), F(5.6618,0.0288), F(5.7370,0.0273),
- + FIX8(0.0273), FIX8(5.7370), FIX8(0.0288), FIX8(5.6618),
- + FIX8(0.0303), FIX8(5.5866), FIX8(0.0320), FIX8(5.5114),
- + FIX8(0.0337), FIX8(5.4362), FIX8(0.0355), FIX8(5.3610),
- + FIX8(0.0375), FIX8(5.2859), FIX8(0.0395), FIX8(5.2106),
- + FIX8(0.0416), FIX8(5.1354), FIX8(0.0439), FIX8(5.0602),
- + FIX8(0.0463), FIX8(4.9851), FIX8(0.0488), FIX8(4.9099),
- + FIX8(0.0515), FIX8(4.8347), FIX8(0.0543), FIX8(4.7595),
- + FIX8(0.0572), FIX8(4.6843), FIX8(0.0604), FIX8(4.6091),
- + FIX8(0.0637), FIX8(4.5339), FIX8(0.0671), FIX8(4.4588),
- + FIX8(0.0708), FIX8(4.3836), FIX8(0.0747), FIX8(4.3083),
- + FIX8(0.0788), FIX8(4.2332), FIX8(0.0832), FIX8(4.1580),
- + FIX8(0.0878), FIX8(4.0828), FIX8(0.0926), FIX8(4.0076),
- + FIX8(0.0977), FIX8(3.9324), FIX8(0.1032), FIX8(3.8572),
- + FIX8(0.1089), FIX8(3.7820), FIX8(0.1149), FIX8(3.7068),
- + FIX8(0.1214), FIX8(3.6316), FIX8(0.1282), FIX8(3.5565),
- + FIX8(0.1353), FIX8(3.4813), FIX8(0.1429), FIX8(3.4061),
- + FIX8(0.1510), FIX8(3.3309), FIX8(0.1596), FIX8(3.2557),
- + FIX8(0.1686), FIX8(3.1805), FIX8(0.1782), FIX8(3.1053),
- + FIX8(0.1884), FIX8(3.0301), FIX8(0.1992), FIX8(2.9549),
- + FIX8(0.2107), FIX8(2.8797), FIX8(0.2229), FIX8(2.8046),
- + FIX8(0.2358), FIX8(2.7294), FIX8(0.2496), FIX8(2.6542),
- + FIX8(0.2642), FIX8(2.5790), FIX8(0.2798), FIX8(2.5038),
- + FIX8(0.2964), FIX8(2.4286), FIX8(0.3142), FIX8(2.3534),
- + FIX8(0.3331), FIX8(2.2782), FIX8(0.3532), FIX8(2.2030),
- + FIX8(0.3748), FIX8(2.1278), FIX8(0.3979), FIX8(2.0527),
- + FIX8(0.4226), FIX8(1.9775), FIX8(0.4491), FIX8(1.9023),
- + FIX8(0.4776), FIX8(1.8271), FIX8(0.5082), FIX8(1.7519),
- + FIX8(0.5412), FIX8(1.6767), FIX8(0.5768), FIX8(1.6015),
- + FIX8(0.6152), FIX8(1.5263), FIX8(0.6568), FIX8(1.4511),
- + FIX8(0.7020), FIX8(1.3759), FIX8(0.7513), FIX8(1.3008),
- + FIX8(0.8050), FIX8(1.2256), FIX8(0.8638), FIX8(1.1504),
- + FIX8(0.9285), FIX8(1.0752), FIX8(1.0000), FIX8(1.0000)
- };
- @@ -794,14 +762,17 @@ void x264_cabac_context_init( x264_cabac_t *cb, int i_slice_type, int i_qp, int
- cabac_context_init = &x264_cabac_context_init_PB[i_model];
- for( int i = 0; i < 460; i++ )
- - cb->state[i] = x264_clip3( (((*cabac_context_init)[i][0] * i_qp) >> 4) + (*cabac_context_init)[i][1], 1, 126 );
- + {
- + int state = x264_clip3( (((*cabac_context_init)[i][0] * i_qp) >> 4) + (*cabac_context_init)[i][1], 1, 126 );
- + cb->state[i] = (X264_MIN( state, 127-state ) << 1) | (state >> 6);
- + }
- }
- void x264_cabac_encode_init( x264_cabac_t *cb, uint8_t *p_data, uint8_t *p_end )
- {
- cb->i_low = 0;
- cb->i_range = 0x01FE;
- - cb->i_queue = -1; // the first bit will be shifted away and not written
- + cb->i_queue = -9; // the first bit will be shifted away and not written
- cb->i_bytes_outstanding = 0;
- cb->p_start = p_data;
- cb->p = p_data;
- @@ -810,10 +781,10 @@ void x264_cabac_encode_init( x264_cabac_t *cb, uint8_t *p_data, uint8_t *p_end )
- static inline void x264_cabac_putbyte( x264_cabac_t *cb )
- {
- - if( cb->i_queue >= 8 )
- + if( cb->i_queue >= 0 )
- {
- - int out = cb->i_low >> (cb->i_queue+2);
- - cb->i_low &= (4<<cb->i_queue)-1;
- + int out = cb->i_low >> (cb->i_queue+10);
- + cb->i_low &= (0x400<<cb->i_queue)-1;
- cb->i_queue -= 8;
- if( (out & 0xff) == 0xff )
- @@ -855,9 +826,9 @@ static inline void x264_cabac_encode_renorm( x264_cabac_t *cb )
- void x264_cabac_encode_decision_c( x264_cabac_t *cb, int i_ctx, int b )
- {
- int i_state = cb->state[i_ctx];
- - int i_range_lps = x264_cabac_range_lps[i_state][(cb->i_range>>6)-4];
- + int i_range_lps = x264_cabac_range_lps[i_state>>1][(cb->i_range>>6)-4];
- cb->i_range -= i_range_lps;
- - if( b != (i_state >> 6) )
- + if( b != (i_state & 1) )
- {
- cb->i_low += cb->i_range;
- cb->i_range = i_range_lps;
- @@ -866,7 +837,7 @@ void x264_cabac_encode_decision_c( x264_cabac_t *cb, int i_ctx, int b )
- x264_cabac_encode_renorm( cb );
- }
- -void x264_cabac_encode_bypass( x264_cabac_t *cb, int b )
- +void x264_cabac_encode_bypass_c( x264_cabac_t *cb, int b )
- {
- cb->i_low <<= 1;
- cb->i_low += -b & cb->i_range;
- @@ -892,7 +863,7 @@ void x264_cabac_encode_ue_bypass( x264_cabac_t *cb, int exp_bits, int val )
- } while( k > 0 );
- }
- -void x264_cabac_encode_terminal( x264_cabac_t *cb )
- +void x264_cabac_encode_terminal_c( x264_cabac_t *cb )
- {
- cb->i_range -= 2;
- x264_cabac_encode_renorm( cb );
- diff --git a/common/cabac.h b/common/cabac.h
- index ef68fe6..9fc3007 100644
- --- a/common/cabac.h
- +++ b/common/cabac.h
- @@ -31,7 +31,7 @@ typedef struct
- int i_range;
- /* bit stream */
- - int i_queue;
- + int i_queue; //stored with an offset of -8 for faster asm
- int i_bytes_outstanding;
- uint8_t *p_start;
- @@ -46,7 +46,7 @@ typedef struct
- } x264_cabac_t;
- extern const uint8_t x264_cabac_transition[128][2];
- -extern const uint16_t x264_cabac_entropy[128][2];
- +extern const uint16_t x264_cabac_entropy[128];
- /* init the contexts given i_slice_type, the quantif and the model */
- void x264_cabac_context_init( x264_cabac_t *cb, int i_slice_type, int i_qp, int i_model );
- @@ -55,15 +55,21 @@ void x264_cabac_context_init( x264_cabac_t *cb, int i_slice_type, int i_qp, int
- void x264_cabac_encode_init ( x264_cabac_t *cb, uint8_t *p_data, uint8_t *p_end );
- void x264_cabac_encode_decision_c( x264_cabac_t *cb, int i_ctx, int b );
- void x264_cabac_encode_decision_asm( x264_cabac_t *cb, int i_ctx, int b );
- -void x264_cabac_encode_bypass( x264_cabac_t *cb, int b );
- +void x264_cabac_encode_bypass_c( x264_cabac_t *cb, int b );
- +void x264_cabac_encode_bypass_asm( x264_cabac_t *cb, int b );
- +void x264_cabac_encode_terminal_c( x264_cabac_t *cb );
- +void x264_cabac_encode_terminal_asm( x264_cabac_t *cb );
- void x264_cabac_encode_ue_bypass( x264_cabac_t *cb, int exp_bits, int val );
- -void x264_cabac_encode_terminal( x264_cabac_t *cb );
- void x264_cabac_encode_flush( x264_t *h, x264_cabac_t *cb );
- #ifdef HAVE_MMX
- #define x264_cabac_encode_decision x264_cabac_encode_decision_asm
- +#define x264_cabac_encode_bypass x264_cabac_encode_bypass_asm
- +#define x264_cabac_encode_terminal x264_cabac_encode_terminal_asm
- #else
- #define x264_cabac_encode_decision x264_cabac_encode_decision_c
- +#define x264_cabac_encode_bypass x264_cabac_encode_bypass_c
- +#define x264_cabac_encode_terminal x264_cabac_encode_terminal_c
- #endif
- #define x264_cabac_encode_decision_noup x264_cabac_encode_decision
- @@ -78,25 +84,25 @@ static ALWAYS_INLINE void x264_cabac_size_decision( x264_cabac_t *cb, long i_ctx
- {
- int i_state = cb->state[i_ctx];
- cb->state[i_ctx] = x264_cabac_transition[i_state][b];
- - cb->f8_bits_encoded += x264_cabac_entropy[i_state][b];
- + cb->f8_bits_encoded += x264_cabac_entropy[i_state^b];
- }
- static ALWAYS_INLINE int x264_cabac_size_decision2( uint8_t *state, long b )
- {
- int i_state = *state;
- *state = x264_cabac_transition[i_state][b];
- - return x264_cabac_entropy[i_state][b];
- + return x264_cabac_entropy[i_state^b];
- }
- static ALWAYS_INLINE void x264_cabac_size_decision_noup( x264_cabac_t *cb, long i_ctx, long b )
- {
- int i_state = cb->state[i_ctx];
- - cb->f8_bits_encoded += x264_cabac_entropy[i_state][b];
- + cb->f8_bits_encoded += x264_cabac_entropy[i_state^b];
- }
- static ALWAYS_INLINE int x264_cabac_size_decision_noup2( uint8_t *state, long b )
- {
- - return x264_cabac_entropy[*state][b];
- + return x264_cabac_entropy[*state^b];
- }
- #endif
- diff --git a/common/x86/cabac-a.asm b/common/x86/cabac-a.asm
- index 2af98c7..8621c5b 100644
- --- a/common/x86/cabac-a.asm
- +++ b/common/x86/cabac-a.asm
- @@ -32,13 +32,13 @@ cextern cabac_renorm_shift
- ; t3 must be ecx, since it's used for shift.
- %ifdef WIN64
- - DECLARE_REG_TMP 3,1,2,0,4,5,6,10
- + DECLARE_REG_TMP 3,1,2,0,4,5,6,10,2
- %define pointer resq
- %elifdef ARCH_X86_64
- - DECLARE_REG_TMP 0,1,2,3,4,5,6,10
- + DECLARE_REG_TMP 0,1,2,3,4,5,6,10,6
- %define pointer resq
- %else
- - DECLARE_REG_TMP 0,4,2,1,3,5,6,2
- + DECLARE_REG_TMP 0,4,2,1,3,5,6,2,2
- %define pointer resd
- %endif
- @@ -72,13 +72,15 @@ cglobal cabac_encode_decision_asm, 0,7
- movifnidn t0, r0mp
- movifnidn t1d, r1m
- mov t5d, [t0+cb.range]
- - movzx t6d, byte [t0+cb.state+t1]
- + movzx t4d, byte [t0+cb.state+t1]
- mov t3d, t5d
- + mov t6d, t4d
- shr t5d, 6
- + shr t4d, 1
- movifnidn t2d, r2m
- - LOAD_GLOBAL t5d, cabac_range_lps-4, t5, t6*4
- + LOAD_GLOBAL t5d, cabac_range_lps-4, t5, t4*4
- LOAD_GLOBAL t4d, cabac_transition, t2, t6*2
- - shr t6d, 6
- + and t6d, 1
- sub t3d, t5d
- cmp t6d, t2d
- mov t6d, [t0+cb.low]
- @@ -94,20 +96,66 @@ cglobal cabac_encode_decision_asm, 0,7
- shl t6d, t3b
- add t3d, [t0+cb.queue]
- mov [t0+cb.range], t4d
- - cmp t3d, 8
- - jl .update_queue_low
- -;cabac_putbyte
- + jge cabac_putbyte
- +.update_queue_low:
- + mov [t0+cb.low], t6d
- + mov [t0+cb.queue], t3d
- + RET
- +
- +cglobal cabac_encode_bypass_asm, 0,3
- + movifnidn t0, r0mp
- + movifnidn t3d, r1m
- + neg t3d
- + mov t8d, [t0+cb.low]
- + and t3d, [t0+cb.range]
- + lea t8d, [t8*2+t3]
- + mov t3d, [t0+cb.queue]
- + inc t3d
- +%ifdef UNIX64 ; .putbyte compiles to nothing but a jmp
- + jge cabac_putbyte
- +%else
- + jge .putbyte
- +%endif
- + mov [t0+cb.low], t8d
- + mov [t0+cb.queue], t3d
- + RET
- +.putbyte:
- + PROLOGUE 0,7
- + movifnidn t6d, t8d
- + jmp cabac_putbyte
- +
- +cglobal cabac_encode_terminal_asm, 0,3
- + movifnidn t0, r0mp
- + sub dword [t0+cb.range], 2
- +; shortcut: the renormalization shift in terminal
- +; can only be 0 or 1 and is zero over 99% of the time.
- + test dword [t0+cb.range], 0x100
- + je .renorm
- + REP_RET
- +.renorm:
- + shl dword [t0+cb.low], 1
- + shl dword [t0+cb.range], 1
- + inc dword [t0+cb.queue]
- + jge .putbyte
- + REP_RET
- +.putbyte:
- + PROLOGUE 0,7
- + mov t3d, [t0+cb.queue]
- + mov t6d, [t0+cb.low]
- + jmp cabac_putbyte
- +
- +cabac_putbyte:
- ; alive: t0=cb t3=queue t6=low
- %ifdef WIN64
- DECLARE_REG_TMP 3,4,1,0,2,5,6,10
- %endif
- mov t1d, -1
- - add t3d, 2
- + add t3d, 10
- mov t2d, t6d
- shl t1d, t3b
- shr t2d, t3b ; out
- not t1d
- - sub t3d, 10
- + sub t3d, 18
- and t6d, t1d
- mov t5d, [t0+cb.bytes_outstanding]
- cmp t2b, 0xff ; FIXME is a 32bit op faster?
- @@ -125,8 +173,4 @@ cglobal cabac_encode_decision_asm, 0,7
- .postpone:
- inc t5d
- mov [t0+cb.bytes_outstanding], t5d
- -.update_queue_low:
- - mov [t0+cb.low], t6d
- - mov [t0+cb.queue], t3d
- - RET
- -
- + jmp mangle(x264_cabac_encode_decision_asm.update_queue_low)
- diff --git a/common/x86/x86inc.asm b/common/x86/x86inc.asm
- index 9d23640..f006f37 100644
- --- a/common/x86/x86inc.asm
- +++ b/common/x86/x86inc.asm
- @@ -171,7 +171,7 @@ DECLARE_REG_SIZE bp, bpl
- %endrep
- %endmacro
- -DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7
- +DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9
- %ifdef ARCH_X86_64
- %define gprsize 8
- diff --git a/encoder/rdo.c b/encoder/rdo.c
- index 4d83b6a..574a484 100644
- --- a/encoder/rdo.c
- +++ b/encoder/rdo.c
- @@ -50,6 +50,8 @@ static uint16_t cabac_size_5ones[128];
- * fractional bits, but only finite precision. */
- #undef x264_cabac_encode_decision
- #undef x264_cabac_encode_decision_noup
- +#undef x264_cabac_encode_bypass
- +#undef x264_cabac_encode_terminal
- #define x264_cabac_encode_decision(c,x,v) x264_cabac_size_decision(c,x,v)
- #define x264_cabac_encode_decision_noup(c,x,v) x264_cabac_size_decision_noup(c,x,v)
- #define x264_cabac_encode_terminal(c) ((c)->f8_bits_encoded += 7)
- diff --git a/tools/checkasm.c b/tools/checkasm.c
- index 2008d2f..9bc15c8 100644
- --- a/tools/checkasm.c
- +++ b/tools/checkasm.c
- @@ -1556,32 +1556,66 @@ static int check_intra( int cpu_ref, int cpu_new )
- }
- #define DECL_CABAC(cpu) \
- -static void run_cabac_##cpu( uint8_t *dst )\
- +static void run_cabac_decision_##cpu( uint8_t *dst )\
- {\
- x264_cabac_t cb;\
- x264_cabac_context_init( &cb, SLICE_TYPE_P, 26, 0 );\
- x264_cabac_encode_init( &cb, dst, dst+0xff0 );\
- for( int i = 0; i < 0x1000; i++ )\
- x264_cabac_encode_decision_##cpu( &cb, buf1[i]>>1, buf1[i]&1 );\
- +}\
- +static void run_cabac_bypass_##cpu( uint8_t *dst )\
- +{\
- + x264_cabac_t cb;\
- + x264_cabac_context_init( &cb, SLICE_TYPE_P, 26, 0 );\
- + x264_cabac_encode_init( &cb, dst, dst+0xff0 );\
- + for( int i = 0; i < 0x1000; i++ )\
- + x264_cabac_encode_bypass_##cpu( &cb, buf1[i]&1 );\
- +}\
- +static void run_cabac_terminal_##cpu( uint8_t *dst )\
- +{\
- + x264_cabac_t cb;\
- + x264_cabac_context_init( &cb, SLICE_TYPE_P, 26, 0 );\
- + x264_cabac_encode_init( &cb, dst, dst+0xff0 );\
- + for( int i = 0; i < 0x1000; i++ )\
- + x264_cabac_encode_terminal_##cpu( &cb );\
- }
- DECL_CABAC(c)
- #ifdef HAVE_MMX
- DECL_CABAC(asm)
- #else
- -#define run_cabac_asm run_cabac_c
- +#define run_cabac_decision_asm run_cabac_decision_c
- +#define run_cabac_bypass_asm run_cabac_bypass_c
- +#define run_cabac_terminal_asm run_cabac_terminal_c
- #endif
- static int check_cabac( int cpu_ref, int cpu_new )
- {
- int ret = 0, ok, used_asm = 1;
- - if( cpu_ref || run_cabac_c == run_cabac_asm)
- + if( cpu_ref || run_cabac_decision_c == run_cabac_decision_asm )
- return 0;
- +
- set_func_name( "cabac_encode_decision" );
- memcpy( buf4, buf3, 0x1000 );
- - call_c( run_cabac_c, buf3 );
- - call_a( run_cabac_asm, buf4 );
- + call_c( run_cabac_decision_c, buf3 );
- + call_a( run_cabac_decision_asm, buf4 );
- + ok = !memcmp( buf3, buf4, 0x1000 );
- + report( "cabac decision:" );
- +
- + set_func_name( "cabac_encode_bypass" );
- + memcpy( buf4, buf3, 0x1000 );
- + call_c( run_cabac_bypass_c, buf3 );
- + call_a( run_cabac_bypass_asm, buf4 );
- ok = !memcmp( buf3, buf4, 0x1000 );
- - report( "cabac :" );
- + report( "cabac bypass:" );
- +
- + set_func_name( "cabac_encode_terminal" );
- + memcpy( buf4, buf3, 0x1000 );
- + call_c( run_cabac_terminal_c, buf3 );
- + call_a( run_cabac_terminal_asm, buf4 );
- + ok = !memcmp( buf3, buf4, 0x1000 );
- + report( "cabac terminal:" );
- +
- return ret;
- }
- --
- 1.7.0.4
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement