Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- From 4fa7d539c1a09a0779194c7971495e966ff7de1e Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Tue, 15 Jun 2010 05:15:42 -0700
- Subject: [PATCH 01/10] Fix compilation on ARM w/ Apple ABI
- ---
- encoder/me.c | 2 +-
- 1 files changed, 1 insertions(+), 1 deletions(-)
- diff --git a/encoder/me.c b/encoder/me.c
- index 2914eb3..291104a 100644
- --- a/encoder/me.c
- +++ b/encoder/me.c
- @@ -245,7 +245,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
- pmv = pack16to32_mask( bmx, bmy );
- if( i_mvc > 0 )
- {
- - ALIGNED_ARRAY_8( int16_t, mvc_fpel,[16][2] );
- + ALIGNED_ARRAY_8( int16_t, mvc_fpel,[16],[2] );
- x264_predictor_roundclip( mvc_fpel, mvc, i_mvc, mv_x_min, mv_x_max, mv_y_min, mv_y_max );
- bcost <<= 4;
- for( int i = 1; i <= i_mvc; i++ )
- --
- 1.7.0.4
- From 69e9d85c292cb9daa96664657352bf6c65af5825 Mon Sep 17 00:00:00 2001
- From: Anton Mitrofanov <BugMaster@narod.ru>
- Date: Sat, 19 Jun 2010 01:44:56 +0400
- Subject: [PATCH 02/10] Fix SIGPIPEs caused by is_regular_file checks
- Check to see if input file is a pipe without opening it.
- ---
- common/osdep.h | 10 +++++++++-
- x264.c | 1 +
- 2 files changed, 10 insertions(+), 1 deletions(-)
- diff --git a/common/osdep.h b/common/osdep.h
- index b1b357c..b3a8cd6 100644
- --- a/common/osdep.h
- +++ b/common/osdep.h
- @@ -290,7 +290,15 @@ static inline uint8_t x264_is_regular_file( FILE *filehandle )
- {
- struct stat file_stat;
- if( fstat( fileno( filehandle ), &file_stat ) )
- - return 0;
- + return -1;
- + return S_ISREG( file_stat.st_mode );
- +}
- +
- +static inline uint8_t x264_is_regular_file_path( const char *filename )
- +{
- + struct stat file_stat;
- + if( stat( filename, &file_stat ) )
- + return -1;
- return S_ISREG( file_stat.st_mode );
- }
- diff --git a/x264.c b/x264.c
- index a124083..09bad61 100644
- --- a/x264.c
- +++ b/x264.c
- @@ -806,6 +806,7 @@ static int select_input( const char *demuxer, char *used_demuxer, char *filename
- int b_auto = !strcasecmp( demuxer, "auto" );
- if( !b_regular && b_auto )
- ext = "yuv";
- + b_regular = b_regular && x264_is_regular_file_path( filename );
- if( b_regular )
- {
- FILE *f = fopen( filename, "r" );
- --
- 1.7.0.4
- From 4c27afb595ac8e8a621ffc2bf8120f0d43c80384 Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Tue, 22 Jun 2010 14:20:46 -0700
- Subject: [PATCH 03/10] Use -fno-tree-vectorize to avoid miscompilation
- Some versions of gcc have been reported to attempt (and fail) to vectorize a loop in plane_expand_border.
- This results in a segfault, so to limit the possible effects of gcc's utter incompetence, we're turning off vectorization entirely.
- It's not like it ever did anything useful to begin with.
- ---
- configure | 4 ++++
- 1 files changed, 4 insertions(+), 0 deletions(-)
- diff --git a/configure b/configure
- index 3a38cb5..24d15ad 100755
- --- a/configure
- +++ b/configure
- @@ -628,6 +628,10 @@ else
- CFLAGS="-O3 -ffast-math $CFLAGS"
- fi
- +if cc_check '' -fno-tree-vectorize ; then
- + CFLAGS="$CFLAGS -fno-tree-vectorize"
- +fi
- +
- if cc_check "stdio.h" "" "fseeko(stdin,0,0);" ; then
- define fseek fseeko
- define ftell ftello
- --
- 1.7.0.4
- From 88b90d9ca0582ff340a6d821d104f25917baa693 Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Sat, 19 Jun 2010 03:27:33 -0700
- Subject: [PATCH 04/10] Improve HRD accuracy
- In a staggering display of brain damage, the spec requires all HRD math to be done in infinite precision despite the output being of quite limited precision.
- Accordingly, convert buffer management to work in units of timescale.
- These accumulating rounding errors probably didn't cause any real problems, but might in theory cause issues in very picky muxers on extremely long-running streams.
- ---
- common/common.c | 36 ++++++++++++++-----------
- common/common.h | 1 +
- encoder/encoder.c | 3 +-
- encoder/ratecontrol.c | 67 ++++++++++++++++++++++++++++++++-----------------
- encoder/ratecontrol.h | 2 +-
- 5 files changed, 67 insertions(+), 42 deletions(-)
- diff --git a/common/common.c b/common/common.c
- index 4fa5e4b..2a9c76e 100644
- --- a/common/common.c
- +++ b/common/common.c
- @@ -1080,24 +1080,28 @@ void x264_free( void *p )
- /****************************************************************************
- * x264_reduce_fraction:
- ****************************************************************************/
- -void x264_reduce_fraction( uint32_t *n, uint32_t *d )
- -{
- - uint32_t a = *n;
- - uint32_t b = *d;
- - uint32_t c;
- - if( !a || !b )
- - return;
- - c = a % b;
- - while(c)
- - {
- - a = b;
- - b = c;
- - c = a % b;
- - }
- - *n /= b;
- - *d /= b;
- +#define REDUCE_FRACTION( name, type )\
- +void name( type *n, type *d )\
- +{ \
- + type a = *n; \
- + type b = *d; \
- + type c; \
- + if( !a || !b ) \
- + return; \
- + c = a % b; \
- + while( c ) \
- + { \
- + a = b; \
- + b = c; \
- + c = a % b; \
- + } \
- + *n /= b; \
- + *d /= b; \
- }
- +REDUCE_FRACTION( x264_reduce_fraction , uint32_t )
- +REDUCE_FRACTION( x264_reduce_fraction64, uint64_t )
- +
- /****************************************************************************
- * x264_slurp_file:
- ****************************************************************************/
- diff --git a/common/common.h b/common/common.h
- index abb5db2..3d522eb 100644
- --- a/common/common.h
- +++ b/common/common.h
- @@ -183,6 +183,7 @@ char *x264_param2string( x264_param_t *p, int b_res );
- void x264_log( x264_t *h, int i_level, const char *psz_fmt, ... );
- void x264_reduce_fraction( uint32_t *n, uint32_t *d );
- +void x264_reduce_fraction64( uint64_t *n, uint64_t *d );
- void x264_init_vlc_tables();
- static ALWAYS_INLINE pixel x264_clip_pixel( int x )
- diff --git a/encoder/encoder.c b/encoder/encoder.c
- index 08a28bd..aebb536 100644
- --- a/encoder/encoder.c
- +++ b/encoder/encoder.c
- @@ -2569,8 +2569,7 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
- /* generate sei buffering period and insert it into place */
- if( h->fenc->b_keyframe && h->sps->vui.b_nal_hrd_parameters_present )
- {
- - h->initial_cpb_removal_delay = x264_hrd_fullness( h );
- -
- + x264_hrd_fullness( h );
- x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
- x264_sei_buffering_period_write( h, &h->out.bs );
- if( x264_nal_end( h ) )
- diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
- index 2c05ad7..aef5083 100644
- --- a/encoder/ratecontrol.c
- +++ b/encoder/ratecontrol.c
- @@ -91,7 +91,7 @@ struct x264_ratecontrol_t
- /* VBV stuff */
- double buffer_size;
- - double buffer_fill_final; /* real buffer as of the last finished frame */
- + int64_t buffer_fill_final;
- double buffer_fill; /* planned buffer, if all in-progress frames hit their bit budget */
- double buffer_rate; /* # of bits added to buffer_fill after each frame */
- double vbv_max_rate; /* # of bits added to buffer_fill per second */
- @@ -157,6 +157,7 @@ struct x264_ratecontrol_t
- int initial_cpb_removal_delay_offset;
- double nrt_first_access_unit; /* nominal removal time */
- double previous_cpb_final_arrival_time;
- + uint64_t hrd_multiply_denom;
- };
- @@ -463,6 +464,8 @@ void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init )
- int vbv_max_bitrate = h->param.rc.i_vbv_max_bitrate * 1000;
- /* Init HRD */
- + h->sps->vui.hrd.i_bit_rate_unscaled = vbv_max_bitrate;
- + h->sps->vui.hrd.i_cpb_size_unscaled = vbv_buffer_size;
- if( h->param.i_nal_hrd && b_init )
- {
- h->sps->vui.hrd.i_cpb_cnt = 1;
- @@ -499,8 +502,8 @@ void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init )
- #undef MAX_DURATION
- - vbv_buffer_size = X264_MIN( vbv_buffer_size, h->sps->vui.hrd.i_cpb_size_unscaled );
- - vbv_max_bitrate = X264_MIN( vbv_max_bitrate, h->sps->vui.hrd.i_bit_rate_unscaled );
- + vbv_buffer_size = h->sps->vui.hrd.i_cpb_size_unscaled;
- + vbv_max_bitrate = h->sps->vui.hrd.i_bit_rate_unscaled;
- }
- else if( h->param.i_nal_hrd && !b_init )
- {
- @@ -528,7 +531,7 @@ void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init )
- if( h->param.rc.f_vbv_buffer_init > 1. )
- h->param.rc.f_vbv_buffer_init = x264_clip3f( h->param.rc.f_vbv_buffer_init / h->param.rc.i_vbv_buffer_size, 0, 1 );
- h->param.rc.f_vbv_buffer_init = x264_clip3f( X264_MAX( h->param.rc.f_vbv_buffer_init, rc->buffer_rate / rc->buffer_size ), 0, 1);
- - rc->buffer_fill_final = rc->buffer_size * h->param.rc.f_vbv_buffer_init;
- + rc->buffer_fill_final = rc->buffer_size * h->param.rc.f_vbv_buffer_init * h->sps->vui.i_time_scale;
- rc->b_vbv = 1;
- rc->b_vbv_min_rate = !rc->b_2pass
- && h->param.rc.i_rc_method == X264_RC_ABR
- @@ -577,6 +580,23 @@ int x264_ratecontrol_new( x264_t *h )
- x264_ratecontrol_init_reconfigurable( h, 1 );
- + if( h->param.i_nal_hrd )
- + {
- + uint64_t denom = (uint64_t)h->sps->vui.hrd.i_bit_rate_unscaled * h->sps->vui.i_time_scale;
- + uint64_t num = 180000;
- + x264_reduce_fraction64( &num, &denom );
- + rc->hrd_multiply_denom = 180000 / num;
- +
- + double bits_required = log2( 180000 / rc->hrd_multiply_denom )
- + + log2( h->sps->vui.i_time_scale )
- + + log2( h->sps->vui.hrd.i_cpb_size_unscaled );
- + if( bits_required >= 63 )
- + {
- + x264_log( h, X264_LOG_ERROR, "HRD with very large timescale and bufsize not supported\n" );
- + return -1;
- + }
- + }
- +
- if( rc->rate_tolerance < 0.01 )
- {
- x264_log(h, X264_LOG_WARNING, "bitrate tolerance too small, using .01\n");
- @@ -1722,9 +1742,10 @@ static void update_predictor( predictor_t *p, double q, double var, double bits
- static int update_vbv( x264_t *h, int bits )
- {
- int filler = 0;
- -
- + int bitrate = h->sps->vui.hrd.i_bit_rate_unscaled;
- x264_ratecontrol_t *rcc = h->rc;
- x264_ratecontrol_t *rct = h->thread[0]->rc;
- + uint64_t buffer_size = (uint64_t)h->sps->vui.hrd.i_cpb_size_unscaled * h->sps->vui.i_time_scale;
- if( rcc->last_satd >= h->mb.i_mb_count )
- update_predictor( &rct->pred[h->sh.i_type], qp2qscale( rcc->qpa_rc ), rcc->last_satd, bits );
- @@ -1732,48 +1753,48 @@ static int update_vbv( x264_t *h, int bits )
- if( !rcc->b_vbv )
- return filler;
- - rct->buffer_fill_final -= bits;
- + rct->buffer_fill_final -= (uint64_t)bits * h->sps->vui.i_time_scale;
- if( rct->buffer_fill_final < 0 )
- - x264_log( h, X264_LOG_WARNING, "VBV underflow (frame %d, %.0f bits)\n", h->i_frame, rct->buffer_fill_final );
- + x264_log( h, X264_LOG_WARNING, "VBV underflow (frame %d, %.0f bits)\n", h->i_frame, (double)rct->buffer_fill_final / h->sps->vui.i_time_scale );
- rct->buffer_fill_final = X264_MAX( rct->buffer_fill_final, 0 );
- - rct->buffer_fill_final += rcc->buffer_rate;
- + rct->buffer_fill_final += (uint64_t)bitrate * h->sps->vui.i_num_units_in_tick * h->fenc->i_cpb_duration;
- - if( h->sps->vui.hrd.b_cbr_hrd && rct->buffer_fill_final > rcc->buffer_size )
- + if( h->sps->vui.hrd.b_cbr_hrd && rct->buffer_fill_final > buffer_size )
- {
- - filler = ceil( (rct->buffer_fill_final - rcc->buffer_size) / 8 );
- - rct->buffer_fill_final -= X264_MAX( (FILLER_OVERHEAD - h->param.b_annexb), filler ) * 8;
- + filler = ceil( (rct->buffer_fill_final - buffer_size) / (8. * h->sps->vui.i_time_scale) );
- + bits = X264_MAX( (FILLER_OVERHEAD - h->param.b_annexb), filler ) * 8;
- + rct->buffer_fill_final -= (uint64_t)bits * h->sps->vui.i_time_scale;
- }
- else
- - rct->buffer_fill_final = X264_MIN( rct->buffer_fill_final, rcc->buffer_size );
- + rct->buffer_fill_final = X264_MIN( rct->buffer_fill_final, buffer_size );
- return filler;
- }
- -int x264_hrd_fullness( x264_t *h )
- +void x264_hrd_fullness( x264_t *h )
- {
- x264_ratecontrol_t *rct = h->thread[0]->rc;
- - double cpb_bits = rct->buffer_fill_final;
- - double bps = h->sps->vui.hrd.i_bit_rate_unscaled;
- - double cpb_size = h->sps->vui.hrd.i_cpb_size_unscaled;
- - double cpb_fullness = 90000.0*cpb_bits/bps;
- + uint64_t denom = (uint64_t)h->sps->vui.hrd.i_bit_rate_unscaled * h->sps->vui.i_time_scale / rct->hrd_multiply_denom;
- + uint64_t cpb_state = rct->buffer_fill_final;
- + uint64_t cpb_size = (uint64_t)h->sps->vui.hrd.i_cpb_size_unscaled * h->sps->vui.i_time_scale;
- + uint64_t multiply_factor = 180000 / rct->hrd_multiply_denom;
- - if( cpb_bits < 0 || cpb_bits > cpb_size )
- + if( cpb_state < 0 || cpb_state > cpb_size )
- {
- x264_log( h, X264_LOG_WARNING, "CPB %s: %.0lf bits in a %.0lf-bit buffer\n",
- - cpb_bits < 0 ? "underflow" : "overflow", cpb_bits, cpb_size );
- + cpb_state < 0 ? "underflow" : "overflow", (float)cpb_state/denom, (float)cpb_size/denom );
- }
- - h->initial_cpb_removal_delay_offset = 90000.0*(cpb_size - cpb_bits)/bps;
- -
- - return x264_clip3f( cpb_fullness + 0.5, 0, 90000.0*cpb_size/bps ); // just lie if we are in a weird state
- + h->initial_cpb_removal_delay = (multiply_factor * cpb_state + denom) / (2*denom);
- + h->initial_cpb_removal_delay_offset = (multiply_factor * cpb_size + denom) / (2*denom) - h->initial_cpb_removal_delay;
- }
- // provisionally update VBV according to the planned size of all frames currently in progress
- static void update_vbv_plan( x264_t *h, int overhead )
- {
- x264_ratecontrol_t *rcc = h->rc;
- - rcc->buffer_fill = h->thread[0]->rc->buffer_fill_final;
- + rcc->buffer_fill = h->thread[0]->rc->buffer_fill_final / h->sps->vui.i_time_scale;
- if( h->i_thread_frames > 1 )
- {
- int j = h->rc - h->thread[0]->rc;
- diff --git a/encoder/ratecontrol.h b/encoder/ratecontrol.h
- index dd139eb..f39c070 100644
- --- a/encoder/ratecontrol.h
- +++ b/encoder/ratecontrol.h
- @@ -47,6 +47,6 @@ int x264_rc_analyse_slice( x264_t *h );
- int x264_weighted_reference_duplicate( x264_t *h, int i_ref, const x264_weight_t *w );
- void x264_threads_distribute_ratecontrol( x264_t *h );
- void x264_threads_merge_ratecontrol( x264_t *h );
- -int x264_hrd_fullness( x264_t *h );
- +void x264_hrd_fullness( x264_t *h );
- #endif
- --
- 1.7.0.4
- From ef0c268a2b1a42ff234ef27a535c16b825452ae0 Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Fri, 18 Jun 2010 13:58:11 -0700
- Subject: [PATCH 05/10] SSE4 and SSSE3 versions of some intra_sad functions
- Primarily Nehalem-optimized.
- ---
- common/pixel.c | 3 +
- common/x86/pixel.h | 2 +
- common/x86/sad-a.asm | 107 ++++++++++++++++++++++++++++++++++++++++++++++++++
- 3 files changed, 112 insertions(+), 0 deletions(-)
- diff --git a/common/pixel.c b/common/pixel.c
- index a8cb1df..8441c7a 100644
- --- a/common/pixel.c
- +++ b/common/pixel.c
- @@ -856,6 +856,9 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
- }
- pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_sse4;
- pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_sse4;
- + pixf->intra_sad_x3_4x4 = x264_intra_sad_x3_4x4_sse4;
- + /* Slower on Conroe, so only enable under SSE4 */
- + pixf->intra_sad_x3_8x8 = x264_intra_sad_x3_8x8_ssse3;
- }
- #endif //HAVE_MMX
- diff --git a/common/x86/pixel.h b/common/x86/pixel.h
- index 9bba683..b1b916d 100644
- --- a/common/x86/pixel.h
- +++ b/common/x86/pixel.h
- @@ -80,6 +80,7 @@ DECL_PIXELS( uint64_t, hadamard_ac, sse4, ( uint8_t *pix, int i_stride ))
- void x264_intra_satd_x3_4x4_mmxext ( uint8_t *, uint8_t *, int * );
- void x264_intra_satd_x3_4x4_ssse3 ( uint8_t *, uint8_t *, int * );
- void x264_intra_sad_x3_4x4_mmxext ( uint8_t *, uint8_t *, int * );
- +void x264_intra_sad_x3_4x4_sse4 ( uint8_t *, uint8_t *, int * );
- void x264_intra_satd_x3_8x8c_mmxext ( uint8_t *, uint8_t *, int * );
- void x264_intra_satd_x3_8x8c_ssse3 ( uint8_t *, uint8_t *, int * );
- void x264_intra_sad_x3_8x8c_mmxext ( uint8_t *, uint8_t *, int * );
- @@ -93,6 +94,7 @@ void x264_intra_sa8d_x3_8x8_mmxext ( uint8_t *, uint8_t *, int * );
- void x264_intra_sa8d_x3_8x8_sse2 ( uint8_t *, uint8_t *, int * );
- void x264_intra_sa8d_x3_8x8_ssse3 ( uint8_t *, uint8_t *, int * );
- void x264_intra_sad_x3_8x8_mmxext ( uint8_t *, uint8_t *, int * );
- +void x264_intra_sad_x3_8x8_ssse3 ( uint8_t *, uint8_t *, int * );
- void x264_intra_sa8d_x3_8x8_core_mmxext( uint8_t *, int16_t [2][8], int * );
- void x264_intra_sa8d_x3_8x8_core_sse2 ( uint8_t *, int16_t [2][8], int * );
- void x264_intra_sa8d_x3_8x8_core_ssse3 ( uint8_t *, int16_t [2][8], int * );
- diff --git a/common/x86/sad-a.asm b/common/x86/sad-a.asm
- index 72c1789..10a365c 100644
- --- a/common/x86/sad-a.asm
- +++ b/common/x86/sad-a.asm
- @@ -26,6 +26,19 @@
- %include "x86inc.asm"
- %include "x86util.asm"
- +SECTION_RODATA
- +
- +h4x4_pred_shuf: db 3,3,3,3,7,7,7,7,11,11,11,11,15,15,15,15
- +h4x4_pred_shuf2: db 3,7,11,15,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
- +h8x8_pred_shuf: times 8 db 1
- + times 8 db 0
- + times 8 db 3
- + times 8 db 2
- + times 8 db 5
- + times 8 db 4
- + times 8 db 7
- + times 8 db 6
- +
- SECTION .text
- cextern pb_3
- @@ -303,6 +316,40 @@ cglobal intra_sad_x3_4x4_mmxext, 3,3
- movd [r2+4], mm1 ;H prediction cost
- RET
- +cglobal intra_sad_x3_4x4_sse4, 3,3
- + movd xmm4, [r1+FDEC_STRIDE*0-4]
- + pinsrd xmm4, [r1+FDEC_STRIDE*1-4], 1
- + pinsrd xmm4, [r1+FDEC_STRIDE*2-4], 2
- + pinsrd xmm4, [r1+FDEC_STRIDE*3-4], 3
- + movd xmm2, [r1-FDEC_STRIDE]
- + pxor xmm3, xmm3
- + movdqa xmm5, xmm4
- + pshufb xmm4, [h4x4_pred_shuf2] ; EFGH
- + pshufb xmm5, [h4x4_pred_shuf] ; EEEEFFFFGGGGHHHH
- + pshufd xmm0, xmm2, 0 ; ABCDABCDABCDABCD
- + punpckldq xmm2, xmm4 ; ABCDEFGH
- + psadbw xmm2, xmm3
- + movd xmm1, [r0+FENC_STRIDE*0]
- + pinsrd xmm1, [r0+FENC_STRIDE*1], 1
- + pinsrd xmm1, [r0+FENC_STRIDE*2], 2
- + pinsrd xmm1, [r0+FENC_STRIDE*3], 3
- + psadbw xmm0, xmm1
- + psadbw xmm5, xmm1
- + psraw xmm2, 2
- + pavgw xmm2, xmm3
- + pshufb xmm2, xmm3 ; DC prediction
- + movdqa xmm3, xmm0
- + punpcklqdq xmm0, xmm5
- + punpckhqdq xmm3, xmm5
- + psadbw xmm2, xmm1
- + paddw xmm0, xmm3
- + movhlps xmm4, xmm2
- + packusdw xmm0, xmm0
- + paddw xmm2, xmm4
- + movq [r2], xmm0 ; V/H prediction costs
- + movd [r2+8], xmm2 ; DC prediction cost
- + RET
- +
- ;-----------------------------------------------------------------------------
- ; void intra_sad_x3_8x8( uint8_t *fenc, uint8_t edge[33], int res[3]);
- ;-----------------------------------------------------------------------------
- @@ -370,6 +417,66 @@ cglobal intra_sad_x3_8x8_mmxext, 3,3
- movd [r2+8], m1
- RET
- +INIT_XMM
- +cglobal intra_sad_x3_8x8_ssse3, 3,4,9
- +%ifdef PIC
- + lea r11, [h8x8_pred_shuf]
- +%define shuf r11
- +%else
- +%define shuf h8x8_pred_shuf
- +%endif
- + movq m0, [r1+7] ; left pixels
- + movq m1, [r1+16] ; top pixels
- + pxor m2, m2
- + pxor m3, m3
- + psadbw m2, m0
- + psadbw m3, m1
- + paddw m2, m3
- + pxor m3, m3 ; V score accumulator
- + psraw m2, 3
- + pavgw m2, m3
- + punpcklqdq m1, m1 ; V prediction
- + pshufb m2, m3 ; DC prediction
- + pxor m4, m4 ; H score accumulator
- + pxor m5, m5 ; DC score accumulator
- + mov r3d, 6
- +.loop:
- + movq m6, [r0+FENC_STRIDE*0]
- + movhps m6, [r0+FENC_STRIDE*1]
- + movdqa m7, m0
- + pshufb m7, [shuf+r3*8] ; H prediction
- +%ifdef ARCH_X86_64
- + movdqa m8, m1
- + psadbw m7, m6
- + psadbw m8, m6
- + psadbw m6, m2
- + paddw m4, m7
- + paddw m3, m8
- + paddw m5, m6
- +%else
- + psadbw m7, m6
- + paddw m4, m7
- + movdqa m7, m1
- + psadbw m7, m6
- + psadbw m6, m2
- + paddw m3, m7
- + paddw m5, m6
- +%endif
- + add r0, FENC_STRIDE*2
- + sub r3d, 2
- + jge .loop
- +
- + movhlps m0, m3
- + movhlps m1, m4
- + movhlps m2, m5
- + paddw m3, m0
- + paddw m4, m1
- + paddw m5, m2
- + movd [r2+0], m3
- + movd [r2+4], m4
- + movd [r2+8], m5
- + RET
- +
- ;-----------------------------------------------------------------------------
- ; void intra_sad_x3_8x8c( uint8_t *fenc, uint8_t *fdec, int res[3] );
- ;-----------------------------------------------------------------------------
- --
- 1.7.0.4
- From f7ca49910a4663fd743734f288ea88079e09deeb Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Sat, 19 Jun 2010 01:41:07 -0700
- Subject: [PATCH 06/10] Improve 2-pass bitrate prediction
- Adapt based on distance to the end in bits, not in frames.
- Helps in videos with absurdly simple end sections, e.g. black frames.
- ---
- encoder/ratecontrol.c | 12 +++++++++---
- 1 files changed, 9 insertions(+), 3 deletions(-)
- diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
- index aef5083..16afbf0 100644
- --- a/encoder/ratecontrol.c
- +++ b/encoder/ratecontrol.c
- @@ -2034,9 +2034,6 @@ static float rate_estimate_qscale( x264_t *h )
- double lmax = rcc->lmax[pict_type];
- int64_t diff;
- int64_t predicted_bits = total_bits;
- - /* Adjust ABR buffer based on distance to the end of the video. */
- - if( rcc->num_entries > h->i_frame )
- - abr_buffer *= 0.5 * sqrt( rcc->num_entries - h->i_frame );
- if( rcc->b_vbv )
- {
- @@ -2062,6 +2059,15 @@ static float rate_estimate_qscale( x264_t *h )
- predicted_bits += (int64_t)(h->i_thread_frames - 1) * rcc->bitrate / rcc->fps;
- }
- + /* Adjust ABR buffer based on distance to the end of the video. */
- + if( rcc->num_entries > h->i_frame )
- + {
- + double final_bits = rcc->entry[rcc->num_entries-1].expected_bits;
- + double video_pos = rce.expected_bits / final_bits;
- + double scale_factor = sqrt( (1 - video_pos) * rcc->num_entries );
- + abr_buffer *= 0.5 * X264_MAX( scale_factor, 0.5 );
- + }
- +
- diff = predicted_bits - (int64_t)rce.expected_bits;
- q = rce.new_qscale;
- q /= x264_clip3f((double)(abr_buffer - diff) / abr_buffer, .5, 2);
- --
- 1.7.0.4
- From ccb8391f68b1657d9d4155ad4f12e2b98660b0c0 Mon Sep 17 00:00:00 2001
- From: Steven Walters <kemuri9@gmail.com>
- Date: Wed, 9 Jun 2010 18:14:52 -0400
- Subject: [PATCH 07/10] Use threadpools to avoid unnecessary thread creation
- Tiny performance improvement with fast settings and lots of threads.
- May help more on some OSs with slow thread creation, like OS X.
- Unify inconsistent synchronized abbreviations to sync.
- ---
- Makefile | 3 +-
- common/common.h | 10 ++-
- common/frame.c | 19 +++++-
- common/frame.h | 9 ++-
- common/threadpool.c | 163 +++++++++++++++++++++++++++++++++++++++++++++++++++
- common/threadpool.h | 39 ++++++++++++
- encoder/encoder.c | 79 ++++++++++++-------------
- encoder/lookahead.c | 22 ++++----
- input/thread.c | 17 ++---
- 9 files changed, 288 insertions(+), 73 deletions(-)
- create mode 100644 common/threadpool.c
- create mode 100644 common/threadpool.h
- diff --git a/Makefile b/Makefile
- index 8074ce5..9837821 100644
- --- a/Makefile
- +++ b/Makefile
- @@ -22,13 +22,14 @@ SRCSO =
- CONFIG := $(shell cat config.h)
- -# Optional muxer module sources
- +# Optional module sources
- ifneq ($(findstring HAVE_AVS, $(CONFIG)),)
- SRCCLI += input/avs.c
- endif
- ifneq ($(findstring HAVE_PTHREAD, $(CONFIG)),)
- SRCCLI += input/thread.c
- +SRCS += common/threadpool.c
- endif
- ifneq ($(findstring HAVE_LAVF, $(CONFIG)),)
- diff --git a/common/common.h b/common/common.h
- index 3d522eb..60899fe 100644
- --- a/common/common.h
- +++ b/common/common.h
- @@ -160,6 +160,7 @@ static const int x264_scan8[16+2*4+3] =
- #include "cabac.h"
- #include "quant.h"
- #include "cpu.h"
- +#include "threadpool.h"
- /****************************************************************************
- * General functions
- @@ -365,9 +366,10 @@ typedef struct x264_lookahead_t
- int i_last_keyframe;
- int i_slicetype_length;
- x264_frame_t *last_nonb;
- - x264_synch_frame_list_t ifbuf;
- - x264_synch_frame_list_t next;
- - x264_synch_frame_list_t ofbuf;
- + x264_pthread_t thread_handle;
- + x264_sync_frame_list_t ifbuf;
- + x264_sync_frame_list_t next;
- + x264_sync_frame_list_t ofbuf;
- } x264_lookahead_t;
- typedef struct x264_ratecontrol_t x264_ratecontrol_t;
- @@ -378,11 +380,11 @@ struct x264_t
- x264_param_t param;
- x264_t *thread[X264_THREAD_MAX+1];
- - x264_pthread_t thread_handle;
- int b_thread_active;
- int i_thread_phase; /* which thread to use for the next frame */
- int i_threadslice_start; /* first row in this thread slice */
- int i_threadslice_end; /* row after the end of this thread slice */
- + x264_threadpool_t *threadpool;
- /* bitstream output */
- struct
- diff --git a/common/frame.c b/common/frame.c
- index c5c573f..7c2fce0 100644
- --- a/common/frame.c
- +++ b/common/frame.c
- @@ -517,7 +517,7 @@ void x264_frame_delete_list( x264_frame_t **list )
- x264_free( list );
- }
- -int x264_synch_frame_list_init( x264_synch_frame_list_t *slist, int max_size )
- +int x264_sync_frame_list_init( x264_sync_frame_list_t *slist, int max_size )
- {
- if( max_size < 0 )
- return -1;
- @@ -533,7 +533,7 @@ fail:
- return -1;
- }
- -void x264_synch_frame_list_delete( x264_synch_frame_list_t *slist )
- +void x264_sync_frame_list_delete( x264_sync_frame_list_t *slist )
- {
- x264_pthread_mutex_destroy( &slist->mutex );
- x264_pthread_cond_destroy( &slist->cv_fill );
- @@ -541,7 +541,7 @@ void x264_synch_frame_list_delete( x264_synch_frame_list_t *slist )
- x264_frame_delete_list( slist->list );
- }
- -void x264_synch_frame_list_push( x264_synch_frame_list_t *slist, x264_frame_t *frame )
- +void x264_sync_frame_list_push( x264_sync_frame_list_t *slist, x264_frame_t *frame )
- {
- x264_pthread_mutex_lock( &slist->mutex );
- while( slist->i_size == slist->i_max_size )
- @@ -550,3 +550,16 @@ void x264_synch_frame_list_push( x264_synch_frame_list_t *slist, x264_frame_t *f
- x264_pthread_mutex_unlock( &slist->mutex );
- x264_pthread_cond_broadcast( &slist->cv_fill );
- }
- +
- +x264_frame_t *x264_sync_frame_list_pop( x264_sync_frame_list_t *slist )
- +{
- + x264_frame_t *frame;
- + x264_pthread_mutex_lock( &slist->mutex );
- + while( !slist->i_size )
- + x264_pthread_cond_wait( &slist->cv_fill, &slist->mutex );
- + frame = slist->list[ --slist->i_size ];
- + slist->list[ slist->i_size ] = NULL;
- + x264_pthread_cond_broadcast( &slist->cv_empty );
- + x264_pthread_mutex_unlock( &slist->mutex );
- + return frame;
- +}
- diff --git a/common/frame.h b/common/frame.h
- index 7d252c3..26529ce 100644
- --- a/common/frame.h
- +++ b/common/frame.h
- @@ -154,7 +154,7 @@ typedef struct
- x264_pthread_mutex_t mutex;
- x264_pthread_cond_t cv_fill; /* event signaling that the list became fuller */
- x264_pthread_cond_t cv_empty; /* event signaling that the list became emptier */
- -} x264_synch_frame_list_t;
- +} x264_sync_frame_list_t;
- typedef void (*x264_deblock_inter_t)( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 );
- typedef void (*x264_deblock_intra_t)( pixel *pix, int stride, int alpha, int beta );
- @@ -202,9 +202,10 @@ x264_frame_t *x264_frame_pop_unused( x264_t *h, int b_fdec );
- void x264_frame_sort( x264_frame_t **list, int b_dts );
- void x264_frame_delete_list( x264_frame_t **list );
- -int x264_synch_frame_list_init( x264_synch_frame_list_t *slist, int nelem );
- -void x264_synch_frame_list_delete( x264_synch_frame_list_t *slist );
- -void x264_synch_frame_list_push( x264_synch_frame_list_t *slist, x264_frame_t *frame );
- +int x264_sync_frame_list_init( x264_sync_frame_list_t *slist, int nelem );
- +void x264_sync_frame_list_delete( x264_sync_frame_list_t *slist );
- +void x264_sync_frame_list_push( x264_sync_frame_list_t *slist, x264_frame_t *frame );
- +x264_frame_t *x264_sync_frame_list_pop( x264_sync_frame_list_t *slist );
- #define x264_frame_sort_dts(list) x264_frame_sort(list, 1)
- #define x264_frame_sort_pts(list) x264_frame_sort(list, 0)
- diff --git a/common/threadpool.c b/common/threadpool.c
- new file mode 100644
- index 0000000..4448ea2
- --- /dev/null
- +++ b/common/threadpool.c
- @@ -0,0 +1,163 @@
- +/*****************************************************************************
- + * threadpool.c: x264 threadpool module
- + *****************************************************************************
- + * Copyright (C) 2010 Steven Walters <kemuri9@gmail.com>
- + *
- + * This program is free software; you can redistribute it and/or modify
- + * it under the terms of the GNU General Public License as published by
- + * the Free Software Foundation; either version 2 of the License, or
- + * (at your option) any later version.
- + *
- + * This program is distributed in the hope that it will be useful,
- + * but WITHOUT ANY WARRANTY; without even the implied warranty of
- + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- + * GNU General Public License for more details.
- + *
- + * You should have received a copy of the GNU General Public License
- + * along with this program; if not, write to the Free Software
- + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
- + *****************************************************************************/
- +
- +#include "common.h"
- +
- +typedef struct
- +{
- + void *(*func)(void *);
- + void *arg;
- + void *ret;
- +} x264_threadpool_job_t;
- +
- +struct x264_threadpool_t
- +{
- + int exit;
- + int threads;
- + x264_pthread_t *thread_handle;
- + void (*init_func)(void *);
- + void *init_arg;
- +
- + /* requires a synchronized list structure and associated methods,
- + so use what is already implemented for frames */
- + x264_sync_frame_list_t uninit; /* list of jobs that are awaiting use */
- + x264_sync_frame_list_t run; /* list of jobs that are queued for processing by the pool */
- + x264_sync_frame_list_t done; /* list of jobs that have finished processing */
- +};
- +
- +static void x264_threadpool_thread( x264_threadpool_t *pool )
- +{
- + if( pool->init_func )
- + pool->init_func( pool->init_arg );
- +
- + while( !pool->exit )
- + {
- + x264_threadpool_job_t *job = NULL;
- + x264_pthread_mutex_lock( &pool->run.mutex );
- + while( !pool->exit && !pool->run.i_size )
- + x264_pthread_cond_wait( &pool->run.cv_fill, &pool->run.mutex );
- + if( pool->run.i_size )
- + {
- + job = (void*)x264_frame_shift( pool->run.list );
- + pool->run.i_size--;
- + }
- + x264_pthread_mutex_unlock( &pool->run.mutex );
- + if( !job )
- + continue;
- + job->ret = job->func( job->arg ); /* execute the function */
- + x264_sync_frame_list_push( &pool->done, (void*)job );
- + }
- +}
- +
- +int x264_threadpool_init( x264_threadpool_t **p_pool, int threads,
- + void (*init_func)(void *), void *init_arg )
- +{
- + if( threads <= 0 )
- + return -1;
- +
- + x264_threadpool_t *pool;
- + CHECKED_MALLOCZERO( pool, sizeof(x264_threadpool_t) );
- + *p_pool = pool;
- +
- + pool->init_func = init_func;
- + pool->init_arg = init_arg;
- + pool->threads = X264_MIN( threads, X264_THREAD_MAX );
- +
- + CHECKED_MALLOC( pool->thread_handle, pool->threads * sizeof(x264_pthread_t) );
- +
- + if( x264_sync_frame_list_init( &pool->uninit, pool->threads ) ||
- + x264_sync_frame_list_init( &pool->run, pool->threads ) ||
- + x264_sync_frame_list_init( &pool->done, pool->threads ) )
- + goto fail;
- +
- + for( int i = 0; i < pool->threads; i++ )
- + {
- + x264_threadpool_job_t *job;
- + CHECKED_MALLOC( job, sizeof(x264_threadpool_job_t) );
- + x264_sync_frame_list_push( &pool->uninit, (void*)job );
- + }
- + for( int i = 0; i < pool->threads; i++ )
- + if( x264_pthread_create( pool->thread_handle+i, NULL, (void*)x264_threadpool_thread, pool ) )
- + goto fail;
- +
- + return 0;
- +fail:
- + return -1;
- +}
- +
- +void x264_threadpool_run( x264_threadpool_t *pool, void *(*func)(void *), void *arg )
- +{
- + x264_threadpool_job_t *job = (void*)x264_sync_frame_list_pop( &pool->uninit );
- + job->func = func;
- + job->arg = arg;
- + x264_sync_frame_list_push( &pool->run, (void*)job );
- +}
- +
- +void *x264_threadpool_wait( x264_threadpool_t *pool, void *arg )
- +{
- + x264_threadpool_job_t *job = NULL;
- +
- + x264_pthread_mutex_lock( &pool->done.mutex );
- + while( !job )
- + {
- + for( int i = 0; i < pool->done.i_size; i++ )
- + {
- + x264_threadpool_job_t *t = (void*)pool->done.list[i];
- + if( t->arg == arg )
- + {
- + job = (void*)x264_frame_shift( pool->done.list+i );
- + pool->done.i_size--;
- + }
- + }
- + if( !job )
- + x264_pthread_cond_wait( &pool->done.cv_fill, &pool->done.mutex );
- + }
- + x264_pthread_mutex_unlock( &pool->done.mutex );
- +
- + void *ret = job->ret;
- + x264_sync_frame_list_push( &pool->uninit, (void*)job );
- + return ret;
- +}
- +
- +static void x264_threadpool_list_delete( x264_sync_frame_list_t *slist )
- +{
- + for( int i = 0; slist->list[i]; i++ )
- + {
- + x264_free( slist->list[i] );
- + slist->list[i] = NULL;
- + }
- + x264_sync_frame_list_delete( slist );
- +}
- +
- +void x264_threadpool_delete( x264_threadpool_t *pool )
- +{
- + x264_pthread_mutex_lock( &pool->run.mutex );
- + pool->exit = 1;
- + x264_pthread_cond_broadcast( &pool->run.cv_fill );
- + x264_pthread_mutex_unlock( &pool->run.mutex );
- + for( int i = 0; i < pool->threads; i++ )
- + x264_pthread_join( pool->thread_handle[i], NULL );
- +
- + x264_threadpool_list_delete( &pool->uninit );
- + x264_threadpool_list_delete( &pool->run );
- + x264_threadpool_list_delete( &pool->done );
- + x264_free( pool->thread_handle );
- + x264_free( pool );
- +}
- diff --git a/common/threadpool.h b/common/threadpool.h
- new file mode 100644
- index 0000000..519737c
- --- /dev/null
- +++ b/common/threadpool.h
- @@ -0,0 +1,39 @@
- +/*****************************************************************************
- + * threadpool.h: x264 threadpool module
- + *****************************************************************************
- + * Copyright (C) 2010 Steven Walters <kemuri9@gmail.com>
- + *
- + * This program is free software; you can redistribute it and/or modify
- + * it under the terms of the GNU General Public License as published by
- + * the Free Software Foundation; either version 2 of the License, or
- + * (at your option) any later version.
- + *
- + * This program is distributed in the hope that it will be useful,
- + * but WITHOUT ANY WARRANTY; without even the implied warranty of
- + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- + * GNU General Public License for more details.
- + *
- + * You should have received a copy of the GNU General Public License
- + * along with this program; if not, write to the Free Software
- + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
- + *****************************************************************************/
- +
- +#ifndef X264_THREADPOOL_H
- +#define X264_THREADPOOL_H
- +
- +typedef struct x264_threadpool_t x264_threadpool_t;
- +
- +#if HAVE_PTHREAD
- +int x264_threadpool_init( x264_threadpool_t **p_pool, int threads,
- + void (*init_func)(void *), void *init_arg );
- +void x264_threadpool_run( x264_threadpool_t *pool, void *(*func)(void *), void *arg );
- +void *x264_threadpool_wait( x264_threadpool_t *pool, void *arg );
- +void x264_threadpool_delete( x264_threadpool_t *pool );
- +#else
- +#define x264_threadpool_init(p,t,f,a) -1
- +#define x264_threadpool_run(p,f,a)
- +#define x264_threadpool_wait(p,a) NULL
- +#define x264_threadpool_delete(p)
- +#endif
- +
- +#endif
- diff --git a/encoder/encoder.c b/encoder/encoder.c
- index aebb536..283783d 100644
- --- a/encoder/encoder.c
- +++ b/encoder/encoder.c
- @@ -349,6 +349,20 @@ fail:
- return -1;
- }
- +#if HAVE_PTHREAD
- +static void x264_encoder_thread_init( x264_t *h )
- +{
- + if( h->param.i_sync_lookahead )
- + x264_lower_thread_priority( 10 );
- +
- +#if HAVE_MMX
- + /* Misalign mask has to be set separately for each thread. */
- + if( h->param.cpu&X264_CPU_SSE_MISALIGN )
- + x264_cpu_mask_misalign_sse();
- +#endif
- +}
- +#endif
- +
- /****************************************************************************
- *
- ****************************************************************************
- @@ -1052,6 +1066,10 @@ x264_t *x264_encoder_open( x264_param_t *param )
- CHECKED_MALLOC( h->nal_buffer, h->out.i_bitstream * 3/2 + 4 );
- h->nal_buffer_size = h->out.i_bitstream * 3/2 + 4;
- + if( h->param.i_threads > 1 &&
- + x264_threadpool_init( &h->threadpool, h->param.i_threads, (void*)x264_encoder_thread_init, h ) )
- + goto fail;
- +
- h->thread[0] = h;
- for( int i = 1; i < h->param.i_threads + !!h->param.i_sync_lookahead; i++ )
- CHECKED_MALLOC( h->thread[i], sizeof(x264_t) );
- @@ -2044,14 +2062,6 @@ static void *x264_slices_write( x264_t *h )
- {
- int i_slice_num = 0;
- int last_thread_mb = h->sh.i_last_mb;
- - if( h->param.i_sync_lookahead )
- - x264_lower_thread_priority( 10 );
- -
- -#if HAVE_MMX
- - /* Misalign mask has to be set separately for each thread. */
- - if( h->param.cpu&X264_CPU_SSE_MISALIGN )
- - x264_cpu_mask_misalign_sse();
- -#endif
- #if HAVE_VISUALIZE
- if( h->param.b_visualize )
- @@ -2093,11 +2103,6 @@ static void *x264_slices_write( x264_t *h )
- static int x264_threaded_slices_write( x264_t *h )
- {
- - void *ret = NULL;
- -#if HAVE_MMX
- - if( h->param.cpu&X264_CPU_SSE_MISALIGN )
- - x264_cpu_mask_misalign_sse();
- -#endif
- /* set first/last mb and sync contexts */
- for( int i = 0; i < h->param.i_threads; i++ )
- {
- @@ -2121,16 +2126,14 @@ static int x264_threaded_slices_write( x264_t *h )
- /* dispatch */
- for( int i = 0; i < h->param.i_threads; i++ )
- {
- - if( x264_pthread_create( &h->thread[i]->thread_handle, NULL, (void*)x264_slices_write, (void*)h->thread[i] ) )
- - return -1;
- + x264_threadpool_run( h->threadpool, (void*)x264_slices_write, h->thread[i] );
- h->thread[i]->b_thread_active = 1;
- }
- for( int i = 0; i < h->param.i_threads; i++ )
- {
- - x264_pthread_join( h->thread[i]->thread_handle, &ret );
- h->thread[i]->b_thread_active = 0;
- - if( (intptr_t)ret )
- - return (intptr_t)ret;
- + if( (intptr_t)x264_threadpool_wait( h->threadpool, h->thread[i] ) )
- + return -1;
- }
- /* Go back and fix up the hpel on the borders between slices. */
- @@ -2206,6 +2209,10 @@ int x264_encoder_encode( x264_t *h,
- thread_current =
- thread_oldest = h;
- }
- +#if HAVE_MMX
- + if( h->i_thread_frames == 1 && h->param.cpu&X264_CPU_SSE_MISALIGN )
- + x264_cpu_mask_misalign_sse();
- +#endif
- // ok to call this before encoding any frames, since the initial values of fdec have b_kept_as_ref=0
- if( x264_reference_update( h ) )
- @@ -2529,8 +2536,7 @@ int x264_encoder_encode( x264_t *h,
- h->i_threadslice_end = h->mb.i_mb_height;
- if( h->i_thread_frames > 1 )
- {
- - if( x264_pthread_create( &h->thread_handle, NULL, (void*)x264_slices_write, h ) )
- - return -1;
- + x264_threadpool_run( h->threadpool, (void*)x264_slices_write, h );
- h->b_thread_active = 1;
- }
- else if( h->param.b_sliced_threads )
- @@ -2553,11 +2559,9 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
- if( h->b_thread_active )
- {
- - void *ret = NULL;
- - x264_pthread_join( h->thread_handle, &ret );
- h->b_thread_active = 0;
- - if( (intptr_t)ret )
- - return (intptr_t)ret;
- + if( (intptr_t)x264_threadpool_wait( h->threadpool, h ) )
- + return -1;
- }
- if( !h->out.i_nal )
- {
- @@ -2821,25 +2825,20 @@ void x264_encoder_close ( x264_t *h )
- x264_lookahead_delete( h );
- if( h->param.i_threads > 1 )
- + x264_threadpool_delete( h->threadpool );
- + if( h->i_thread_frames > 1 )
- {
- - // don't strictly have to wait for the other threads, but it's simpler than canceling them
- - for( int i = 0; i < h->param.i_threads; i++ )
- + for( int i = 0; i < h->i_thread_frames; i++ )
- if( h->thread[i]->b_thread_active )
- - x264_pthread_join( h->thread[i]->thread_handle, NULL );
- - if( h->i_thread_frames > 1 )
- - {
- - for( int i = 0; i < h->i_thread_frames; i++ )
- - if( h->thread[i]->b_thread_active )
- - {
- - assert( h->thread[i]->fenc->i_reference_count == 1 );
- - x264_frame_delete( h->thread[i]->fenc );
- - }
- + {
- + assert( h->thread[i]->fenc->i_reference_count == 1 );
- + x264_frame_delete( h->thread[i]->fenc );
- + }
- - x264_t *thread_prev = h->thread[h->i_thread_phase];
- - x264_thread_sync_ratecontrol( h, thread_prev, h );
- - x264_thread_sync_ratecontrol( thread_prev, thread_prev, h );
- - h->i_frame = thread_prev->i_frame + 1 - h->i_thread_frames;
- - }
- + x264_t *thread_prev = h->thread[h->i_thread_phase];
- + x264_thread_sync_ratecontrol( h, thread_prev, h );
- + x264_thread_sync_ratecontrol( thread_prev, thread_prev, h );
- + h->i_frame = thread_prev->i_frame + 1 - h->i_thread_frames;
- }
- h->i_frame++;
- diff --git a/encoder/lookahead.c b/encoder/lookahead.c
- index a79d4b1..f0af216 100644
- --- a/encoder/lookahead.c
- +++ b/encoder/lookahead.c
- @@ -37,7 +37,7 @@
- #include "common/common.h"
- #include "analyse.h"
- -static void x264_lookahead_shift( x264_synch_frame_list_t *dst, x264_synch_frame_list_t *src, int count )
- +static void x264_lookahead_shift( x264_sync_frame_list_t *dst, x264_sync_frame_list_t *src, int count )
- {
- int i = count;
- while( i-- )
- @@ -137,9 +137,9 @@ int x264_lookahead_init( x264_t *h, int i_slicetype_length )
- look->i_slicetype_length = i_slicetype_length;
- /* init frame lists */
- - if( x264_synch_frame_list_init( &look->ifbuf, h->param.i_sync_lookahead+3 ) ||
- - x264_synch_frame_list_init( &look->next, h->frames.i_delay+3 ) ||
- - x264_synch_frame_list_init( &look->ofbuf, h->frames.i_delay+3 ) )
- + if( x264_sync_frame_list_init( &look->ifbuf, h->param.i_sync_lookahead+3 ) ||
- + x264_sync_frame_list_init( &look->next, h->frames.i_delay+3 ) ||
- + x264_sync_frame_list_init( &look->ofbuf, h->frames.i_delay+3 ) )
- goto fail;
- if( !h->param.i_sync_lookahead )
- @@ -153,7 +153,7 @@ int x264_lookahead_init( x264_t *h, int i_slicetype_length )
- if( x264_macroblock_thread_allocate( look_h, 1 ) < 0 )
- goto fail;
- - if( x264_pthread_create( &look_h->thread_handle, NULL, (void *)x264_lookahead_thread, look_h ) )
- + if( x264_pthread_create( &look->thread_handle, NULL, (void*)x264_lookahead_thread, look_h ) )
- goto fail;
- look->b_thread_active = 1;
- @@ -171,25 +171,25 @@ void x264_lookahead_delete( x264_t *h )
- h->lookahead->b_exit_thread = 1;
- x264_pthread_cond_broadcast( &h->lookahead->ifbuf.cv_fill );
- x264_pthread_mutex_unlock( &h->lookahead->ifbuf.mutex );
- - x264_pthread_join( h->thread[h->param.i_threads]->thread_handle, NULL );
- + x264_pthread_join( h->lookahead->thread_handle, NULL );
- x264_macroblock_cache_free( h->thread[h->param.i_threads] );
- x264_macroblock_thread_free( h->thread[h->param.i_threads], 1 );
- x264_free( h->thread[h->param.i_threads] );
- }
- - x264_synch_frame_list_delete( &h->lookahead->ifbuf );
- - x264_synch_frame_list_delete( &h->lookahead->next );
- + x264_sync_frame_list_delete( &h->lookahead->ifbuf );
- + x264_sync_frame_list_delete( &h->lookahead->next );
- if( h->lookahead->last_nonb )
- x264_frame_push_unused( h, h->lookahead->last_nonb );
- - x264_synch_frame_list_delete( &h->lookahead->ofbuf );
- + x264_sync_frame_list_delete( &h->lookahead->ofbuf );
- x264_free( h->lookahead );
- }
- void x264_lookahead_put_frame( x264_t *h, x264_frame_t *frame )
- {
- if( h->param.i_sync_lookahead )
- - x264_synch_frame_list_push( &h->lookahead->ifbuf, frame );
- + x264_sync_frame_list_push( &h->lookahead->ifbuf, frame );
- else
- - x264_synch_frame_list_push( &h->lookahead->next, frame );
- + x264_sync_frame_list_push( &h->lookahead->next, frame );
- }
- int x264_lookahead_is_empty( x264_t *h )
- diff --git a/input/thread.c b/input/thread.c
- index a88cfae..c4b07fa 100644
- --- a/input/thread.c
- +++ b/input/thread.c
- @@ -30,10 +30,9 @@ typedef struct
- cli_input_t input;
- hnd_t p_handle;
- x264_picture_t pic;
- - x264_pthread_t tid;
- + x264_threadpool_t *pool;
- int next_frame;
- int frame_total;
- - int in_progress;
- struct thread_input_arg_t *next_args;
- } thread_hnd_t;
- @@ -55,7 +54,6 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
- }
- h->input = input;
- h->p_handle = *p_handle;
- - h->in_progress = 0;
- h->next_frame = -1;
- h->next_args = malloc( sizeof(thread_input_arg_t) );
- if( !h->next_args )
- @@ -66,6 +64,9 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
- thread_input.picture_alloc = h->input.picture_alloc;
- thread_input.picture_clean = h->input.picture_clean;
- + if( x264_threadpool_init( &h->pool, 1, NULL, NULL ) )
- + return -1;
- +
- *p_handle = h;
- return 0;
- }
- @@ -88,9 +89,8 @@ static int read_frame( x264_picture_t *p_pic, hnd_t handle, int i_frame )
- if( h->next_frame >= 0 )
- {
- - x264_pthread_join( h->tid, NULL );
- + x264_threadpool_wait( h->pool, h->next_args );
- ret |= h->next_args->status;
- - h->in_progress = 0;
- }
- if( h->next_frame == i_frame )
- @@ -103,9 +103,7 @@ static int read_frame( x264_picture_t *p_pic, hnd_t handle, int i_frame )
- h->next_frame =
- h->next_args->i_frame = i_frame+1;
- h->next_args->pic = &h->pic;
- - if( x264_pthread_create( &h->tid, NULL, (void*)read_frame_thread_int, h->next_args ) )
- - return -1;
- - h->in_progress = 1;
- + x264_threadpool_run( h->pool, (void*)read_frame_thread_int, h->next_args );
- }
- else
- h->next_frame = -1;
- @@ -124,8 +122,7 @@ static int release_frame( x264_picture_t *pic, hnd_t handle )
- static int close_file( hnd_t handle )
- {
- thread_hnd_t *h = handle;
- - if( h->in_progress )
- - x264_pthread_join( h->tid, NULL );
- + x264_threadpool_delete( h->pool );
- h->input.close_file( h->p_handle );
- h->input.picture_clean( &h->pic );
- free( h->next_args );
- --
- 1.7.0.4
- From c567d621e3b19cade993c570e04b077277ba1db7 Mon Sep 17 00:00:00 2001
- From: Lamont Alston <wewk584@gmail.com>
- Date: Wed, 16 Jun 2010 10:05:17 -0700
- Subject: [PATCH 08/10] Add open-GOP support
- ---
- common/common.c | 13 +++++++++-
- common/common.h | 6 ++++-
- encoder/encoder.c | 45 +++++++++++++++++++++++++-------------
- encoder/ratecontrol.c | 1 +
- encoder/slicetype.c | 57 +++++++++++++++++++++++++++++++++++++------------
- x264.c | 11 ++++++++-
- x264.h | 8 ++++++-
- 7 files changed, 107 insertions(+), 34 deletions(-)
- diff --git a/common/common.c b/common/common.c
- index 2a9c76e..4612bb5 100644
- --- a/common/common.c
- +++ b/common/common.c
- @@ -676,6 +676,15 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
- p->i_bframe_pyramid = atoi(value);
- }
- }
- + OPT("open-gop")
- + {
- + b_error |= parse_enum( value, x264_open_gop_names, &p->i_open_gop );
- + if( b_error )
- + {
- + b_error = 0;
- + p->i_open_gop = atoi(value);
- + }
- + }
- OPT("nf")
- p->b_deblocking_filter = !atobool(value);
- OPT2("filter", "deblock")
- @@ -1190,9 +1199,9 @@ char *x264_param2string( x264_param_t *p, int b_res )
- s += sprintf( s, " bframes=%d", p->i_bframe );
- if( p->i_bframe )
- {
- - s += sprintf( s, " b_pyramid=%d b_adapt=%d b_bias=%d direct=%d weightb=%d",
- + s += sprintf( s, " b_pyramid=%d b_adapt=%d b_bias=%d direct=%d weightb=%d open_gop=%d",
- p->i_bframe_pyramid, p->i_bframe_adaptive, p->i_bframe_bias,
- - p->analyse.i_direct_mv_pred, p->analyse.b_weighted_bipred );
- + p->analyse.i_direct_mv_pred, p->analyse.b_weighted_bipred, p->i_open_gop );
- }
- s += sprintf( s, " weightp=%d", p->analyse.i_weighted_pred > 0 ? p->analyse.i_weighted_pred : 0 );
- diff --git a/common/common.h b/common/common.h
- index 60899fe..dfa1121 100644
- --- a/common/common.h
- +++ b/common/common.h
- @@ -471,7 +471,11 @@ struct x264_t
- /* frames used for reference + sentinels */
- x264_frame_t *reference[16+2];
- - int i_last_keyframe; /* Frame number of the last keyframe */
- + int i_last_keyframe; /* Frame number of the last keyframe */
- + int i_last_idr; /* Frame number of the last IDR (not RP)*/
- + int i_poc_last_open_gop; /* Poc of the I frame of the last open-gop. The value
- + * is only assigned during the period between that
- + * I frame and the next P or I frame, else -1 */
- int i_input; /* Number of input frames already accepted */
- diff --git a/encoder/encoder.c b/encoder/encoder.c
- index 283783d..aa84192 100644
- --- a/encoder/encoder.c
- +++ b/encoder/encoder.c
- @@ -573,12 +573,10 @@ static int x264_validate_parameters( x264_t *h )
- x264_log( h, X264_LOG_WARNING, "subme=0 + direct=temporal is not supported\n" );
- h->param.analyse.i_direct_mv_pred = X264_DIRECT_PRED_SPATIAL;
- }
- - h->param.i_bframe = x264_clip3( h->param.i_bframe, 0, X264_BFRAME_MAX );
- + h->param.i_bframe = x264_clip3( h->param.i_bframe, 0, X264_MIN( X264_BFRAME_MAX, h->param.i_keyint_max-1 ) );
- + h->param.i_open_gop = x264_clip3( h->param.i_open_gop, X264_OPEN_GOP_NONE, X264_OPEN_GOP_CODED_ORDER );
- if( h->param.i_keyint_max == 1 )
- - {
- - h->param.i_bframe = 0;
- h->param.b_intra_refresh = 0;
- - }
- h->param.i_bframe_bias = x264_clip3( h->param.i_bframe_bias, -90, 100 );
- if( h->param.i_bframe <= 1 )
- h->param.i_bframe_pyramid = X264_B_PYRAMID_NONE;
- @@ -588,6 +586,7 @@ static int x264_validate_parameters( x264_t *h )
- h->param.i_bframe_adaptive = X264_B_ADAPT_NONE;
- h->param.analyse.i_direct_mv_pred = 0;
- h->param.analyse.b_weighted_bipred = 0;
- + h->param.i_open_gop = X264_OPEN_GOP_NONE;
- }
- if( h->param.b_intra_refresh && h->param.i_bframe_pyramid == X264_B_PYRAMID_NORMAL )
- {
- @@ -599,6 +598,11 @@ static int x264_validate_parameters( x264_t *h )
- x264_log( h, X264_LOG_WARNING, "ref > 1 + intra-refresh is not supported\n" );
- h->param.i_frame_reference = 1;
- }
- + if( h->param.b_intra_refresh && h->param.i_open_gop )
- + {
- + x264_log( h, X264_LOG_WARNING, "intra-refresh is not compatible with open-gop\n" );
- + h->param.i_open_gop = X264_OPEN_GOP_NONE;
- + }
- if( h->param.i_keyint_min == X264_KEYINT_MIN_AUTO )
- h->param.i_keyint_min = h->param.i_keyint_max / 10;
- h->param.i_keyint_min = x264_clip3( h->param.i_keyint_min, 1, h->param.i_keyint_max/2+1 );
- @@ -978,9 +982,11 @@ x264_t *x264_encoder_open( x264_param_t *param )
- h->frames.b_have_lowres |= h->param.rc.b_stat_read && h->param.rc.i_vbv_buffer_size > 0;
- h->frames.b_have_sub8x8_esa = !!(h->param.analyse.inter & X264_ANALYSE_PSUB8x8);
- + h->frames.i_last_idr =
- h->frames.i_last_keyframe = - h->param.i_keyint_max;
- h->frames.i_input = 0;
- h->frames.i_largest_pts = h->frames.i_second_largest_pts = -1;
- + h->frames.i_poc_last_open_gop = -1;
- CHECKED_MALLOCZERO( h->frames.unused[0], (h->frames.i_delay + 3) * sizeof(x264_frame_t *) );
- /* Allocate room for max refs plus a few extra just in case. */
- @@ -1688,35 +1694,37 @@ static inline void x264_reference_hierarchy_reset( x264_t *h )
- {
- int ref;
- int b_hasdelayframe = 0;
- - if( !h->param.i_bframe_pyramid )
- - return;
- /* look for delay frames -- chain must only contain frames that are disposable */
- for( int i = 0; h->frames.current[i] && IS_DISPOSABLE( h->frames.current[i]->i_type ); i++ )
- b_hasdelayframe |= h->frames.current[i]->i_coded
- != h->frames.current[i]->i_frame + h->sps->vui.i_num_reorder_frames;
- - if( h->param.i_bframe_pyramid != X264_B_PYRAMID_STRICT && !b_hasdelayframe )
- + /* This function must handle b-pyramid and clear frames for open-gop */
- + if( h->param.i_bframe_pyramid != X264_B_PYRAMID_STRICT && !b_hasdelayframe && h->frames.i_poc_last_open_gop == -1 )
- return;
- /* Remove last BREF. There will never be old BREFs in the
- * dpb during a BREF decode when pyramid == STRICT */
- for( ref = 0; h->frames.reference[ref]; ref++ )
- {
- - if( h->param.i_bframe_pyramid == X264_B_PYRAMID_STRICT
- + if( ( h->param.i_bframe_pyramid == X264_B_PYRAMID_STRICT
- && h->frames.reference[ref]->i_type == X264_TYPE_BREF )
- + || ( h->frames.reference[ref]->i_poc < h->frames.i_poc_last_open_gop
- + && h->sh.i_type != SLICE_TYPE_B ) )
- {
- int diff = h->i_frame_num - h->frames.reference[ref]->i_frame_num;
- h->sh.mmco[h->sh.i_mmco_command_count].i_difference_of_pic_nums = diff;
- h->sh.mmco[h->sh.i_mmco_command_count++].i_poc = h->frames.reference[ref]->i_poc;
- - x264_frame_push_unused( h, x264_frame_pop( h->frames.reference ) );
- + x264_frame_push_unused( h, x264_frame_shift( &h->frames.reference[ref] ) );
- h->b_ref_reorder[0] = 1;
- - break;
- + ref--;
- }
- }
- - /* Prepare to room in the dpb for the delayed display time of the later b-frame's */
- - h->sh.i_mmco_remove_from_end = X264_MAX( ref + 2 - h->frames.i_max_dpb, 0 );
- + /* Prepare room in the dpb for the delayed display time of the later b-frame's */
- + if( h->param.i_bframe_pyramid )
- + h->sh.i_mmco_remove_from_end = X264_MAX( ref + 2 - h->frames.i_max_dpb, 0 );
- }
- static inline void x264_slice_init( x264_t *h, int i_nal_type, int i_global_qp )
- @@ -2321,12 +2329,17 @@ int x264_encoder_encode( x264_t *h,
- {
- h->frames.i_last_keyframe = h->fenc->i_frame;
- if( h->fenc->i_type == X264_TYPE_IDR )
- + {
- h->i_frame_num = 0;
- + h->frames.i_last_idr = h->fenc->i_frame;
- + }
- }
- h->sh.i_mmco_command_count =
- h->sh.i_mmco_remove_from_end = 0;
- h->b_ref_reorder[0] =
- h->b_ref_reorder[1] = 0;
- + h->fdec->i_poc =
- + h->fenc->i_poc = 2 * ( h->fenc->i_frame - X264_MAX( h->frames.i_last_idr, 0 ) );
- /* ------------------- Setup frame context ----------------------------- */
- /* 5: Init data dependent of frame type */
- @@ -2337,6 +2350,7 @@ int x264_encoder_encode( x264_t *h,
- i_nal_ref_idc = NAL_PRIORITY_HIGHEST;
- h->sh.i_type = SLICE_TYPE_I;
- x264_reference_reset( h );
- + h->frames.i_poc_last_open_gop = -1;
- }
- else if( h->fenc->i_type == X264_TYPE_I )
- {
- @@ -2344,6 +2358,8 @@ int x264_encoder_encode( x264_t *h,
- i_nal_ref_idc = NAL_PRIORITY_HIGH; /* Not completely true but for now it is (as all I/P are kept as ref)*/
- h->sh.i_type = SLICE_TYPE_I;
- x264_reference_hierarchy_reset( h );
- + if( h->param.i_open_gop )
- + h->frames.i_poc_last_open_gop = h->fenc->b_keyframe ? h->fenc->i_poc : -1;
- }
- else if( h->fenc->i_type == X264_TYPE_P )
- {
- @@ -2351,6 +2367,7 @@ int x264_encoder_encode( x264_t *h,
- i_nal_ref_idc = NAL_PRIORITY_HIGH; /* Not completely true but for now it is (as all I/P are kept as ref)*/
- h->sh.i_type = SLICE_TYPE_P;
- x264_reference_hierarchy_reset( h );
- + h->frames.i_poc_last_open_gop = -1;
- }
- else if( h->fenc->i_type == X264_TYPE_BREF )
- {
- @@ -2366,8 +2383,6 @@ int x264_encoder_encode( x264_t *h,
- h->sh.i_type = SLICE_TYPE_B;
- }
- - h->fdec->i_poc =
- - h->fenc->i_poc = 2 * (h->fenc->i_frame - h->frames.i_last_keyframe);
- h->fdec->i_type = h->fenc->i_type;
- h->fdec->i_frame = h->fenc->i_frame;
- h->fenc->b_kept_as_ref =
- @@ -2484,7 +2499,7 @@ int x264_encoder_encode( x264_t *h,
- if( h->fenc->i_type != X264_TYPE_IDR )
- {
- - int time_to_recovery = X264_MIN( h->mb.i_mb_width - 1, h->param.i_keyint_max ) + h->param.i_bframe;
- + int time_to_recovery = h->param.i_open_gop ? 0 : X264_MIN( h->mb.i_mb_width - 1, h->param.i_keyint_max ) + h->param.i_bframe;
- x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
- x264_sei_recovery_point_write( h, &h->out.bs, time_to_recovery );
- x264_nal_end( h );
- diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
- index 16afbf0..1030ef2 100644
- --- a/encoder/ratecontrol.c
- +++ b/encoder/ratecontrol.c
- @@ -724,6 +724,7 @@ int x264_ratecontrol_new( x264_t *h )
- CMP_OPT_FIRST_PASS( "b_pyramid", h->param.i_bframe_pyramid );
- CMP_OPT_FIRST_PASS( "intra_refresh", h->param.b_intra_refresh );
- CMP_OPT_FIRST_PASS( "keyint", h->param.i_keyint_max );
- + CMP_OPT_FIRST_PASS( "open_gop", h->param.i_open_gop );
- if( strstr( opts, "qp=0" ) && h->param.rc.i_rc_method == X264_RC_ABR )
- x264_log( h, X264_LOG_WARNING, "1st pass was lossless, bitrate prediction will be inaccurate\n" );
- diff --git a/encoder/slicetype.c b/encoder/slicetype.c
- index 60f3a24..abd4979 100644
- --- a/encoder/slicetype.c
- +++ b/encoder/slicetype.c
- @@ -1058,7 +1058,7 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
- {
- x264_mb_analysis_t a;
- x264_frame_t *frames[X264_LOOKAHEAD_MAX+3] = { NULL, };
- - int num_frames, orig_num_frames, keyint_limit, idr_frame_type, framecnt;
- + int num_frames, orig_num_frames, keyint_limit, framecnt;
- int i_mb_count = NUM_MBS;
- int cost1p0, cost2p0, cost1b1, cost2p1;
- int i_max_search = X264_MIN( h->lookahead->next.i_size, X264_LOOKAHEAD_MAX );
- @@ -1080,7 +1080,6 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
- orig_num_frames = num_frames = h->param.b_intra_refresh ? framecnt : X264_MIN( framecnt, keyint_limit );
- x264_lowres_context_init( h, &a );
- - idr_frame_type = frames[1]->i_frame - h->lookahead->i_last_keyframe >= h->param.i_keyint_min ? X264_TYPE_IDR : X264_TYPE_I;
- /* This is important psy-wise: if we have a non-scenecut keyframe,
- * there will be significant visual artifacts if the frames just before
- @@ -1092,12 +1091,12 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
- {
- frames[1]->i_type = X264_TYPE_P;
- if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1, 1, orig_num_frames ) )
- - frames[1]->i_type = idr_frame_type;
- + frames[1]->i_type = X264_TYPE_I;
- return;
- }
- else if( num_frames == 0 )
- {
- - frames[1]->i_type = idr_frame_type;
- + frames[1]->i_type = X264_TYPE_I;
- return;
- }
- @@ -1106,7 +1105,7 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
- int reset_start;
- if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1, 1, orig_num_frames ) )
- {
- - frames[1]->i_type = idr_frame_type;
- + frames[1]->i_type = X264_TYPE_I;
- return;
- }
- @@ -1210,15 +1209,19 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
- /* Enforce keyframe limit. */
- if( !h->param.b_intra_refresh )
- - for( int j = 0; j < num_frames; j++ )
- + for( int i = keyint_limit+1; i <= num_frames; i += h->param.i_keyint_max )
- {
- - if( ((j-keyint_limit) % h->param.i_keyint_max) == 0 )
- + int j = i;
- + if( h->param.i_open_gop == X264_OPEN_GOP_CODED_ORDER )
- {
- - if( j && h->param.i_keyint_max > 1 )
- - frames[j]->i_type = X264_TYPE_P;
- - frames[j+1]->i_type = X264_TYPE_IDR;
- - reset_start = X264_MIN( reset_start, j+2 );
- + while( IS_X264_TYPE_B( frames[i]->i_type ) )
- + i++;
- + while( IS_X264_TYPE_B( frames[j-1]->i_type ) )
- + j--;
- }
- + frames[i]->i_type = X264_TYPE_I;
- + reset_start = X264_MIN( reset_start, i+1 );
- + i = j;
- }
- if( h->param.rc.i_vbv_buffer_size )
- @@ -1303,13 +1306,39 @@ void x264_slicetype_decide( x264_t *h )
- frm->i_frame, x264_b_pyramid_names[h->param.i_bframe_pyramid], h->param.i_frame_reference );
- }
- + if( frm->i_type == X264_TYPE_KEYFRAME )
- + frm->i_type = h->param.i_open_gop ? X264_TYPE_I : X264_TYPE_IDR;
- +
- /* Limit GOP size */
- if( (!h->param.b_intra_refresh || frm->i_frame == 0) && frm->i_frame - h->lookahead->i_last_keyframe >= h->param.i_keyint_max )
- {
- - if( frm->i_type == X264_TYPE_AUTO )
- + if( frm->i_type == X264_TYPE_AUTO || frm->i_type == X264_TYPE_I )
- + frm->i_type = h->param.i_open_gop && h->lookahead->i_last_keyframe >= 0 ? X264_TYPE_I : X264_TYPE_IDR;
- + int warn = frm->i_type != X264_TYPE_IDR;
- + if( warn && h->param.i_open_gop == X264_OPEN_GOP_DISPLAY_ORDER )
- + warn &= frm->i_type != X264_TYPE_I && frm->i_type != X264_TYPE_KEYFRAME;
- + if( warn && h->param.i_open_gop == X264_OPEN_GOP_CODED_ORDER )
- + {
- + /* if this minigop ends with i, it's not a violation */
- + int j = bframes;
- + while( IS_X264_TYPE_B( h->lookahead->next.list[j]->i_type ) )
- + j++;
- + warn = h->lookahead->next.list[j]->i_type != X264_TYPE_I && h->lookahead->next.list[j]->i_type != X264_TYPE_KEYFRAME;
- + }
- + if( warn )
- + x264_log( h, X264_LOG_WARNING, "specified frame type (%d) at %d is not compatible with keyframe interval\n", frm->i_type, frm->i_frame );
- + }
- + if( frm->i_type == X264_TYPE_I && frm->i_frame - h->lookahead->i_last_keyframe >= h->param.i_keyint_min )
- + {
- + if( h->param.i_open_gop )
- + {
- + h->lookahead->i_last_keyframe = frm->i_frame; // Use display order
- + if( h->param.i_open_gop == X264_OPEN_GOP_CODED_ORDER )
- + h->lookahead->i_last_keyframe -= bframes; // Use coded order
- + frm->b_keyframe = 1;
- + }
- + else
- frm->i_type = X264_TYPE_IDR;
- - if( frm->i_type != X264_TYPE_IDR )
- - x264_log( h, X264_LOG_WARNING, "specified frame type (%d) is not compatible with keyframe interval\n", frm->i_type );
- }
- if( frm->i_type == X264_TYPE_IDR )
- {
- diff --git a/x264.c b/x264.c
- index 09bad61..8722565 100644
- --- a/x264.c
- +++ b/x264.c
- @@ -380,6 +380,12 @@ static void Help( x264_param_t *defaults, int longhelp )
- " - strict: Strictly hierarchical pyramid\n"
- " - normal: Non-strict (not Blu-ray compatible)\n",
- strtable_lookup( x264_b_pyramid_names, defaults->i_bframe_pyramid ) );
- + H1( " --open-gop <string> Use recovery points to close GOPs [none]\n"
- + " - none: Use standard closed GOPs\n"
- + " - display: Base GOP length on display order\n"
- + " (not Blu-ray compatible)\n"
- + " - coded: Base GOP length on coded order\n"
- + " Only available with b-frames\n" );
- H1( " --no-cabac Disable CABAC\n" );
- H1( " -r, --ref <integer> Number of reference frames [%d]\n", defaults->i_frame_reference );
- H1( " --no-deblock Disable loop filter\n" );
- @@ -441,7 +447,8 @@ static void Help( x264_param_t *defaults, int longhelp )
- " or b=<float> (bitrate multiplier)\n" );
- H2( " --qpfile <string> Force frametypes and QPs for some or all frames\n"
- " Format of each line: framenumber frametype QP\n"
- - " QP of -1 lets x264 choose. Frametypes: I,i,P,B,b.\n"
- + " QP of -1 lets x264 choose. Frametypes: I,i,K,P,B,b.\n"
- + " K=<I or i> depending on open-gop setting\n"
- " QPs are restricted by qpmin/qpmax.\n" );
- H1( "\n" );
- H1( "Analysis:\n" );
- @@ -627,6 +634,7 @@ static struct option long_options[] =
- { "no-b-adapt", no_argument, NULL, 0 },
- { "b-bias", required_argument, NULL, 0 },
- { "b-pyramid", required_argument, NULL, 0 },
- + { "open-gop", required_argument, NULL, 0 },
- { "min-keyint", required_argument, NULL, 'i' },
- { "keyint", required_argument, NULL, 'I' },
- { "intra-refresh", no_argument, NULL, 0 },
- @@ -1305,6 +1313,7 @@ static void parse_qpfile( cli_opt_t *opt, x264_picture_t *pic, int i_frame )
- pic->i_qpplus1 = qp+1;
- if ( type == 'I' ) pic->i_type = X264_TYPE_IDR;
- else if( type == 'i' ) pic->i_type = X264_TYPE_I;
- + else if( type == 'K' ) pic->i_type = X264_TYPE_KEYFRAME;
- else if( type == 'P' ) pic->i_type = X264_TYPE_P;
- else if( type == 'B' ) pic->i_type = X264_TYPE_BREF;
- else if( type == 'b' ) pic->i_type = X264_TYPE_B;
- diff --git a/x264.h b/x264.h
- index 9cd4600..09183fd 100644
- --- a/x264.h
- +++ b/x264.h
- @@ -35,7 +35,7 @@
- #include <stdarg.h>
- -#define X264_BUILD 98
- +#define X264_BUILD 99
- /* x264_t:
- * opaque handler for encoder */
- @@ -104,6 +104,9 @@ typedef struct x264_t x264_t;
- #define X264_B_PYRAMID_STRICT 1
- #define X264_B_PYRAMID_NORMAL 2
- #define X264_KEYINT_MIN_AUTO 0
- +#define X264_OPEN_GOP_NONE 0
- +#define X264_OPEN_GOP_DISPLAY_ORDER 1
- +#define X264_OPEN_GOP_CODED_ORDER 2
- static const char * const x264_direct_pred_names[] = { "none", "spatial", "temporal", "auto", 0 };
- static const char * const x264_motion_est_names[] = { "dia", "hex", "umh", "esa", "tesa", 0 };
- @@ -115,6 +118,7 @@ static const char * const x264_colorprim_names[] = { "", "bt709", "undef", "", "
- static const char * const x264_transfer_names[] = { "", "bt709", "undef", "", "bt470m", "bt470bg", "smpte170m", "smpte240m", "linear", "log100", "log316", 0 };
- static const char * const x264_colmatrix_names[] = { "GBR", "bt709", "undef", "", "fcc", "bt470bg", "smpte170m", "smpte240m", "YCgCo", 0 };
- static const char * const x264_nal_hrd_names[] = { "none", "vbr", "cbr", 0 };
- +static const char * const x264_open_gop_names[] = { "none", "display", "coded", 0 };
- /* Colorspace type
- * legacy only; nothing other than I420 is really supported. */
- @@ -138,6 +142,7 @@ static const char * const x264_nal_hrd_names[] = { "none", "vbr", "cbr", 0 };
- #define X264_TYPE_P 0x0003
- #define X264_TYPE_BREF 0x0004 /* Non-disposable B-frame */
- #define X264_TYPE_B 0x0005
- +#define X264_TYPE_KEYFRAME 0x0006 /* IDR or I depending on b_open_gop option */
- #define IS_X264_TYPE_I(x) ((x)==X264_TYPE_I || (x)==X264_TYPE_IDR)
- #define IS_X264_TYPE_B(x) ((x)==X264_TYPE_B || (x)==X264_TYPE_BREF)
- @@ -221,6 +226,7 @@ typedef struct x264_param_t
- int i_bframe_adaptive;
- int i_bframe_bias;
- int i_bframe_pyramid; /* Keep some B-frames as references: 0=off, 1=strict hierarchical, 2=normal */
- + int i_open_gop; /* Open gop: 1=display order, 2=coded order to determine gop size */
- int b_deblocking_filter;
- int i_deblocking_filter_alphac0; /* [-6, 6] -6 light filter, 6 strong */
- --
- 1.7.0.4
- From 2410f2b66ef0a91dfad39e17c44412a0c49ad0d7 Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Thu, 17 Jun 2010 14:50:07 -0700
- Subject: [PATCH 09/10] Lookaheadless MB-tree support
- Uses past motion information instead of future data from the lookahead.
- Not as accurate, but better than nothing in zero-latency compression when a lookahead isn't available.
- Currently resets on keyframes, so only available if intra-refresh is set, to avoid pops on non-scenecut keyframes.
- Not on by default with any preset/tune combination; must be enabled explicitly if --tune zerolatency is used.
- Also slightly modify encoding presets: disable rc-lookahead in the fastest presets.
- Enable MB-tree in "veryfast", albeit with a very short lookahead.
- ---
- common/common.c | 5 ++++-
- encoder/encoder.c | 7 ++++++-
- encoder/slicetype.c | 48 ++++++++++++++++++++++++++++++++++--------------
- x264.c | 19 ++++++++++---------
- 4 files changed, 54 insertions(+), 25 deletions(-)
- diff --git a/common/common.c b/common/common.c
- index 4612bb5..1ed983f 100644
- --- a/common/common.c
- +++ b/common/common.c
- @@ -184,6 +184,7 @@ static int x264_param_apply_preset( x264_param_t *param, const char *preset )
- param->rc.b_mb_tree = 0;
- param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
- param->analyse.b_weighted_bipred = 0;
- + param->rc.i_lookahead = 0;
- }
- else if( !strcasecmp( preset, "superfast" ) )
- {
- @@ -195,6 +196,7 @@ static int x264_param_apply_preset( x264_param_t *param, const char *preset )
- param->analyse.i_trellis = 0;
- param->rc.b_mb_tree = 0;
- param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
- + param->rc.i_lookahead = 0;
- }
- else if( !strcasecmp( preset, "veryfast" ) )
- {
- @@ -203,8 +205,8 @@ static int x264_param_apply_preset( x264_param_t *param, const char *preset )
- param->i_frame_reference = 1;
- param->analyse.b_mixed_references = 0;
- param->analyse.i_trellis = 0;
- - param->rc.b_mb_tree = 0;
- param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
- + param->rc.i_lookahead = 10;
- }
- else if( !strcasecmp( preset, "faster" ) )
- {
- @@ -355,6 +357,7 @@ static int x264_param_apply_tune( x264_param_t *param, const char *tune )
- param->i_bframe = 0;
- param->b_sliced_threads = 1;
- param->b_vfr_input = 0;
- + param->rc.b_mb_tree = 0;
- }
- else if( !strncasecmp( s, "touhou", 6 ) )
- {
- diff --git a/encoder/encoder.c b/encoder/encoder.c
- index aa84192..b4c63c9 100644
- --- a/encoder/encoder.c
- +++ b/encoder/encoder.c
- @@ -621,8 +621,13 @@ static int x264_validate_parameters( x264_t *h )
- }
- h->param.rc.f_qcompress = x264_clip3f( h->param.rc.f_qcompress, 0.0, 1.0 );
- - if( !h->param.rc.i_lookahead || h->param.i_keyint_max == 1 || h->param.rc.f_qcompress == 1 )
- + if( h->param.i_keyint_max == 1 || h->param.rc.f_qcompress == 1 )
- h->param.rc.b_mb_tree = 0;
- + if( !h->param.rc.i_lookahead && !h->param.b_intra_refresh && h->param.rc.b_mb_tree )
- + {
- + x264_log( h, X264_LOG_WARNING, "lookaheadless mb-tree requires intra refresh\n" );
- + h->param.rc.b_mb_tree = 0;
- + }
- if( h->param.rc.b_stat_read )
- h->param.rc.i_lookahead = 0;
- #if HAVE_PTHREAD
- diff --git a/encoder/slicetype.c b/encoder/slicetype.c
- index abd4979..ab2a8b9 100644
- --- a/encoder/slicetype.c
- +++ b/encoder/slicetype.c
- @@ -734,7 +734,7 @@ static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, in
- }
- }
- - if( h->param.rc.i_vbv_buffer_size && referenced )
- + if( h->param.rc.i_vbv_buffer_size && h->param.rc.i_lookahead && referenced )
- x264_macroblock_tree_finish( h, frames[b], b == p1 ? b - p0 : 0 );
- }
- @@ -743,7 +743,8 @@ static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t
- int idx = !b_intra;
- int last_nonb, cur_nonb = 1;
- int bframes = 0;
- - int i = num_frames - 1;
- + int i = num_frames;
- +
- if( b_intra )
- x264_slicetype_frame_cost( h, a, frames, 0, 0, 0, 0 );
- @@ -751,10 +752,25 @@ static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t
- i--;
- last_nonb = i;
- - if( last_nonb < idx )
- - return;
- - memset( frames[last_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) );
- + if( !h->param.rc.i_lookahead )
- + {
- + if( b_intra )
- + {
- + memset( frames[0]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) );
- + memcpy( frames[0]->f_qp_offset, frames[0]->f_qp_offset_aq, h->mb.i_mb_count * sizeof(float) );
- + return;
- + }
- + XCHG( uint16_t*, frames[last_nonb]->i_propagate_cost, frames[0]->i_propagate_cost );
- + memset( frames[0]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) );
- + }
- + else
- + {
- + if( last_nonb < idx )
- + return;
- + memset( frames[last_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) );
- + }
- +
- while( i-- > idx )
- {
- cur_nonb = i;
- @@ -796,6 +812,12 @@ static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t
- last_nonb = cur_nonb;
- }
- + if( !h->param.rc.i_lookahead )
- + {
- + x264_macroblock_tree_propagate( h, frames, 0, last_nonb, last_nonb, 1 );
- + XCHG( uint16_t*, frames[last_nonb]->i_propagate_cost, frames[0]->i_propagate_cost );
- + }
- +
- x264_macroblock_tree_finish( h, frames[last_nonb], last_nonb );
- if( h->param.i_bframe_pyramid && bframes > 1 && !h->param.rc.i_vbv_buffer_size )
- x264_macroblock_tree_finish( h, frames[last_nonb+(bframes+1)/2], 0 );
- @@ -1062,6 +1084,7 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
- int i_mb_count = NUM_MBS;
- int cost1p0, cost2p0, cost1b1, cost2p1;
- int i_max_search = X264_MIN( h->lookahead->next.i_size, X264_LOOKAHEAD_MAX );
- + int vbv_lookahead = h->param.rc.i_vbv_buffer_size && h->param.rc.i_lookahead;
- if( h->param.b_deterministic )
- i_max_search = X264_MIN( i_max_search, h->lookahead->i_slicetype_length + !keyframe );
- @@ -1074,7 +1097,11 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
- frames[framecnt+1] = h->lookahead->next.list[framecnt];
- if( !framecnt )
- + {
- + if( h->param.rc.b_mb_tree )
- + x264_macroblock_tree( h, &a, frames, 0, keyframe );
- return;
- + }
- keyint_limit = h->param.i_keyint_max - frames[0]->i_frame + h->lookahead->i_last_keyframe - 1;
- orig_num_frames = num_frames = h->param.b_intra_refresh ? framecnt : X264_MIN( framecnt, keyint_limit );
- @@ -1085,15 +1112,8 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
- * there will be significant visual artifacts if the frames just before
- * go down in quality due to being referenced less, despite it being
- * more RD-optimal. */
- - if( (h->param.analyse.b_psy && h->param.rc.b_mb_tree) || h->param.rc.i_vbv_buffer_size )
- + if( (h->param.analyse.b_psy && h->param.rc.b_mb_tree) || vbv_lookahead )
- num_frames = framecnt;
- - else if( num_frames == 1 )
- - {
- - frames[1]->i_type = X264_TYPE_P;
- - if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1, 1, orig_num_frames ) )
- - frames[1]->i_type = X264_TYPE_I;
- - return;
- - }
- else if( num_frames == 0 )
- {
- frames[1]->i_type = X264_TYPE_I;
- @@ -1224,7 +1244,7 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
- i = j;
- }
- - if( h->param.rc.i_vbv_buffer_size )
- + if( vbv_lookahead )
- x264_vbv_lookahead( h, &a, frames, num_frames, keyframe );
- /* Restore frametypes for all frames that haven't actually been decided yet. */
- diff --git a/x264.c b/x264.c
- index 8722565..df04385 100644
- --- a/x264.c
- +++ b/x264.c
- @@ -285,16 +285,16 @@ static void Help( x264_param_t *defaults, int longhelp )
- " --no-8x8dct --aq-mode 0 --b-adapt 0\n"
- " --bframes 0 --no-cabac --no-deblock\n"
- " --no-mbtree --me dia --no-mixed-refs\n"
- - " --partitions none --ref 1 --scenecut 0\n"
- - " --subme 0 --trellis 0 --no-weightb\n"
- - " --weightp 0\n"
- + " --partitions none --rc-lookahead 0 --ref 1\n"
- + " --scenecut 0 --subme 0 --trellis 0\n"
- + " --no-weightb --weightp 0\n"
- " - superfast:\n"
- " --no-mbtree --me dia --no-mixed-refs\n"
- - " --partitions i8x8,i4x4 --ref 1\n"
- - " --subme 1 --trellis 0 --weightp 0\n"
- + " --partitions i8x8,i4x4 --rc-lookahead 0\n"
- + " --ref 1 --subme 1 --trellis 0 --weightp 0\n"
- " - veryfast:\n"
- - " --no-mbtree --no-mixed-refs --ref 1\n"
- - " --subme 2 --trellis 0 --weightp 0\n"
- + " --no-mixed-refs --rc-lookahead 10\n"
- + " --ref 1 --subme 2 --trellis 0 --weightp 0\n"
- " - faster:\n"
- " --no-mixed-refs --rc-lookahead 20\n"
- " --ref 2 --subme 4 --weightp 1\n"
- @@ -350,8 +350,9 @@ static void Help( x264_param_t *defaults, int longhelp )
- " --no-cabac --no-deblock --no-weightb\n"
- " --weightp 0\n"
- " - zerolatency:\n"
- - " --bframes 0 --force-cfr --rc-lookahead 0\n"
- - " --sync-lookahead 0 --sliced-threads\n" );
- + " --bframes 0 --force-cfr --no-mbtree\n"
- + " --sync-lookahead 0 --sliced-threads\n"
- + " --rc-lookahead 0\n" );
- else H0( " - psy tunings: film,animation,grain,\n"
- " stillimage,psnr,ssim\n"
- " - other tunings: fastdecode,zerolatency\n" );
- --
- 1.7.0.4
- From cdc1679a01738af72fbab54a39a1da770b44f26a Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Wed, 23 Jun 2010 17:29:34 -0700
- Subject: [PATCH 10/10] Interactive encoder control: error resilience
- In low-latency streaming with few clients, it is often feasible to modify encoder behavior in some fashion based on feedback from clients.
- One possible application of this is error resilience: if a packet is lost, mark the associated frame (and any referenced from it) as lost.
- This allows quick recovery from errors with minimal expense bit-wise.
- The new i_dpb_size parameter allows a calling application to tell x264 to use a larger DPB size than required by the number of reference frames.
- This lets x264 and the client keep a large buffer of old references to fall back to in case of lost frames.
- If no recovery is possible even with the available buffer, x264 will force a keyframe.
- This initial version does not support B-frames or intra refresh.
- Recommended usage is to set keyint to a very large value, so that keyframes do not occur except as necessary for extreme error recovery.
- Full documentation is in x264.h.
- Move DTS/PTS calculation to before encoding each frame instead of after.
- Improve documentation of x264_encoder_intra_refresh.
- ---
- common/common.c | 2 +
- common/common.h | 2 +
- common/frame.c | 1 +
- common/frame.h | 4 ++
- common/mvpred.c | 14 +++++---
- encoder/encoder.c | 96 +++++++++++++++++++++++++++++++++++++++-------------
- encoder/set.c | 4 +-
- x264.h | 35 ++++++++++++++++++-
- 8 files changed, 125 insertions(+), 33 deletions(-)
- diff --git a/common/common.c b/common/common.c
- index 1ed983f..8c7cf3c 100644
- --- a/common/common.c
- +++ b/common/common.c
- @@ -634,6 +634,8 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
- }
- OPT2("ref", "frameref")
- p->i_frame_reference = atoi(value);
- + OPT("dpb-size")
- + p->i_dpb_size = atoi(value);
- OPT("keyint")
- {
- p->i_keyint_max = atoi(value);
- diff --git a/common/common.h b/common/common.h
- index dfa1121..7b60811 100644
- --- a/common/common.h
- +++ b/common/common.h
- @@ -421,6 +421,8 @@ struct x264_t
- int i_cpb_delay_lookahead;
- int b_queued_intra_refresh;
- + int64_t i_reference_invalidate_pts;
- + int64_t i_last_idr_pts;
- /* We use only one SPS and one PPS */
- x264_sps_t sps_array[1];
- diff --git a/common/frame.c b/common/frame.c
- index 7c2fce0..d862468 100644
- --- a/common/frame.c
- +++ b/common/frame.c
- @@ -443,6 +443,7 @@ x264_frame_t *x264_frame_pop_unused( x264_t *h, int b_fdec )
- frame->b_intra_calculated = 0;
- frame->b_scenecut = 1;
- frame->b_keyframe = 0;
- + frame->b_corrupt = 0;
- memset( frame->weight, 0, sizeof(frame->weight) );
- memset( frame->f_weighted_cost_delta, 0, sizeof(frame->f_weighted_cost_delta) );
- diff --git a/common/frame.h b/common/frame.h
- index 26529ce..904ba4e 100644
- --- a/common/frame.h
- +++ b/common/frame.h
- @@ -35,6 +35,7 @@ typedef struct x264_frame
- int i_type;
- int i_qpplus1;
- int64_t i_pts;
- + int64_t i_dts;
- int64_t i_reordered_pts;
- int i_duration; /* in SPS time_scale units (i.e 2 * timebase units) used for vfr */
- int i_cpb_duration;
- @@ -143,6 +144,9 @@ typedef struct x264_frame
- int i_pir_start_col;
- int i_pir_end_col;
- int i_frames_since_pir;
- +
- + /* interactive encoder control */
- + int b_corrupt;
- } x264_frame_t;
- /* synchronized frame list */
- diff --git a/common/mvpred.c b/common/mvpred.c
- index 10a18b3..03dfe9f 100644
- --- a/common/mvpred.c
- +++ b/common/mvpred.c
- @@ -409,12 +409,16 @@ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t mvc[
- if( i_ref == 0 && h->frames.b_have_lowres )
- {
- - int16_t (*lowres_mv)[2] = i_list ? h->fenc->lowres_mvs[1][h->fref1[0]->i_frame-h->fenc->i_frame-1]
- - : h->fenc->lowres_mvs[0][h->fenc->i_frame-h->fref0[0]->i_frame-1];
- - if( lowres_mv[0][0] != 0x7fff )
- + int idx = i_list ? h->fref1[0]->i_frame-h->fenc->i_frame-1
- + : h->fenc->i_frame-h->fref0[0]->i_frame-1;
- + if( idx <= h->param.i_bframe )
- {
- - M32( mvc[i] ) = (M32( lowres_mv[h->mb.i_mb_xy] )*2)&0xfffeffff;
- - i++;
- + int16_t (*lowres_mv)[2] = h->fenc->lowres_mvs[i_list][idx];
- + if( lowres_mv[0][0] != 0x7fff )
- + {
- + M32( mvc[i] ) = (M32( lowres_mv[h->mb.i_mb_xy] )*2)&0xfffeffff;
- + i++;
- + }
- }
- }
- diff --git a/encoder/encoder.c b/encoder/encoder.c
- index b4c63c9..f54fe85 100644
- --- a/encoder/encoder.c
- +++ b/encoder/encoder.c
- @@ -564,6 +564,7 @@ static int x264_validate_parameters( x264_t *h )
- }
- h->param.i_frame_reference = x264_clip3( h->param.i_frame_reference, 1, 16 );
- + h->param.i_dpb_size = x264_clip3( h->param.i_dpb_size, 1, 16 );
- if( h->param.i_keyint_max <= 0 )
- h->param.i_keyint_max = 1;
- if( h->param.i_scenecut_threshold < 0 )
- @@ -593,10 +594,11 @@ static int x264_validate_parameters( x264_t *h )
- x264_log( h, X264_LOG_WARNING, "b-pyramid normal + intra-refresh is not supported\n" );
- h->param.i_bframe_pyramid = X264_B_PYRAMID_STRICT;
- }
- - if( h->param.b_intra_refresh && h->param.i_frame_reference > 1 )
- + if( h->param.b_intra_refresh && (h->param.i_frame_reference > 1 || h->param.i_dpb_size > 1) )
- {
- x264_log( h, X264_LOG_WARNING, "ref > 1 + intra-refresh is not supported\n" );
- h->param.i_frame_reference = 1;
- + h->param.i_dpb_size = 1;
- }
- if( h->param.b_intra_refresh && h->param.i_open_gop )
- {
- @@ -1481,6 +1483,8 @@ static inline void x264_reference_build_list( x264_t *h, int i_poc )
- for( int i = 0; h->frames.reference[i]; i++ )
- {
- + if( h->frames.reference[i]->b_corrupt )
- + continue;
- if( h->frames.reference[i]->i_poc < i_poc )
- h->fref0[h->i_ref0++] = h->frames.reference[i];
- else if( h->frames.reference[i]->i_poc > i_poc )
- @@ -2185,6 +2189,23 @@ void x264_encoder_intra_refresh( x264_t *h )
- h->b_queued_intra_refresh = 1;
- }
- +int x264_encoder_invalidate_reference( x264_t *h, int64_t pts )
- +{
- + if( h->param.i_bframe )
- + {
- + x264_log( h, X264_LOG_ERROR, "x264_encoder_invalidate_reference is not supported with B-frames enabled\n" );
- + return -1;
- + }
- + if( h->param.b_intra_refresh )
- + {
- + x264_log( h, X264_LOG_ERROR, "x264_encoder_invalidate_reference is not supported with intra refresh enabled\n" );
- + return -1;
- + }
- + h = h->thread[h->i_thread_phase];
- + h->i_reference_invalidate_pts = pts;
- + return 0;
- +}
- +
- /****************************************************************************
- * x264_encoder_encode:
- * XXX: i_poc : is the poc of the current given picture
- @@ -2330,6 +2351,29 @@ int x264_encoder_encode( x264_t *h,
- h->fenc->param->param_free( h->fenc->param );
- }
- + if( h->i_reference_invalidate_pts )
- + {
- + if( h->i_reference_invalidate_pts >= h->i_last_idr_pts )
- + for( int i = 0; h->frames.reference[i]; i++ )
- + if( h->i_reference_invalidate_pts <= h->frames.reference[i]->i_pts )
- + h->frames.reference[i]->b_corrupt = 1;
- + h->i_reference_invalidate_pts = 0;
- + }
- +
- + if( !IS_X264_TYPE_I( h->fenc->i_type ) )
- + {
- + int valid_refs_left = 0;
- + for( int i = 0; h->frames.reference[i]; i++ )
- + if( !h->frames.reference[i]->b_corrupt )
- + valid_refs_left++;
- + /* No valid reference frames left: force an IDR. */
- + if( !valid_refs_left )
- + {
- + h->fenc->b_keyframe = 1;
- + h->fenc->i_type = X264_TYPE_IDR;
- + }
- + }
- +
- if( h->fenc->b_keyframe )
- {
- h->frames.i_last_keyframe = h->fenc->i_frame;
- @@ -2393,7 +2437,30 @@ int x264_encoder_encode( x264_t *h,
- h->fenc->b_kept_as_ref =
- h->fdec->b_kept_as_ref = i_nal_ref_idc != NAL_PRIORITY_DISPOSABLE && h->param.i_keyint_max > 1;
- -
- + h->fdec->i_pts = h->fenc->i_pts *= h->i_dts_compress_multiplier;
- + if( h->frames.i_bframe_delay )
- + {
- + int64_t *prev_reordered_pts = thread_current->frames.i_prev_reordered_pts;
- + if( h->i_frame <= h->frames.i_bframe_delay )
- + {
- + if( h->i_dts_compress_multiplier == 1 )
- + h->fdec->i_dts = h->fenc->i_reordered_pts - h->frames.i_bframe_delay_time;
- + else
- + {
- + /* DTS compression */
- + if( h->i_frame == 1 )
- + thread_current->frames.i_init_delta = h->fenc->i_reordered_pts * h->i_dts_compress_multiplier;
- + h->fdec->i_dts = h->i_frame * thread_current->frames.i_init_delta / h->i_dts_compress_multiplier;
- + }
- + }
- + else
- + h->fdec->i_dts = prev_reordered_pts[ (h->i_frame - h->frames.i_bframe_delay) % h->frames.i_bframe_delay ];
- + prev_reordered_pts[ h->i_frame % h->frames.i_bframe_delay ] = h->fenc->i_reordered_pts * h->i_dts_compress_multiplier;
- + }
- + else
- + h->fdec->i_dts = h->fenc->i_reordered_pts;
- + if( h->fenc->i_type == X264_TYPE_IDR )
- + h->i_last_idr_pts = h->fdec->i_pts;
- /* ------------------- Init ----------------------------- */
- /* build ref list 0/1 */
- @@ -2616,28 +2683,9 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
- pic_out->b_keyframe = h->fenc->b_keyframe;
- - pic_out->i_pts = h->fenc->i_pts *= h->i_dts_compress_multiplier;
- - if( h->frames.i_bframe_delay )
- - {
- - int64_t *prev_reordered_pts = thread_current->frames.i_prev_reordered_pts;
- - if( h->i_frame <= h->frames.i_bframe_delay )
- - {
- - if( h->i_dts_compress_multiplier == 1 )
- - pic_out->i_dts = h->fenc->i_reordered_pts - h->frames.i_bframe_delay_time;
- - else
- - {
- - /* DTS compression */
- - if( h->i_frame == 1 )
- - thread_current->frames.i_init_delta = h->fenc->i_reordered_pts * h->i_dts_compress_multiplier;
- - pic_out->i_dts = h->i_frame * thread_current->frames.i_init_delta / h->i_dts_compress_multiplier;
- - }
- - }
- - else
- - pic_out->i_dts = prev_reordered_pts[ (h->i_frame - h->frames.i_bframe_delay) % h->frames.i_bframe_delay ];
- - prev_reordered_pts[ h->i_frame % h->frames.i_bframe_delay ] = h->fenc->i_reordered_pts * h->i_dts_compress_multiplier;
- - }
- - else
- - pic_out->i_dts = h->fenc->i_reordered_pts;
- + pic_out->i_pts = h->fdec->i_pts;
- + pic_out->i_dts = h->fdec->i_dts;
- +
- if( pic_out->i_pts < pic_out->i_dts )
- x264_log( h, X264_LOG_WARNING, "invalid DTS: PTS is less than DTS\n" );
- diff --git a/encoder/set.c b/encoder/set.c
- index 86b4a30..883388b 100644
- --- a/encoder/set.c
- +++ b/encoder/set.c
- @@ -223,8 +223,8 @@ void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param )
- /* extra slot with pyramid so that we don't have to override the
- * order of forgetting old pictures */
- sps->vui.i_max_dec_frame_buffering =
- - sps->i_num_ref_frames = X264_MIN(16, X264_MAX3(param->i_frame_reference, 1 + sps->vui.i_num_reorder_frames,
- - param->i_bframe_pyramid ? 4 : 1 ));
- + sps->i_num_ref_frames = X264_MIN(16, X264_MAX4(param->i_frame_reference, 1 + sps->vui.i_num_reorder_frames,
- + param->i_bframe_pyramid ? 4 : 1, param->i_dpb_size)));
- sps->i_num_ref_frames -= param->i_bframe_pyramid == X264_B_PYRAMID_STRICT;
- sps->vui.b_bitstream_restriction = 1;
- diff --git a/x264.h b/x264.h
- index 09183fd..1138a8b 100644
- --- a/x264.h
- +++ b/x264.h
- @@ -35,7 +35,7 @@
- #include <stdarg.h>
- -#define X264_BUILD 99
- +#define X264_BUILD 100
- /* x264_t:
- * opaque handler for encoder */
- @@ -217,6 +217,8 @@ typedef struct x264_param_t
- /* Bitstream parameters */
- int i_frame_reference; /* Maximum number of reference frames */
- + int i_dpb_size; /* Force a DPB size larger than that implied by B-frames and reference frames.
- + * Useful in combination with interactive error resilience. */
- int i_keyint_max; /* Force an IDR keyframe at this interval */
- int i_keyint_min; /* Scenecuts closer together than this are coded as I, not IDR. */
- int i_scenecut_threshold; /* how aggressively to insert extra I frames */
- @@ -682,9 +684,38 @@ int x264_encoder_delayed_frames( x264_t * );
- * If an intra refresh is not in progress, begin one with the next P-frame.
- * If an intra refresh is in progress, begin one as soon as the current one finishes.
- * Requires that b_intra_refresh be set.
- + *
- * Useful for interactive streaming where the client can tell the server that packet loss has
- * occurred. In this case, keyint can be set to an extremely high value so that intra refreshes
- - * only occur when calling x264_encoder_intra_refresh. */
- + * only occur when calling x264_encoder_intra_refresh.
- + *
- + * In multi-pass encoding, if x264_encoder_intra_refresh is called differently in each pass,
- + * behavior is undefined.
- + *
- + * Should not be called during an x264_encoder_encode. */
- void x264_encoder_intra_refresh( x264_t * );
- +/* x264_encoder_invalidate_reference:
- + * An interactive error resilience tool, designed for use in a low-latency one-encoder-few-clients
- + * system. When the client has packet loss or otherwise incorrectly decodes a frame, the encoder
- + * can be told with this command to "forget" the frame and all frames that depend on it, referencing
- + * only frames that occurred before the loss. This will force a keyframe if no frames are left to
- + * reference after the aforementioned "forgetting".
- + *
- + * It is strongly recommended to use a large i_dpb_size in this case, which allows the encoder to
- + * keep around extra, older frames to fall back on in case more recent frames are all invalidated.
- + * Unlike increasing i_frame_reference, this does not increase the number of frames used for motion
- + * estimation and thus has no speed impact. It is also recommended to set a very large keyframe
- + * interval, so that keyframes are not used except as necessary for error recovery.
- + *
- + * x264_encoder_invalidate_reference is not currently compatible with the use of B-frames or intra
- + * refresh.
- + *
- + * In multi-pass encoding, if x264_encoder_invalidate_reference is called differently in each pass,
- + * behavior is undefined.
- + *
- + * Should not be called during an x264_encoder_encode.
- + *
- + * Returns 0 on success, negative on failure. */
- +int x264_encoder_invalidate_reference( x264_t *, int64_t pts );
- #endif
- --
- 1.7.0.4
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement