Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- From 83d2ad004ccf51fb20a1c571e022586e40ca941c Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Fri, 29 Jan 2010 02:40:41 -0800
- Subject: [PATCH 01/14] Add ability to adjust ratecontrol parameters on the fly encoder_reconfig and x264_picture_t->param can now be used to change ratecontrol parameters. This is extraordinarily useful in certain streaming situations where the encoder needs to adapt the bitrate to network circumstances.
- What can be changed:
- 1) CRF can be adjusted if in CRF mode.
- 2) VBV maxrate and bufsize can be adjusted if in VBV mode.
- 3) Bitrate can be adjusted if in CBR mode.
- However, x264 cannot switch between modes and cannot change bitrate in ABR mode.
- Also fix a bug where x264_picture_t->param reconfig method would not always be frame-exact.
- Commit sponsored by SayMama video calling.
- ---
- encoder/encoder.c | 55 +++++++++++++++++++-
- encoder/ratecontrol.c | 137 +++++++++++++++++++++++-------------------------
- encoder/ratecontrol.h | 2 +
- x264.h | 7 ++-
- 4 files changed, 125 insertions(+), 76 deletions(-)
- diff --git a/encoder/encoder.c b/encoder/encoder.c
- index d873cd0..8e9c118 100644
- --- a/encoder/encoder.c
- +++ b/encoder/encoder.c
- @@ -507,6 +507,39 @@ static int x264_validate_parameters( x264_t *h )
- }
- h->param.rc.i_qp_max = x264_clip3( h->param.rc.i_qp_max, 0, 51 );
- h->param.rc.i_qp_min = x264_clip3( h->param.rc.i_qp_min, 0, h->param.rc.i_qp_max );
- + if( h->param.rc.i_vbv_buffer_size )
- + {
- + if( h->param.rc.i_rc_method == X264_RC_CQP )
- + {
- + x264_log(h, X264_LOG_WARNING, "VBV is incompatible with constant QP, ignored.\n");
- + h->param.rc.i_vbv_max_bitrate = 0;
- + h->param.rc.i_vbv_buffer_size = 0;
- + }
- + else if( h->param.rc.i_vbv_max_bitrate == 0 )
- + {
- + if( h->param.rc.i_rc_method == X264_RC_ABR )
- + {
- + x264_log( h, X264_LOG_INFO, "VBV maxrate unspecified, assuming CBR\n" );
- + h->param.rc.i_vbv_max_bitrate = h->param.rc.i_bitrate;
- + }
- + else
- + {
- + x264_log( h, X264_LOG_INFO, "VBV bufsize set but maxrate unspecified, ignored\n" );
- + h->param.rc.i_vbv_buffer_size = 0;
- + }
- + }
- + else if( h->param.rc.i_vbv_max_bitrate < h->param.rc.i_bitrate &&
- + h->param.rc.i_vbv_max_bitrate > 0 )
- + {
- + x264_log(h, X264_LOG_WARNING, "max bitrate less than average bitrate, ignored.\n");
- + h->param.rc.i_vbv_max_bitrate = 0;
- + }
- + }
- + else if( h->param.rc.i_vbv_max_bitrate )
- + {
- + x264_log(h, X264_LOG_WARNING, "VBV maxrate specified, but no bufsize.\n");
- + h->param.rc.i_vbv_max_bitrate = 0;
- + }
- int max_slices = (h->param.i_height+((16<<h->param.b_interlaced)-1))/(16<<h->param.b_interlaced);
- if( h->param.b_sliced_threads )
- @@ -1071,7 +1104,7 @@ fail:
- ****************************************************************************/
- int x264_encoder_reconfig( x264_t *h, x264_param_t *param )
- {
- - h = h->thread[h->i_thread_phase];
- + h = h->thread[h->thread[0]->i_thread_phase];
- x264_set_aspect_ratio( h, param, 0 );
- #define COPY(var) h->param.var = param->var
- COPY( i_frame_reference ); // but never uses more refs than initially specified
- @@ -1110,11 +1143,29 @@ int x264_encoder_reconfig( x264_t *h, x264_param_t *param )
- COPY( i_slice_max_size );
- COPY( i_slice_max_mbs );
- COPY( i_slice_count );
- + /* VBV can't be turned on if it wasn't on to begin with */
- + if( h->param.rc.i_vbv_max_bitrate > 0 && h->param.rc.i_vbv_buffer_size > 0 )
- + {
- + COPY( rc.i_vbv_max_bitrate );
- + COPY( rc.i_vbv_buffer_size );
- + COPY( rc.i_bitrate );
- + }
- + COPY( rc.f_rf_constant );
- #undef COPY
- mbcmp_init( h );
- - return x264_validate_parameters( h );
- + int failure = x264_validate_parameters( h );
- +
- + /* Supported reconfiguration options (1-pass only):
- + * vbv-maxrate
- + * vbv-bufsize
- + * crf
- + * bitrate (CBR only) */
- + if( !failure )
- + x264_ratecontrol_init_reconfigurable( h, 0 );
- +
- + return failure;
- }
- /****************************************************************************
- diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
- index 63b3be6..52196e7 100644
- --- a/encoder/ratecontrol.c
- +++ b/encoder/ratecontrol.c
- @@ -388,6 +388,53 @@ static char *x264_strcat_filename( char *input, char *suffix )
- return output;
- }
- +void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init )
- +{
- + x264_ratecontrol_t *rc = h->rc;
- + if( !b_init && rc->b_2pass )
- + return;
- +
- + if( h->param.rc.i_vbv_max_bitrate > 0 && h->param.rc.i_vbv_buffer_size > 0 )
- + {
- + if( h->param.rc.i_vbv_buffer_size < (int)(h->param.rc.i_vbv_max_bitrate / rc->fps) )
- + {
- + h->param.rc.i_vbv_buffer_size = h->param.rc.i_vbv_max_bitrate / rc->fps;
- + x264_log( h, X264_LOG_WARNING, "VBV buffer size cannot be smaller than one frame, using %d kbit\n",
- + h->param.rc.i_vbv_buffer_size );
- + }
- +
- + /* We don't support changing the ABR bitrate right now,
- + so if the stream starts as CBR, keep it CBR. */
- + if( rc->b_vbv_min_rate )
- + h->param.rc.i_vbv_max_bitrate = h->param.rc.i_bitrate;
- + rc->buffer_rate = h->param.rc.i_vbv_max_bitrate * 1000. / rc->fps;
- + rc->buffer_size = h->param.rc.i_vbv_buffer_size * 1000.;
- + rc->single_frame_vbv = rc->buffer_rate * 1.1 > rc->buffer_size;
- + rc->cbr_decay = 1.0 - rc->buffer_rate / rc->buffer_size
- + * 0.5 * X264_MAX(0, 1.5 - rc->buffer_rate * rc->fps / rc->bitrate);
- + if( b_init )
- + {
- + if( h->param.rc.f_vbv_buffer_init > 1. )
- + h->param.rc.f_vbv_buffer_init = x264_clip3f( h->param.rc.f_vbv_buffer_init / h->param.rc.i_vbv_buffer_size, 0, 1 );
- + h->param.rc.f_vbv_buffer_init = X264_MAX( h->param.rc.f_vbv_buffer_init, rc->buffer_rate / rc->buffer_size );
- + rc->buffer_fill_final = rc->buffer_size * h->param.rc.f_vbv_buffer_init;
- + rc->b_vbv = 1;
- + rc->b_vbv_min_rate = !rc->b_2pass
- + && h->param.rc.i_rc_method == X264_RC_ABR
- + && h->param.rc.i_vbv_max_bitrate <= h->param.rc.i_bitrate;
- + }
- + }
- + if( h->param.rc.i_rc_method == X264_RC_CRF )
- + {
- + /* Arbitrary rescaling to make CRF somewhat similar to QP.
- + * Try to compensate for MB-tree's effects as well. */
- + double base_cplx = h->mb.i_mb_count * (h->param.i_bframe ? 120 : 80);
- + double mbtree_offset = h->param.rc.b_mb_tree ? (1.0-h->param.rc.f_qcompress)*13.5 : 0;
- + rc->rate_factor_constant = pow( base_cplx, 1 - rc->qcompress )
- + / qp2qscale( h->param.rc.f_rf_constant + mbtree_offset );
- + }
- +}
- +
- int x264_ratecontrol_new( x264_t *h )
- {
- x264_ratecontrol_t *rc;
- @@ -426,60 +473,10 @@ int x264_ratecontrol_new( x264_t *h )
- x264_log(h, X264_LOG_ERROR, "constant rate-factor is incompatible with 2pass.\n");
- return -1;
- }
- - if( h->param.rc.i_vbv_buffer_size )
- - {
- - if( h->param.rc.i_rc_method == X264_RC_CQP )
- - {
- - x264_log(h, X264_LOG_WARNING, "VBV is incompatible with constant QP, ignored.\n");
- - h->param.rc.i_vbv_max_bitrate = 0;
- - h->param.rc.i_vbv_buffer_size = 0;
- - }
- - else if( h->param.rc.i_vbv_max_bitrate == 0 )
- - {
- - if( h->param.rc.i_rc_method == X264_RC_ABR )
- - {
- - x264_log( h, X264_LOG_INFO, "VBV maxrate unspecified, assuming CBR\n" );
- - h->param.rc.i_vbv_max_bitrate = h->param.rc.i_bitrate;
- - }
- - else
- - {
- - x264_log( h, X264_LOG_INFO, "VBV bufsize set but maxrate unspecified, ignored\n" );
- - h->param.rc.i_vbv_buffer_size = 0;
- - }
- - }
- - }
- - if( h->param.rc.i_vbv_max_bitrate < h->param.rc.i_bitrate &&
- - h->param.rc.i_vbv_max_bitrate > 0)
- - x264_log(h, X264_LOG_WARNING, "max bitrate less than average bitrate, ignored.\n");
- - else if( h->param.rc.i_vbv_max_bitrate > 0 &&
- - h->param.rc.i_vbv_buffer_size > 0 )
- - {
- - if( h->param.rc.i_vbv_buffer_size < (int)(h->param.rc.i_vbv_max_bitrate / rc->fps) )
- - {
- - h->param.rc.i_vbv_buffer_size = h->param.rc.i_vbv_max_bitrate / rc->fps;
- - x264_log( h, X264_LOG_WARNING, "VBV buffer size cannot be smaller than one frame, using %d kbit\n",
- - h->param.rc.i_vbv_buffer_size );
- - }
- - if( h->param.rc.f_vbv_buffer_init > 1. )
- - h->param.rc.f_vbv_buffer_init = x264_clip3f( h->param.rc.f_vbv_buffer_init / h->param.rc.i_vbv_buffer_size, 0, 1 );
- - rc->buffer_rate = h->param.rc.i_vbv_max_bitrate * 1000. / rc->fps;
- - rc->buffer_size = h->param.rc.i_vbv_buffer_size * 1000.;
- - rc->single_frame_vbv = rc->buffer_rate * 1.1 > rc->buffer_size;
- - h->param.rc.f_vbv_buffer_init = X264_MAX( h->param.rc.f_vbv_buffer_init, rc->buffer_rate / rc->buffer_size );
- - rc->buffer_fill_final = rc->buffer_size * h->param.rc.f_vbv_buffer_init;
- - rc->cbr_decay = 1.0 - rc->buffer_rate / rc->buffer_size
- - * 0.5 * X264_MAX(0, 1.5 - rc->buffer_rate * rc->fps / rc->bitrate);
- - rc->b_vbv = 1;
- - rc->b_vbv_min_rate = !rc->b_2pass
- - && h->param.rc.i_rc_method == X264_RC_ABR
- - && h->param.rc.i_vbv_max_bitrate <= h->param.rc.i_bitrate;
- - }
- - else if( h->param.rc.i_vbv_max_bitrate )
- - {
- - x264_log(h, X264_LOG_WARNING, "VBV maxrate specified, but no bufsize.\n");
- - h->param.rc.i_vbv_max_bitrate = 0;
- - }
- - if(rc->rate_tolerance < 0.01)
- +
- + x264_ratecontrol_init_reconfigurable( h, 1 );
- +
- + if( rc->rate_tolerance < 0.01 )
- {
- x264_log(h, X264_LOG_WARNING, "bitrate tolerance too small, using .01\n");
- rc->rate_tolerance = 0.01;
- @@ -499,16 +496,6 @@ int x264_ratecontrol_new( x264_t *h )
- rc->last_non_b_pict_type = SLICE_TYPE_I;
- }
- - if( h->param.rc.i_rc_method == X264_RC_CRF )
- - {
- - /* Arbitrary rescaling to make CRF somewhat similar to QP.
- - * Try to compensate for MB-tree's effects as well. */
- - double base_cplx = h->mb.i_mb_count * (h->param.i_bframe ? 120 : 80);
- - double mbtree_offset = h->param.rc.b_mb_tree ? (1.0-h->param.rc.f_qcompress)*13.5 : 0;
- - rc->rate_factor_constant = pow( base_cplx, 1 - rc->qcompress )
- - / qp2qscale( h->param.rc.f_rf_constant + mbtree_offset );
- - }
- -
- rc->ip_offset = 6.0 * log(h->param.rc.f_ip_factor) / log(2.0);
- rc->pb_offset = 6.0 * log(h->param.rc.f_pb_factor) / log(2.0);
- rc->qp_constant[SLICE_TYPE_P] = h->param.rc.i_qp_constant;
- @@ -1577,15 +1564,15 @@ static void update_vbv( x264_t *h, int bits )
- if( rct->buffer_fill_final < 0 )
- x264_log( h, X264_LOG_WARNING, "VBV underflow (frame %d, %.0f bits)\n", h->i_frame, rct->buffer_fill_final );
- rct->buffer_fill_final = X264_MAX( rct->buffer_fill_final, 0 );
- - rct->buffer_fill_final += rct->buffer_rate;
- - rct->buffer_fill_final = X264_MIN( rct->buffer_fill_final, rct->buffer_size );
- + rct->buffer_fill_final += rcc->buffer_rate;
- + rct->buffer_fill_final = X264_MIN( rct->buffer_fill_final, rcc->buffer_size );
- }
- // provisionally update VBV according to the planned size of all frames currently in progress
- static void update_vbv_plan( x264_t *h, int overhead )
- {
- x264_ratecontrol_t *rcc = h->rc;
- - rcc->buffer_fill = h->thread[0]->rc->buffer_fill_final - overhead;
- + rcc->buffer_fill = h->thread[0]->rc->buffer_fill_final;
- if( h->i_thread_frames > 1 )
- {
- int j = h->rc - h->thread[0]->rc;
- @@ -1603,6 +1590,8 @@ static void update_vbv_plan( x264_t *h, int overhead )
- rcc->buffer_fill = X264_MIN( rcc->buffer_fill, rcc->buffer_size );
- }
- }
- + rcc->buffer_fill = X264_MIN( rcc->buffer_fill, rcc->buffer_size );
- + rcc->buffer_fill -= overhead;
- }
- // apply VBV constraints and clip qscale to between lmin and lmax
- @@ -2027,8 +2016,7 @@ void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next )
- #define COPY(var) memcpy(&cur->rc->var, &prev->rc->var, sizeof(cur->rc->var))
- /* these vars are updated in x264_ratecontrol_start()
- * so copy them from the context that most recently started (prev)
- - * to the context that's about to start (cur).
- - */
- + * to the context that's about to start (cur). */
- COPY(accum_p_qp);
- COPY(accum_p_norm);
- COPY(last_satd);
- @@ -2040,6 +2028,14 @@ void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next )
- COPY(bframes);
- COPY(prev_zone);
- COPY(qpbuf_pos);
- + /* these vars can be updated by x264_ratecontrol_init_reconfigurable */
- + COPY(buffer_rate);
- + COPY(buffer_size);
- + COPY(single_frame_vbv);
- + COPY(cbr_decay);
- + COPY(b_vbv_min_rate);
- + COPY(rate_factor_constant);
- + COPY(bitrate);
- #undef COPY
- }
- if( cur != next )
- @@ -2047,8 +2043,7 @@ void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next )
- #define COPY(var) next->rc->var = cur->rc->var
- /* these vars are updated in x264_ratecontrol_end()
- * so copy them from the context that most recently ended (cur)
- - * to the context that's about to end (next)
- - */
- + * to the context that's about to end (next) */
- COPY(cplxr_sum);
- COPY(expected_bits_sum);
- COPY(wanted_bits_window);
- diff --git a/encoder/ratecontrol.h b/encoder/ratecontrol.h
- index 5a8d088..2767866 100644
- --- a/encoder/ratecontrol.h
- +++ b/encoder/ratecontrol.h
- @@ -27,6 +27,8 @@
- int x264_ratecontrol_new ( x264_t * );
- void x264_ratecontrol_delete( x264_t * );
- +void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init );
- +
- void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame );
- void x264_adaptive_quant( x264_t * );
- int x264_macroblock_tree_read( x264_t *h, x264_frame_t *frame );
- diff --git a/x264.h b/x264.h
- index 2550864..e7d19b7 100644
- --- a/x264.h
- +++ b/x264.h
- @@ -35,7 +35,7 @@
- #include <stdarg.h>
- -#define X264_BUILD 84
- +#define X264_BUILD 85
- /* x264_t:
- * opaque handler for encoder */
- @@ -480,11 +480,12 @@ typedef struct
- x264_t *x264_encoder_open( x264_param_t * );
- /* x264_encoder_reconfig:
- - * analysis-related parameters from x264_param_t are copied.
- + * various parameters from x264_param_t are copied.
- * this takes effect immediately, on whichever frame is encoded next;
- * due to delay, this may not be the next frame passed to encoder_encode.
- * if the change should apply to some particular frame, use x264_picture_t->param instead.
- - * returns 0 on success, negative on parameter validation error. */
- + * returns 0 on success, negative on parameter validation error.
- + * not all parameters can be changed; see the actual function for a detailed breakdown. */
- int x264_encoder_reconfig( x264_t *, x264_param_t * );
- /* x264_encoder_parameters:
- * copies the current internal set of parameters to the pointer provided
- --
- 1.6.1.2
- From 3a4ddc546cd2a368f22ba0a26da093129d6f6772 Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Mon, 1 Feb 2010 13:04:47 -0800
- Subject: [PATCH 02/14] Slightly faster predictor_difference_mmxext
- ---
- common/x86/util.h | 17 ++++++++++-------
- 1 files changed, 10 insertions(+), 7 deletions(-)
- diff --git a/common/x86/util.h b/common/x86/util.h
- index efc700a..c8bcf4b 100644
- --- a/common/x86/util.h
- +++ b/common/x86/util.h
- @@ -45,8 +45,9 @@ static inline void x264_median_mv_mmxext( int16_t *dst, int16_t *a, int16_t *b,
- #define x264_predictor_difference x264_predictor_difference_mmxext
- static inline int x264_predictor_difference_mmxext( int16_t (*mvc)[2], intptr_t i_mvc )
- {
- - int sum = 0;
- - uint16_t output[4];
- + int sum;
- + static const uint64_t pw_1 = 0x0001000100010001ULL;
- +
- asm(
- "pxor %%mm4, %%mm4 \n"
- "test $1, %1 \n"
- @@ -56,7 +57,7 @@ static inline int x264_predictor_difference_mmxext( int16_t (*mvc)[2], intptr_t
- "psubw %%mm3, %%mm0 \n"
- "jmp 2f \n"
- "3: \n"
- - "sub $1, %1 \n"
- + "dec %1 \n"
- "1: \n"
- "movq -8(%2,%1,4), %%mm0 \n"
- "psubw -4(%2,%1,4), %%mm0 \n"
- @@ -67,11 +68,13 @@ static inline int x264_predictor_difference_mmxext( int16_t (*mvc)[2], intptr_t
- "pmaxsw %%mm2, %%mm0 \n"
- "paddusw %%mm0, %%mm4 \n"
- "jg 1b \n"
- - "movq %%mm4, %0 \n"
- - :"=m"(output), "+r"(i_mvc)
- - :"r"(mvc), "m"(M64( mvc ))
- + "pmaddwd %4, %%mm4 \n"
- + "pshufw $14, %%mm4, %%mm0 \n"
- + "paddd %%mm0, %%mm4 \n"
- + "movd %%mm4, %0 \n"
- + :"=r"(sum), "+r"(i_mvc)
- + :"r"(mvc), "m"(M64( mvc )), "m"(pw_1)
- );
- - sum += output[0] + output[1] + output[2] + output[3];
- return sum;
- }
- #define x264_cabac_amvd_sum x264_cabac_amvd_sum_mmxext
- --
- 1.6.1.2
- From 8d417bd03fcc4f3a8058b2db9063b0d967600536 Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Tue, 2 Feb 2010 03:15:18 -0800
- Subject: [PATCH 03/14] Improve bidir search, fix some artifacts in fades
- Modify analysis to allow bidir to use different motion vectors than L0/L1.
- Always try the <0,0,0,0> motion vector for bidir.
- Eliminates almost all errant motion vectors in fades.
- Slightly improves PSNR as well (~0.015db).
- ---
- encoder/analyse.c | 50 ++++++++++++++++++++++++++++++++++++++------------
- 1 files changed, 38 insertions(+), 12 deletions(-)
- diff --git a/encoder/analyse.c b/encoder/analyse.c
- index 666596b..1fb2206 100644
- --- a/encoder/analyse.c
- +++ b/encoder/analyse.c
- @@ -40,6 +40,7 @@ typedef struct
- int i_ref;
- int i_rd16x16;
- x264_me_t me16x16;
- + x264_me_t bi16x16; /* for b16x16 BI mode, since MVs can differ from l0/l1 */
- /* 8x8 */
- int i_cost8x8;
- @@ -1722,20 +1723,45 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
- a->l1.me16x16.i_ref = a->l1.i_ref;
- /* get cost of BI mode */
- + int ref_costs = REF_COST( 0, a->l0.i_ref ) + REF_COST( 1, a->l1.i_ref );
- + h->mc.memcpy_aligned( &a->l0.bi16x16, &a->l0.me16x16, sizeof(x264_me_t) );
- + h->mc.memcpy_aligned( &a->l1.bi16x16, &a->l1.me16x16, sizeof(x264_me_t) );
- src0 = h->mc.get_ref( pix0, &stride0,
- h->mb.pic.p_fref[0][a->l0.i_ref], h->mb.pic.i_stride[0],
- - a->l0.me16x16.mv[0], a->l0.me16x16.mv[1], 16, 16, weight_none );
- + a->l0.bi16x16.mv[0], a->l0.bi16x16.mv[1], 16, 16, weight_none );
- src1 = h->mc.get_ref( pix1, &stride1,
- h->mb.pic.p_fref[1][a->l1.i_ref], h->mb.pic.i_stride[0],
- - a->l1.me16x16.mv[0], a->l1.me16x16.mv[1], 16, 16, weight_none );
- + a->l1.bi16x16.mv[0], a->l1.bi16x16.mv[1], 16, 16, weight_none );
- h->mc.avg[PIXEL_16x16]( pix0, 16, src0, stride0, src1, stride1, h->mb.bipred_weight[a->l0.i_ref][a->l1.i_ref] );
- a->i_cost16x16bi = h->pixf.mbcmp[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE, pix0, 16 )
- - + REF_COST( 0, a->l0.i_ref )
- - + REF_COST( 1, a->l1.i_ref )
- - + a->l0.me16x16.cost_mv
- - + a->l1.me16x16.cost_mv;
- + + ref_costs
- + + a->l0.bi16x16.cost_mv
- + + a->l1.bi16x16.cost_mv;
- +
- +
- + /* Always try the 0,0,0,0 vector; helps avoid errant motion vectors in fades */
- + if( M32( a->l0.bi16x16.mv ) | M32( a->l1.bi16x16.mv ) )
- + {
- + int l0_mv_cost = a->l0.bi16x16.p_cost_mv[-a->l0.bi16x16.mvp[0]]
- + + a->l0.bi16x16.p_cost_mv[-a->l0.bi16x16.mvp[1]];
- + int l1_mv_cost = a->l1.bi16x16.p_cost_mv[-a->l1.bi16x16.mvp[0]]
- + + a->l1.bi16x16.p_cost_mv[-a->l1.bi16x16.mvp[1]];
- + h->mc.avg[PIXEL_16x16]( pix0, 16, h->mb.pic.p_fref[0][a->l0.i_ref][0], h->mb.pic.i_stride[0],
- + h->mb.pic.p_fref[1][a->l1.i_ref][0], h->mb.pic.i_stride[0],
- + h->mb.bipred_weight[a->l0.i_ref][a->l1.i_ref] );
- + int cost00 = h->pixf.mbcmp[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE, pix0, 16 )
- + + ref_costs + l0_mv_cost + l1_mv_cost;
- + if( cost00 < a->i_cost16x16bi )
- + {
- + M32( a->l0.bi16x16.mv ) = 0;
- + M32( a->l1.bi16x16.mv ) = 0;
- + a->l0.bi16x16.cost_mv = l0_mv_cost;
- + a->l1.bi16x16.cost_mv = l1_mv_cost;
- + a->i_cost16x16bi = cost00;
- + }
- + }
- /* mb type cost */
- a->i_cost16x16bi += a->i_lambda * i_mb_b_cost_table[B_BI_BI];
- @@ -2205,7 +2231,7 @@ static void x264_refine_bidir( x264_t *h, x264_mb_analysis_t *a )
- {
- case D_16x16:
- if( h->mb.i_type == B_BI_BI )
- - x264_me_refine_bidir_satd( h, &a->l0.me16x16, &a->l1.me16x16, i_biweight );
- + x264_me_refine_bidir_satd( h, &a->l0.bi16x16, &a->l1.bi16x16, i_biweight );
- break;
- case D_16x8:
- for( i=0; i<2; i++ )
- @@ -2819,8 +2845,8 @@ intra_analysis:
- }
- else if( i_type == B_BI_BI )
- {
- - x264_me_refine_qpel( h, &analysis.l0.me16x16 );
- - x264_me_refine_qpel( h, &analysis.l1.me16x16 );
- + x264_me_refine_qpel( h, &analysis.l0.bi16x16 );
- + x264_me_refine_qpel( h, &analysis.l1.bi16x16 );
- }
- }
- else if( i_partition == D_16x8 )
- @@ -2938,7 +2964,7 @@ intra_analysis:
- x264_me_refine_qpel_rd( h, &analysis.l1.me16x16, analysis.i_lambda2, 0, 1 );
- }
- else if( i_type == B_BI_BI )
- - x264_me_refine_bidir_rd( h, &analysis.l0.me16x16, &analysis.l1.me16x16, i_biweight, 0, analysis.i_lambda2 );
- + x264_me_refine_bidir_rd( h, &analysis.l0.bi16x16, &analysis.l1.bi16x16, i_biweight, 0, analysis.i_lambda2 );
- }
- else if( i_partition == D_16x8 )
- {
- @@ -3121,10 +3147,10 @@ static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a )
- break;
- case B_BI_BI:
- x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.i_ref );
- - x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv );
- + x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, a->l0.bi16x16.mv );
- x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, a->l1.i_ref );
- - x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 1, a->l1.me16x16.mv );
- + x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 1, a->l1.bi16x16.mv );
- break;
- }
- break;
- --
- 1.6.1.2
- From 65f2eb0783b325bddee87b171b904bc54e5ceacc Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Wed, 3 Feb 2010 14:22:05 -0800
- Subject: [PATCH 04/14] Faster CABAC MB header writing
- Reorganize the header writing to merge mb type and mb mode info (mv, pred, etc)
- Reduces redundant branches and better splits the code between frame types (for better code cache usage).
- Also slightly simplify qp delta calculation.
- Also make CAVLC and CABAC a bit more consistent in structure and function names.
- ---
- encoder/cabac.c | 573 ++++++++++++++++++++++++++-----------------------------
- encoder/cavlc.c | 118 ++++++------
- 2 files changed, 334 insertions(+), 357 deletions(-)
- diff --git a/encoder/cabac.c b/encoder/cabac.c
- index 271f527..6ff2aed 100644
- --- a/encoder/cabac.c
- +++ b/encoder/cabac.c
- @@ -29,151 +29,6 @@
- #define RDO_SKIP_BS 0
- #endif
- -static inline void x264_cabac_mb_type_intra( x264_t *h, x264_cabac_t *cb, int i_mb_type,
- - int ctx0, int ctx1, int ctx2, int ctx3, int ctx4, int ctx5 )
- -{
- - if( i_mb_type == I_4x4 || i_mb_type == I_8x8 )
- - {
- - x264_cabac_encode_decision_noup( cb, ctx0, 0 );
- - }
- -#if !RDO_SKIP_BS
- - else if( i_mb_type == I_PCM )
- - {
- - x264_cabac_encode_decision_noup( cb, ctx0, 1 );
- - x264_cabac_encode_flush( h, cb );
- - }
- -#endif
- - else
- - {
- - int i_pred = x264_mb_pred_mode16x16_fix[h->mb.i_intra16x16_pred_mode];
- -
- - x264_cabac_encode_decision_noup( cb, ctx0, 1 );
- - x264_cabac_encode_terminal( cb );
- -
- - x264_cabac_encode_decision_noup( cb, ctx1, !!h->mb.i_cbp_luma );
- - if( h->mb.i_cbp_chroma == 0 )
- - x264_cabac_encode_decision_noup( cb, ctx2, 0 );
- - else
- - {
- - x264_cabac_encode_decision( cb, ctx2, 1 );
- - x264_cabac_encode_decision_noup( cb, ctx3, h->mb.i_cbp_chroma>>1 );
- - }
- - x264_cabac_encode_decision( cb, ctx4, i_pred>>1 );
- - x264_cabac_encode_decision_noup( cb, ctx5, i_pred&1 );
- - }
- -}
- -
- -static void x264_cabac_mb_type( x264_t *h, x264_cabac_t *cb )
- -{
- - const int i_mb_type = h->mb.i_type;
- -
- - if( h->sh.b_mbaff &&
- - (!(h->mb.i_mb_y & 1) || IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride])) )
- - {
- - x264_cabac_encode_decision_noup( cb, 70 + h->mb.cache.i_neighbour_interlaced, h->mb.b_interlaced );
- - }
- -
- - if( h->sh.i_type == SLICE_TYPE_I )
- - {
- - int ctx = 0;
- - if( h->mb.i_mb_type_left >= 0 && h->mb.i_mb_type_left != I_4x4 )
- - ctx++;
- - if( h->mb.i_mb_type_top >= 0 && h->mb.i_mb_type_top != I_4x4 )
- - ctx++;
- -
- - x264_cabac_mb_type_intra( h, cb, i_mb_type, 3+ctx, 3+3, 3+4, 3+5, 3+6, 3+7 );
- - }
- - else if( h->sh.i_type == SLICE_TYPE_P )
- - {
- - /* prefix: 14, suffix: 17 */
- - if( i_mb_type == P_L0 )
- - {
- - x264_cabac_encode_decision_noup( cb, 14, 0 );
- - x264_cabac_encode_decision_noup( cb, 15, h->mb.i_partition != D_16x16 );
- - x264_cabac_encode_decision_noup( cb, 17-(h->mb.i_partition == D_16x16), h->mb.i_partition == D_16x8 );
- - }
- - else if( i_mb_type == P_8x8 )
- - {
- - x264_cabac_encode_decision_noup( cb, 14, 0 );
- - x264_cabac_encode_decision_noup( cb, 15, 0 );
- - x264_cabac_encode_decision_noup( cb, 16, 1 );
- - }
- - else /* intra */
- - {
- - /* prefix */
- - x264_cabac_encode_decision_noup( cb, 14, 1 );
- -
- - /* suffix */
- - x264_cabac_mb_type_intra( h, cb, i_mb_type, 17+0, 17+1, 17+2, 17+2, 17+3, 17+3 );
- - }
- - }
- - else //if( h->sh.i_type == SLICE_TYPE_B )
- - {
- - int ctx = 0;
- - if( h->mb.i_mb_type_left >= 0 && h->mb.i_mb_type_left != B_SKIP && h->mb.i_mb_type_left != B_DIRECT )
- - ctx++;
- - if( h->mb.i_mb_type_top >= 0 && h->mb.i_mb_type_top != B_SKIP && h->mb.i_mb_type_top != B_DIRECT )
- - ctx++;
- -
- - if( i_mb_type == B_DIRECT )
- - {
- - x264_cabac_encode_decision_noup( cb, 27+ctx, 0 );
- - return;
- - }
- - x264_cabac_encode_decision_noup( cb, 27+ctx, 1 );
- -
- - if( i_mb_type == B_8x8 )
- - {
- - x264_cabac_encode_decision_noup( cb, 27+3, 1 );
- - x264_cabac_encode_decision_noup( cb, 27+4, 1 );
- - x264_cabac_encode_decision( cb, 27+5, 1 );
- - x264_cabac_encode_decision( cb, 27+5, 1 );
- - x264_cabac_encode_decision_noup( cb, 27+5, 1 );
- - }
- - else if( IS_INTRA( i_mb_type ) )
- - {
- - /* prefix */
- - x264_cabac_encode_decision_noup( cb, 27+3, 1 );
- - x264_cabac_encode_decision_noup( cb, 27+4, 1 );
- - x264_cabac_encode_decision( cb, 27+5, 1 );
- - x264_cabac_encode_decision( cb, 27+5, 0 );
- - x264_cabac_encode_decision( cb, 27+5, 1 );
- -
- - /* suffix */
- - x264_cabac_mb_type_intra( h, cb, i_mb_type, 32+0, 32+1, 32+2, 32+2, 32+3, 32+3 );
- - }
- - else
- - {
- - static const uint8_t i_mb_bits[9*3] =
- - {
- - 0x31, 0x29, 0x4, /* L0 L0 */
- - 0x35, 0x2d, 0, /* L0 L1 */
- - 0x43, 0x63, 0, /* L0 BI */
- - 0x3d, 0x2f, 0, /* L1 L0 */
- - 0x39, 0x25, 0x6, /* L1 L1 */
- - 0x53, 0x73, 0, /* L1 BI */
- - 0x4b, 0x6b, 0, /* BI L0 */
- - 0x5b, 0x7b, 0, /* BI L1 */
- - 0x47, 0x67, 0x21 /* BI BI */
- - };
- -
- - const int idx = (i_mb_type - B_L0_L0) * 3 + (h->mb.i_partition - D_16x8);
- - int bits = i_mb_bits[idx];
- -
- - x264_cabac_encode_decision_noup( cb, 27+3, bits&1 );
- - x264_cabac_encode_decision( cb, 27+5-(bits&1), (bits>>1)&1 ); bits >>= 2;
- - if( bits != 1 )
- - {
- - x264_cabac_encode_decision( cb, 27+5, bits&1 ); bits >>= 1;
- - x264_cabac_encode_decision( cb, 27+5, bits&1 ); bits >>= 1;
- - x264_cabac_encode_decision( cb, 27+5, bits&1 ); bits >>= 1;
- - if( bits != 1 )
- - x264_cabac_encode_decision_noup( cb, 27+5, bits&1 );
- - }
- - }
- - }
- -}
- -
- static void x264_cabac_mb_intra4x4_pred_mode( x264_cabac_t *cb, int i_pred, int i_mode )
- {
- if( i_pred == i_mode )
- @@ -209,6 +64,12 @@ static void x264_cabac_mb_intra_chroma_pred_mode( x264_t *h, x264_cabac_t *cb )
- }
- }
- +static inline void x264_cabac_mb_transform_size( x264_t *h, x264_cabac_t *cb )
- +{
- + int ctx = 399 + h->mb.cache.i_neighbour_transform_size;
- + x264_cabac_encode_decision_noup( cb, ctx, h->mb.b_transform_8x8 );
- +}
- +
- static void x264_cabac_mb_cbp_luma( x264_t *h, x264_cabac_t *cb )
- {
- int cbp = h->mb.i_cbp_luma;
- @@ -244,7 +105,6 @@ static void x264_cabac_mb_cbp_chroma( x264_t *h, x264_cabac_t *cb )
- static void x264_cabac_mb_qp_delta( x264_t *h, x264_cabac_t *cb )
- {
- int i_dqp = h->mb.i_qp - h->mb.i_last_qp;
- - int ctx;
- /* Avoid writing a delta quant if we have an empty i16x16 block, e.g. in a completely flat background area */
- if( h->mb.i_type == I_16x16 && !h->mb.cbp[h->mb.i_mb_xy] )
- @@ -257,7 +117,7 @@ static void x264_cabac_mb_qp_delta( x264_t *h, x264_cabac_t *cb )
- /* Since, per the above, empty-CBP I16x16 blocks never have delta quants,
- * we don't have to check for them. */
- - ctx = h->mb.i_last_dqp && h->mb.cbp[h->mb.i_mb_prev_xy];
- + int ctx = !!h->mb.i_last_dqp;
- if( i_dqp != 0 )
- {
- @@ -321,12 +181,6 @@ static inline void x264_cabac_mb_sub_b_partition( x264_cabac_t *cb, int i_sub )
- x264_cabac_encode_decision( cb, 39, i_sub == D_L1_8x8 );
- }
- -static inline void x264_cabac_mb_transform_size( x264_t *h, x264_cabac_t *cb )
- -{
- - int ctx = 399 + h->mb.cache.i_neighbour_transform_size;
- - x264_cabac_encode_decision_noup( cb, ctx, h->mb.b_transform_8x8 );
- -}
- -
- static void x264_cabac_mb_ref( x264_t *h, x264_cabac_t *cb, int i_list, int idx )
- {
- const int i8 = x264_scan8[idx];
- @@ -463,6 +317,267 @@ static inline void x264_cabac_mb8x8_mvd( x264_t *h, x264_cabac_t *cb, int i )
- }
- }
- +static void x264_cabac_mb_header_intra( x264_t *h, x264_cabac_t *cb, int i_mb_type,
- + int ctx0, int ctx1, int ctx2, int ctx3, int ctx4, int ctx5 )
- +{
- + if( i_mb_type == I_4x4 || i_mb_type == I_8x8 )
- + {
- + int i, di = h->mb.b_transform_8x8 ? 4 : 1;
- + x264_cabac_encode_decision_noup( cb, ctx0, 0 );
- +
- + if( h->pps->b_transform_8x8_mode )
- + x264_cabac_mb_transform_size( h, cb );
- +
- + for( i = 0; i < 16; i += di )
- + {
- + const int i_pred = x264_mb_predict_intra4x4_mode( h, i );
- + const int i_mode = x264_mb_pred_mode4x4_fix( h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] );
- + x264_cabac_mb_intra4x4_pred_mode( cb, i_pred, i_mode );
- + }
- + }
- +#if !RDO_SKIP_BS
- + else if( i_mb_type == I_PCM )
- + {
- + x264_cabac_encode_decision_noup( cb, ctx0, 1 );
- + x264_cabac_encode_flush( h, cb );
- + return;
- + }
- +#endif
- + else
- + {
- + int i_pred = x264_mb_pred_mode16x16_fix[h->mb.i_intra16x16_pred_mode];
- +
- + x264_cabac_encode_decision_noup( cb, ctx0, 1 );
- + x264_cabac_encode_terminal( cb );
- +
- + x264_cabac_encode_decision_noup( cb, ctx1, !!h->mb.i_cbp_luma );
- + if( h->mb.i_cbp_chroma == 0 )
- + x264_cabac_encode_decision_noup( cb, ctx2, 0 );
- + else
- + {
- + x264_cabac_encode_decision( cb, ctx2, 1 );
- + x264_cabac_encode_decision_noup( cb, ctx3, h->mb.i_cbp_chroma>>1 );
- + }
- + x264_cabac_encode_decision( cb, ctx4, i_pred>>1 );
- + x264_cabac_encode_decision_noup( cb, ctx5, i_pred&1 );
- + }
- + x264_cabac_mb_intra_chroma_pred_mode( h, cb );
- +}
- +
- +static inline void x264_cabac_mb_header( x264_t *h, x264_cabac_t *cb )
- +{
- + const int i_mb_type = h->mb.i_type;
- + int i_list, i;
- +
- + if( h->sh.b_mbaff &&
- + (!(h->mb.i_mb_y & 1) || IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride])) )
- + {
- + x264_cabac_encode_decision_noup( cb, 70 + h->mb.cache.i_neighbour_interlaced, h->mb.b_interlaced );
- + }
- +
- + if( h->sh.i_type == SLICE_TYPE_I )
- + {
- + int ctx = 0;
- + if( h->mb.i_mb_type_left >= 0 && h->mb.i_mb_type_left != I_4x4 )
- + ctx++;
- + if( h->mb.i_mb_type_top >= 0 && h->mb.i_mb_type_top != I_4x4 )
- + ctx++;
- +
- + x264_cabac_mb_header_intra( h, cb, i_mb_type, 3+ctx, 3+3, 3+4, 3+5, 3+6, 3+7 );
- + }
- + else if( h->sh.i_type == SLICE_TYPE_P )
- + {
- + /* prefix: 14, suffix: 17 */
- + if( i_mb_type == P_L0 )
- + {
- + x264_cabac_encode_decision_noup( cb, 14, 0 );
- + if( h->mb.i_partition == D_16x16 )
- + {
- + x264_cabac_encode_decision_noup( cb, 15, 0 );
- + x264_cabac_encode_decision_noup( cb, 16, 0 );
- + if( h->mb.pic.i_fref[0] > 1 )
- + x264_cabac_mb_ref( h, cb, 0, 0 );
- + x264_cabac_mb_mvd( h, cb, 0, 0, 4, 4 );
- + }
- + else if( h->mb.i_partition == D_16x8 )
- + {
- + x264_cabac_encode_decision_noup( cb, 15, 1 );
- + x264_cabac_encode_decision_noup( cb, 17, 1 );
- + if( h->mb.pic.i_fref[0] > 1 )
- + {
- + x264_cabac_mb_ref( h, cb, 0, 0 );
- + x264_cabac_mb_ref( h, cb, 0, 8 );
- + }
- + x264_cabac_mb_mvd( h, cb, 0, 0, 4, 2 );
- + x264_cabac_mb_mvd( h, cb, 0, 8, 4, 2 );
- + }
- + else //if( h->mb.i_partition == D_8x16 )
- + {
- + x264_cabac_encode_decision_noup( cb, 15, 1 );
- + x264_cabac_encode_decision_noup( cb, 17, 0 );
- + if( h->mb.pic.i_fref[0] > 1 )
- + {
- + x264_cabac_mb_ref( h, cb, 0, 0 );
- + x264_cabac_mb_ref( h, cb, 0, 4 );
- + }
- + x264_cabac_mb_mvd( h, cb, 0, 0, 2, 4 );
- + x264_cabac_mb_mvd( h, cb, 0, 4, 2, 4 );
- + }
- + }
- + else if( i_mb_type == P_8x8 )
- + {
- + x264_cabac_encode_decision_noup( cb, 14, 0 );
- + x264_cabac_encode_decision_noup( cb, 15, 0 );
- + x264_cabac_encode_decision_noup( cb, 16, 1 );
- +
- + /* sub mb type */
- + for( i = 0; i < 4; i++ )
- + x264_cabac_mb_sub_p_partition( cb, h->mb.i_sub_partition[i] );
- +
- + /* ref 0 */
- + if( h->mb.pic.i_fref[0] > 1 )
- + {
- + x264_cabac_mb_ref( h, cb, 0, 0 );
- + x264_cabac_mb_ref( h, cb, 0, 4 );
- + x264_cabac_mb_ref( h, cb, 0, 8 );
- + x264_cabac_mb_ref( h, cb, 0, 12 );
- + }
- +
- + for( i = 0; i < 4; i++ )
- + x264_cabac_mb8x8_mvd( h, cb, i );
- + }
- + else /* intra */
- + {
- + /* prefix */
- + x264_cabac_encode_decision_noup( cb, 14, 1 );
- +
- + /* suffix */
- + x264_cabac_mb_header_intra( h, cb, i_mb_type, 17+0, 17+1, 17+2, 17+2, 17+3, 17+3 );
- + }
- + }
- + else //if( h->sh.i_type == SLICE_TYPE_B )
- + {
- + int ctx = 0;
- + if( h->mb.i_mb_type_left >= 0 && h->mb.i_mb_type_left != B_SKIP && h->mb.i_mb_type_left != B_DIRECT )
- + ctx++;
- + if( h->mb.i_mb_type_top >= 0 && h->mb.i_mb_type_top != B_SKIP && h->mb.i_mb_type_top != B_DIRECT )
- + ctx++;
- +
- + if( i_mb_type == B_DIRECT )
- + {
- + x264_cabac_encode_decision_noup( cb, 27+ctx, 0 );
- + return;
- + }
- + x264_cabac_encode_decision_noup( cb, 27+ctx, 1 );
- +
- + if( i_mb_type == B_8x8 )
- + {
- + x264_cabac_encode_decision_noup( cb, 27+3, 1 );
- + x264_cabac_encode_decision_noup( cb, 27+4, 1 );
- + x264_cabac_encode_decision ( cb, 27+5, 1 );
- + x264_cabac_encode_decision ( cb, 27+5, 1 );
- + x264_cabac_encode_decision_noup( cb, 27+5, 1 );
- +
- + /* sub mb type */
- + for( i = 0; i < 4; i++ )
- + x264_cabac_mb_sub_b_partition( cb, h->mb.i_sub_partition[i] );
- +
- + /* ref */
- + if( h->mb.pic.i_fref[0] > 1 )
- + for( i = 0; i < 4; i++ )
- + if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
- + x264_cabac_mb_ref( h, cb, 0, 4*i );
- +
- + if( h->mb.pic.i_fref[1] > 1 )
- + for( i = 0; i < 4; i++ )
- + if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
- + x264_cabac_mb_ref( h, cb, 1, 4*i );
- +
- + for( i = 0; i < 4; i++ )
- + if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
- + x264_cabac_mb_mvd( h, cb, 0, 4*i, 2, 2 );
- +
- + for( i = 0; i < 4; i++ )
- + if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
- + x264_cabac_mb_mvd( h, cb, 1, 4*i, 2, 2 );
- + }
- + else if( IS_INTRA( i_mb_type ) )
- + {
- + /* prefix */
- + x264_cabac_encode_decision_noup( cb, 27+3, 1 );
- + x264_cabac_encode_decision_noup( cb, 27+4, 1 );
- + x264_cabac_encode_decision ( cb, 27+5, 1 );
- + x264_cabac_encode_decision ( cb, 27+5, 0 );
- + x264_cabac_encode_decision ( cb, 27+5, 1 );
- +
- + /* suffix */
- + x264_cabac_mb_header_intra( h, cb, i_mb_type, 32+0, 32+1, 32+2, 32+2, 32+3, 32+3 );
- + }
- + else
- + {
- + static const uint8_t i_mb_bits[9*3] =
- + {
- + 0x31, 0x29, 0x4, /* L0 L0 */
- + 0x35, 0x2d, 0, /* L0 L1 */
- + 0x43, 0x63, 0, /* L0 BI */
- + 0x3d, 0x2f, 0, /* L1 L0 */
- + 0x39, 0x25, 0x6, /* L1 L1 */
- + 0x53, 0x73, 0, /* L1 BI */
- + 0x4b, 0x6b, 0, /* BI L0 */
- + 0x5b, 0x7b, 0, /* BI L1 */
- + 0x47, 0x67, 0x21 /* BI BI */
- + };
- +
- + const int idx = (i_mb_type - B_L0_L0) * 3 + (h->mb.i_partition - D_16x8);
- + int bits = i_mb_bits[idx];
- +
- + x264_cabac_encode_decision_noup( cb, 27+3, bits&1 );
- + x264_cabac_encode_decision( cb, 27+5-(bits&1), (bits>>1)&1 ); bits >>= 2;
- + if( bits != 1 )
- + {
- + x264_cabac_encode_decision( cb, 27+5, bits&1 ); bits >>= 1;
- + x264_cabac_encode_decision( cb, 27+5, bits&1 ); bits >>= 1;
- + x264_cabac_encode_decision( cb, 27+5, bits&1 ); bits >>= 1;
- + if( bits != 1 )
- + x264_cabac_encode_decision_noup( cb, 27+5, bits&1 );
- + }
- +
- + const uint8_t (*b_list)[2] = x264_mb_type_list_table[i_mb_type];
- + if( h->mb.pic.i_fref[0] > 1 )
- + {
- + if( b_list[0][0] )
- + x264_cabac_mb_ref( h, cb, 0, 0 );
- + if( b_list[0][1] && h->mb.i_partition != D_16x16 )
- + x264_cabac_mb_ref( h, cb, 0, 8 >> (h->mb.i_partition == D_8x16) );
- + }
- + if( h->mb.pic.i_fref[1] > 1 )
- + {
- + if( b_list[1][0] )
- + x264_cabac_mb_ref( h, cb, 1, 0 );
- + if( b_list[1][1] && h->mb.i_partition != D_16x16 )
- + x264_cabac_mb_ref( h, cb, 1, 8 >> (h->mb.i_partition == D_8x16) );
- + }
- + for( i_list = 0; i_list < 2; i_list++ )
- + {
- + if( h->mb.i_partition == D_16x16 )
- + {
- + if( b_list[i_list][0] ) x264_cabac_mb_mvd( h, cb, i_list, 0, 4, 4 );
- + }
- + else if( h->mb.i_partition == D_16x8 )
- + {
- + if( b_list[i_list][0] ) x264_cabac_mb_mvd( h, cb, i_list, 0, 4, 2 );
- + if( b_list[i_list][1] ) x264_cabac_mb_mvd( h, cb, i_list, 8, 4, 2 );
- + }
- + else //if( h->mb.i_partition == D_8x16 )
- + {
- + if( b_list[i_list][0] ) x264_cabac_mb_mvd( h, cb, i_list, 0, 2, 4 );
- + if( b_list[i_list][1] ) x264_cabac_mb_mvd( h, cb, i_list, 4, 2, 4 );
- + }
- + }
- + }
- + }
- +}
- +
- /* i_ctxBlockCat: 0-> DC 16x16 i_idx = 0
- * 1-> AC 16x16 i_idx = luma4x4idx
- * 2-> Luma4x4 i_idx = luma4x4idx
- @@ -752,7 +867,6 @@ static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBl
- void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
- {
- const int i_mb_type = h->mb.i_type;
- - int i_list;
- int i;
- #if !RDO_SKIP_BS
- @@ -760,15 +874,14 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
- int i_mb_pos_tex;
- #endif
- - /* Write the MB type */
- - x264_cabac_mb_type( h, cb );
- + x264_cabac_mb_header( h, cb );
- #if !RDO_SKIP_BS
- + i_mb_pos_tex = x264_cabac_pos( cb );
- + h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start;
- +
- if( i_mb_type == I_PCM )
- {
- - i_mb_pos_tex = x264_cabac_pos( cb );
- - h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start;
- -
- memcpy( cb->p, h->mb.pic.p_fenc[0], 256 );
- cb->p += 256;
- for( i = 0; i < 8; i++ )
- @@ -793,140 +906,6 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
- }
- #endif
- - if( IS_INTRA( i_mb_type ) )
- - {
- - if( h->pps->b_transform_8x8_mode && i_mb_type != I_16x16 )
- - x264_cabac_mb_transform_size( h, cb );
- -
- - if( i_mb_type != I_16x16 )
- - {
- - int di = h->mb.b_transform_8x8 ? 4 : 1;
- - for( i = 0; i < 16; i += di )
- - {
- - const int i_pred = x264_mb_predict_intra4x4_mode( h, i );
- - const int i_mode = x264_mb_pred_mode4x4_fix( h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] );
- - x264_cabac_mb_intra4x4_pred_mode( cb, i_pred, i_mode );
- - }
- - }
- -
- - x264_cabac_mb_intra_chroma_pred_mode( h, cb );
- - }
- - else if( i_mb_type == P_L0 )
- - {
- - if( h->mb.i_partition == D_16x16 )
- - {
- - if( h->mb.pic.i_fref[0] > 1 )
- - {
- - x264_cabac_mb_ref( h, cb, 0, 0 );
- - }
- - x264_cabac_mb_mvd( h, cb, 0, 0, 4, 4 );
- - }
- - else if( h->mb.i_partition == D_16x8 )
- - {
- - if( h->mb.pic.i_fref[0] > 1 )
- - {
- - x264_cabac_mb_ref( h, cb, 0, 0 );
- - x264_cabac_mb_ref( h, cb, 0, 8 );
- - }
- - x264_cabac_mb_mvd( h, cb, 0, 0, 4, 2 );
- - x264_cabac_mb_mvd( h, cb, 0, 8, 4, 2 );
- - }
- - else //if( h->mb.i_partition == D_8x16 )
- - {
- - if( h->mb.pic.i_fref[0] > 1 )
- - {
- - x264_cabac_mb_ref( h, cb, 0, 0 );
- - x264_cabac_mb_ref( h, cb, 0, 4 );
- - }
- - x264_cabac_mb_mvd( h, cb, 0, 0, 2, 4 );
- - x264_cabac_mb_mvd( h, cb, 0, 4, 2, 4 );
- - }
- - }
- - else if( i_mb_type == P_8x8 )
- - {
- - /* sub mb type */
- - for( i = 0; i < 4; i++ )
- - x264_cabac_mb_sub_p_partition( cb, h->mb.i_sub_partition[i] );
- -
- - /* ref 0 */
- - if( h->mb.pic.i_fref[0] > 1 )
- - {
- - x264_cabac_mb_ref( h, cb, 0, 0 );
- - x264_cabac_mb_ref( h, cb, 0, 4 );
- - x264_cabac_mb_ref( h, cb, 0, 8 );
- - x264_cabac_mb_ref( h, cb, 0, 12 );
- - }
- -
- - for( i = 0; i < 4; i++ )
- - x264_cabac_mb8x8_mvd( h, cb, i );
- - }
- - else if( i_mb_type == B_8x8 )
- - {
- - /* sub mb type */
- - for( i = 0; i < 4; i++ )
- - x264_cabac_mb_sub_b_partition( cb, h->mb.i_sub_partition[i] );
- -
- - /* ref */
- - if( h->mb.pic.i_fref[0] > 1 )
- - for( i = 0; i < 4; i++ )
- - if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
- - x264_cabac_mb_ref( h, cb, 0, 4*i );
- -
- - if( h->mb.pic.i_fref[1] > 1 )
- - for( i = 0; i < 4; i++ )
- - if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
- - x264_cabac_mb_ref( h, cb, 1, 4*i );
- -
- - for( i = 0; i < 4; i++ )
- - if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
- - x264_cabac_mb_mvd( h, cb, 0, 4*i, 2, 2 );
- -
- - for( i = 0; i < 4; i++ )
- - if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
- - x264_cabac_mb_mvd( h, cb, 1, 4*i, 2, 2 );
- - }
- - else if( i_mb_type != B_DIRECT )
- - {
- - /* All B mode */
- - const uint8_t (*b_list)[2] = x264_mb_type_list_table[i_mb_type];
- - if( h->mb.pic.i_fref[0] > 1 )
- - {
- - if( b_list[0][0] )
- - x264_cabac_mb_ref( h, cb, 0, 0 );
- - if( b_list[0][1] && h->mb.i_partition != D_16x16 )
- - x264_cabac_mb_ref( h, cb, 0, 8 >> (h->mb.i_partition == D_8x16) );
- - }
- - if( h->mb.pic.i_fref[1] > 1 )
- - {
- - if( b_list[1][0] )
- - x264_cabac_mb_ref( h, cb, 1, 0 );
- - if( b_list[1][1] && h->mb.i_partition != D_16x16 )
- - x264_cabac_mb_ref( h, cb, 1, 8 >> (h->mb.i_partition == D_8x16) );
- - }
- - for( i_list = 0; i_list < 2; i_list++ )
- - {
- - if( h->mb.i_partition == D_16x16 )
- - {
- - if( b_list[i_list][0] ) x264_cabac_mb_mvd( h, cb, i_list, 0, 4, 4 );
- - }
- - else if( h->mb.i_partition == D_16x8 )
- - {
- - if( b_list[i_list][0] ) x264_cabac_mb_mvd( h, cb, i_list, 0, 4, 2 );
- - if( b_list[i_list][1] ) x264_cabac_mb_mvd( h, cb, i_list, 8, 4, 2 );
- - }
- - else //if( h->mb.i_partition == D_8x16 )
- - {
- - if( b_list[i_list][0] ) x264_cabac_mb_mvd( h, cb, i_list, 0, 2, 4 );
- - if( b_list[i_list][1] ) x264_cabac_mb_mvd( h, cb, i_list, 4, 2, 4 );
- - }
- - }
- - }
- -
- -#if !RDO_SKIP_BS
- - i_mb_pos_tex = x264_cabac_pos( cb );
- - h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start;
- -#endif
- -
- if( i_mb_type != I_16x16 )
- {
- x264_cabac_mb_cbp_luma( h, cb );
- @@ -934,11 +913,9 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
- }
- if( x264_mb_transform_8x8_allowed( h ) && h->mb.i_cbp_luma )
- - {
- x264_cabac_mb_transform_size( h, cb );
- - }
- - if( h->mb.i_cbp_luma > 0 || h->mb.i_cbp_chroma > 0 || i_mb_type == I_16x16 )
- + if( h->mb.i_cbp_luma || h->mb.i_cbp_chroma || i_mb_type == I_16x16 )
- {
- const int b_intra = IS_INTRA( i_mb_type );
- x264_cabac_mb_qp_delta( h, cb );
- @@ -950,7 +927,7 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
- block_residual_write_cabac_cbf( h, cb, DCT_LUMA_DC, 24, h->dct.luma16x16_dc, 1 );
- /* AC Luma */
- - if( h->mb.i_cbp_luma != 0 )
- + if( h->mb.i_cbp_luma )
- for( i = 0; i < 16; i++ )
- block_residual_write_cabac_cbf( h, cb, DCT_LUMA_AC, i, h->dct.luma4x4[i]+1, 1 );
- }
- @@ -967,7 +944,7 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
- block_residual_write_cabac_cbf( h, cb, DCT_LUMA_4x4, i, h->dct.luma4x4[i], b_intra );
- }
- - if( h->mb.i_cbp_chroma&0x03 ) /* Chroma DC residual present */
- + if( h->mb.i_cbp_chroma ) /* Chroma DC residual present */
- {
- block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_DC, 25, h->dct.chroma_dc[0], b_intra );
- block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_DC, 26, h->dct.chroma_dc[1], b_intra );
- diff --git a/encoder/cavlc.c b/encoder/cavlc.c
- index c65c9bd..d18408b 100644
- --- a/encoder/cavlc.c
- +++ b/encoder/cavlc.c
- @@ -203,7 +203,7 @@ static const uint8_t ct_index[17] = {0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,3};
- *nnz = block_residual_write_cavlc(h,cat,l,nC);\
- }
- -static void cavlc_qp_delta( x264_t *h )
- +static void x264_cavlc_mb_qp_delta( x264_t *h )
- {
- bs_t *s = &h->out.bs;
- int i_dqp = h->mb.i_qp - h->mb.i_last_qp;
- @@ -228,7 +228,7 @@ static void cavlc_qp_delta( x264_t *h )
- bs_write_se( s, i_dqp );
- }
- -static void cavlc_mb_mvd( x264_t *h, int i_list, int idx, int width )
- +static void x264_cavlc_mb_mvd( x264_t *h, int i_list, int idx, int width )
- {
- bs_t *s = &h->out.bs;
- ALIGNED_4( int16_t mvp[2] );
- @@ -237,26 +237,26 @@ static void cavlc_mb_mvd( x264_t *h, int i_list, int idx, int width )
- bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[idx]][1] - mvp[1] );
- }
- -static inline void cavlc_mb8x8_mvd( x264_t *h, int i )
- +static inline void x264_cavlc_mb8x8_mvd( x264_t *h, int i )
- {
- switch( h->mb.i_sub_partition[i] )
- {
- case D_L0_8x8:
- - cavlc_mb_mvd( h, 0, 4*i, 2 );
- + x264_cavlc_mb_mvd( h, 0, 4*i, 2 );
- break;
- case D_L0_8x4:
- - cavlc_mb_mvd( h, 0, 4*i+0, 2 );
- - cavlc_mb_mvd( h, 0, 4*i+2, 2 );
- + x264_cavlc_mb_mvd( h, 0, 4*i+0, 2 );
- + x264_cavlc_mb_mvd( h, 0, 4*i+2, 2 );
- break;
- case D_L0_4x8:
- - cavlc_mb_mvd( h, 0, 4*i+0, 1 );
- - cavlc_mb_mvd( h, 0, 4*i+1, 1 );
- + x264_cavlc_mb_mvd( h, 0, 4*i+0, 1 );
- + x264_cavlc_mb_mvd( h, 0, 4*i+1, 1 );
- break;
- case D_L0_4x4:
- - cavlc_mb_mvd( h, 0, 4*i+0, 1 );
- - cavlc_mb_mvd( h, 0, 4*i+1, 1 );
- - cavlc_mb_mvd( h, 0, 4*i+2, 1 );
- - cavlc_mb_mvd( h, 0, 4*i+3, 1 );
- + x264_cavlc_mb_mvd( h, 0, 4*i+0, 1 );
- + x264_cavlc_mb_mvd( h, 0, 4*i+1, 1 );
- + x264_cavlc_mb_mvd( h, 0, 4*i+2, 1 );
- + x264_cavlc_mb_mvd( h, 0, 4*i+3, 1 );
- break;
- }
- }
- @@ -372,7 +372,7 @@ void x264_macroblock_write_cavlc( x264_t *h )
- if( h->mb.pic.i_fref[0] > 1 )
- bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
- - cavlc_mb_mvd( h, 0, 0, 4 );
- + x264_cavlc_mb_mvd( h, 0, 0, 4 );
- }
- else if( h->mb.i_partition == D_16x8 )
- {
- @@ -382,8 +382,8 @@ void x264_macroblock_write_cavlc( x264_t *h )
- bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
- bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[8]] );
- }
- - cavlc_mb_mvd( h, 0, 0, 4 );
- - cavlc_mb_mvd( h, 0, 8, 4 );
- + x264_cavlc_mb_mvd( h, 0, 0, 4 );
- + x264_cavlc_mb_mvd( h, 0, 8, 4 );
- }
- else if( h->mb.i_partition == D_8x16 )
- {
- @@ -393,8 +393,8 @@ void x264_macroblock_write_cavlc( x264_t *h )
- bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
- bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[4]] );
- }
- - cavlc_mb_mvd( h, 0, 0, 2 );
- - cavlc_mb_mvd( h, 0, 4, 2 );
- + x264_cavlc_mb_mvd( h, 0, 0, 2 );
- + x264_cavlc_mb_mvd( h, 0, 4, 2 );
- }
- }
- else if( i_mb_type == P_8x8 )
- @@ -429,7 +429,7 @@ void x264_macroblock_write_cavlc( x264_t *h )
- }
- for( i = 0; i < 4; i++ )
- - cavlc_mb8x8_mvd( h, i );
- + x264_cavlc_mb8x8_mvd( h, i );
- }
- else if( i_mb_type == B_8x8 )
- {
- @@ -452,10 +452,10 @@ void x264_macroblock_write_cavlc( x264_t *h )
- /* mvd */
- for( i = 0; i < 4; i++ )
- if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
- - cavlc_mb_mvd( h, 0, 4*i, 2 );
- + x264_cavlc_mb_mvd( h, 0, 4*i, 2 );
- for( i = 0; i < 4; i++ )
- if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
- - cavlc_mb_mvd( h, 1, 4*i, 2 );
- + x264_cavlc_mb_mvd( h, 1, 4*i, 2 );
- }
- else if( i_mb_type != B_DIRECT )
- {
- @@ -470,8 +470,8 @@ void x264_macroblock_write_cavlc( x264_t *h )
- {
- if( i_ref0_max && b_list[0][0] ) bs_write_te( s, i_ref0_max, h->mb.cache.ref[0][x264_scan8[0]] );
- if( i_ref1_max && b_list[1][0] ) bs_write_te( s, i_ref1_max, h->mb.cache.ref[1][x264_scan8[0]] );
- - if( b_list[0][0] ) cavlc_mb_mvd( h, 0, 0, 4 );
- - if( b_list[1][0] ) cavlc_mb_mvd( h, 1, 0, 4 );
- + if( b_list[0][0] ) x264_cavlc_mb_mvd( h, 0, 0, 4 );
- + if( b_list[1][0] ) x264_cavlc_mb_mvd( h, 1, 0, 4 );
- }
- else
- {
- @@ -481,17 +481,17 @@ void x264_macroblock_write_cavlc( x264_t *h )
- if( i_ref1_max && b_list[1][1] ) bs_write_te( s, i_ref1_max, h->mb.cache.ref[1][x264_scan8[12]] );
- if( h->mb.i_partition == D_16x8 )
- {
- - if( b_list[0][0] ) cavlc_mb_mvd( h, 0, 0, 4 );
- - if( b_list[0][1] ) cavlc_mb_mvd( h, 0, 8, 4 );
- - if( b_list[1][0] ) cavlc_mb_mvd( h, 1, 0, 4 );
- - if( b_list[1][1] ) cavlc_mb_mvd( h, 1, 8, 4 );
- + if( b_list[0][0] ) x264_cavlc_mb_mvd( h, 0, 0, 4 );
- + if( b_list[0][1] ) x264_cavlc_mb_mvd( h, 0, 8, 4 );
- + if( b_list[1][0] ) x264_cavlc_mb_mvd( h, 1, 0, 4 );
- + if( b_list[1][1] ) x264_cavlc_mb_mvd( h, 1, 8, 4 );
- }
- else //if( h->mb.i_partition == D_8x16 )
- {
- - if( b_list[0][0] ) cavlc_mb_mvd( h, 0, 0, 2 );
- - if( b_list[0][1] ) cavlc_mb_mvd( h, 0, 4, 2 );
- - if( b_list[1][0] ) cavlc_mb_mvd( h, 1, 0, 2 );
- - if( b_list[1][1] ) cavlc_mb_mvd( h, 1, 4, 2 );
- + if( b_list[0][0] ) x264_cavlc_mb_mvd( h, 0, 0, 2 );
- + if( b_list[0][1] ) x264_cavlc_mb_mvd( h, 0, 4, 2 );
- + if( b_list[1][0] ) x264_cavlc_mb_mvd( h, 1, 0, 2 );
- + if( b_list[1][1] ) x264_cavlc_mb_mvd( h, 1, 4, 2 );
- }
- }
- }
- @@ -514,31 +514,31 @@ void x264_macroblock_write_cavlc( x264_t *h )
- bs_write1( s, h->mb.b_transform_8x8 );
- /* write residual */
- - if( i_mb_type == I_16x16 )
- + if( h->mb.i_cbp_luma || h->mb.i_cbp_chroma || i_mb_type == I_16x16 )
- {
- - cavlc_qp_delta( h );
- + x264_cavlc_mb_qp_delta( h );
- - /* DC Luma */
- - block_residual_write_cavlc( h, DCT_LUMA_DC, 24 , h->dct.luma16x16_dc );
- + if( i_mb_type == I_16x16 )
- + {
- + /* DC Luma */
- + block_residual_write_cavlc( h, DCT_LUMA_DC, 24 , h->dct.luma16x16_dc );
- - /* AC Luma */
- - if( h->mb.i_cbp_luma )
- - for( i = 0; i < 16; i++ )
- - block_residual_write_cavlc( h, DCT_LUMA_AC, i, h->dct.luma4x4[i]+1 );
- - }
- - else if( h->mb.i_cbp_luma | h->mb.i_cbp_chroma )
- - {
- - cavlc_qp_delta( h );
- - x264_macroblock_luma_write_cavlc( h, 0, 3 );
- - }
- - if( h->mb.i_cbp_chroma )
- - {
- - /* Chroma DC residual present */
- - block_residual_write_cavlc( h, DCT_CHROMA_DC, 25, h->dct.chroma_dc[0] );
- - block_residual_write_cavlc( h, DCT_CHROMA_DC, 26, h->dct.chroma_dc[1] );
- - if( h->mb.i_cbp_chroma&0x02 ) /* Chroma AC residual present */
- - for( i = 16; i < 24; i++ )
- - block_residual_write_cavlc( h, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1 );
- + /* AC Luma */
- + if( h->mb.i_cbp_luma )
- + for( i = 0; i < 16; i++ )
- + block_residual_write_cavlc( h, DCT_LUMA_AC, i, h->dct.luma4x4[i]+1 );
- + }
- + else
- + x264_macroblock_luma_write_cavlc( h, 0, 3 );
- +
- + if( h->mb.i_cbp_chroma ) /* Chroma DC residual present */
- + {
- + block_residual_write_cavlc( h, DCT_CHROMA_DC, 25, h->dct.chroma_dc[0] );
- + block_residual_write_cavlc( h, DCT_CHROMA_DC, 26, h->dct.chroma_dc[1] );
- + if( h->mb.i_cbp_chroma&0x02 ) /* Chroma AC residual present */
- + for( i = 16; i < 24; i++ )
- + block_residual_write_cavlc( h, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1 );
- + }
- }
- #if !RDO_SKIP_BS
- @@ -563,22 +563,22 @@ static int x264_partition_size_cavlc( x264_t *h, int i8, int i_pixel )
- if( i_mb_type == P_8x8 )
- {
- - cavlc_mb8x8_mvd( h, i8 );
- + x264_cavlc_mb8x8_mvd( h, i8 );
- bs_write_ue( s, sub_mb_type_p_to_golomb[ h->mb.i_sub_partition[i8] ] );
- }
- else if( i_mb_type == P_L0 )
- - cavlc_mb_mvd( h, 0, 4*i8, 4>>b_8x16 );
- + x264_cavlc_mb_mvd( h, 0, 4*i8, 4>>b_8x16 );
- else if( i_mb_type > B_DIRECT && i_mb_type < B_8x8 )
- {
- - if( x264_mb_type_list_table[ i_mb_type ][0][!!i8] ) cavlc_mb_mvd( h, 0, 4*i8, 4>>b_8x16 );
- - if( x264_mb_type_list_table[ i_mb_type ][1][!!i8] ) cavlc_mb_mvd( h, 1, 4*i8, 4>>b_8x16 );
- + if( x264_mb_type_list_table[ i_mb_type ][0][!!i8] ) x264_cavlc_mb_mvd( h, 0, 4*i8, 4>>b_8x16 );
- + if( x264_mb_type_list_table[ i_mb_type ][1][!!i8] ) x264_cavlc_mb_mvd( h, 1, 4*i8, 4>>b_8x16 );
- }
- else //if( i_mb_type == B_8x8 )
- {
- if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i8] ] )
- - cavlc_mb_mvd( h, 0, 4*i8, 2 );
- + x264_cavlc_mb_mvd( h, 0, 4*i8, 2 );
- if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i8] ] )
- - cavlc_mb_mvd( h, 1, 4*i8, 2 );
- + x264_cavlc_mb_mvd( h, 1, 4*i8, 2 );
- }
- for( j = (i_pixel < PIXEL_8x8); j >= 0; j-- )
- @@ -596,7 +596,7 @@ static int x264_subpartition_size_cavlc( x264_t *h, int i4, int i_pixel )
- {
- int b_8x4 = i_pixel == PIXEL_8x4;
- h->out.bs.i_bits_encoded = 0;
- - cavlc_mb_mvd( h, 0, i4, 1+b_8x4 );
- + x264_cavlc_mb_mvd( h, 0, i4, 1+b_8x4 );
- block_residual_write_cavlc( h, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4] );
- if( i_pixel != PIXEL_4x4 )
- {
- --
- 1.6.1.2
- From c8db72d3e9af6f5850a2e94904657910a77c5103 Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Wed, 3 Feb 2010 18:19:29 -0800
- Subject: [PATCH 05/14] Simplify decimate checks in macroblock_encode
- Also fix a misleading comment.
- ---
- common/common.h | 1 +
- encoder/analyse.c | 1 +
- encoder/macroblock.c | 12 +++++-------
- 3 files changed, 7 insertions(+), 7 deletions(-)
- diff --git a/common/common.h b/common/common.h
- index 950f48f..8b1b05a 100644
- --- a/common/common.h
- +++ b/common/common.h
- @@ -484,6 +484,7 @@ struct x264_t
- int b_chroma_me;
- int b_trellis;
- int b_noise_reduction;
- + int b_dct_decimate;
- int i_psy_rd; /* Psy RD strength--fixed point value*/
- int i_psy_trellis; /* Psy trellis strength--fixed point value*/
- diff --git a/encoder/analyse.c b/encoder/analyse.c
- index 1fb2206..b8710dc 100644
- --- a/encoder/analyse.c
- +++ b/encoder/analyse.c
- @@ -364,6 +364,7 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
- h->mb.i_subpel_refine = h->param.analyse.i_subpel_refine;
- h->mb.b_chroma_me = h->param.analyse.b_chroma_me && h->sh.i_type == SLICE_TYPE_P
- && h->mb.i_subpel_refine >= 5;
- + h->mb.b_dct_decimate = h->param.analyse.b_dct_decimate && h->sh.i_type != SLICE_TYPE_I;
- h->mb.b_transform_8x8 = 0;
- h->mb.b_noise_reduction = 0;
- diff --git a/encoder/macroblock.c b/encoder/macroblock.c
- index e4edb8a..fa7942d 100644
- --- a/encoder/macroblock.c
- +++ b/encoder/macroblock.c
- @@ -208,8 +208,7 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qp )
- ALIGNED_ARRAY_16( int16_t, dct_dc4x4,[16] );
- int i, nz;
- - int b_decimate = h->sh.i_type == SLICE_TYPE_B || (h->param.analyse.b_dct_decimate && h->sh.i_type == SLICE_TYPE_P);
- - int decimate_score = b_decimate ? 0 : 9;
- + int decimate_score = h->mb.b_dct_decimate ? 0 : 9;
- if( h->mb.b_lossless )
- {
- @@ -342,7 +341,7 @@ static inline int x264_mb_optimize_chroma_dc( x264_t *h, int b_inter, int i_qp,
- void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
- {
- int i, ch, nz, nz_dc;
- - int b_decimate = b_inter && (h->sh.i_type == SLICE_TYPE_B || h->param.analyse.b_dct_decimate);
- + int b_decimate = b_inter && h->mb.b_dct_decimate;
- ALIGNED_ARRAY_16( int16_t, dct2x2,[4] );
- h->mb.i_cbp_chroma = 0;
- @@ -607,7 +606,7 @@ void x264_macroblock_encode( x264_t *h )
- {
- int i_cbp_dc = 0;
- int i_qp = h->mb.i_qp;
- - int b_decimate = h->sh.i_type == SLICE_TYPE_B || h->param.analyse.b_dct_decimate;
- + int b_decimate = h->mb.b_dct_decimate;
- int b_force_no_skip = 0;
- int i,idx,nz;
- h->mb.i_cbp_luma = 0;
- @@ -914,8 +913,7 @@ void x264_macroblock_encode( x264_t *h )
- /*****************************************************************************
- * x264_macroblock_probe_skip:
- - * Check if the current MB could be encoded as a [PB]_SKIP (it supposes you use
- - * the previous QP
- + * Check if the current MB could be encoded as a [PB]_SKIP
- *****************************************************************************/
- int x264_macroblock_probe_skip( x264_t *h, int b_bidir )
- {
- @@ -1052,7 +1050,7 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
- int i_qp = h->mb.i_qp;
- uint8_t *p_fenc = h->mb.pic.p_fenc[0] + (i8&1)*8 + (i8>>1)*8*FENC_STRIDE;
- uint8_t *p_fdec = h->mb.pic.p_fdec[0] + (i8&1)*8 + (i8>>1)*8*FDEC_STRIDE;
- - int b_decimate = h->sh.i_type == SLICE_TYPE_B || h->param.analyse.b_dct_decimate;
- + int b_decimate = h->mb.b_dct_decimate;
- int nnz8x8 = 0;
- int ch, nz;
- --
- 1.6.1.2
- From 69cba07228fce5004ad526aac68887e43fcfcacc Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Wed, 3 Feb 2010 18:36:44 -0800
- Subject: [PATCH 06/14] Fix subpel iteration counts with B-frame analysis and subme 6/8
- Since subme 6 means "like subme 5, except RD on P-frames", B-frame analysis
- shouldn't use the RD subpel counts at subme 6. Similarly with subme 8.
- Slightly faster (and very marginally worse) compression at subme 6 and 8.
- ---
- encoder/analyse.c | 2 ++
- 1 files changed, 2 insertions(+), 0 deletions(-)
- diff --git a/encoder/analyse.c b/encoder/analyse.c
- index b8710dc..35d39d5 100644
- --- a/encoder/analyse.c
- +++ b/encoder/analyse.c
- @@ -362,6 +362,8 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
- h->mb.i_me_method = h->param.analyse.i_me_method;
- h->mb.i_subpel_refine = h->param.analyse.i_subpel_refine;
- + if( h->sh.i_type == SLICE_TYPE_B && (h->mb.i_subpel_refine == 6 || h->mb.i_subpel_refine == 8) )
- + h->mb.i_subpel_refine--;
- h->mb.b_chroma_me = h->param.analyse.b_chroma_me && h->sh.i_type == SLICE_TYPE_P
- && h->mb.i_subpel_refine >= 5;
- h->mb.b_dct_decimate = h->param.analyse.b_dct_decimate && h->sh.i_type != SLICE_TYPE_I;
- --
- 1.6.1.2
- From 6561e6ff5091f773facb9b1f3bf57bb037fe0267 Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Wed, 3 Feb 2010 20:01:16 -0800
- Subject: [PATCH 07/14] Smarter QPRD
- Catch some cases in which RD checks can be avoided; reduces QPRD RD calls by 10-20%.
- ---
- encoder/analyse.c | 42 ++++++++++++++++++++++++++++++++++++++----
- 1 files changed, 38 insertions(+), 4 deletions(-)
- diff --git a/encoder/analyse.c b/encoder/analyse.c
- index 35d39d5..e30b849 100644
- --- a/encoder/analyse.c
- +++ b/encoder/analyse.c
- @@ -2306,9 +2306,10 @@ static inline void x264_mb_analyse_qp_rd( x264_t *h, x264_mb_analysis_t *a )
- int orig_qp = h->mb.i_qp, bqp = h->mb.i_qp;
- int last_qp_tried = 0;
- origcost = bcost = x264_rd_cost_mb( h, a->i_lambda2 );
- + int origcbp = h->mb.cbp[h->mb.i_mb_xy];
- /* If CBP is already zero, don't raise the quantizer any higher. */
- - for( direction = h->mb.cbp[h->mb.i_mb_xy] ? 1 : -1; direction >= -1; direction-=2 )
- + for( direction = origcbp ? 1 : -1; direction >= -1; direction-=2 )
- {
- /* Without psy-RD, require monotonicity when moving quant away from previous
- * macroblock's quant; allow 1 failure when moving quant towards previous quant.
- @@ -2323,14 +2324,47 @@ static inline void x264_mb_analyse_qp_rd( x264_t *h, x264_mb_analysis_t *a )
- h->mb.i_qp = orig_qp;
- failures = 0;
- prevcost = origcost;
- +
- + /* If the current QP results in an empty CBP, it's highly likely that lower QPs
- + * (up to a point) will too. So, jump down to where the threshold will kick in
- + * and check the QP there. If the CBP is still empty, skip the main loop.
- + * If it isn't empty, we would have ended up having to check this QP anyways,
- + * so as long as we store it for later lookup, we lose nothing. */
- + int already_checked_qp = -1;
- + int already_checked_cost = COST_MAX;
- + if( direction == -1 )
- + {
- + if( !origcbp )
- + {
- + h->mb.i_qp = X264_MAX( h->mb.i_qp - threshold - 1, h->param.rc.i_qp_min );
- + h->mb.i_chroma_qp = h->chroma_qp_table[h->mb.i_qp];
- + already_checked_cost = x264_rd_cost_mb( h, a->i_lambda2 );
- + if( !h->mb.cbp[h->mb.i_mb_xy] )
- + {
- + /* If our empty-CBP block is lower QP than the last QP,
- + * the last QP cannot possibly have a CBP either. */
- + if( h->mb.i_last_qp > h->mb.i_qp )
- + last_qp_tried = 1;
- + break;
- + }
- + already_checked_qp = h->mb.i_qp;
- + h->mb.i_qp = orig_qp;
- + }
- + }
- +
- h->mb.i_qp += direction;
- while( h->mb.i_qp >= h->param.rc.i_qp_min && h->mb.i_qp <= h->param.rc.i_qp_max )
- {
- if( h->mb.i_last_qp == h->mb.i_qp )
- last_qp_tried = 1;
- - h->mb.i_chroma_qp = h->chroma_qp_table[h->mb.i_qp];
- - cost = x264_rd_cost_mb( h, a->i_lambda2 );
- - COPY2_IF_LT( bcost, cost, bqp, h->mb.i_qp );
- + if( h->mb.i_qp == already_checked_qp )
- + cost = already_checked_cost;
- + else
- + {
- + h->mb.i_chroma_qp = h->chroma_qp_table[h->mb.i_qp];
- + cost = x264_rd_cost_mb( h, a->i_lambda2 );
- + COPY2_IF_LT( bcost, cost, bqp, h->mb.i_qp );
- + }
- /* We can't assume that the costs are monotonic over QPs.
- * Tie case-as-failure seems to give better results. */
- --
- 1.6.1.2
- From 44499e11c37c2eda2438e7d346c24f9c66008363 Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Wed, 3 Feb 2010 20:27:57 -0800
- Subject: [PATCH 08/14] Fix 2-pass ratecontrol continuation in case of missing statsfile
- Didn't work properly if MB-tree was enabled.
- ---
- encoder/ratecontrol.c | 1 +
- 1 files changed, 1 insertions(+), 0 deletions(-)
- diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
- index 52196e7..e314ba2 100644
- --- a/encoder/ratecontrol.c
- +++ b/encoder/ratecontrol.c
- @@ -1280,6 +1280,7 @@ int x264_ratecontrol_slice_type( x264_t *h, int frame_num )
- h->thread[i]->param.rc.b_stat_read = 0;
- h->thread[i]->param.i_bframe_adaptive = 0;
- h->thread[i]->param.i_scenecut_threshold = 0;
- + h->thread[i]->param.rc.b_mb_tree = 0;
- if( h->thread[i]->param.i_bframe > 1 )
- h->thread[i]->param.i_bframe = 1;
- }
- --
- 1.6.1.2
- From 95fa057ad69e497b7adf0391ef8e63b0c3d24d17 Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Fri, 5 Feb 2010 16:15:23 -0800
- Subject: [PATCH 09/14] Various CABAC/CAVLC cleanups/speedups
- Make some if/else chains into switch statements.
- Store CBP data in x264_t and only move it to frame storage later.
- This saves a wasted cache line and some unnecessary dereferences in RDO.
- ---
- common/common.h | 1 +
- common/macroblock.c | 3 +-
- encoder/analyse.c | 8 +-
- encoder/cabac.c | 40 +++---
- encoder/cavlc.c | 365 ++++++++++++++++++++++++++------------------------
- encoder/macroblock.c | 19 +--
- 6 files changed, 219 insertions(+), 217 deletions(-)
- diff --git a/common/common.h b/common/common.h
- index 8b1b05a..d4a8dd9 100644
- --- a/common/common.h
- +++ b/common/common.h
- @@ -542,6 +542,7 @@ struct x264_t
- ALIGNED_4( uint8_t i_sub_partition[4] );
- int b_transform_8x8;
- + int i_cbp_combined;
- int i_cbp_luma;
- int i_cbp_chroma;
- diff --git a/common/macroblock.c b/common/macroblock.c
- index 10f09ac..d86f3af 100644
- --- a/common/macroblock.c
- +++ b/common/macroblock.c
- @@ -1343,11 +1343,12 @@ void x264_macroblock_cache_save( x264_t *h )
- M16( &non_zero_count[16+2*2] ) = M32( &h->mb.cache.non_zero_count[x264_scan8[16+2*2]-1] ) >> 8;
- M16( &non_zero_count[16+3*2] ) = M32( &h->mb.cache.non_zero_count[x264_scan8[16+3*2]-1] ) >> 8;
- - if( h->mb.i_type != I_16x16 && h->mb.i_cbp_luma == 0 && h->mb.i_cbp_chroma == 0 )
- + if( h->mb.i_type != I_16x16 && !h->mb.i_cbp_combined )
- h->mb.i_qp = h->mb.i_last_qp;
- h->mb.qp[i_mb_xy] = h->mb.i_qp;
- h->mb.i_last_dqp = h->mb.i_qp - h->mb.i_last_qp;
- h->mb.i_last_qp = h->mb.i_qp;
- + h->mb.cbp[i_mb_xy] = h->mb.i_cbp_combined;
- }
- if( h->mb.i_cbp_luma == 0 && h->mb.i_type != I_8x8 )
- diff --git a/encoder/analyse.c b/encoder/analyse.c
- index e30b849..dba85b4 100644
- --- a/encoder/analyse.c
- +++ b/encoder/analyse.c
- @@ -1198,7 +1198,7 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
- h->mb.i_partition = D_16x16;
- x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv );
- a->l0.i_rd16x16 = x264_rd_cost_mb( h, a->i_lambda2 );
- - if( !(h->mb.i_cbp_luma|h->mb.i_cbp_chroma) )
- + if( !h->mb.i_cbp_combined )
- h->mb.i_type = P_SKIP;
- }
- }
- @@ -2306,7 +2306,7 @@ static inline void x264_mb_analyse_qp_rd( x264_t *h, x264_mb_analysis_t *a )
- int orig_qp = h->mb.i_qp, bqp = h->mb.i_qp;
- int last_qp_tried = 0;
- origcost = bcost = x264_rd_cost_mb( h, a->i_lambda2 );
- - int origcbp = h->mb.cbp[h->mb.i_mb_xy];
- + int origcbp = h->mb.i_cbp_combined;
- /* If CBP is already zero, don't raise the quantizer any higher. */
- for( direction = origcbp ? 1 : -1; direction >= -1; direction-=2 )
- @@ -2339,7 +2339,7 @@ static inline void x264_mb_analyse_qp_rd( x264_t *h, x264_mb_analysis_t *a )
- h->mb.i_qp = X264_MAX( h->mb.i_qp - threshold - 1, h->param.rc.i_qp_min );
- h->mb.i_chroma_qp = h->chroma_qp_table[h->mb.i_qp];
- already_checked_cost = x264_rd_cost_mb( h, a->i_lambda2 );
- - if( !h->mb.cbp[h->mb.i_mb_xy] )
- + if( !h->mb.i_cbp_combined )
- {
- /* If our empty-CBP block is lower QP than the last QP,
- * the last QP cannot possibly have a CBP either. */
- @@ -2376,7 +2376,7 @@ static inline void x264_mb_analyse_qp_rd( x264_t *h, x264_mb_analysis_t *a )
- if( failures > threshold )
- break;
- - if( direction == 1 && !h->mb.cbp[h->mb.i_mb_xy] )
- + if( direction == 1 && !h->mb.i_cbp_combined )
- break;
- h->mb.i_qp += direction;
- }
- diff --git a/encoder/cabac.c b/encoder/cabac.c
- index 6ff2aed..6c14722 100644
- --- a/encoder/cabac.c
- +++ b/encoder/cabac.c
- @@ -107,7 +107,7 @@ static void x264_cabac_mb_qp_delta( x264_t *h, x264_cabac_t *cb )
- int i_dqp = h->mb.i_qp - h->mb.i_last_qp;
- /* Avoid writing a delta quant if we have an empty i16x16 block, e.g. in a completely flat background area */
- - if( h->mb.i_type == I_16x16 && !h->mb.cbp[h->mb.i_mb_xy] )
- + if( h->mb.i_type == I_16x16 && !h->mb.i_cbp_combined )
- {
- #if !RDO_SKIP_BS
- h->mb.i_qp = h->mb.i_last_qp;
- @@ -915,7 +915,7 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
- if( x264_mb_transform_8x8_allowed( h ) && h->mb.i_cbp_luma )
- x264_cabac_mb_transform_size( h, cb );
- - if( h->mb.i_cbp_luma || h->mb.i_cbp_chroma || i_mb_type == I_16x16 )
- + if( h->mb.i_cbp_combined || i_mb_type == I_16x16 )
- {
- const int b_intra = IS_INTRA( i_mb_type );
- x264_cabac_mb_qp_delta( h, cb );
- @@ -973,24 +973,24 @@ static void x264_partition_size_cabac( x264_t *h, x264_cabac_t *cb, int i8, int
- int b_8x16 = h->mb.i_partition == D_8x16;
- int j;
- - if( i_mb_type == P_8x8 )
- + switch( i_mb_type )
- {
- - x264_cabac_mb8x8_mvd( h, cb, i8 );
- - x264_cabac_mb_sub_p_partition( cb, h->mb.i_sub_partition[i8] );
- - }
- - else if( i_mb_type == P_L0 )
- - x264_cabac_mb_mvd( h, cb, 0, 4*i8, 4>>b_8x16, 2<<b_8x16 );
- - else if( i_mb_type > B_DIRECT && i_mb_type < B_8x8 )
- - {
- - if( x264_mb_type_list_table[ i_mb_type ][0][!!i8] ) x264_cabac_mb_mvd( h, cb, 0, 4*i8, 4>>b_8x16, 2<<b_8x16 );
- - if( x264_mb_type_list_table[ i_mb_type ][1][!!i8] ) x264_cabac_mb_mvd( h, cb, 1, 4*i8, 4>>b_8x16, 2<<b_8x16 );
- - }
- - else //if( i_mb_type == B_8x8 )
- - {
- - if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i8] ] )
- - x264_cabac_mb_mvd( h, cb, 0, 4*i8, 2, 2 );
- - if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i8] ] )
- - x264_cabac_mb_mvd( h, cb, 1, 4*i8, 2, 2 );
- + case P_L0:
- + x264_cabac_mb_mvd( h, cb, 0, 4*i8, 4>>b_8x16, 2<<b_8x16 );
- + break;
- + case P_8x8:
- + x264_cabac_mb8x8_mvd( h, cb, i8 );
- + x264_cabac_mb_sub_p_partition( cb, h->mb.i_sub_partition[i8] );
- + break;
- + case B_8x8:
- + if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i8] ] )
- + x264_cabac_mb_mvd( h, cb, 0, 4*i8, 2, 2 );
- + if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i8] ] )
- + x264_cabac_mb_mvd( h, cb, 1, 4*i8, 2, 2 );
- + break;
- + default: /* Rest of the B types */
- + if( x264_mb_type_list_table[i_mb_type][0][!!i8] ) x264_cabac_mb_mvd( h, cb, 0, 4*i8, 4>>b_8x16, 2<<b_8x16 );
- + if( x264_mb_type_list_table[i_mb_type][1][!!i8] ) x264_cabac_mb_mvd( h, cb, 1, 4*i8, 4>>b_8x16, 2<<b_8x16 );
- }
- for( j = (i_pixel < PIXEL_8x8); j >= 0; j-- )
- @@ -1019,9 +1019,7 @@ static void x264_subpartition_size_cabac( x264_t *h, x264_cabac_t *cb, int i4, i
- int b_8x4 = i_pixel == PIXEL_8x4;
- block_residual_write_cabac_cbf( h, cb, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4], 0 );
- if( i_pixel == PIXEL_4x4 )
- - {
- x264_cabac_mb_mvd( h, cb, 0, i4, 1, 1 );
- - }
- else
- {
- x264_cabac_mb_mvd( h, cb, 0, i4, 1+b_8x4, 2-b_8x4 );
- diff --git a/encoder/cavlc.c b/encoder/cavlc.c
- index d18408b..45b55fe 100644
- --- a/encoder/cavlc.c
- +++ b/encoder/cavlc.c
- @@ -209,8 +209,7 @@ static void x264_cavlc_mb_qp_delta( x264_t *h )
- int i_dqp = h->mb.i_qp - h->mb.i_last_qp;
- /* Avoid writing a delta quant if we have an empty i16x16 block, e.g. in a completely flat background area */
- - if( h->mb.i_type == I_16x16 && !(h->mb.i_cbp_luma | h->mb.i_cbp_chroma)
- - && !h->mb.cache.non_zero_count[x264_scan8[24]] )
- + if( h->mb.i_type == I_16x16 && !h->mb.i_cbp_combined )
- {
- #if !RDO_SKIP_BS
- h->mb.i_qp = h->mb.i_last_qp;
- @@ -302,201 +301,209 @@ void x264_macroblock_write_cavlc( x264_t *h )
- bs_write1( s, h->mb.b_interlaced );
- }
- -#if !RDO_SKIP_BS
- - if( i_mb_type == I_PCM )
- - {
- - uint8_t *p_start = s->p_start;
- - bs_write_ue( s, i_mb_i_offset + 25 );
- - i_mb_pos_tex = bs_pos( s );
- - h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start;
- -
- - bs_align_0( s );
- -
- - memcpy( s->p, h->mb.pic.p_fenc[0], 256 );
- - s->p += 256;
- - for( i = 0; i < 8; i++ )
- - memcpy( s->p + i*8, h->mb.pic.p_fenc[1] + i*FENC_STRIDE, 8 );
- - s->p += 64;
- - for( i = 0; i < 8; i++ )
- - memcpy( s->p + i*8, h->mb.pic.p_fenc[2] + i*FENC_STRIDE, 8 );
- - s->p += 64;
- -
- - bs_init( s, s->p, s->p_end - s->p );
- - s->p_start = p_start;
- -
- - /* if PCM is chosen, we need to store reconstructed frame data */
- - h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.p_fenc[0], FENC_STRIDE, 16 );
- - h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE, 8 );
- - h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2], FENC_STRIDE, 8 );
- -
- - h->stat.frame.i_tex_bits += bs_pos(s) - i_mb_pos_tex;
- - return;
- - }
- -#endif
- -
- /* Write:
- - type
- - prediction
- - mv */
- - if( i_mb_type == I_4x4 || i_mb_type == I_8x8 )
- + switch( i_mb_type )
- {
- - int di = i_mb_type == I_8x8 ? 4 : 1;
- - bs_write_ue( s, i_mb_i_offset + 0 );
- - if( h->pps->b_transform_8x8_mode )
- - bs_write1( s, h->mb.b_transform_8x8 );
- -
- - /* Prediction: Luma */
- - for( i = 0; i < 16; i += di )
- + case I_4x4:
- + case I_8x8:
- {
- - int i_pred = x264_mb_predict_intra4x4_mode( h, i );
- - int i_mode = x264_mb_pred_mode4x4_fix( h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] );
- + int di = i_mb_type == I_8x8 ? 4 : 1;
- + bs_write_ue( s, i_mb_i_offset + 0 );
- + if( h->pps->b_transform_8x8_mode )
- + bs_write1( s, h->mb.b_transform_8x8 );
- - if( i_pred == i_mode )
- - bs_write1( s, 1 ); /* b_prev_intra4x4_pred_mode */
- - else
- - bs_write( s, 4, i_mode - (i_mode > i_pred) );
- + /* Prediction: Luma */
- + for( i = 0; i < 16; i += di )
- + {
- + int i_pred = x264_mb_predict_intra4x4_mode( h, i );
- + int i_mode = x264_mb_pred_mode4x4_fix( h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] );
- +
- + if( i_pred == i_mode )
- + bs_write1( s, 1 ); /* b_prev_intra4x4_pred_mode */
- + else
- + bs_write( s, 4, i_mode - (i_mode > i_pred) );
- + }
- + bs_write_ue( s, x264_mb_pred_mode8x8c_fix[ h->mb.i_chroma_pred_mode ] );
- + break;
- + case I_16x16:
- + bs_write_ue( s, i_mb_i_offset + 1 + x264_mb_pred_mode16x16_fix[h->mb.i_intra16x16_pred_mode] +
- + h->mb.i_cbp_chroma * 4 + ( h->mb.i_cbp_luma == 0 ? 0 : 12 ) );
- + bs_write_ue( s, x264_mb_pred_mode8x8c_fix[ h->mb.i_chroma_pred_mode ] );
- + break;
- }
- - bs_write_ue( s, x264_mb_pred_mode8x8c_fix[ h->mb.i_chroma_pred_mode ] );
- - }
- - else if( i_mb_type == I_16x16 )
- - {
- - bs_write_ue( s, i_mb_i_offset + 1 + x264_mb_pred_mode16x16_fix[h->mb.i_intra16x16_pred_mode] +
- - h->mb.i_cbp_chroma * 4 + ( h->mb.i_cbp_luma == 0 ? 0 : 12 ) );
- - bs_write_ue( s, x264_mb_pred_mode8x8c_fix[ h->mb.i_chroma_pred_mode ] );
- - }
- - else if( i_mb_type == P_L0 )
- - {
- - if( h->mb.i_partition == D_16x16 )
- +#if !RDO_SKIP_BS
- + case I_PCM:
- {
- - bs_write1( s, 1 );
- -
- - if( h->mb.pic.i_fref[0] > 1 )
- - bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
- - x264_cavlc_mb_mvd( h, 0, 0, 4 );
- + uint8_t *p_start = s->p_start;
- + bs_write_ue( s, i_mb_i_offset + 25 );
- + i_mb_pos_tex = bs_pos( s );
- + h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start;
- +
- + bs_align_0( s );
- +
- + memcpy( s->p, h->mb.pic.p_fenc[0], 256 );
- + s->p += 256;
- + for( i = 0; i < 8; i++ )
- + memcpy( s->p + i*8, h->mb.pic.p_fenc[1] + i*FENC_STRIDE, 8 );
- + s->p += 64;
- + for( i = 0; i < 8; i++ )
- + memcpy( s->p + i*8, h->mb.pic.p_fenc[2] + i*FENC_STRIDE, 8 );
- + s->p += 64;
- +
- + bs_init( s, s->p, s->p_end - s->p );
- + s->p_start = p_start;
- +
- + /* if PCM is chosen, we need to store reconstructed frame data */
- + h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.p_fenc[0], FENC_STRIDE, 16 );
- + h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE, 8 );
- + h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2], FENC_STRIDE, 8 );
- +
- + h->stat.frame.i_tex_bits += bs_pos(s) - i_mb_pos_tex;
- + return;
- }
- - else if( h->mb.i_partition == D_16x8 )
- +#endif
- + case P_L0:
- {
- - bs_write_ue( s, 1 );
- - if( h->mb.pic.i_fref[0] > 1 )
- + if( h->mb.i_partition == D_16x16 )
- {
- - bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
- - bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[8]] );
- + bs_write1( s, 1 );
- +
- + if( h->mb.pic.i_fref[0] > 1 )
- + bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
- + x264_cavlc_mb_mvd( h, 0, 0, 4 );
- }
- - x264_cavlc_mb_mvd( h, 0, 0, 4 );
- - x264_cavlc_mb_mvd( h, 0, 8, 4 );
- + else if( h->mb.i_partition == D_16x8 )
- + {
- + bs_write_ue( s, 1 );
- + if( h->mb.pic.i_fref[0] > 1 )
- + {
- + bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
- + bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[8]] );
- + }
- + x264_cavlc_mb_mvd( h, 0, 0, 4 );
- + x264_cavlc_mb_mvd( h, 0, 8, 4 );
- + }
- + else if( h->mb.i_partition == D_8x16 )
- + {
- + bs_write_ue( s, 2 );
- + if( h->mb.pic.i_fref[0] > 1 )
- + {
- + bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
- + bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[4]] );
- + }
- + x264_cavlc_mb_mvd( h, 0, 0, 2 );
- + x264_cavlc_mb_mvd( h, 0, 4, 2 );
- + }
- + break;
- }
- - else if( h->mb.i_partition == D_8x16 )
- + case P_8x8:
- {
- - bs_write_ue( s, 2 );
- - if( h->mb.pic.i_fref[0] > 1 )
- + int b_sub_ref;
- + if( (h->mb.cache.ref[0][x264_scan8[0]] | h->mb.cache.ref[0][x264_scan8[ 4]] |
- + h->mb.cache.ref[0][x264_scan8[8]] | h->mb.cache.ref[0][x264_scan8[12]]) == 0 )
- + {
- + bs_write_ue( s, 4 );
- + b_sub_ref = 0;
- + }
- + else
- + {
- + bs_write_ue( s, 3 );
- + b_sub_ref = 1;
- + }
- +
- + /* sub mb type */
- + if( h->param.analyse.inter & X264_ANALYSE_PSUB8x8 )
- + for( i = 0; i < 4; i++ )
- + bs_write_ue( s, sub_mb_type_p_to_golomb[ h->mb.i_sub_partition[i] ] );
- + else
- + bs_write( s, 4, 0xf );
- +
- + /* ref0 */
- + if( b_sub_ref )
- {
- bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
- bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[4]] );
- + bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[8]] );
- + bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[12]] );
- }
- - x264_cavlc_mb_mvd( h, 0, 0, 2 );
- - x264_cavlc_mb_mvd( h, 0, 4, 2 );
- - }
- - }
- - else if( i_mb_type == P_8x8 )
- - {
- - int b_sub_ref;
- - if( (h->mb.cache.ref[0][x264_scan8[0]] | h->mb.cache.ref[0][x264_scan8[ 4]] |
- - h->mb.cache.ref[0][x264_scan8[8]] | h->mb.cache.ref[0][x264_scan8[12]]) == 0 )
- - {
- - bs_write_ue( s, 4 );
- - b_sub_ref = 0;
- - }
- - else
- - {
- - bs_write_ue( s, 3 );
- - b_sub_ref = 1;
- - }
- - /* sub mb type */
- - if( h->param.analyse.inter & X264_ANALYSE_PSUB8x8 )
- for( i = 0; i < 4; i++ )
- - bs_write_ue( s, sub_mb_type_p_to_golomb[ h->mb.i_sub_partition[i] ] );
- - else
- - bs_write( s, 4, 0xf );
- -
- - /* ref0 */
- - if( b_sub_ref )
- - {
- - bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
- - bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[4]] );
- - bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[8]] );
- - bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[12]] );
- + x264_cavlc_mb8x8_mvd( h, i );
- + break;
- }
- + case B_8x8:
- + {
- + bs_write_ue( s, 22 );
- - for( i = 0; i < 4; i++ )
- - x264_cavlc_mb8x8_mvd( h, i );
- - }
- - else if( i_mb_type == B_8x8 )
- - {
- - bs_write_ue( s, 22 );
- -
- - /* sub mb type */
- - for( i = 0; i < 4; i++ )
- - bs_write_ue( s, sub_mb_type_b_to_golomb[ h->mb.i_sub_partition[i] ] );
- + /* sub mb type */
- + for( i = 0; i < 4; i++ )
- + bs_write_ue( s, sub_mb_type_b_to_golomb[ h->mb.i_sub_partition[i] ] );
- - /* ref */
- - if( h->mb.pic.i_fref[0] > 1 )
- + /* ref */
- + if( h->mb.pic.i_fref[0] > 1 )
- + for( i = 0; i < 4; i++ )
- + if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
- + bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[i*4]] );
- + if( h->mb.pic.i_fref[1] > 1 )
- + for( i = 0; i < 4; i++ )
- + if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
- + bs_write_te( s, h->mb.pic.i_fref[1] - 1, h->mb.cache.ref[1][x264_scan8[i*4]] );
- +
- + /* mvd */
- for( i = 0; i < 4; i++ )
- if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
- - bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[i*4]] );
- - if( h->mb.pic.i_fref[1] > 1 )
- + x264_cavlc_mb_mvd( h, 0, 4*i, 2 );
- for( i = 0; i < 4; i++ )
- if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
- - bs_write_te( s, h->mb.pic.i_fref[1] - 1, h->mb.cache.ref[1][x264_scan8[i*4]] );
- -
- - /* mvd */
- - for( i = 0; i < 4; i++ )
- - if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
- - x264_cavlc_mb_mvd( h, 0, 4*i, 2 );
- - for( i = 0; i < 4; i++ )
- - if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
- - x264_cavlc_mb_mvd( h, 1, 4*i, 2 );
- - }
- - else if( i_mb_type != B_DIRECT )
- - {
- - /* All B mode */
- - /* Motion Vector */
- - const uint8_t (*b_list)[2] = x264_mb_type_list_table[i_mb_type];
- - const int i_ref0_max = h->mb.pic.i_fref[0] - 1;
- - const int i_ref1_max = h->mb.pic.i_fref[1] - 1;
- -
- - bs_write_ue( s, mb_type_b_to_golomb[ h->mb.i_partition - D_16x8 ][ i_mb_type - B_L0_L0 ] );
- - if( h->mb.i_partition == D_16x16 )
- + x264_cavlc_mb_mvd( h, 1, 4*i, 2 );
- + break;
- + }
- + case B_DIRECT:
- {
- - if( i_ref0_max && b_list[0][0] ) bs_write_te( s, i_ref0_max, h->mb.cache.ref[0][x264_scan8[0]] );
- - if( i_ref1_max && b_list[1][0] ) bs_write_te( s, i_ref1_max, h->mb.cache.ref[1][x264_scan8[0]] );
- - if( b_list[0][0] ) x264_cavlc_mb_mvd( h, 0, 0, 4 );
- - if( b_list[1][0] ) x264_cavlc_mb_mvd( h, 1, 0, 4 );
- + bs_write1( s, 1 );
- + break;
- }
- - else
- + default: /* Rest of the B types */
- {
- - if( i_ref0_max && b_list[0][0] ) bs_write_te( s, i_ref0_max, h->mb.cache.ref[0][x264_scan8[ 0]] );
- - if( i_ref0_max && b_list[0][1] ) bs_write_te( s, i_ref0_max, h->mb.cache.ref[0][x264_scan8[12]] );
- - if( i_ref1_max && b_list[1][0] ) bs_write_te( s, i_ref1_max, h->mb.cache.ref[1][x264_scan8[ 0]] );
- - if( i_ref1_max && b_list[1][1] ) bs_write_te( s, i_ref1_max, h->mb.cache.ref[1][x264_scan8[12]] );
- - if( h->mb.i_partition == D_16x8 )
- + const uint8_t (*b_list)[2] = x264_mb_type_list_table[i_mb_type];
- + const int i_ref0_max = h->mb.pic.i_fref[0] - 1;
- + const int i_ref1_max = h->mb.pic.i_fref[1] - 1;
- +
- + bs_write_ue( s, mb_type_b_to_golomb[ h->mb.i_partition - D_16x8 ][ i_mb_type - B_L0_L0 ] );
- + if( h->mb.i_partition == D_16x16 )
- {
- + if( i_ref0_max && b_list[0][0] ) bs_write_te( s, i_ref0_max, h->mb.cache.ref[0][x264_scan8[0]] );
- + if( i_ref1_max && b_list[1][0] ) bs_write_te( s, i_ref1_max, h->mb.cache.ref[1][x264_scan8[0]] );
- if( b_list[0][0] ) x264_cavlc_mb_mvd( h, 0, 0, 4 );
- - if( b_list[0][1] ) x264_cavlc_mb_mvd( h, 0, 8, 4 );
- if( b_list[1][0] ) x264_cavlc_mb_mvd( h, 1, 0, 4 );
- - if( b_list[1][1] ) x264_cavlc_mb_mvd( h, 1, 8, 4 );
- }
- - else //if( h->mb.i_partition == D_8x16 )
- + else
- {
- - if( b_list[0][0] ) x264_cavlc_mb_mvd( h, 0, 0, 2 );
- - if( b_list[0][1] ) x264_cavlc_mb_mvd( h, 0, 4, 2 );
- - if( b_list[1][0] ) x264_cavlc_mb_mvd( h, 1, 0, 2 );
- - if( b_list[1][1] ) x264_cavlc_mb_mvd( h, 1, 4, 2 );
- + if( i_ref0_max && b_list[0][0] ) bs_write_te( s, i_ref0_max, h->mb.cache.ref[0][x264_scan8[ 0]] );
- + if( i_ref0_max && b_list[0][1] ) bs_write_te( s, i_ref0_max, h->mb.cache.ref[0][x264_scan8[12]] );
- + if( i_ref1_max && b_list[1][0] ) bs_write_te( s, i_ref1_max, h->mb.cache.ref[1][x264_scan8[ 0]] );
- + if( i_ref1_max && b_list[1][1] ) bs_write_te( s, i_ref1_max, h->mb.cache.ref[1][x264_scan8[12]] );
- + if( h->mb.i_partition == D_16x8 )
- + {
- + if( b_list[0][0] ) x264_cavlc_mb_mvd( h, 0, 0, 4 );
- + if( b_list[0][1] ) x264_cavlc_mb_mvd( h, 0, 8, 4 );
- + if( b_list[1][0] ) x264_cavlc_mb_mvd( h, 1, 0, 4 );
- + if( b_list[1][1] ) x264_cavlc_mb_mvd( h, 1, 8, 4 );
- + }
- + else //if( h->mb.i_partition == D_8x16 )
- + {
- + if( b_list[0][0] ) x264_cavlc_mb_mvd( h, 0, 0, 2 );
- + if( b_list[0][1] ) x264_cavlc_mb_mvd( h, 0, 4, 2 );
- + if( b_list[1][0] ) x264_cavlc_mb_mvd( h, 1, 0, 2 );
- + if( b_list[1][1] ) x264_cavlc_mb_mvd( h, 1, 4, 2 );
- + }
- }
- + break;
- }
- }
- - else //if( i_mb_type == B_DIRECT )
- - bs_write1( s, 1 );
- #if !RDO_SKIP_BS
- i_mb_pos_tex = bs_pos( s );
- @@ -505,16 +512,16 @@ void x264_macroblock_write_cavlc( x264_t *h )
- /* Coded block patern */
- if( i_mb_type == I_4x4 || i_mb_type == I_8x8 )
- - bs_write_ue( s, intra4x4_cbp_to_golomb[( h->mb.i_cbp_chroma << 4 )|h->mb.i_cbp_luma] );
- + bs_write_ue( s, intra4x4_cbp_to_golomb[h->mb.i_cbp_combined&0x3f] );
- else if( i_mb_type != I_16x16 )
- - bs_write_ue( s, inter_cbp_to_golomb[( h->mb.i_cbp_chroma << 4 )|h->mb.i_cbp_luma] );
- + bs_write_ue( s, inter_cbp_to_golomb[h->mb.i_cbp_combined&0x3f] );
- /* transform size 8x8 flag */
- if( x264_mb_transform_8x8_allowed( h ) && h->mb.i_cbp_luma )
- bs_write1( s, h->mb.b_transform_8x8 );
- /* write residual */
- - if( h->mb.i_cbp_luma || h->mb.i_cbp_chroma || i_mb_type == I_16x16 )
- + if( h->mb.i_cbp_combined&0x3f || i_mb_type == I_16x16 )
- {
- x264_cavlc_mb_qp_delta( h );
- @@ -561,24 +568,24 @@ static int x264_partition_size_cavlc( x264_t *h, int i8, int i_pixel )
- int b_8x16 = h->mb.i_partition == D_8x16;
- int j;
- - if( i_mb_type == P_8x8 )
- - {
- - x264_cavlc_mb8x8_mvd( h, i8 );
- - bs_write_ue( s, sub_mb_type_p_to_golomb[ h->mb.i_sub_partition[i8] ] );
- - }
- - else if( i_mb_type == P_L0 )
- - x264_cavlc_mb_mvd( h, 0, 4*i8, 4>>b_8x16 );
- - else if( i_mb_type > B_DIRECT && i_mb_type < B_8x8 )
- + switch( i_mb_type )
- {
- - if( x264_mb_type_list_table[ i_mb_type ][0][!!i8] ) x264_cavlc_mb_mvd( h, 0, 4*i8, 4>>b_8x16 );
- - if( x264_mb_type_list_table[ i_mb_type ][1][!!i8] ) x264_cavlc_mb_mvd( h, 1, 4*i8, 4>>b_8x16 );
- - }
- - else //if( i_mb_type == B_8x8 )
- - {
- - if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i8] ] )
- - x264_cavlc_mb_mvd( h, 0, 4*i8, 2 );
- - if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i8] ] )
- - x264_cavlc_mb_mvd( h, 1, 4*i8, 2 );
- + case P_L0:
- + x264_cavlc_mb_mvd( h, 0, 4*i8, 4>>b_8x16 );
- + break;
- + case P_8x8:
- + x264_cavlc_mb8x8_mvd( h, i8 );
- + bs_write_ue( s, sub_mb_type_p_to_golomb[ h->mb.i_sub_partition[i8] ] );
- + break;
- + case B_8x8:
- + if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i8] ] )
- + x264_cavlc_mb_mvd( h, 0, 4*i8, 2 );
- + if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i8] ] )
- + x264_cavlc_mb_mvd( h, 1, 4*i8, 2 );
- + break;
- + default: /* Rest of the B types */
- + if( x264_mb_type_list_table[ i_mb_type ][0][!!i8] ) x264_cavlc_mb_mvd( h, 0, 4*i8, 4>>b_8x16 );
- + if( x264_mb_type_list_table[ i_mb_type ][1][!!i8] ) x264_cavlc_mb_mvd( h, 1, 4*i8, 4>>b_8x16 );
- }
- for( j = (i_pixel < PIXEL_8x8); j >= 0; j-- )
- @@ -618,6 +625,8 @@ static int cavlc_intra4x4_pred_size( x264_t *h, int i4, int i_mode )
- static int x264_partition_i8x8_size_cavlc( x264_t *h, int i8, int i_mode )
- {
- h->out.bs.i_bits_encoded = cavlc_intra4x4_pred_size( h, 4*i8, i_mode );
- + /* We can't use h->mb.i_cbp_combined here because it's only calculated at the end of
- + * x264_macroblock_encode(), which hasn't been called at this point. */
- bs_write_ue( &h->out.bs, intra4x4_cbp_to_golomb[( h->mb.i_cbp_chroma << 4 )|h->mb.i_cbp_luma] );
- x264_macroblock_luma_write_cavlc( h, i8, i8 );
- return h->out.bs.i_bits_encoded;
- diff --git a/encoder/macroblock.c b/encoder/macroblock.c
- index fa7942d..f5f6267 100644
- --- a/encoder/macroblock.c
- +++ b/encoder/macroblock.c
- @@ -488,7 +488,7 @@ static void x264_macroblock_encode_skip( x264_t *h )
- h->mb.i_cbp_chroma = 0x00;
- memset( h->mb.cache.non_zero_count, 0, X264_SCAN8_SIZE );
- /* store cbp */
- - h->mb.cbp[h->mb.i_mb_xy] = 0;
- + h->mb.i_cbp_combined = 0;
- }
- /*****************************************************************************
- @@ -604,7 +604,6 @@ void x264_predict_lossless_16x16( x264_t *h, int i_mode )
- *****************************************************************************/
- void x264_macroblock_encode( x264_t *h )
- {
- - int i_cbp_dc = 0;
- int i_qp = h->mb.i_qp;
- int b_decimate = h->mb.b_dct_decimate;
- int b_force_no_skip = 0;
- @@ -880,34 +879,28 @@ void x264_macroblock_encode( x264_t *h )
- /* encode the 8x8 blocks */
- x264_mb_encode_8x8_chroma( h, !IS_INTRA( h->mb.i_type ), h->mb.i_chroma_qp );
- - if( h->param.b_cabac )
- - {
- - i_cbp_dc = h->mb.cache.non_zero_count[x264_scan8[24]]
- + int i_cbp_dc = h->mb.cache.non_zero_count[x264_scan8[24]]
- | h->mb.cache.non_zero_count[x264_scan8[25]] << 1
- | h->mb.cache.non_zero_count[x264_scan8[26]] << 2;
- - }
- /* store cbp */
- - h->mb.cbp[h->mb.i_mb_xy] = (i_cbp_dc << 8) | (h->mb.i_cbp_chroma << 4) | h->mb.i_cbp_luma;
- + h->mb.i_cbp_combined = (i_cbp_dc << 8) | (h->mb.i_cbp_chroma << 4) | h->mb.i_cbp_luma;
- /* Check for P_SKIP
- * XXX: in the me perhaps we should take x264_mb_predict_mv_pskip into account
- * (if multiple mv give same result)*/
- if( !b_force_no_skip )
- {
- - if( h->mb.i_type == P_L0 && h->mb.i_partition == D_16x16 &&
- - !(h->mb.i_cbp_luma | h->mb.i_cbp_chroma) &&
- - M32( h->mb.cache.mv[0][x264_scan8[0]] ) == M32( h->mb.cache.pskip_mv )
- + if( h->mb.i_type == P_L0 && h->mb.i_partition == D_16x16 && !h->mb.i_cbp_combined
- + && M32( h->mb.cache.mv[0][x264_scan8[0]] ) == M32( h->mb.cache.pskip_mv )
- && h->mb.cache.ref[0][x264_scan8[0]] == 0 )
- {
- h->mb.i_type = P_SKIP;
- }
- /* Check for B_SKIP */
- - if( h->mb.i_type == B_DIRECT && !(h->mb.i_cbp_luma | h->mb.i_cbp_chroma) )
- - {
- + if( h->mb.i_type == B_DIRECT && !h->mb.i_cbp_combined )
- h->mb.i_type = B_SKIP;
- - }
- }
- }
- --
- 1.6.1.2
- From c20ab8386e321ef2f5b159055a902b43b40913ec Mon Sep 17 00:00:00 2001
- From: Yusuke Nakamura <muken.the.vfrmaniac@gmail.com>
- Date: Mon, 8 Feb 2010 01:48:38 -0800
- Subject: [PATCH 10/14] Write PASP atom in mp4 muxing
- Adds container-level aspect ratio support.
- ---
- output/mp4.c | 3 ++-
- 1 files changed, 2 insertions(+), 1 deletions(-)
- diff --git a/output/mp4.c b/output/mp4.c
- index e3ad9c6..b817c82 100644
- --- a/output/mp4.c
- +++ b/output/mp4.c
- @@ -121,7 +121,7 @@ static int close_file( hnd_t handle, int64_t largest_pts, int64_t second_largest
- if( mdhd_duration != total_duration )
- {
- uint64_t last_dts = gf_isom_get_sample_dts( p_mp4->p_file, p_mp4->i_track, p_mp4->i_numframe );
- - uint32_t last_duration = (uint32_t)( mdhd_duration > last_dts ? mdhd_duration - last_dts : (largest_pts - second_largest_pts) * p_mp4->i_time_inc );
- + uint32_t last_duration = (uint32_t)( mdhd_duration > last_dts ? mdhd_duration - last_dts : (largest_pts - second_largest_pts) * p_mp4->i_time_inc );
- gf_isom_set_last_sample_duration( p_mp4->p_file, p_mp4->i_track, last_duration );
- total_duration = gf_isom_get_media_duration( p_mp4->p_file, p_mp4->i_track );
- }
- @@ -212,6 +212,7 @@ static int set_param( hnd_t handle, x264_param_t *p_param )
- dw *= sar ;
- else
- dh /= sar;
- + gf_isom_set_pixel_aspect_ratio( p_mp4->p_file, p_mp4->i_track, p_mp4->i_descidx, p_param->vui.i_sar_width, p_param->vui.i_sar_height );
- gf_isom_set_track_layout_info( p_mp4->p_file, p_mp4->i_track, dw, dh, 0, 0, 0 );
- }
- --
- 1.6.1.2
- From 83ac6d576d252c91d96006cf73b57748e7bac537 Mon Sep 17 00:00:00 2001
- From: Henrik Gramner <hengar-6@student.ltu.se>
- Date: Mon, 8 Feb 2010 15:53:52 -0800
- Subject: [PATCH 11/14] Faster 2x2 chroma DC dequant
- ---
- doc/standards.txt | 1 +
- encoder/macroblock.c | 24 +++++++++---------------
- 2 files changed, 10 insertions(+), 15 deletions(-)
- diff --git a/doc/standards.txt b/doc/standards.txt
- index db9a691..7474d8f 100644
- --- a/doc/standards.txt
- +++ b/doc/standards.txt
- @@ -4,6 +4,7 @@ checkasm is written in gcc, with no attempt at compatibility with anything else.
- We make the following additional assumptions which are true of real systems but not guaranteed by C99:
- * Two's complement.
- * Signed right-shifts are sign-extended.
- +* int is 32-bit or larger.
- x86-specific assumptions:
- * The stack is 16-byte aligned. We align it on entry to libx264 and on entry to any thread, but the compiler must preserve alignment after that.
- diff --git a/encoder/macroblock.c b/encoder/macroblock.c
- index f5f6267..3d859de 100644
- --- a/encoder/macroblock.c
- +++ b/encoder/macroblock.c
- @@ -42,30 +42,24 @@ static inline void zigzag_scan_2x2_dc( int16_t level[4], int16_t dct[4] )
- int d1 = dct[2] + dct[3]; \
- int d2 = dct[0] - dct[1]; \
- int d3 = dct[2] - dct[3]; \
- - int dmf = dequant_mf[i_qp%6][0]; \
- - int qbits = i_qp/6 - 5; \
- - if( qbits > 0 ) \
- - { \
- - dmf <<= qbits; \
- - qbits = 0; \
- - }
- + int dmf = dequant_mf[i_qp%6][0] << i_qp/6;
- static inline void idct_dequant_2x2_dc( int16_t dct[4], int16_t dct4x4[4][16], int dequant_mf[6][16], int i_qp )
- {
- IDCT_DEQUANT_START
- - dct4x4[0][0] = (d0 + d1) * dmf >> -qbits;
- - dct4x4[1][0] = (d0 - d1) * dmf >> -qbits;
- - dct4x4[2][0] = (d2 + d3) * dmf >> -qbits;
- - dct4x4[3][0] = (d2 - d3) * dmf >> -qbits;
- + dct4x4[0][0] = (d0 + d1) * dmf >> 5;
- + dct4x4[1][0] = (d0 - d1) * dmf >> 5;
- + dct4x4[2][0] = (d2 + d3) * dmf >> 5;
- + dct4x4[3][0] = (d2 - d3) * dmf >> 5;
- }
- static inline void idct_dequant_2x2_dconly( int16_t out[4], int16_t dct[4], int dequant_mf[6][16], int i_qp )
- {
- IDCT_DEQUANT_START
- - out[0] = (d0 + d1) * dmf >> -qbits;
- - out[1] = (d0 - d1) * dmf >> -qbits;
- - out[2] = (d2 + d3) * dmf >> -qbits;
- - out[3] = (d2 - d3) * dmf >> -qbits;
- + out[0] = (d0 + d1) * dmf >> 5;
- + out[1] = (d0 - d1) * dmf >> 5;
- + out[2] = (d2 + d3) * dmf >> 5;
- + out[3] = (d2 - d3) * dmf >> 5;
- }
- static inline void dct2x2dc( int16_t d[4], int16_t dct4x4[4][16] )
- --
- 1.6.1.2
- From 7b8fd33d747b99a338ab04ed923c3cf83ad3e134 Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Tue, 9 Feb 2010 15:08:31 -0800
- Subject: [PATCH 12/14] Make psy-(rd|trellis) use more precision in userdata SEI
- ---
- common/common.c | 2 +-
- 1 files changed, 1 insertions(+), 1 deletions(-)
- diff --git a/common/common.c b/common/common.c
- index 6d1d7f0..aaccdf2 100644
- --- a/common/common.c
- +++ b/common/common.c
- @@ -886,7 +886,7 @@ char *x264_param2string( x264_param_t *p, int b_res )
- s += sprintf( s, " subme=%d", p->analyse.i_subpel_refine );
- s += sprintf( s, " psy=%d", p->analyse.b_psy );
- if( p->analyse.b_psy )
- - s += sprintf( s, " psy_rd=%.1f:%.1f", p->analyse.f_psy_rd, p->analyse.f_psy_trellis );
- + s += sprintf( s, " psy_rd=%.2f:%.2f", p->analyse.f_psy_rd, p->analyse.f_psy_trellis );
- s += sprintf( s, " mixed_ref=%d", p->analyse.b_mixed_references );
- s += sprintf( s, " me_range=%d", p->analyse.i_me_range );
- s += sprintf( s, " chroma_me=%d", p->analyse.b_chroma_me );
- --
- 1.6.1.2
- From 75480be89f05681f5b7f3b66a46057771f17e2a8 Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Wed, 10 Feb 2010 12:12:29 -0800
- Subject: [PATCH 13/14] Overhaul sliced-threads VBV
- Make predictors thread-local and allow each thread to poll the others to get their predicted sizes.
- Many, many other tweaks to improve quality with small VBV and sliced threads.
- ---
- encoder/encoder.c | 4 +-
- encoder/ratecontrol.c | 145 ++++++++++++++++++++++++++++++-------------------
- encoder/slicetype.c | 4 +-
- 3 files changed, 93 insertions(+), 60 deletions(-)
- diff --git a/encoder/encoder.c b/encoder/encoder.c
- index 8e9c118..b3da30e 100644
- --- a/encoder/encoder.c
- +++ b/encoder/encoder.c
- @@ -2061,6 +2061,8 @@ static int x264_threaded_slices_write( x264_t *h )
- for( i = 0; i <= h->sps->i_mb_height; i++ )
- x264_fdec_filter_row( h, i );
- + x264_threads_merge_ratecontrol( h );
- +
- for( i = 1; i < h->param.i_threads; i++ )
- {
- x264_t *t = h->thread[i];
- @@ -2076,8 +2078,6 @@ static int x264_threaded_slices_write( x264_t *h )
- ((int*)&h->stat.frame)[j] += ((int*)&t->stat.frame)[j];
- }
- - x264_threads_merge_ratecontrol( h );
- -
- return 0;
- }
- diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
- index e314ba2..57439c2 100644
- --- a/encoder/ratecontrol.c
- +++ b/encoder/ratecontrol.c
- @@ -137,6 +137,7 @@ struct x264_ratecontrol_t
- double frame_size_estimated;
- double frame_size_planned;
- double slice_size_planned;
- + double max_frame_error;
- predictor_t (*row_pred)[2];
- predictor_t row_preds[5][2];
- predictor_t *pred_b_from_p; /* predict B-frame size from P-frame satd */
- @@ -505,17 +506,21 @@ int x264_ratecontrol_new( x264_t *h )
- rc->lstep = pow( 2, h->param.rc.i_qp_step / 6.0 );
- rc->last_qscale = qp2qscale(26);
- - CHECKED_MALLOC( rc->pred, 5*sizeof(predictor_t) );
- + int num_preds = h->param.b_sliced_threads * h->param.i_threads + 1;
- + CHECKED_MALLOC( rc->pred, 5 * sizeof(predictor_t) * num_preds );
- CHECKED_MALLOC( rc->pred_b_from_p, sizeof(predictor_t) );
- for( i = 0; i < 5; i++ )
- {
- rc->last_qscale_for[i] = qp2qscale( ABR_INIT_QP );
- rc->lmin[i] = qp2qscale( h->param.rc.i_qp_min );
- rc->lmax[i] = qp2qscale( h->param.rc.i_qp_max );
- - rc->pred[i].coeff= 2.0;
- - rc->pred[i].count= 1.0;
- - rc->pred[i].decay= 0.5;
- - rc->pred[i].offset= 0.0;
- + for( j = 0; j < num_preds; j++ )
- + {
- + rc->pred[i+j*5].coeff= 2.0;
- + rc->pred[i+j*5].count= 1.0;
- + rc->pred[i+j*5].decay= 0.5;
- + rc->pred[i+j*5].offset= 0.0;
- + }
- for( j = 0; j < 2; j++ )
- {
- rc->row_preds[i][j].coeff= .25;
- @@ -988,18 +993,12 @@ void x264_ratecontrol_delete( x264_t *h )
- void x264_ratecontrol_set_estimated_size( x264_t *h, int bits )
- {
- - x264_pthread_mutex_lock( &h->fenc->mutex );
- h->rc->frame_size_estimated = bits;
- - x264_pthread_mutex_unlock( &h->fenc->mutex );
- }
- -int x264_ratecontrol_get_estimated_size( x264_t const *h)
- +int x264_ratecontrol_get_estimated_size( x264_t const *h )
- {
- - int size;
- - x264_pthread_mutex_lock( &h->fenc->mutex );
- - size = h->rc->frame_size_estimated;
- - x264_pthread_mutex_unlock( &h->fenc->mutex );
- - return size;
- + return h->rc->frame_size_estimated;
- }
- static void accum_p_qp_update( x264_t *h, float qp )
- @@ -1173,6 +1172,7 @@ void x264_ratecontrol_mb( x264_t *h, int bits )
- /* tweak quality based on difference from predicted size */
- if( y < h->i_threadslice_end-1 )
- {
- + int i;
- int prev_row_qp = h->fdec->i_row_qp[y];
- int i_qp_max = X264_MIN( prev_row_qp + h->param.rc.i_qp_step, h->param.rc.i_qp_max );
- int i_qp_min = X264_MAX( prev_row_qp - h->param.rc.i_qp_step, h->param.rc.i_qp_min );
- @@ -1186,19 +1186,23 @@ void x264_ratecontrol_mb( x264_t *h, int bits )
- float buffer_left_planned = rc->buffer_fill - rc->frame_size_planned;
- float slice_size_planned = h->param.b_sliced_threads ? rc->slice_size_planned : rc->frame_size_planned;
- - float size_of_other_slices = rc->frame_size_planned - slice_size_planned;
- + float size_of_other_slices = 0;
- + if( h->param.b_sliced_threads )
- + {
- + for( i = 0; i < h->param.i_threads; i++ )
- + if( h != h->thread[i] )
- + size_of_other_slices += x264_ratecontrol_get_estimated_size( h->thread[i] );
- + }
- + else
- + rc->max_frame_error = X264_MAX( 0.05, 1.0 / (h->sps->i_mb_width) );
- +
- /* More threads means we have to be more cautious in letting ratecontrol use up extra bits. */
- float rc_tol = buffer_left_planned / h->param.i_threads * rc->rate_tolerance;
- - float max_frame_error = X264_MAX( 0.05, 1.0 / h->sps->i_mb_height );
- - int b1 = predict_row_size_sum( h, y, rc->qpm );
- -
- - /* Assume that if this slice has become larger than expected,
- - * the other slices will have gotten equally larger. */
- - b1 += X264_MAX( size_of_other_slices * b1 / slice_size_planned, size_of_other_slices );
- + int b1 = predict_row_size_sum( h, y, rc->qpm ) + size_of_other_slices;
- /* Don't modify the row QPs until a sufficent amount of the bits of the frame have been processed, in case a flat */
- /* area at the top of the frame was measured inaccurately. */
- - if( row_bits_so_far(h,y) < 0.05 * (rc->frame_size_planned-size_of_other_slices) )
- + if( row_bits_so_far( h, y ) < 0.05 * slice_size_planned )
- return;
- if( h->sh.i_type != SLICE_TYPE_I )
- @@ -1213,8 +1217,7 @@ void x264_ratecontrol_mb( x264_t *h, int bits )
- (b1 > rc->frame_size_planned && rc->qpm < rc->qp_novbv)) )
- {
- rc->qpm ++;
- - b1 = predict_row_size_sum( h, y, rc->qpm );
- - b1 += X264_MAX( size_of_other_slices * b1 / slice_size_planned, size_of_other_slices );
- + b1 = predict_row_size_sum( h, y, rc->qpm ) + size_of_other_slices;
- }
- while( rc->qpm > i_qp_min
- @@ -1223,20 +1226,18 @@ void x264_ratecontrol_mb( x264_t *h, int bits )
- || b1 < (rc->buffer_fill - rc->buffer_size + rc->buffer_rate) * 1.1) )
- {
- rc->qpm --;
- - b1 = predict_row_size_sum( h, y, rc->qpm );
- - b1 += X264_MAX( size_of_other_slices * b1 / slice_size_planned, size_of_other_slices );
- + b1 = predict_row_size_sum( h, y, rc->qpm ) + size_of_other_slices;
- }
- /* avoid VBV underflow */
- while( (rc->qpm < h->param.rc.i_qp_max)
- - && (rc->buffer_fill - b1 < rc->buffer_rate * max_frame_error) )
- + && (rc->buffer_fill - b1 < rc->buffer_rate * rc->max_frame_error) )
- {
- rc->qpm ++;
- - b1 = predict_row_size_sum( h, y, rc->qpm );
- - b1 += X264_MAX( size_of_other_slices * b1 / slice_size_planned, size_of_other_slices );
- + b1 = predict_row_size_sum( h, y, rc->qpm ) + size_of_other_slices;
- }
- - x264_ratecontrol_set_estimated_size(h, b1);
- + x264_ratecontrol_set_estimated_size( h, predict_row_size_sum( h, y, rc->qpm ) );
- }
- /* loses the fractional part of the frame-wise qp */
- @@ -1958,56 +1959,88 @@ static float rate_estimate_qscale( x264_t *h )
- }
- }
- +void x264_threads_normalize_predictors( x264_t *h )
- +{
- + int i;
- + double totalsize = 0;
- + for( i = 0; i < h->param.i_threads; i++ )
- + totalsize += h->thread[i]->rc->slice_size_planned;
- + double factor = h->rc->frame_size_planned / totalsize;
- + for( i = 0; i < h->param.i_threads; i++ )
- + h->thread[i]->rc->slice_size_planned *= factor;
- +}
- +
- void x264_threads_distribute_ratecontrol( x264_t *h )
- {
- - int i, row, totalsize = 0;
- - if( h->rc->b_vbv )
- - for( row = 0; row < h->sps->i_mb_height; row++ )
- - totalsize += h->fdec->i_row_satd[row];
- + int i, row;
- + x264_ratecontrol_t *rc = h->rc;
- +
- + /* Initialize row predictors */
- + if( h->i_frame == 0 )
- + for( i = 0; i < h->param.i_threads; i++ )
- + {
- + x264_ratecontrol_t *t = h->thread[i]->rc;
- + memcpy( t->row_preds, rc->row_preds, sizeof(rc->row_preds) );
- + }
- +
- for( i = 0; i < h->param.i_threads; i++ )
- {
- x264_t *t = h->thread[i];
- - x264_ratecontrol_t *rc = h->rc;
- - memcpy( t->rc, rc, sizeof(x264_ratecontrol_t) );
- + memcpy( t->rc, rc, offsetof(x264_ratecontrol_t, row_pred) );
- + t->rc->row_pred = &t->rc->row_preds[h->sh.i_type];
- /* Calculate the planned slice size. */
- - if( h->rc->b_vbv && rc->frame_size_planned )
- + if( rc->b_vbv && rc->frame_size_planned )
- {
- int size = 0;
- for( row = t->i_threadslice_start; row < t->i_threadslice_end; row++ )
- size += h->fdec->i_row_satd[row];
- - t->rc->slice_size_planned = size * rc->frame_size_planned / totalsize;
- + t->rc->slice_size_planned = predict_size( &rc->pred[h->sh.i_type + (i+1)*5], rc->qpm, size );
- }
- else
- t->rc->slice_size_planned = 0;
- }
- + if( rc->b_vbv && rc->frame_size_planned )
- + {
- + x264_threads_normalize_predictors( h );
- +
- + if( rc->single_frame_vbv )
- + {
- + /* Compensate for our max frame error threshold: give more bits (proportionally) to smaller slices. */
- + for( i = 0; i < h->param.i_threads; i++ )
- + {
- + x264_t *t = h->thread[i];
- + t->rc->max_frame_error = X264_MAX( 0.05, 1.0 / (t->i_threadslice_end - t->i_threadslice_start) );
- + t->rc->slice_size_planned += 2 * t->rc->max_frame_error * rc->frame_size_planned;
- + }
- + x264_threads_normalize_predictors( h );
- + }
- +
- + for( i = 0; i < h->param.i_threads; i++ )
- + h->thread[i]->rc->frame_size_estimated = h->thread[i]->rc->slice_size_planned;
- + }
- }
- void x264_threads_merge_ratecontrol( x264_t *h )
- {
- - int i, j, k;
- + int i, row;
- x264_ratecontrol_t *rc = h->rc;
- x264_emms();
- - for( i = 1; i < h->param.i_threads; i++ )
- + for( i = 0; i < h->param.i_threads; i++ )
- {
- - x264_ratecontrol_t *t = h->thread[i]->rc;
- - rc->qpa_rc += t->qpa_rc;
- - rc->qpa_aq += t->qpa_aq;
- - for( j = 0; j < 5; j++ )
- - for( k = 0; k < 2; k++ )
- - {
- - rc->row_preds[j][k].coeff += t->row_preds[j][k].coeff;
- - rc->row_preds[j][k].offset += t->row_preds[j][k].offset;
- - rc->row_preds[j][k].count += t->row_preds[j][k].count;
- - }
- + x264_t *t = h->thread[i];
- + x264_ratecontrol_t *rct = h->thread[i]->rc;
- + int size = 0;
- + for( row = t->i_threadslice_start; row < t->i_threadslice_end; row++ )
- + size += h->fdec->i_row_satd[row];
- + int bits = t->stat.frame.i_mv_bits + t->stat.frame.i_tex_bits + t->stat.frame.i_misc_bits;
- + int mb_count = (t->i_threadslice_end - t->i_threadslice_start) * h->sps->i_mb_width;
- + update_predictor( &rc->pred[h->sh.i_type+5*i], qp2qscale(rct->qpa_rc/mb_count), size, bits );
- + if( !i )
- + continue;
- + rc->qpa_rc += rct->qpa_rc;
- + rc->qpa_aq += rct->qpa_aq;
- }
- - for( j = 0; j < 5; j++ )
- - for( k = 0; k < 2; k++ )
- - {
- - rc->row_preds[j][k].coeff /= h->param.i_threads;
- - rc->row_preds[j][k].offset /= h->param.i_threads;
- - rc->row_preds[j][k].count /= h->param.i_threads;
- - }
- }
- void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next )
- diff --git a/encoder/slicetype.c b/encoder/slicetype.c
- index 057f6a6..bb2ed64 100644
- --- a/encoder/slicetype.c
- +++ b/encoder/slicetype.c
- @@ -1394,10 +1394,10 @@ int x264_rc_analyse_slice( x264_t *h )
- int mb_xy = y * h->mb.i_mb_stride;
- for( x = h->fdec->i_pir_start_col; x <= h->fdec->i_pir_end_col; x++, mb_xy++ )
- {
- - int intra_cost = (h->fenc->i_intra_cost[mb_xy] * ip_factor) >> 8;
- + int intra_cost = (h->fenc->i_intra_cost[mb_xy] * ip_factor + 128) >> 8;
- int inter_cost = h->fenc->lowres_costs[b-p0][p1-b][mb_xy];
- int diff = intra_cost - inter_cost;
- - h->fdec->i_row_satd[y] += diff;
- + h->fdec->i_row_satd[y] += (diff * frames[b]->i_inv_qscale_factor[mb_xy] + 128) >> 8;
- cost += diff;
- }
- }
- --
- 1.6.1.2
- From 293ae5edfae553a5c00ebb854b579994f7010a9a Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Wed, 10 Feb 2010 13:44:28 -0800
- Subject: [PATCH 14/14] Allow longer keyints with intra refresh
- If a long keyint is specified (longer than macroblock width-1), the refresh will simply not occur all the time.
- In other words, a refresh will take place, and then x264 will wait until keyint is over to start another refresh.
- ---
- encoder/encoder.c | 9 ++++-----
- 1 files changed, 4 insertions(+), 5 deletions(-)
- diff --git a/encoder/encoder.c b/encoder/encoder.c
- index b3da30e..64f41fb 100644
- --- a/encoder/encoder.c
- +++ b/encoder/encoder.c
- @@ -599,8 +599,6 @@ static int x264_validate_parameters( x264_t *h )
- x264_log( h, X264_LOG_WARNING, "ref > 1 + intra-refresh is not supported\n" );
- h->param.i_frame_reference = 1;
- }
- - if( h->param.b_intra_refresh )
- - h->param.i_keyint_max = X264_MIN( h->param.i_keyint_max, (h->param.i_width+15)/16 - 1 );
- h->param.i_keyint_min = x264_clip3( h->param.i_keyint_min, 1, h->param.i_keyint_max/2+1 );
- h->param.rc.i_lookahead = x264_clip3( h->param.rc.i_lookahead, 0, X264_LOOKAHEAD_MAX );
- {
- @@ -2306,12 +2304,12 @@ int x264_encoder_encode( x264_t *h,
- if( h->param.b_intra_refresh && h->fenc->i_type == X264_TYPE_P )
- {
- int pocdiff = (h->fdec->i_poc - h->fref0[0]->i_poc)/2;
- - float increment = ((float)h->sps->i_mb_width-1) / h->param.i_keyint_max;
- + float increment = X264_MAX( ((float)h->sps->i_mb_width-1) / h->param.i_keyint_max, 1 );
- if( IS_X264_TYPE_I( h->fref0[0]->i_type ) )
- h->fdec->f_pir_position = 0;
- else
- {
- - if( h->fref0[0]->i_pir_end_col == h->sps->i_mb_width - 1 )
- + if( h->fdec->f_pir_position >= h->param.i_keyint_max )
- {
- h->fdec->f_pir_position = 0;
- h->fenc->b_keyframe = 1;
- @@ -2357,8 +2355,9 @@ int x264_encoder_encode( x264_t *h,
- if( h->fenc->i_type != X264_TYPE_IDR )
- {
- + int time_to_recovery = X264_MIN( h->sps->i_mb_width - 1, h->param.i_keyint_max ) + h->param.i_bframe;
- x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
- - x264_sei_recovery_point_write( h, &h->out.bs, h->param.i_keyint_max );
- + x264_sei_recovery_point_write( h, &h->out.bs, time_to_recovery );
- x264_nal_end( h );
- overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD;
- }
- --
- 1.6.1.2
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement