Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- From 6cbaa59fc4c4a0af3b67bd1a776a2e63a2b11746 Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Fri, 29 Jan 2010 02:40:41 -0800
- Subject: [PATCH 01/24] Add ability to adjust ratecontrol parameters on the fly
- encoder_reconfig and x264_picture_t->param can now be used to change ratecontrol parameters.
- This is extraordinarily useful in certain streaming situations where the encoder needs to adapt the bitrate to network circumstances.
- What can be changed:
- 1) CRF can be adjusted if in CRF mode.
- 2) VBV maxrate and bufsize can be adjusted if in VBV mode.
- 3) Bitrate can be adjusted if in CBR mode.
- However, x264 cannot switch between modes and cannot change bitrate in ABR mode.
- Also fix a bug where x264_picture_t->param reconfig method would not always be frame-exact.
- Commit sponsored by SayMama video calling.
- ---
- encoder/encoder.c | 55 +++++++++++++++++++-
- encoder/ratecontrol.c | 137 +++++++++++++++++++++++-------------------------
- encoder/ratecontrol.h | 2 +
- x264.h | 7 ++-
- 4 files changed, 125 insertions(+), 76 deletions(-)
- diff --git a/encoder/encoder.c b/encoder/encoder.c
- index d873cd0..e266a1a 100644
- --- a/encoder/encoder.c
- +++ b/encoder/encoder.c
- @@ -507,6 +507,39 @@ static int x264_validate_parameters( x264_t *h )
- }
- h->param.rc.i_qp_max = x264_clip3( h->param.rc.i_qp_max, 0, 51 );
- h->param.rc.i_qp_min = x264_clip3( h->param.rc.i_qp_min, 0, h->param.rc.i_qp_max );
- + if( h->param.rc.i_vbv_buffer_size )
- + {
- + if( h->param.rc.i_rc_method == X264_RC_CQP )
- + {
- + x264_log( h, X264_LOG_WARNING, "VBV is incompatible with constant QP, ignored.\n" );
- + h->param.rc.i_vbv_max_bitrate = 0;
- + h->param.rc.i_vbv_buffer_size = 0;
- + }
- + else if( h->param.rc.i_vbv_max_bitrate == 0 )
- + {
- + if( h->param.rc.i_rc_method == X264_RC_ABR )
- + {
- + x264_log( h, X264_LOG_INFO, "VBV maxrate unspecified, assuming CBR\n" );
- + h->param.rc.i_vbv_max_bitrate = h->param.rc.i_bitrate;
- + }
- + else
- + {
- + x264_log( h, X264_LOG_INFO, "VBV bufsize set but maxrate unspecified, ignored\n" );
- + h->param.rc.i_vbv_buffer_size = 0;
- + }
- + }
- + else if( h->param.rc.i_vbv_max_bitrate < h->param.rc.i_bitrate &&
- + h->param.rc.i_rc_method == X264_RC_ABR )
- + {
- + x264_log( h, X264_LOG_WARNING, "max bitrate less than average bitrate, assuming CBR.\n" );
- + h->param.rc.i_vbv_max_bitrate = h->param.rc.i_bitrate;
- + }
- + }
- + else if( h->param.rc.i_vbv_max_bitrate )
- + {
- + x264_log( h, X264_LOG_WARNING, "VBV maxrate specified, but no bufsize.\n" );
- + h->param.rc.i_vbv_max_bitrate = 0;
- + }
- int max_slices = (h->param.i_height+((16<<h->param.b_interlaced)-1))/(16<<h->param.b_interlaced);
- if( h->param.b_sliced_threads )
- @@ -1071,7 +1104,7 @@ fail:
- ****************************************************************************/
- int x264_encoder_reconfig( x264_t *h, x264_param_t *param )
- {
- - h = h->thread[h->i_thread_phase];
- + h = h->thread[h->thread[0]->i_thread_phase];
- x264_set_aspect_ratio( h, param, 0 );
- #define COPY(var) h->param.var = param->var
- COPY( i_frame_reference ); // but never uses more refs than initially specified
- @@ -1110,11 +1143,29 @@ int x264_encoder_reconfig( x264_t *h, x264_param_t *param )
- COPY( i_slice_max_size );
- COPY( i_slice_max_mbs );
- COPY( i_slice_count );
- + /* VBV can't be turned on if it wasn't on to begin with */
- + if( h->param.rc.i_vbv_max_bitrate > 0 && h->param.rc.i_vbv_buffer_size > 0 )
- + {
- + COPY( rc.i_vbv_max_bitrate );
- + COPY( rc.i_vbv_buffer_size );
- + COPY( rc.i_bitrate );
- + }
- + COPY( rc.f_rf_constant );
- #undef COPY
- mbcmp_init( h );
- - return x264_validate_parameters( h );
- + int failure = x264_validate_parameters( h );
- +
- + /* Supported reconfiguration options (1-pass only):
- + * vbv-maxrate
- + * vbv-bufsize
- + * crf
- + * bitrate (CBR only) */
- + if( !failure )
- + x264_ratecontrol_init_reconfigurable( h, 0 );
- +
- + return failure;
- }
- /****************************************************************************
- diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
- index 63b3be6..52196e7 100644
- --- a/encoder/ratecontrol.c
- +++ b/encoder/ratecontrol.c
- @@ -388,6 +388,53 @@ static char *x264_strcat_filename( char *input, char *suffix )
- return output;
- }
- +void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init )
- +{
- + x264_ratecontrol_t *rc = h->rc;
- + if( !b_init && rc->b_2pass )
- + return;
- +
- + if( h->param.rc.i_vbv_max_bitrate > 0 && h->param.rc.i_vbv_buffer_size > 0 )
- + {
- + if( h->param.rc.i_vbv_buffer_size < (int)(h->param.rc.i_vbv_max_bitrate / rc->fps) )
- + {
- + h->param.rc.i_vbv_buffer_size = h->param.rc.i_vbv_max_bitrate / rc->fps;
- + x264_log( h, X264_LOG_WARNING, "VBV buffer size cannot be smaller than one frame, using %d kbit\n",
- + h->param.rc.i_vbv_buffer_size );
- + }
- +
- + /* We don't support changing the ABR bitrate right now,
- + so if the stream starts as CBR, keep it CBR. */
- + if( rc->b_vbv_min_rate )
- + h->param.rc.i_vbv_max_bitrate = h->param.rc.i_bitrate;
- + rc->buffer_rate = h->param.rc.i_vbv_max_bitrate * 1000. / rc->fps;
- + rc->buffer_size = h->param.rc.i_vbv_buffer_size * 1000.;
- + rc->single_frame_vbv = rc->buffer_rate * 1.1 > rc->buffer_size;
- + rc->cbr_decay = 1.0 - rc->buffer_rate / rc->buffer_size
- + * 0.5 * X264_MAX(0, 1.5 - rc->buffer_rate * rc->fps / rc->bitrate);
- + if( b_init )
- + {
- + if( h->param.rc.f_vbv_buffer_init > 1. )
- + h->param.rc.f_vbv_buffer_init = x264_clip3f( h->param.rc.f_vbv_buffer_init / h->param.rc.i_vbv_buffer_size, 0, 1 );
- + h->param.rc.f_vbv_buffer_init = X264_MAX( h->param.rc.f_vbv_buffer_init, rc->buffer_rate / rc->buffer_size );
- + rc->buffer_fill_final = rc->buffer_size * h->param.rc.f_vbv_buffer_init;
- + rc->b_vbv = 1;
- + rc->b_vbv_min_rate = !rc->b_2pass
- + && h->param.rc.i_rc_method == X264_RC_ABR
- + && h->param.rc.i_vbv_max_bitrate <= h->param.rc.i_bitrate;
- + }
- + }
- + if( h->param.rc.i_rc_method == X264_RC_CRF )
- + {
- + /* Arbitrary rescaling to make CRF somewhat similar to QP.
- + * Try to compensate for MB-tree's effects as well. */
- + double base_cplx = h->mb.i_mb_count * (h->param.i_bframe ? 120 : 80);
- + double mbtree_offset = h->param.rc.b_mb_tree ? (1.0-h->param.rc.f_qcompress)*13.5 : 0;
- + rc->rate_factor_constant = pow( base_cplx, 1 - rc->qcompress )
- + / qp2qscale( h->param.rc.f_rf_constant + mbtree_offset );
- + }
- +}
- +
- int x264_ratecontrol_new( x264_t *h )
- {
- x264_ratecontrol_t *rc;
- @@ -426,60 +473,10 @@ int x264_ratecontrol_new( x264_t *h )
- x264_log(h, X264_LOG_ERROR, "constant rate-factor is incompatible with 2pass.\n");
- return -1;
- }
- - if( h->param.rc.i_vbv_buffer_size )
- - {
- - if( h->param.rc.i_rc_method == X264_RC_CQP )
- - {
- - x264_log(h, X264_LOG_WARNING, "VBV is incompatible with constant QP, ignored.\n");
- - h->param.rc.i_vbv_max_bitrate = 0;
- - h->param.rc.i_vbv_buffer_size = 0;
- - }
- - else if( h->param.rc.i_vbv_max_bitrate == 0 )
- - {
- - if( h->param.rc.i_rc_method == X264_RC_ABR )
- - {
- - x264_log( h, X264_LOG_INFO, "VBV maxrate unspecified, assuming CBR\n" );
- - h->param.rc.i_vbv_max_bitrate = h->param.rc.i_bitrate;
- - }
- - else
- - {
- - x264_log( h, X264_LOG_INFO, "VBV bufsize set but maxrate unspecified, ignored\n" );
- - h->param.rc.i_vbv_buffer_size = 0;
- - }
- - }
- - }
- - if( h->param.rc.i_vbv_max_bitrate < h->param.rc.i_bitrate &&
- - h->param.rc.i_vbv_max_bitrate > 0)
- - x264_log(h, X264_LOG_WARNING, "max bitrate less than average bitrate, ignored.\n");
- - else if( h->param.rc.i_vbv_max_bitrate > 0 &&
- - h->param.rc.i_vbv_buffer_size > 0 )
- - {
- - if( h->param.rc.i_vbv_buffer_size < (int)(h->param.rc.i_vbv_max_bitrate / rc->fps) )
- - {
- - h->param.rc.i_vbv_buffer_size = h->param.rc.i_vbv_max_bitrate / rc->fps;
- - x264_log( h, X264_LOG_WARNING, "VBV buffer size cannot be smaller than one frame, using %d kbit\n",
- - h->param.rc.i_vbv_buffer_size );
- - }
- - if( h->param.rc.f_vbv_buffer_init > 1. )
- - h->param.rc.f_vbv_buffer_init = x264_clip3f( h->param.rc.f_vbv_buffer_init / h->param.rc.i_vbv_buffer_size, 0, 1 );
- - rc->buffer_rate = h->param.rc.i_vbv_max_bitrate * 1000. / rc->fps;
- - rc->buffer_size = h->param.rc.i_vbv_buffer_size * 1000.;
- - rc->single_frame_vbv = rc->buffer_rate * 1.1 > rc->buffer_size;
- - h->param.rc.f_vbv_buffer_init = X264_MAX( h->param.rc.f_vbv_buffer_init, rc->buffer_rate / rc->buffer_size );
- - rc->buffer_fill_final = rc->buffer_size * h->param.rc.f_vbv_buffer_init;
- - rc->cbr_decay = 1.0 - rc->buffer_rate / rc->buffer_size
- - * 0.5 * X264_MAX(0, 1.5 - rc->buffer_rate * rc->fps / rc->bitrate);
- - rc->b_vbv = 1;
- - rc->b_vbv_min_rate = !rc->b_2pass
- - && h->param.rc.i_rc_method == X264_RC_ABR
- - && h->param.rc.i_vbv_max_bitrate <= h->param.rc.i_bitrate;
- - }
- - else if( h->param.rc.i_vbv_max_bitrate )
- - {
- - x264_log(h, X264_LOG_WARNING, "VBV maxrate specified, but no bufsize.\n");
- - h->param.rc.i_vbv_max_bitrate = 0;
- - }
- - if(rc->rate_tolerance < 0.01)
- +
- + x264_ratecontrol_init_reconfigurable( h, 1 );
- +
- + if( rc->rate_tolerance < 0.01 )
- {
- x264_log(h, X264_LOG_WARNING, "bitrate tolerance too small, using .01\n");
- rc->rate_tolerance = 0.01;
- @@ -499,16 +496,6 @@ int x264_ratecontrol_new( x264_t *h )
- rc->last_non_b_pict_type = SLICE_TYPE_I;
- }
- - if( h->param.rc.i_rc_method == X264_RC_CRF )
- - {
- - /* Arbitrary rescaling to make CRF somewhat similar to QP.
- - * Try to compensate for MB-tree's effects as well. */
- - double base_cplx = h->mb.i_mb_count * (h->param.i_bframe ? 120 : 80);
- - double mbtree_offset = h->param.rc.b_mb_tree ? (1.0-h->param.rc.f_qcompress)*13.5 : 0;
- - rc->rate_factor_constant = pow( base_cplx, 1 - rc->qcompress )
- - / qp2qscale( h->param.rc.f_rf_constant + mbtree_offset );
- - }
- -
- rc->ip_offset = 6.0 * log(h->param.rc.f_ip_factor) / log(2.0);
- rc->pb_offset = 6.0 * log(h->param.rc.f_pb_factor) / log(2.0);
- rc->qp_constant[SLICE_TYPE_P] = h->param.rc.i_qp_constant;
- @@ -1577,15 +1564,15 @@ static void update_vbv( x264_t *h, int bits )
- if( rct->buffer_fill_final < 0 )
- x264_log( h, X264_LOG_WARNING, "VBV underflow (frame %d, %.0f bits)\n", h->i_frame, rct->buffer_fill_final );
- rct->buffer_fill_final = X264_MAX( rct->buffer_fill_final, 0 );
- - rct->buffer_fill_final += rct->buffer_rate;
- - rct->buffer_fill_final = X264_MIN( rct->buffer_fill_final, rct->buffer_size );
- + rct->buffer_fill_final += rcc->buffer_rate;
- + rct->buffer_fill_final = X264_MIN( rct->buffer_fill_final, rcc->buffer_size );
- }
- // provisionally update VBV according to the planned size of all frames currently in progress
- static void update_vbv_plan( x264_t *h, int overhead )
- {
- x264_ratecontrol_t *rcc = h->rc;
- - rcc->buffer_fill = h->thread[0]->rc->buffer_fill_final - overhead;
- + rcc->buffer_fill = h->thread[0]->rc->buffer_fill_final;
- if( h->i_thread_frames > 1 )
- {
- int j = h->rc - h->thread[0]->rc;
- @@ -1603,6 +1590,8 @@ static void update_vbv_plan( x264_t *h, int overhead )
- rcc->buffer_fill = X264_MIN( rcc->buffer_fill, rcc->buffer_size );
- }
- }
- + rcc->buffer_fill = X264_MIN( rcc->buffer_fill, rcc->buffer_size );
- + rcc->buffer_fill -= overhead;
- }
- // apply VBV constraints and clip qscale to between lmin and lmax
- @@ -2027,8 +2016,7 @@ void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next )
- #define COPY(var) memcpy(&cur->rc->var, &prev->rc->var, sizeof(cur->rc->var))
- /* these vars are updated in x264_ratecontrol_start()
- * so copy them from the context that most recently started (prev)
- - * to the context that's about to start (cur).
- - */
- + * to the context that's about to start (cur). */
- COPY(accum_p_qp);
- COPY(accum_p_norm);
- COPY(last_satd);
- @@ -2040,6 +2028,14 @@ void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next )
- COPY(bframes);
- COPY(prev_zone);
- COPY(qpbuf_pos);
- + /* these vars can be updated by x264_ratecontrol_init_reconfigurable */
- + COPY(buffer_rate);
- + COPY(buffer_size);
- + COPY(single_frame_vbv);
- + COPY(cbr_decay);
- + COPY(b_vbv_min_rate);
- + COPY(rate_factor_constant);
- + COPY(bitrate);
- #undef COPY
- }
- if( cur != next )
- @@ -2047,8 +2043,7 @@ void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next )
- #define COPY(var) next->rc->var = cur->rc->var
- /* these vars are updated in x264_ratecontrol_end()
- * so copy them from the context that most recently ended (cur)
- - * to the context that's about to end (next)
- - */
- + * to the context that's about to end (next) */
- COPY(cplxr_sum);
- COPY(expected_bits_sum);
- COPY(wanted_bits_window);
- diff --git a/encoder/ratecontrol.h b/encoder/ratecontrol.h
- index 5a8d088..2767866 100644
- --- a/encoder/ratecontrol.h
- +++ b/encoder/ratecontrol.h
- @@ -27,6 +27,8 @@
- int x264_ratecontrol_new ( x264_t * );
- void x264_ratecontrol_delete( x264_t * );
- +void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init );
- +
- void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame );
- void x264_adaptive_quant( x264_t * );
- int x264_macroblock_tree_read( x264_t *h, x264_frame_t *frame );
- diff --git a/x264.h b/x264.h
- index 2550864..e7d19b7 100644
- --- a/x264.h
- +++ b/x264.h
- @@ -35,7 +35,7 @@
- #include <stdarg.h>
- -#define X264_BUILD 84
- +#define X264_BUILD 85
- /* x264_t:
- * opaque handler for encoder */
- @@ -480,11 +480,12 @@ typedef struct
- x264_t *x264_encoder_open( x264_param_t * );
- /* x264_encoder_reconfig:
- - * analysis-related parameters from x264_param_t are copied.
- + * various parameters from x264_param_t are copied.
- * this takes effect immediately, on whichever frame is encoded next;
- * due to delay, this may not be the next frame passed to encoder_encode.
- * if the change should apply to some particular frame, use x264_picture_t->param instead.
- - * returns 0 on success, negative on parameter validation error. */
- + * returns 0 on success, negative on parameter validation error.
- + * not all parameters can be changed; see the actual function for a detailed breakdown. */
- int x264_encoder_reconfig( x264_t *, x264_param_t * );
- /* x264_encoder_parameters:
- * copies the current internal set of parameters to the pointer provided
- --
- 1.6.1.2
- From 6c5c82b796b48f7005426cc3f55a90b3e1f582fd Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Mon, 1 Feb 2010 13:04:47 -0800
- Subject: [PATCH 02/24] Slightly faster predictor_difference_mmxext
- ---
- common/x86/util.h | 17 ++++++++++-------
- 1 files changed, 10 insertions(+), 7 deletions(-)
- diff --git a/common/x86/util.h b/common/x86/util.h
- index efc700a..c8bcf4b 100644
- --- a/common/x86/util.h
- +++ b/common/x86/util.h
- @@ -45,8 +45,9 @@ static inline void x264_median_mv_mmxext( int16_t *dst, int16_t *a, int16_t *b,
- #define x264_predictor_difference x264_predictor_difference_mmxext
- static inline int x264_predictor_difference_mmxext( int16_t (*mvc)[2], intptr_t i_mvc )
- {
- - int sum = 0;
- - uint16_t output[4];
- + int sum;
- + static const uint64_t pw_1 = 0x0001000100010001ULL;
- +
- asm(
- "pxor %%mm4, %%mm4 \n"
- "test $1, %1 \n"
- @@ -56,7 +57,7 @@ static inline int x264_predictor_difference_mmxext( int16_t (*mvc)[2], intptr_t
- "psubw %%mm3, %%mm0 \n"
- "jmp 2f \n"
- "3: \n"
- - "sub $1, %1 \n"
- + "dec %1 \n"
- "1: \n"
- "movq -8(%2,%1,4), %%mm0 \n"
- "psubw -4(%2,%1,4), %%mm0 \n"
- @@ -67,11 +68,13 @@ static inline int x264_predictor_difference_mmxext( int16_t (*mvc)[2], intptr_t
- "pmaxsw %%mm2, %%mm0 \n"
- "paddusw %%mm0, %%mm4 \n"
- "jg 1b \n"
- - "movq %%mm4, %0 \n"
- - :"=m"(output), "+r"(i_mvc)
- - :"r"(mvc), "m"(M64( mvc ))
- + "pmaddwd %4, %%mm4 \n"
- + "pshufw $14, %%mm4, %%mm0 \n"
- + "paddd %%mm0, %%mm4 \n"
- + "movd %%mm4, %0 \n"
- + :"=r"(sum), "+r"(i_mvc)
- + :"r"(mvc), "m"(M64( mvc )), "m"(pw_1)
- );
- - sum += output[0] + output[1] + output[2] + output[3];
- return sum;
- }
- #define x264_cabac_amvd_sum x264_cabac_amvd_sum_mmxext
- --
- 1.6.1.2
- From fabe7c83223feb254c8ff956ec934020bd2d0964 Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Tue, 2 Feb 2010 03:15:18 -0800
- Subject: [PATCH 03/24] Improve bidir search, fix some artifacts in fades
- Modify analysis to allow bidir to use different motion vectors than L0/L1.
- Always try the <0,0,0,0> motion vector for bidir.
- Eliminates almost all errant motion vectors in fades.
- Slightly improves PSNR as well (~0.015db).
- ---
- encoder/analyse.c | 50 ++++++++++++++++++++++++++++++++++++++------------
- 1 files changed, 38 insertions(+), 12 deletions(-)
- diff --git a/encoder/analyse.c b/encoder/analyse.c
- index 666596b..1fb2206 100644
- --- a/encoder/analyse.c
- +++ b/encoder/analyse.c
- @@ -40,6 +40,7 @@ typedef struct
- int i_ref;
- int i_rd16x16;
- x264_me_t me16x16;
- + x264_me_t bi16x16; /* for b16x16 BI mode, since MVs can differ from l0/l1 */
- /* 8x8 */
- int i_cost8x8;
- @@ -1722,20 +1723,45 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
- a->l1.me16x16.i_ref = a->l1.i_ref;
- /* get cost of BI mode */
- + int ref_costs = REF_COST( 0, a->l0.i_ref ) + REF_COST( 1, a->l1.i_ref );
- + h->mc.memcpy_aligned( &a->l0.bi16x16, &a->l0.me16x16, sizeof(x264_me_t) );
- + h->mc.memcpy_aligned( &a->l1.bi16x16, &a->l1.me16x16, sizeof(x264_me_t) );
- src0 = h->mc.get_ref( pix0, &stride0,
- h->mb.pic.p_fref[0][a->l0.i_ref], h->mb.pic.i_stride[0],
- - a->l0.me16x16.mv[0], a->l0.me16x16.mv[1], 16, 16, weight_none );
- + a->l0.bi16x16.mv[0], a->l0.bi16x16.mv[1], 16, 16, weight_none );
- src1 = h->mc.get_ref( pix1, &stride1,
- h->mb.pic.p_fref[1][a->l1.i_ref], h->mb.pic.i_stride[0],
- - a->l1.me16x16.mv[0], a->l1.me16x16.mv[1], 16, 16, weight_none );
- + a->l1.bi16x16.mv[0], a->l1.bi16x16.mv[1], 16, 16, weight_none );
- h->mc.avg[PIXEL_16x16]( pix0, 16, src0, stride0, src1, stride1, h->mb.bipred_weight[a->l0.i_ref][a->l1.i_ref] );
- a->i_cost16x16bi = h->pixf.mbcmp[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE, pix0, 16 )
- - + REF_COST( 0, a->l0.i_ref )
- - + REF_COST( 1, a->l1.i_ref )
- - + a->l0.me16x16.cost_mv
- - + a->l1.me16x16.cost_mv;
- + + ref_costs
- + + a->l0.bi16x16.cost_mv
- + + a->l1.bi16x16.cost_mv;
- +
- +
- + /* Always try the 0,0,0,0 vector; helps avoid errant motion vectors in fades */
- + if( M32( a->l0.bi16x16.mv ) | M32( a->l1.bi16x16.mv ) )
- + {
- + int l0_mv_cost = a->l0.bi16x16.p_cost_mv[-a->l0.bi16x16.mvp[0]]
- + + a->l0.bi16x16.p_cost_mv[-a->l0.bi16x16.mvp[1]];
- + int l1_mv_cost = a->l1.bi16x16.p_cost_mv[-a->l1.bi16x16.mvp[0]]
- + + a->l1.bi16x16.p_cost_mv[-a->l1.bi16x16.mvp[1]];
- + h->mc.avg[PIXEL_16x16]( pix0, 16, h->mb.pic.p_fref[0][a->l0.i_ref][0], h->mb.pic.i_stride[0],
- + h->mb.pic.p_fref[1][a->l1.i_ref][0], h->mb.pic.i_stride[0],
- + h->mb.bipred_weight[a->l0.i_ref][a->l1.i_ref] );
- + int cost00 = h->pixf.mbcmp[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE, pix0, 16 )
- + + ref_costs + l0_mv_cost + l1_mv_cost;
- + if( cost00 < a->i_cost16x16bi )
- + {
- + M32( a->l0.bi16x16.mv ) = 0;
- + M32( a->l1.bi16x16.mv ) = 0;
- + a->l0.bi16x16.cost_mv = l0_mv_cost;
- + a->l1.bi16x16.cost_mv = l1_mv_cost;
- + a->i_cost16x16bi = cost00;
- + }
- + }
- /* mb type cost */
- a->i_cost16x16bi += a->i_lambda * i_mb_b_cost_table[B_BI_BI];
- @@ -2205,7 +2231,7 @@ static void x264_refine_bidir( x264_t *h, x264_mb_analysis_t *a )
- {
- case D_16x16:
- if( h->mb.i_type == B_BI_BI )
- - x264_me_refine_bidir_satd( h, &a->l0.me16x16, &a->l1.me16x16, i_biweight );
- + x264_me_refine_bidir_satd( h, &a->l0.bi16x16, &a->l1.bi16x16, i_biweight );
- break;
- case D_16x8:
- for( i=0; i<2; i++ )
- @@ -2819,8 +2845,8 @@ intra_analysis:
- }
- else if( i_type == B_BI_BI )
- {
- - x264_me_refine_qpel( h, &analysis.l0.me16x16 );
- - x264_me_refine_qpel( h, &analysis.l1.me16x16 );
- + x264_me_refine_qpel( h, &analysis.l0.bi16x16 );
- + x264_me_refine_qpel( h, &analysis.l1.bi16x16 );
- }
- }
- else if( i_partition == D_16x8 )
- @@ -2938,7 +2964,7 @@ intra_analysis:
- x264_me_refine_qpel_rd( h, &analysis.l1.me16x16, analysis.i_lambda2, 0, 1 );
- }
- else if( i_type == B_BI_BI )
- - x264_me_refine_bidir_rd( h, &analysis.l0.me16x16, &analysis.l1.me16x16, i_biweight, 0, analysis.i_lambda2 );
- + x264_me_refine_bidir_rd( h, &analysis.l0.bi16x16, &analysis.l1.bi16x16, i_biweight, 0, analysis.i_lambda2 );
- }
- else if( i_partition == D_16x8 )
- {
- @@ -3121,10 +3147,10 @@ static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a )
- break;
- case B_BI_BI:
- x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.i_ref );
- - x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv );
- + x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, a->l0.bi16x16.mv );
- x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, a->l1.i_ref );
- - x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 1, a->l1.me16x16.mv );
- + x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 1, a->l1.bi16x16.mv );
- break;
- }
- break;
- --
- 1.6.1.2
- From 652a7dff1d179c8bad98657bccdb42d5b2c25b81 Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Wed, 3 Feb 2010 14:22:05 -0800
- Subject: [PATCH 04/24] Faster CABAC MB header writing
- Reorganize the header writing to merge mb type and mb mode info (mv, pred, etc)
- Reduces redundant branches and better splits the code between frame types (for better code cache usage).
- Also slightly simplify qp delta calculation.
- Also make CAVLC and CABAC a bit more consistent in structure and function names.
- ---
- encoder/cabac.c | 573 ++++++++++++++++++++++++++-----------------------------
- encoder/cavlc.c | 118 ++++++------
- 2 files changed, 334 insertions(+), 357 deletions(-)
- diff --git a/encoder/cabac.c b/encoder/cabac.c
- index 271f527..6ff2aed 100644
- --- a/encoder/cabac.c
- +++ b/encoder/cabac.c
- @@ -29,151 +29,6 @@
- #define RDO_SKIP_BS 0
- #endif
- -static inline void x264_cabac_mb_type_intra( x264_t *h, x264_cabac_t *cb, int i_mb_type,
- - int ctx0, int ctx1, int ctx2, int ctx3, int ctx4, int ctx5 )
- -{
- - if( i_mb_type == I_4x4 || i_mb_type == I_8x8 )
- - {
- - x264_cabac_encode_decision_noup( cb, ctx0, 0 );
- - }
- -#if !RDO_SKIP_BS
- - else if( i_mb_type == I_PCM )
- - {
- - x264_cabac_encode_decision_noup( cb, ctx0, 1 );
- - x264_cabac_encode_flush( h, cb );
- - }
- -#endif
- - else
- - {
- - int i_pred = x264_mb_pred_mode16x16_fix[h->mb.i_intra16x16_pred_mode];
- -
- - x264_cabac_encode_decision_noup( cb, ctx0, 1 );
- - x264_cabac_encode_terminal( cb );
- -
- - x264_cabac_encode_decision_noup( cb, ctx1, !!h->mb.i_cbp_luma );
- - if( h->mb.i_cbp_chroma == 0 )
- - x264_cabac_encode_decision_noup( cb, ctx2, 0 );
- - else
- - {
- - x264_cabac_encode_decision( cb, ctx2, 1 );
- - x264_cabac_encode_decision_noup( cb, ctx3, h->mb.i_cbp_chroma>>1 );
- - }
- - x264_cabac_encode_decision( cb, ctx4, i_pred>>1 );
- - x264_cabac_encode_decision_noup( cb, ctx5, i_pred&1 );
- - }
- -}
- -
- -static void x264_cabac_mb_type( x264_t *h, x264_cabac_t *cb )
- -{
- - const int i_mb_type = h->mb.i_type;
- -
- - if( h->sh.b_mbaff &&
- - (!(h->mb.i_mb_y & 1) || IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride])) )
- - {
- - x264_cabac_encode_decision_noup( cb, 70 + h->mb.cache.i_neighbour_interlaced, h->mb.b_interlaced );
- - }
- -
- - if( h->sh.i_type == SLICE_TYPE_I )
- - {
- - int ctx = 0;
- - if( h->mb.i_mb_type_left >= 0 && h->mb.i_mb_type_left != I_4x4 )
- - ctx++;
- - if( h->mb.i_mb_type_top >= 0 && h->mb.i_mb_type_top != I_4x4 )
- - ctx++;
- -
- - x264_cabac_mb_type_intra( h, cb, i_mb_type, 3+ctx, 3+3, 3+4, 3+5, 3+6, 3+7 );
- - }
- - else if( h->sh.i_type == SLICE_TYPE_P )
- - {
- - /* prefix: 14, suffix: 17 */
- - if( i_mb_type == P_L0 )
- - {
- - x264_cabac_encode_decision_noup( cb, 14, 0 );
- - x264_cabac_encode_decision_noup( cb, 15, h->mb.i_partition != D_16x16 );
- - x264_cabac_encode_decision_noup( cb, 17-(h->mb.i_partition == D_16x16), h->mb.i_partition == D_16x8 );
- - }
- - else if( i_mb_type == P_8x8 )
- - {
- - x264_cabac_encode_decision_noup( cb, 14, 0 );
- - x264_cabac_encode_decision_noup( cb, 15, 0 );
- - x264_cabac_encode_decision_noup( cb, 16, 1 );
- - }
- - else /* intra */
- - {
- - /* prefix */
- - x264_cabac_encode_decision_noup( cb, 14, 1 );
- -
- - /* suffix */
- - x264_cabac_mb_type_intra( h, cb, i_mb_type, 17+0, 17+1, 17+2, 17+2, 17+3, 17+3 );
- - }
- - }
- - else //if( h->sh.i_type == SLICE_TYPE_B )
- - {
- - int ctx = 0;
- - if( h->mb.i_mb_type_left >= 0 && h->mb.i_mb_type_left != B_SKIP && h->mb.i_mb_type_left != B_DIRECT )
- - ctx++;
- - if( h->mb.i_mb_type_top >= 0 && h->mb.i_mb_type_top != B_SKIP && h->mb.i_mb_type_top != B_DIRECT )
- - ctx++;
- -
- - if( i_mb_type == B_DIRECT )
- - {
- - x264_cabac_encode_decision_noup( cb, 27+ctx, 0 );
- - return;
- - }
- - x264_cabac_encode_decision_noup( cb, 27+ctx, 1 );
- -
- - if( i_mb_type == B_8x8 )
- - {
- - x264_cabac_encode_decision_noup( cb, 27+3, 1 );
- - x264_cabac_encode_decision_noup( cb, 27+4, 1 );
- - x264_cabac_encode_decision( cb, 27+5, 1 );
- - x264_cabac_encode_decision( cb, 27+5, 1 );
- - x264_cabac_encode_decision_noup( cb, 27+5, 1 );
- - }
- - else if( IS_INTRA( i_mb_type ) )
- - {
- - /* prefix */
- - x264_cabac_encode_decision_noup( cb, 27+3, 1 );
- - x264_cabac_encode_decision_noup( cb, 27+4, 1 );
- - x264_cabac_encode_decision( cb, 27+5, 1 );
- - x264_cabac_encode_decision( cb, 27+5, 0 );
- - x264_cabac_encode_decision( cb, 27+5, 1 );
- -
- - /* suffix */
- - x264_cabac_mb_type_intra( h, cb, i_mb_type, 32+0, 32+1, 32+2, 32+2, 32+3, 32+3 );
- - }
- - else
- - {
- - static const uint8_t i_mb_bits[9*3] =
- - {
- - 0x31, 0x29, 0x4, /* L0 L0 */
- - 0x35, 0x2d, 0, /* L0 L1 */
- - 0x43, 0x63, 0, /* L0 BI */
- - 0x3d, 0x2f, 0, /* L1 L0 */
- - 0x39, 0x25, 0x6, /* L1 L1 */
- - 0x53, 0x73, 0, /* L1 BI */
- - 0x4b, 0x6b, 0, /* BI L0 */
- - 0x5b, 0x7b, 0, /* BI L1 */
- - 0x47, 0x67, 0x21 /* BI BI */
- - };
- -
- - const int idx = (i_mb_type - B_L0_L0) * 3 + (h->mb.i_partition - D_16x8);
- - int bits = i_mb_bits[idx];
- -
- - x264_cabac_encode_decision_noup( cb, 27+3, bits&1 );
- - x264_cabac_encode_decision( cb, 27+5-(bits&1), (bits>>1)&1 ); bits >>= 2;
- - if( bits != 1 )
- - {
- - x264_cabac_encode_decision( cb, 27+5, bits&1 ); bits >>= 1;
- - x264_cabac_encode_decision( cb, 27+5, bits&1 ); bits >>= 1;
- - x264_cabac_encode_decision( cb, 27+5, bits&1 ); bits >>= 1;
- - if( bits != 1 )
- - x264_cabac_encode_decision_noup( cb, 27+5, bits&1 );
- - }
- - }
- - }
- -}
- -
- static void x264_cabac_mb_intra4x4_pred_mode( x264_cabac_t *cb, int i_pred, int i_mode )
- {
- if( i_pred == i_mode )
- @@ -209,6 +64,12 @@ static void x264_cabac_mb_intra_chroma_pred_mode( x264_t *h, x264_cabac_t *cb )
- }
- }
- +static inline void x264_cabac_mb_transform_size( x264_t *h, x264_cabac_t *cb )
- +{
- + int ctx = 399 + h->mb.cache.i_neighbour_transform_size;
- + x264_cabac_encode_decision_noup( cb, ctx, h->mb.b_transform_8x8 );
- +}
- +
- static void x264_cabac_mb_cbp_luma( x264_t *h, x264_cabac_t *cb )
- {
- int cbp = h->mb.i_cbp_luma;
- @@ -244,7 +105,6 @@ static void x264_cabac_mb_cbp_chroma( x264_t *h, x264_cabac_t *cb )
- static void x264_cabac_mb_qp_delta( x264_t *h, x264_cabac_t *cb )
- {
- int i_dqp = h->mb.i_qp - h->mb.i_last_qp;
- - int ctx;
- /* Avoid writing a delta quant if we have an empty i16x16 block, e.g. in a completely flat background area */
- if( h->mb.i_type == I_16x16 && !h->mb.cbp[h->mb.i_mb_xy] )
- @@ -257,7 +117,7 @@ static void x264_cabac_mb_qp_delta( x264_t *h, x264_cabac_t *cb )
- /* Since, per the above, empty-CBP I16x16 blocks never have delta quants,
- * we don't have to check for them. */
- - ctx = h->mb.i_last_dqp && h->mb.cbp[h->mb.i_mb_prev_xy];
- + int ctx = !!h->mb.i_last_dqp;
- if( i_dqp != 0 )
- {
- @@ -321,12 +181,6 @@ static inline void x264_cabac_mb_sub_b_partition( x264_cabac_t *cb, int i_sub )
- x264_cabac_encode_decision( cb, 39, i_sub == D_L1_8x8 );
- }
- -static inline void x264_cabac_mb_transform_size( x264_t *h, x264_cabac_t *cb )
- -{
- - int ctx = 399 + h->mb.cache.i_neighbour_transform_size;
- - x264_cabac_encode_decision_noup( cb, ctx, h->mb.b_transform_8x8 );
- -}
- -
- static void x264_cabac_mb_ref( x264_t *h, x264_cabac_t *cb, int i_list, int idx )
- {
- const int i8 = x264_scan8[idx];
- @@ -463,6 +317,267 @@ static inline void x264_cabac_mb8x8_mvd( x264_t *h, x264_cabac_t *cb, int i )
- }
- }
- +static void x264_cabac_mb_header_intra( x264_t *h, x264_cabac_t *cb, int i_mb_type,
- + int ctx0, int ctx1, int ctx2, int ctx3, int ctx4, int ctx5 )
- +{
- + if( i_mb_type == I_4x4 || i_mb_type == I_8x8 )
- + {
- + int i, di = h->mb.b_transform_8x8 ? 4 : 1;
- + x264_cabac_encode_decision_noup( cb, ctx0, 0 );
- +
- + if( h->pps->b_transform_8x8_mode )
- + x264_cabac_mb_transform_size( h, cb );
- +
- + for( i = 0; i < 16; i += di )
- + {
- + const int i_pred = x264_mb_predict_intra4x4_mode( h, i );
- + const int i_mode = x264_mb_pred_mode4x4_fix( h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] );
- + x264_cabac_mb_intra4x4_pred_mode( cb, i_pred, i_mode );
- + }
- + }
- +#if !RDO_SKIP_BS
- + else if( i_mb_type == I_PCM )
- + {
- + x264_cabac_encode_decision_noup( cb, ctx0, 1 );
- + x264_cabac_encode_flush( h, cb );
- + return;
- + }
- +#endif
- + else
- + {
- + int i_pred = x264_mb_pred_mode16x16_fix[h->mb.i_intra16x16_pred_mode];
- +
- + x264_cabac_encode_decision_noup( cb, ctx0, 1 );
- + x264_cabac_encode_terminal( cb );
- +
- + x264_cabac_encode_decision_noup( cb, ctx1, !!h->mb.i_cbp_luma );
- + if( h->mb.i_cbp_chroma == 0 )
- + x264_cabac_encode_decision_noup( cb, ctx2, 0 );
- + else
- + {
- + x264_cabac_encode_decision( cb, ctx2, 1 );
- + x264_cabac_encode_decision_noup( cb, ctx3, h->mb.i_cbp_chroma>>1 );
- + }
- + x264_cabac_encode_decision( cb, ctx4, i_pred>>1 );
- + x264_cabac_encode_decision_noup( cb, ctx5, i_pred&1 );
- + }
- + x264_cabac_mb_intra_chroma_pred_mode( h, cb );
- +}
- +
- +static inline void x264_cabac_mb_header( x264_t *h, x264_cabac_t *cb )
- +{
- + const int i_mb_type = h->mb.i_type;
- + int i_list, i;
- +
- + if( h->sh.b_mbaff &&
- + (!(h->mb.i_mb_y & 1) || IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride])) )
- + {
- + x264_cabac_encode_decision_noup( cb, 70 + h->mb.cache.i_neighbour_interlaced, h->mb.b_interlaced );
- + }
- +
- + if( h->sh.i_type == SLICE_TYPE_I )
- + {
- + int ctx = 0;
- + if( h->mb.i_mb_type_left >= 0 && h->mb.i_mb_type_left != I_4x4 )
- + ctx++;
- + if( h->mb.i_mb_type_top >= 0 && h->mb.i_mb_type_top != I_4x4 )
- + ctx++;
- +
- + x264_cabac_mb_header_intra( h, cb, i_mb_type, 3+ctx, 3+3, 3+4, 3+5, 3+6, 3+7 );
- + }
- + else if( h->sh.i_type == SLICE_TYPE_P )
- + {
- + /* prefix: 14, suffix: 17 */
- + if( i_mb_type == P_L0 )
- + {
- + x264_cabac_encode_decision_noup( cb, 14, 0 );
- + if( h->mb.i_partition == D_16x16 )
- + {
- + x264_cabac_encode_decision_noup( cb, 15, 0 );
- + x264_cabac_encode_decision_noup( cb, 16, 0 );
- + if( h->mb.pic.i_fref[0] > 1 )
- + x264_cabac_mb_ref( h, cb, 0, 0 );
- + x264_cabac_mb_mvd( h, cb, 0, 0, 4, 4 );
- + }
- + else if( h->mb.i_partition == D_16x8 )
- + {
- + x264_cabac_encode_decision_noup( cb, 15, 1 );
- + x264_cabac_encode_decision_noup( cb, 17, 1 );
- + if( h->mb.pic.i_fref[0] > 1 )
- + {
- + x264_cabac_mb_ref( h, cb, 0, 0 );
- + x264_cabac_mb_ref( h, cb, 0, 8 );
- + }
- + x264_cabac_mb_mvd( h, cb, 0, 0, 4, 2 );
- + x264_cabac_mb_mvd( h, cb, 0, 8, 4, 2 );
- + }
- + else //if( h->mb.i_partition == D_8x16 )
- + {
- + x264_cabac_encode_decision_noup( cb, 15, 1 );
- + x264_cabac_encode_decision_noup( cb, 17, 0 );
- + if( h->mb.pic.i_fref[0] > 1 )
- + {
- + x264_cabac_mb_ref( h, cb, 0, 0 );
- + x264_cabac_mb_ref( h, cb, 0, 4 );
- + }
- + x264_cabac_mb_mvd( h, cb, 0, 0, 2, 4 );
- + x264_cabac_mb_mvd( h, cb, 0, 4, 2, 4 );
- + }
- + }
- + else if( i_mb_type == P_8x8 )
- + {
- + x264_cabac_encode_decision_noup( cb, 14, 0 );
- + x264_cabac_encode_decision_noup( cb, 15, 0 );
- + x264_cabac_encode_decision_noup( cb, 16, 1 );
- +
- + /* sub mb type */
- + for( i = 0; i < 4; i++ )
- + x264_cabac_mb_sub_p_partition( cb, h->mb.i_sub_partition[i] );
- +
- + /* ref 0 */
- + if( h->mb.pic.i_fref[0] > 1 )
- + {
- + x264_cabac_mb_ref( h, cb, 0, 0 );
- + x264_cabac_mb_ref( h, cb, 0, 4 );
- + x264_cabac_mb_ref( h, cb, 0, 8 );
- + x264_cabac_mb_ref( h, cb, 0, 12 );
- + }
- +
- + for( i = 0; i < 4; i++ )
- + x264_cabac_mb8x8_mvd( h, cb, i );
- + }
- + else /* intra */
- + {
- + /* prefix */
- + x264_cabac_encode_decision_noup( cb, 14, 1 );
- +
- + /* suffix */
- + x264_cabac_mb_header_intra( h, cb, i_mb_type, 17+0, 17+1, 17+2, 17+2, 17+3, 17+3 );
- + }
- + }
- + else //if( h->sh.i_type == SLICE_TYPE_B )
- + {
- + int ctx = 0;
- + if( h->mb.i_mb_type_left >= 0 && h->mb.i_mb_type_left != B_SKIP && h->mb.i_mb_type_left != B_DIRECT )
- + ctx++;
- + if( h->mb.i_mb_type_top >= 0 && h->mb.i_mb_type_top != B_SKIP && h->mb.i_mb_type_top != B_DIRECT )
- + ctx++;
- +
- + if( i_mb_type == B_DIRECT )
- + {
- + x264_cabac_encode_decision_noup( cb, 27+ctx, 0 );
- + return;
- + }
- + x264_cabac_encode_decision_noup( cb, 27+ctx, 1 );
- +
- + if( i_mb_type == B_8x8 )
- + {
- + x264_cabac_encode_decision_noup( cb, 27+3, 1 );
- + x264_cabac_encode_decision_noup( cb, 27+4, 1 );
- + x264_cabac_encode_decision ( cb, 27+5, 1 );
- + x264_cabac_encode_decision ( cb, 27+5, 1 );
- + x264_cabac_encode_decision_noup( cb, 27+5, 1 );
- +
- + /* sub mb type */
- + for( i = 0; i < 4; i++ )
- + x264_cabac_mb_sub_b_partition( cb, h->mb.i_sub_partition[i] );
- +
- + /* ref */
- + if( h->mb.pic.i_fref[0] > 1 )
- + for( i = 0; i < 4; i++ )
- + if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
- + x264_cabac_mb_ref( h, cb, 0, 4*i );
- +
- + if( h->mb.pic.i_fref[1] > 1 )
- + for( i = 0; i < 4; i++ )
- + if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
- + x264_cabac_mb_ref( h, cb, 1, 4*i );
- +
- + for( i = 0; i < 4; i++ )
- + if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
- + x264_cabac_mb_mvd( h, cb, 0, 4*i, 2, 2 );
- +
- + for( i = 0; i < 4; i++ )
- + if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
- + x264_cabac_mb_mvd( h, cb, 1, 4*i, 2, 2 );
- + }
- + else if( IS_INTRA( i_mb_type ) )
- + {
- + /* prefix */
- + x264_cabac_encode_decision_noup( cb, 27+3, 1 );
- + x264_cabac_encode_decision_noup( cb, 27+4, 1 );
- + x264_cabac_encode_decision ( cb, 27+5, 1 );
- + x264_cabac_encode_decision ( cb, 27+5, 0 );
- + x264_cabac_encode_decision ( cb, 27+5, 1 );
- +
- + /* suffix */
- + x264_cabac_mb_header_intra( h, cb, i_mb_type, 32+0, 32+1, 32+2, 32+2, 32+3, 32+3 );
- + }
- + else
- + {
- + static const uint8_t i_mb_bits[9*3] =
- + {
- + 0x31, 0x29, 0x4, /* L0 L0 */
- + 0x35, 0x2d, 0, /* L0 L1 */
- + 0x43, 0x63, 0, /* L0 BI */
- + 0x3d, 0x2f, 0, /* L1 L0 */
- + 0x39, 0x25, 0x6, /* L1 L1 */
- + 0x53, 0x73, 0, /* L1 BI */
- + 0x4b, 0x6b, 0, /* BI L0 */
- + 0x5b, 0x7b, 0, /* BI L1 */
- + 0x47, 0x67, 0x21 /* BI BI */
- + };
- +
- + const int idx = (i_mb_type - B_L0_L0) * 3 + (h->mb.i_partition - D_16x8);
- + int bits = i_mb_bits[idx];
- +
- + x264_cabac_encode_decision_noup( cb, 27+3, bits&1 );
- + x264_cabac_encode_decision( cb, 27+5-(bits&1), (bits>>1)&1 ); bits >>= 2;
- + if( bits != 1 )
- + {
- + x264_cabac_encode_decision( cb, 27+5, bits&1 ); bits >>= 1;
- + x264_cabac_encode_decision( cb, 27+5, bits&1 ); bits >>= 1;
- + x264_cabac_encode_decision( cb, 27+5, bits&1 ); bits >>= 1;
- + if( bits != 1 )
- + x264_cabac_encode_decision_noup( cb, 27+5, bits&1 );
- + }
- +
- + const uint8_t (*b_list)[2] = x264_mb_type_list_table[i_mb_type];
- + if( h->mb.pic.i_fref[0] > 1 )
- + {
- + if( b_list[0][0] )
- + x264_cabac_mb_ref( h, cb, 0, 0 );
- + if( b_list[0][1] && h->mb.i_partition != D_16x16 )
- + x264_cabac_mb_ref( h, cb, 0, 8 >> (h->mb.i_partition == D_8x16) );
- + }
- + if( h->mb.pic.i_fref[1] > 1 )
- + {
- + if( b_list[1][0] )
- + x264_cabac_mb_ref( h, cb, 1, 0 );
- + if( b_list[1][1] && h->mb.i_partition != D_16x16 )
- + x264_cabac_mb_ref( h, cb, 1, 8 >> (h->mb.i_partition == D_8x16) );
- + }
- + for( i_list = 0; i_list < 2; i_list++ )
- + {
- + if( h->mb.i_partition == D_16x16 )
- + {
- + if( b_list[i_list][0] ) x264_cabac_mb_mvd( h, cb, i_list, 0, 4, 4 );
- + }
- + else if( h->mb.i_partition == D_16x8 )
- + {
- + if( b_list[i_list][0] ) x264_cabac_mb_mvd( h, cb, i_list, 0, 4, 2 );
- + if( b_list[i_list][1] ) x264_cabac_mb_mvd( h, cb, i_list, 8, 4, 2 );
- + }
- + else //if( h->mb.i_partition == D_8x16 )
- + {
- + if( b_list[i_list][0] ) x264_cabac_mb_mvd( h, cb, i_list, 0, 2, 4 );
- + if( b_list[i_list][1] ) x264_cabac_mb_mvd( h, cb, i_list, 4, 2, 4 );
- + }
- + }
- + }
- + }
- +}
- +
- /* i_ctxBlockCat: 0-> DC 16x16 i_idx = 0
- * 1-> AC 16x16 i_idx = luma4x4idx
- * 2-> Luma4x4 i_idx = luma4x4idx
- @@ -752,7 +867,6 @@ static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBl
- void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
- {
- const int i_mb_type = h->mb.i_type;
- - int i_list;
- int i;
- #if !RDO_SKIP_BS
- @@ -760,15 +874,14 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
- int i_mb_pos_tex;
- #endif
- - /* Write the MB type */
- - x264_cabac_mb_type( h, cb );
- + x264_cabac_mb_header( h, cb );
- #if !RDO_SKIP_BS
- + i_mb_pos_tex = x264_cabac_pos( cb );
- + h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start;
- +
- if( i_mb_type == I_PCM )
- {
- - i_mb_pos_tex = x264_cabac_pos( cb );
- - h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start;
- -
- memcpy( cb->p, h->mb.pic.p_fenc[0], 256 );
- cb->p += 256;
- for( i = 0; i < 8; i++ )
- @@ -793,140 +906,6 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
- }
- #endif
- - if( IS_INTRA( i_mb_type ) )
- - {
- - if( h->pps->b_transform_8x8_mode && i_mb_type != I_16x16 )
- - x264_cabac_mb_transform_size( h, cb );
- -
- - if( i_mb_type != I_16x16 )
- - {
- - int di = h->mb.b_transform_8x8 ? 4 : 1;
- - for( i = 0; i < 16; i += di )
- - {
- - const int i_pred = x264_mb_predict_intra4x4_mode( h, i );
- - const int i_mode = x264_mb_pred_mode4x4_fix( h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] );
- - x264_cabac_mb_intra4x4_pred_mode( cb, i_pred, i_mode );
- - }
- - }
- -
- - x264_cabac_mb_intra_chroma_pred_mode( h, cb );
- - }
- - else if( i_mb_type == P_L0 )
- - {
- - if( h->mb.i_partition == D_16x16 )
- - {
- - if( h->mb.pic.i_fref[0] > 1 )
- - {
- - x264_cabac_mb_ref( h, cb, 0, 0 );
- - }
- - x264_cabac_mb_mvd( h, cb, 0, 0, 4, 4 );
- - }
- - else if( h->mb.i_partition == D_16x8 )
- - {
- - if( h->mb.pic.i_fref[0] > 1 )
- - {
- - x264_cabac_mb_ref( h, cb, 0, 0 );
- - x264_cabac_mb_ref( h, cb, 0, 8 );
- - }
- - x264_cabac_mb_mvd( h, cb, 0, 0, 4, 2 );
- - x264_cabac_mb_mvd( h, cb, 0, 8, 4, 2 );
- - }
- - else //if( h->mb.i_partition == D_8x16 )
- - {
- - if( h->mb.pic.i_fref[0] > 1 )
- - {
- - x264_cabac_mb_ref( h, cb, 0, 0 );
- - x264_cabac_mb_ref( h, cb, 0, 4 );
- - }
- - x264_cabac_mb_mvd( h, cb, 0, 0, 2, 4 );
- - x264_cabac_mb_mvd( h, cb, 0, 4, 2, 4 );
- - }
- - }
- - else if( i_mb_type == P_8x8 )
- - {
- - /* sub mb type */
- - for( i = 0; i < 4; i++ )
- - x264_cabac_mb_sub_p_partition( cb, h->mb.i_sub_partition[i] );
- -
- - /* ref 0 */
- - if( h->mb.pic.i_fref[0] > 1 )
- - {
- - x264_cabac_mb_ref( h, cb, 0, 0 );
- - x264_cabac_mb_ref( h, cb, 0, 4 );
- - x264_cabac_mb_ref( h, cb, 0, 8 );
- - x264_cabac_mb_ref( h, cb, 0, 12 );
- - }
- -
- - for( i = 0; i < 4; i++ )
- - x264_cabac_mb8x8_mvd( h, cb, i );
- - }
- - else if( i_mb_type == B_8x8 )
- - {
- - /* sub mb type */
- - for( i = 0; i < 4; i++ )
- - x264_cabac_mb_sub_b_partition( cb, h->mb.i_sub_partition[i] );
- -
- - /* ref */
- - if( h->mb.pic.i_fref[0] > 1 )
- - for( i = 0; i < 4; i++ )
- - if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
- - x264_cabac_mb_ref( h, cb, 0, 4*i );
- -
- - if( h->mb.pic.i_fref[1] > 1 )
- - for( i = 0; i < 4; i++ )
- - if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
- - x264_cabac_mb_ref( h, cb, 1, 4*i );
- -
- - for( i = 0; i < 4; i++ )
- - if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
- - x264_cabac_mb_mvd( h, cb, 0, 4*i, 2, 2 );
- -
- - for( i = 0; i < 4; i++ )
- - if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
- - x264_cabac_mb_mvd( h, cb, 1, 4*i, 2, 2 );
- - }
- - else if( i_mb_type != B_DIRECT )
- - {
- - /* All B mode */
- - const uint8_t (*b_list)[2] = x264_mb_type_list_table[i_mb_type];
- - if( h->mb.pic.i_fref[0] > 1 )
- - {
- - if( b_list[0][0] )
- - x264_cabac_mb_ref( h, cb, 0, 0 );
- - if( b_list[0][1] && h->mb.i_partition != D_16x16 )
- - x264_cabac_mb_ref( h, cb, 0, 8 >> (h->mb.i_partition == D_8x16) );
- - }
- - if( h->mb.pic.i_fref[1] > 1 )
- - {
- - if( b_list[1][0] )
- - x264_cabac_mb_ref( h, cb, 1, 0 );
- - if( b_list[1][1] && h->mb.i_partition != D_16x16 )
- - x264_cabac_mb_ref( h, cb, 1, 8 >> (h->mb.i_partition == D_8x16) );
- - }
- - for( i_list = 0; i_list < 2; i_list++ )
- - {
- - if( h->mb.i_partition == D_16x16 )
- - {
- - if( b_list[i_list][0] ) x264_cabac_mb_mvd( h, cb, i_list, 0, 4, 4 );
- - }
- - else if( h->mb.i_partition == D_16x8 )
- - {
- - if( b_list[i_list][0] ) x264_cabac_mb_mvd( h, cb, i_list, 0, 4, 2 );
- - if( b_list[i_list][1] ) x264_cabac_mb_mvd( h, cb, i_list, 8, 4, 2 );
- - }
- - else //if( h->mb.i_partition == D_8x16 )
- - {
- - if( b_list[i_list][0] ) x264_cabac_mb_mvd( h, cb, i_list, 0, 2, 4 );
- - if( b_list[i_list][1] ) x264_cabac_mb_mvd( h, cb, i_list, 4, 2, 4 );
- - }
- - }
- - }
- -
- -#if !RDO_SKIP_BS
- - i_mb_pos_tex = x264_cabac_pos( cb );
- - h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start;
- -#endif
- -
- if( i_mb_type != I_16x16 )
- {
- x264_cabac_mb_cbp_luma( h, cb );
- @@ -934,11 +913,9 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
- }
- if( x264_mb_transform_8x8_allowed( h ) && h->mb.i_cbp_luma )
- - {
- x264_cabac_mb_transform_size( h, cb );
- - }
- - if( h->mb.i_cbp_luma > 0 || h->mb.i_cbp_chroma > 0 || i_mb_type == I_16x16 )
- + if( h->mb.i_cbp_luma || h->mb.i_cbp_chroma || i_mb_type == I_16x16 )
- {
- const int b_intra = IS_INTRA( i_mb_type );
- x264_cabac_mb_qp_delta( h, cb );
- @@ -950,7 +927,7 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
- block_residual_write_cabac_cbf( h, cb, DCT_LUMA_DC, 24, h->dct.luma16x16_dc, 1 );
- /* AC Luma */
- - if( h->mb.i_cbp_luma != 0 )
- + if( h->mb.i_cbp_luma )
- for( i = 0; i < 16; i++ )
- block_residual_write_cabac_cbf( h, cb, DCT_LUMA_AC, i, h->dct.luma4x4[i]+1, 1 );
- }
- @@ -967,7 +944,7 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
- block_residual_write_cabac_cbf( h, cb, DCT_LUMA_4x4, i, h->dct.luma4x4[i], b_intra );
- }
- - if( h->mb.i_cbp_chroma&0x03 ) /* Chroma DC residual present */
- + if( h->mb.i_cbp_chroma ) /* Chroma DC residual present */
- {
- block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_DC, 25, h->dct.chroma_dc[0], b_intra );
- block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_DC, 26, h->dct.chroma_dc[1], b_intra );
- diff --git a/encoder/cavlc.c b/encoder/cavlc.c
- index c65c9bd..d18408b 100644
- --- a/encoder/cavlc.c
- +++ b/encoder/cavlc.c
- @@ -203,7 +203,7 @@ static const uint8_t ct_index[17] = {0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,3};
- *nnz = block_residual_write_cavlc(h,cat,l,nC);\
- }
- -static void cavlc_qp_delta( x264_t *h )
- +static void x264_cavlc_mb_qp_delta( x264_t *h )
- {
- bs_t *s = &h->out.bs;
- int i_dqp = h->mb.i_qp - h->mb.i_last_qp;
- @@ -228,7 +228,7 @@ static void cavlc_qp_delta( x264_t *h )
- bs_write_se( s, i_dqp );
- }
- -static void cavlc_mb_mvd( x264_t *h, int i_list, int idx, int width )
- +static void x264_cavlc_mb_mvd( x264_t *h, int i_list, int idx, int width )
- {
- bs_t *s = &h->out.bs;
- ALIGNED_4( int16_t mvp[2] );
- @@ -237,26 +237,26 @@ static void cavlc_mb_mvd( x264_t *h, int i_list, int idx, int width )
- bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[idx]][1] - mvp[1] );
- }
- -static inline void cavlc_mb8x8_mvd( x264_t *h, int i )
- +static inline void x264_cavlc_mb8x8_mvd( x264_t *h, int i )
- {
- switch( h->mb.i_sub_partition[i] )
- {
- case D_L0_8x8:
- - cavlc_mb_mvd( h, 0, 4*i, 2 );
- + x264_cavlc_mb_mvd( h, 0, 4*i, 2 );
- break;
- case D_L0_8x4:
- - cavlc_mb_mvd( h, 0, 4*i+0, 2 );
- - cavlc_mb_mvd( h, 0, 4*i+2, 2 );
- + x264_cavlc_mb_mvd( h, 0, 4*i+0, 2 );
- + x264_cavlc_mb_mvd( h, 0, 4*i+2, 2 );
- break;
- case D_L0_4x8:
- - cavlc_mb_mvd( h, 0, 4*i+0, 1 );
- - cavlc_mb_mvd( h, 0, 4*i+1, 1 );
- + x264_cavlc_mb_mvd( h, 0, 4*i+0, 1 );
- + x264_cavlc_mb_mvd( h, 0, 4*i+1, 1 );
- break;
- case D_L0_4x4:
- - cavlc_mb_mvd( h, 0, 4*i+0, 1 );
- - cavlc_mb_mvd( h, 0, 4*i+1, 1 );
- - cavlc_mb_mvd( h, 0, 4*i+2, 1 );
- - cavlc_mb_mvd( h, 0, 4*i+3, 1 );
- + x264_cavlc_mb_mvd( h, 0, 4*i+0, 1 );
- + x264_cavlc_mb_mvd( h, 0, 4*i+1, 1 );
- + x264_cavlc_mb_mvd( h, 0, 4*i+2, 1 );
- + x264_cavlc_mb_mvd( h, 0, 4*i+3, 1 );
- break;
- }
- }
- @@ -372,7 +372,7 @@ void x264_macroblock_write_cavlc( x264_t *h )
- if( h->mb.pic.i_fref[0] > 1 )
- bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
- - cavlc_mb_mvd( h, 0, 0, 4 );
- + x264_cavlc_mb_mvd( h, 0, 0, 4 );
- }
- else if( h->mb.i_partition == D_16x8 )
- {
- @@ -382,8 +382,8 @@ void x264_macroblock_write_cavlc( x264_t *h )
- bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
- bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[8]] );
- }
- - cavlc_mb_mvd( h, 0, 0, 4 );
- - cavlc_mb_mvd( h, 0, 8, 4 );
- + x264_cavlc_mb_mvd( h, 0, 0, 4 );
- + x264_cavlc_mb_mvd( h, 0, 8, 4 );
- }
- else if( h->mb.i_partition == D_8x16 )
- {
- @@ -393,8 +393,8 @@ void x264_macroblock_write_cavlc( x264_t *h )
- bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
- bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[4]] );
- }
- - cavlc_mb_mvd( h, 0, 0, 2 );
- - cavlc_mb_mvd( h, 0, 4, 2 );
- + x264_cavlc_mb_mvd( h, 0, 0, 2 );
- + x264_cavlc_mb_mvd( h, 0, 4, 2 );
- }
- }
- else if( i_mb_type == P_8x8 )
- @@ -429,7 +429,7 @@ void x264_macroblock_write_cavlc( x264_t *h )
- }
- for( i = 0; i < 4; i++ )
- - cavlc_mb8x8_mvd( h, i );
- + x264_cavlc_mb8x8_mvd( h, i );
- }
- else if( i_mb_type == B_8x8 )
- {
- @@ -452,10 +452,10 @@ void x264_macroblock_write_cavlc( x264_t *h )
- /* mvd */
- for( i = 0; i < 4; i++ )
- if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
- - cavlc_mb_mvd( h, 0, 4*i, 2 );
- + x264_cavlc_mb_mvd( h, 0, 4*i, 2 );
- for( i = 0; i < 4; i++ )
- if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
- - cavlc_mb_mvd( h, 1, 4*i, 2 );
- + x264_cavlc_mb_mvd( h, 1, 4*i, 2 );
- }
- else if( i_mb_type != B_DIRECT )
- {
- @@ -470,8 +470,8 @@ void x264_macroblock_write_cavlc( x264_t *h )
- {
- if( i_ref0_max && b_list[0][0] ) bs_write_te( s, i_ref0_max, h->mb.cache.ref[0][x264_scan8[0]] );
- if( i_ref1_max && b_list[1][0] ) bs_write_te( s, i_ref1_max, h->mb.cache.ref[1][x264_scan8[0]] );
- - if( b_list[0][0] ) cavlc_mb_mvd( h, 0, 0, 4 );
- - if( b_list[1][0] ) cavlc_mb_mvd( h, 1, 0, 4 );
- + if( b_list[0][0] ) x264_cavlc_mb_mvd( h, 0, 0, 4 );
- + if( b_list[1][0] ) x264_cavlc_mb_mvd( h, 1, 0, 4 );
- }
- else
- {
- @@ -481,17 +481,17 @@ void x264_macroblock_write_cavlc( x264_t *h )
- if( i_ref1_max && b_list[1][1] ) bs_write_te( s, i_ref1_max, h->mb.cache.ref[1][x264_scan8[12]] );
- if( h->mb.i_partition == D_16x8 )
- {
- - if( b_list[0][0] ) cavlc_mb_mvd( h, 0, 0, 4 );
- - if( b_list[0][1] ) cavlc_mb_mvd( h, 0, 8, 4 );
- - if( b_list[1][0] ) cavlc_mb_mvd( h, 1, 0, 4 );
- - if( b_list[1][1] ) cavlc_mb_mvd( h, 1, 8, 4 );
- + if( b_list[0][0] ) x264_cavlc_mb_mvd( h, 0, 0, 4 );
- + if( b_list[0][1] ) x264_cavlc_mb_mvd( h, 0, 8, 4 );
- + if( b_list[1][0] ) x264_cavlc_mb_mvd( h, 1, 0, 4 );
- + if( b_list[1][1] ) x264_cavlc_mb_mvd( h, 1, 8, 4 );
- }
- else //if( h->mb.i_partition == D_8x16 )
- {
- - if( b_list[0][0] ) cavlc_mb_mvd( h, 0, 0, 2 );
- - if( b_list[0][1] ) cavlc_mb_mvd( h, 0, 4, 2 );
- - if( b_list[1][0] ) cavlc_mb_mvd( h, 1, 0, 2 );
- - if( b_list[1][1] ) cavlc_mb_mvd( h, 1, 4, 2 );
- + if( b_list[0][0] ) x264_cavlc_mb_mvd( h, 0, 0, 2 );
- + if( b_list[0][1] ) x264_cavlc_mb_mvd( h, 0, 4, 2 );
- + if( b_list[1][0] ) x264_cavlc_mb_mvd( h, 1, 0, 2 );
- + if( b_list[1][1] ) x264_cavlc_mb_mvd( h, 1, 4, 2 );
- }
- }
- }
- @@ -514,31 +514,31 @@ void x264_macroblock_write_cavlc( x264_t *h )
- bs_write1( s, h->mb.b_transform_8x8 );
- /* write residual */
- - if( i_mb_type == I_16x16 )
- + if( h->mb.i_cbp_luma || h->mb.i_cbp_chroma || i_mb_type == I_16x16 )
- {
- - cavlc_qp_delta( h );
- + x264_cavlc_mb_qp_delta( h );
- - /* DC Luma */
- - block_residual_write_cavlc( h, DCT_LUMA_DC, 24 , h->dct.luma16x16_dc );
- + if( i_mb_type == I_16x16 )
- + {
- + /* DC Luma */
- + block_residual_write_cavlc( h, DCT_LUMA_DC, 24 , h->dct.luma16x16_dc );
- - /* AC Luma */
- - if( h->mb.i_cbp_luma )
- - for( i = 0; i < 16; i++ )
- - block_residual_write_cavlc( h, DCT_LUMA_AC, i, h->dct.luma4x4[i]+1 );
- - }
- - else if( h->mb.i_cbp_luma | h->mb.i_cbp_chroma )
- - {
- - cavlc_qp_delta( h );
- - x264_macroblock_luma_write_cavlc( h, 0, 3 );
- - }
- - if( h->mb.i_cbp_chroma )
- - {
- - /* Chroma DC residual present */
- - block_residual_write_cavlc( h, DCT_CHROMA_DC, 25, h->dct.chroma_dc[0] );
- - block_residual_write_cavlc( h, DCT_CHROMA_DC, 26, h->dct.chroma_dc[1] );
- - if( h->mb.i_cbp_chroma&0x02 ) /* Chroma AC residual present */
- - for( i = 16; i < 24; i++ )
- - block_residual_write_cavlc( h, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1 );
- + /* AC Luma */
- + if( h->mb.i_cbp_luma )
- + for( i = 0; i < 16; i++ )
- + block_residual_write_cavlc( h, DCT_LUMA_AC, i, h->dct.luma4x4[i]+1 );
- + }
- + else
- + x264_macroblock_luma_write_cavlc( h, 0, 3 );
- +
- + if( h->mb.i_cbp_chroma ) /* Chroma DC residual present */
- + {
- + block_residual_write_cavlc( h, DCT_CHROMA_DC, 25, h->dct.chroma_dc[0] );
- + block_residual_write_cavlc( h, DCT_CHROMA_DC, 26, h->dct.chroma_dc[1] );
- + if( h->mb.i_cbp_chroma&0x02 ) /* Chroma AC residual present */
- + for( i = 16; i < 24; i++ )
- + block_residual_write_cavlc( h, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1 );
- + }
- }
- #if !RDO_SKIP_BS
- @@ -563,22 +563,22 @@ static int x264_partition_size_cavlc( x264_t *h, int i8, int i_pixel )
- if( i_mb_type == P_8x8 )
- {
- - cavlc_mb8x8_mvd( h, i8 );
- + x264_cavlc_mb8x8_mvd( h, i8 );
- bs_write_ue( s, sub_mb_type_p_to_golomb[ h->mb.i_sub_partition[i8] ] );
- }
- else if( i_mb_type == P_L0 )
- - cavlc_mb_mvd( h, 0, 4*i8, 4>>b_8x16 );
- + x264_cavlc_mb_mvd( h, 0, 4*i8, 4>>b_8x16 );
- else if( i_mb_type > B_DIRECT && i_mb_type < B_8x8 )
- {
- - if( x264_mb_type_list_table[ i_mb_type ][0][!!i8] ) cavlc_mb_mvd( h, 0, 4*i8, 4>>b_8x16 );
- - if( x264_mb_type_list_table[ i_mb_type ][1][!!i8] ) cavlc_mb_mvd( h, 1, 4*i8, 4>>b_8x16 );
- + if( x264_mb_type_list_table[ i_mb_type ][0][!!i8] ) x264_cavlc_mb_mvd( h, 0, 4*i8, 4>>b_8x16 );
- + if( x264_mb_type_list_table[ i_mb_type ][1][!!i8] ) x264_cavlc_mb_mvd( h, 1, 4*i8, 4>>b_8x16 );
- }
- else //if( i_mb_type == B_8x8 )
- {
- if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i8] ] )
- - cavlc_mb_mvd( h, 0, 4*i8, 2 );
- + x264_cavlc_mb_mvd( h, 0, 4*i8, 2 );
- if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i8] ] )
- - cavlc_mb_mvd( h, 1, 4*i8, 2 );
- + x264_cavlc_mb_mvd( h, 1, 4*i8, 2 );
- }
- for( j = (i_pixel < PIXEL_8x8); j >= 0; j-- )
- @@ -596,7 +596,7 @@ static int x264_subpartition_size_cavlc( x264_t *h, int i4, int i_pixel )
- {
- int b_8x4 = i_pixel == PIXEL_8x4;
- h->out.bs.i_bits_encoded = 0;
- - cavlc_mb_mvd( h, 0, i4, 1+b_8x4 );
- + x264_cavlc_mb_mvd( h, 0, i4, 1+b_8x4 );
- block_residual_write_cavlc( h, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4] );
- if( i_pixel != PIXEL_4x4 )
- {
- --
- 1.6.1.2
- From e494167d136a8a8cd044c5a555ecc1311c90effc Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Wed, 3 Feb 2010 18:19:29 -0800
- Subject: [PATCH 05/24] Simplify decimate checks in macroblock_encode
- Also fix a misleading comment.
- ---
- common/common.h | 1 +
- encoder/analyse.c | 2 ++
- encoder/macroblock.c | 12 +++++-------
- 3 files changed, 8 insertions(+), 7 deletions(-)
- diff --git a/common/common.h b/common/common.h
- index 950f48f..8b1b05a 100644
- --- a/common/common.h
- +++ b/common/common.h
- @@ -484,6 +484,7 @@ struct x264_t
- int b_chroma_me;
- int b_trellis;
- int b_noise_reduction;
- + int b_dct_decimate;
- int i_psy_rd; /* Psy RD strength--fixed point value*/
- int i_psy_trellis; /* Psy trellis strength--fixed point value*/
- diff --git a/encoder/analyse.c b/encoder/analyse.c
- index 1fb2206..92d6584 100644
- --- a/encoder/analyse.c
- +++ b/encoder/analyse.c
- @@ -364,6 +364,8 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
- h->mb.i_subpel_refine = h->param.analyse.i_subpel_refine;
- h->mb.b_chroma_me = h->param.analyse.b_chroma_me && h->sh.i_type == SLICE_TYPE_P
- && h->mb.i_subpel_refine >= 5;
- + h->mb.b_dct_decimate = h->sh.i_type == SLICE_TYPE_B ||
- + (h->param.analyse.b_dct_decimate && h->sh.i_type != SLICE_TYPE_I);
- h->mb.b_transform_8x8 = 0;
- h->mb.b_noise_reduction = 0;
- diff --git a/encoder/macroblock.c b/encoder/macroblock.c
- index e4edb8a..fa7942d 100644
- --- a/encoder/macroblock.c
- +++ b/encoder/macroblock.c
- @@ -208,8 +208,7 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qp )
- ALIGNED_ARRAY_16( int16_t, dct_dc4x4,[16] );
- int i, nz;
- - int b_decimate = h->sh.i_type == SLICE_TYPE_B || (h->param.analyse.b_dct_decimate && h->sh.i_type == SLICE_TYPE_P);
- - int decimate_score = b_decimate ? 0 : 9;
- + int decimate_score = h->mb.b_dct_decimate ? 0 : 9;
- if( h->mb.b_lossless )
- {
- @@ -342,7 +341,7 @@ static inline int x264_mb_optimize_chroma_dc( x264_t *h, int b_inter, int i_qp,
- void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
- {
- int i, ch, nz, nz_dc;
- - int b_decimate = b_inter && (h->sh.i_type == SLICE_TYPE_B || h->param.analyse.b_dct_decimate);
- + int b_decimate = b_inter && h->mb.b_dct_decimate;
- ALIGNED_ARRAY_16( int16_t, dct2x2,[4] );
- h->mb.i_cbp_chroma = 0;
- @@ -607,7 +606,7 @@ void x264_macroblock_encode( x264_t *h )
- {
- int i_cbp_dc = 0;
- int i_qp = h->mb.i_qp;
- - int b_decimate = h->sh.i_type == SLICE_TYPE_B || h->param.analyse.b_dct_decimate;
- + int b_decimate = h->mb.b_dct_decimate;
- int b_force_no_skip = 0;
- int i,idx,nz;
- h->mb.i_cbp_luma = 0;
- @@ -914,8 +913,7 @@ void x264_macroblock_encode( x264_t *h )
- /*****************************************************************************
- * x264_macroblock_probe_skip:
- - * Check if the current MB could be encoded as a [PB]_SKIP (it supposes you use
- - * the previous QP
- + * Check if the current MB could be encoded as a [PB]_SKIP
- *****************************************************************************/
- int x264_macroblock_probe_skip( x264_t *h, int b_bidir )
- {
- @@ -1052,7 +1050,7 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
- int i_qp = h->mb.i_qp;
- uint8_t *p_fenc = h->mb.pic.p_fenc[0] + (i8&1)*8 + (i8>>1)*8*FENC_STRIDE;
- uint8_t *p_fdec = h->mb.pic.p_fdec[0] + (i8&1)*8 + (i8>>1)*8*FDEC_STRIDE;
- - int b_decimate = h->sh.i_type == SLICE_TYPE_B || h->param.analyse.b_dct_decimate;
- + int b_decimate = h->mb.b_dct_decimate;
- int nnz8x8 = 0;
- int ch, nz;
- --
- 1.6.1.2
- From d80fa482a6f99b0d0bb59fdace6ef5cdbd67b98e Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Wed, 3 Feb 2010 18:36:44 -0800
- Subject: [PATCH 06/24] Fix subpel iteration counts with B-frame analysis and subme 6/8
- Since subme 6 means "like subme 5, except RD on P-frames", B-frame analysis
- shouldn't use the RD subpel counts at subme 6. Similarly with subme 8.
- Slightly faster (and very marginally worse) compression at subme 6 and 8.
- ---
- encoder/analyse.c | 2 ++
- 1 files changed, 2 insertions(+), 0 deletions(-)
- diff --git a/encoder/analyse.c b/encoder/analyse.c
- index 92d6584..c15bf8f 100644
- --- a/encoder/analyse.c
- +++ b/encoder/analyse.c
- @@ -362,6 +362,8 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
- h->mb.i_me_method = h->param.analyse.i_me_method;
- h->mb.i_subpel_refine = h->param.analyse.i_subpel_refine;
- + if( h->sh.i_type == SLICE_TYPE_B && (h->mb.i_subpel_refine == 6 || h->mb.i_subpel_refine == 8) )
- + h->mb.i_subpel_refine--;
- h->mb.b_chroma_me = h->param.analyse.b_chroma_me && h->sh.i_type == SLICE_TYPE_P
- && h->mb.i_subpel_refine >= 5;
- h->mb.b_dct_decimate = h->sh.i_type == SLICE_TYPE_B ||
- --
- 1.6.1.2
- From 34b59c92d298b0fb58130d8601d053bfea1c870a Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Wed, 3 Feb 2010 20:01:16 -0800
- Subject: [PATCH 07/24] Smarter QPRD
- Catch some cases in which RD checks can be avoided; reduces QPRD RD calls by 10-20%.
- ---
- encoder/analyse.c | 42 ++++++++++++++++++++++++++++++++++++++----
- 1 files changed, 38 insertions(+), 4 deletions(-)
- diff --git a/encoder/analyse.c b/encoder/analyse.c
- index c15bf8f..53ca025 100644
- --- a/encoder/analyse.c
- +++ b/encoder/analyse.c
- @@ -2307,9 +2307,10 @@ static inline void x264_mb_analyse_qp_rd( x264_t *h, x264_mb_analysis_t *a )
- int orig_qp = h->mb.i_qp, bqp = h->mb.i_qp;
- int last_qp_tried = 0;
- origcost = bcost = x264_rd_cost_mb( h, a->i_lambda2 );
- + int origcbp = h->mb.cbp[h->mb.i_mb_xy];
- /* If CBP is already zero, don't raise the quantizer any higher. */
- - for( direction = h->mb.cbp[h->mb.i_mb_xy] ? 1 : -1; direction >= -1; direction-=2 )
- + for( direction = origcbp ? 1 : -1; direction >= -1; direction-=2 )
- {
- /* Without psy-RD, require monotonicity when moving quant away from previous
- * macroblock's quant; allow 1 failure when moving quant towards previous quant.
- @@ -2324,14 +2325,47 @@ static inline void x264_mb_analyse_qp_rd( x264_t *h, x264_mb_analysis_t *a )
- h->mb.i_qp = orig_qp;
- failures = 0;
- prevcost = origcost;
- +
- + /* If the current QP results in an empty CBP, it's highly likely that lower QPs
- + * (up to a point) will too. So, jump down to where the threshold will kick in
- + * and check the QP there. If the CBP is still empty, skip the main loop.
- + * If it isn't empty, we would have ended up having to check this QP anyways,
- + * so as long as we store it for later lookup, we lose nothing. */
- + int already_checked_qp = -1;
- + int already_checked_cost = COST_MAX;
- + if( direction == -1 )
- + {
- + if( !origcbp )
- + {
- + h->mb.i_qp = X264_MAX( h->mb.i_qp - threshold - 1, h->param.rc.i_qp_min );
- + h->mb.i_chroma_qp = h->chroma_qp_table[h->mb.i_qp];
- + already_checked_cost = x264_rd_cost_mb( h, a->i_lambda2 );
- + if( !h->mb.cbp[h->mb.i_mb_xy] )
- + {
- + /* If our empty-CBP block is lower QP than the last QP,
- + * the last QP cannot possibly have a CBP either. */
- + if( h->mb.i_last_qp > h->mb.i_qp )
- + last_qp_tried = 1;
- + break;
- + }
- + already_checked_qp = h->mb.i_qp;
- + h->mb.i_qp = orig_qp;
- + }
- + }
- +
- h->mb.i_qp += direction;
- while( h->mb.i_qp >= h->param.rc.i_qp_min && h->mb.i_qp <= h->param.rc.i_qp_max )
- {
- if( h->mb.i_last_qp == h->mb.i_qp )
- last_qp_tried = 1;
- - h->mb.i_chroma_qp = h->chroma_qp_table[h->mb.i_qp];
- - cost = x264_rd_cost_mb( h, a->i_lambda2 );
- - COPY2_IF_LT( bcost, cost, bqp, h->mb.i_qp );
- + if( h->mb.i_qp == already_checked_qp )
- + cost = already_checked_cost;
- + else
- + {
- + h->mb.i_chroma_qp = h->chroma_qp_table[h->mb.i_qp];
- + cost = x264_rd_cost_mb( h, a->i_lambda2 );
- + COPY2_IF_LT( bcost, cost, bqp, h->mb.i_qp );
- + }
- /* We can't assume that the costs are monotonic over QPs.
- * Tie case-as-failure seems to give better results. */
- --
- 1.6.1.2
- From aa56cb41947ed2e737090b7f22ab54a323b6fa0b Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Wed, 3 Feb 2010 20:27:57 -0800
- Subject: [PATCH 08/24] Fix 2-pass ratecontrol continuation in case of missing statsfile
- Didn't work properly if MB-tree was enabled.
- ---
- encoder/ratecontrol.c | 1 +
- 1 files changed, 1 insertions(+), 0 deletions(-)
- diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
- index 52196e7..e314ba2 100644
- --- a/encoder/ratecontrol.c
- +++ b/encoder/ratecontrol.c
- @@ -1280,6 +1280,7 @@ int x264_ratecontrol_slice_type( x264_t *h, int frame_num )
- h->thread[i]->param.rc.b_stat_read = 0;
- h->thread[i]->param.i_bframe_adaptive = 0;
- h->thread[i]->param.i_scenecut_threshold = 0;
- + h->thread[i]->param.rc.b_mb_tree = 0;
- if( h->thread[i]->param.i_bframe > 1 )
- h->thread[i]->param.i_bframe = 1;
- }
- --
- 1.6.1.2
- From 479d312c2512244bf81a82b90815087a7e694f5e Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Fri, 5 Feb 2010 16:15:23 -0800
- Subject: [PATCH 09/24] Various CABAC/CAVLC cleanups/speedups
- Make some if/else chains into switch statements.
- Store CBP data in x264_t and only move it to frame storage later.
- This saves a wasted cache line and some unnecessary dereferences in RDO.
- ---
- common/common.h | 1 +
- common/macroblock.c | 3 +-
- encoder/analyse.c | 8 +-
- encoder/cabac.c | 40 +++---
- encoder/cavlc.c | 365 ++++++++++++++++++++++++++------------------------
- encoder/macroblock.c | 19 +--
- 6 files changed, 219 insertions(+), 217 deletions(-)
- diff --git a/common/common.h b/common/common.h
- index 8b1b05a..d4a8dd9 100644
- --- a/common/common.h
- +++ b/common/common.h
- @@ -542,6 +542,7 @@ struct x264_t
- ALIGNED_4( uint8_t i_sub_partition[4] );
- int b_transform_8x8;
- + int i_cbp_combined;
- int i_cbp_luma;
- int i_cbp_chroma;
- diff --git a/common/macroblock.c b/common/macroblock.c
- index 10f09ac..d86f3af 100644
- --- a/common/macroblock.c
- +++ b/common/macroblock.c
- @@ -1343,11 +1343,12 @@ void x264_macroblock_cache_save( x264_t *h )
- M16( &non_zero_count[16+2*2] ) = M32( &h->mb.cache.non_zero_count[x264_scan8[16+2*2]-1] ) >> 8;
- M16( &non_zero_count[16+3*2] ) = M32( &h->mb.cache.non_zero_count[x264_scan8[16+3*2]-1] ) >> 8;
- - if( h->mb.i_type != I_16x16 && h->mb.i_cbp_luma == 0 && h->mb.i_cbp_chroma == 0 )
- + if( h->mb.i_type != I_16x16 && !h->mb.i_cbp_combined )
- h->mb.i_qp = h->mb.i_last_qp;
- h->mb.qp[i_mb_xy] = h->mb.i_qp;
- h->mb.i_last_dqp = h->mb.i_qp - h->mb.i_last_qp;
- h->mb.i_last_qp = h->mb.i_qp;
- + h->mb.cbp[i_mb_xy] = h->mb.i_cbp_combined;
- }
- if( h->mb.i_cbp_luma == 0 && h->mb.i_type != I_8x8 )
- diff --git a/encoder/analyse.c b/encoder/analyse.c
- index 53ca025..4f3f35f 100644
- --- a/encoder/analyse.c
- +++ b/encoder/analyse.c
- @@ -1199,7 +1199,7 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
- h->mb.i_partition = D_16x16;
- x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv );
- a->l0.i_rd16x16 = x264_rd_cost_mb( h, a->i_lambda2 );
- - if( !(h->mb.i_cbp_luma|h->mb.i_cbp_chroma) )
- + if( !h->mb.i_cbp_combined )
- h->mb.i_type = P_SKIP;
- }
- }
- @@ -2307,7 +2307,7 @@ static inline void x264_mb_analyse_qp_rd( x264_t *h, x264_mb_analysis_t *a )
- int orig_qp = h->mb.i_qp, bqp = h->mb.i_qp;
- int last_qp_tried = 0;
- origcost = bcost = x264_rd_cost_mb( h, a->i_lambda2 );
- - int origcbp = h->mb.cbp[h->mb.i_mb_xy];
- + int origcbp = h->mb.i_cbp_combined;
- /* If CBP is already zero, don't raise the quantizer any higher. */
- for( direction = origcbp ? 1 : -1; direction >= -1; direction-=2 )
- @@ -2340,7 +2340,7 @@ static inline void x264_mb_analyse_qp_rd( x264_t *h, x264_mb_analysis_t *a )
- h->mb.i_qp = X264_MAX( h->mb.i_qp - threshold - 1, h->param.rc.i_qp_min );
- h->mb.i_chroma_qp = h->chroma_qp_table[h->mb.i_qp];
- already_checked_cost = x264_rd_cost_mb( h, a->i_lambda2 );
- - if( !h->mb.cbp[h->mb.i_mb_xy] )
- + if( !h->mb.i_cbp_combined )
- {
- /* If our empty-CBP block is lower QP than the last QP,
- * the last QP cannot possibly have a CBP either. */
- @@ -2377,7 +2377,7 @@ static inline void x264_mb_analyse_qp_rd( x264_t *h, x264_mb_analysis_t *a )
- if( failures > threshold )
- break;
- - if( direction == 1 && !h->mb.cbp[h->mb.i_mb_xy] )
- + if( direction == 1 && !h->mb.i_cbp_combined )
- break;
- h->mb.i_qp += direction;
- }
- diff --git a/encoder/cabac.c b/encoder/cabac.c
- index 6ff2aed..6c14722 100644
- --- a/encoder/cabac.c
- +++ b/encoder/cabac.c
- @@ -107,7 +107,7 @@ static void x264_cabac_mb_qp_delta( x264_t *h, x264_cabac_t *cb )
- int i_dqp = h->mb.i_qp - h->mb.i_last_qp;
- /* Avoid writing a delta quant if we have an empty i16x16 block, e.g. in a completely flat background area */
- - if( h->mb.i_type == I_16x16 && !h->mb.cbp[h->mb.i_mb_xy] )
- + if( h->mb.i_type == I_16x16 && !h->mb.i_cbp_combined )
- {
- #if !RDO_SKIP_BS
- h->mb.i_qp = h->mb.i_last_qp;
- @@ -915,7 +915,7 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
- if( x264_mb_transform_8x8_allowed( h ) && h->mb.i_cbp_luma )
- x264_cabac_mb_transform_size( h, cb );
- - if( h->mb.i_cbp_luma || h->mb.i_cbp_chroma || i_mb_type == I_16x16 )
- + if( h->mb.i_cbp_combined || i_mb_type == I_16x16 )
- {
- const int b_intra = IS_INTRA( i_mb_type );
- x264_cabac_mb_qp_delta( h, cb );
- @@ -973,24 +973,24 @@ static void x264_partition_size_cabac( x264_t *h, x264_cabac_t *cb, int i8, int
- int b_8x16 = h->mb.i_partition == D_8x16;
- int j;
- - if( i_mb_type == P_8x8 )
- + switch( i_mb_type )
- {
- - x264_cabac_mb8x8_mvd( h, cb, i8 );
- - x264_cabac_mb_sub_p_partition( cb, h->mb.i_sub_partition[i8] );
- - }
- - else if( i_mb_type == P_L0 )
- - x264_cabac_mb_mvd( h, cb, 0, 4*i8, 4>>b_8x16, 2<<b_8x16 );
- - else if( i_mb_type > B_DIRECT && i_mb_type < B_8x8 )
- - {
- - if( x264_mb_type_list_table[ i_mb_type ][0][!!i8] ) x264_cabac_mb_mvd( h, cb, 0, 4*i8, 4>>b_8x16, 2<<b_8x16 );
- - if( x264_mb_type_list_table[ i_mb_type ][1][!!i8] ) x264_cabac_mb_mvd( h, cb, 1, 4*i8, 4>>b_8x16, 2<<b_8x16 );
- - }
- - else //if( i_mb_type == B_8x8 )
- - {
- - if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i8] ] )
- - x264_cabac_mb_mvd( h, cb, 0, 4*i8, 2, 2 );
- - if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i8] ] )
- - x264_cabac_mb_mvd( h, cb, 1, 4*i8, 2, 2 );
- + case P_L0:
- + x264_cabac_mb_mvd( h, cb, 0, 4*i8, 4>>b_8x16, 2<<b_8x16 );
- + break;
- + case P_8x8:
- + x264_cabac_mb8x8_mvd( h, cb, i8 );
- + x264_cabac_mb_sub_p_partition( cb, h->mb.i_sub_partition[i8] );
- + break;
- + case B_8x8:
- + if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i8] ] )
- + x264_cabac_mb_mvd( h, cb, 0, 4*i8, 2, 2 );
- + if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i8] ] )
- + x264_cabac_mb_mvd( h, cb, 1, 4*i8, 2, 2 );
- + break;
- + default: /* Rest of the B types */
- + if( x264_mb_type_list_table[i_mb_type][0][!!i8] ) x264_cabac_mb_mvd( h, cb, 0, 4*i8, 4>>b_8x16, 2<<b_8x16 );
- + if( x264_mb_type_list_table[i_mb_type][1][!!i8] ) x264_cabac_mb_mvd( h, cb, 1, 4*i8, 4>>b_8x16, 2<<b_8x16 );
- }
- for( j = (i_pixel < PIXEL_8x8); j >= 0; j-- )
- @@ -1019,9 +1019,7 @@ static void x264_subpartition_size_cabac( x264_t *h, x264_cabac_t *cb, int i4, i
- int b_8x4 = i_pixel == PIXEL_8x4;
- block_residual_write_cabac_cbf( h, cb, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4], 0 );
- if( i_pixel == PIXEL_4x4 )
- - {
- x264_cabac_mb_mvd( h, cb, 0, i4, 1, 1 );
- - }
- else
- {
- x264_cabac_mb_mvd( h, cb, 0, i4, 1+b_8x4, 2-b_8x4 );
- diff --git a/encoder/cavlc.c b/encoder/cavlc.c
- index d18408b..45b55fe 100644
- --- a/encoder/cavlc.c
- +++ b/encoder/cavlc.c
- @@ -209,8 +209,7 @@ static void x264_cavlc_mb_qp_delta( x264_t *h )
- int i_dqp = h->mb.i_qp - h->mb.i_last_qp;
- /* Avoid writing a delta quant if we have an empty i16x16 block, e.g. in a completely flat background area */
- - if( h->mb.i_type == I_16x16 && !(h->mb.i_cbp_luma | h->mb.i_cbp_chroma)
- - && !h->mb.cache.non_zero_count[x264_scan8[24]] )
- + if( h->mb.i_type == I_16x16 && !h->mb.i_cbp_combined )
- {
- #if !RDO_SKIP_BS
- h->mb.i_qp = h->mb.i_last_qp;
- @@ -302,201 +301,209 @@ void x264_macroblock_write_cavlc( x264_t *h )
- bs_write1( s, h->mb.b_interlaced );
- }
- -#if !RDO_SKIP_BS
- - if( i_mb_type == I_PCM )
- - {
- - uint8_t *p_start = s->p_start;
- - bs_write_ue( s, i_mb_i_offset + 25 );
- - i_mb_pos_tex = bs_pos( s );
- - h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start;
- -
- - bs_align_0( s );
- -
- - memcpy( s->p, h->mb.pic.p_fenc[0], 256 );
- - s->p += 256;
- - for( i = 0; i < 8; i++ )
- - memcpy( s->p + i*8, h->mb.pic.p_fenc[1] + i*FENC_STRIDE, 8 );
- - s->p += 64;
- - for( i = 0; i < 8; i++ )
- - memcpy( s->p + i*8, h->mb.pic.p_fenc[2] + i*FENC_STRIDE, 8 );
- - s->p += 64;
- -
- - bs_init( s, s->p, s->p_end - s->p );
- - s->p_start = p_start;
- -
- - /* if PCM is chosen, we need to store reconstructed frame data */
- - h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.p_fenc[0], FENC_STRIDE, 16 );
- - h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE, 8 );
- - h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2], FENC_STRIDE, 8 );
- -
- - h->stat.frame.i_tex_bits += bs_pos(s) - i_mb_pos_tex;
- - return;
- - }
- -#endif
- -
- /* Write:
- - type
- - prediction
- - mv */
- - if( i_mb_type == I_4x4 || i_mb_type == I_8x8 )
- + switch( i_mb_type )
- {
- - int di = i_mb_type == I_8x8 ? 4 : 1;
- - bs_write_ue( s, i_mb_i_offset + 0 );
- - if( h->pps->b_transform_8x8_mode )
- - bs_write1( s, h->mb.b_transform_8x8 );
- -
- - /* Prediction: Luma */
- - for( i = 0; i < 16; i += di )
- + case I_4x4:
- + case I_8x8:
- {
- - int i_pred = x264_mb_predict_intra4x4_mode( h, i );
- - int i_mode = x264_mb_pred_mode4x4_fix( h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] );
- + int di = i_mb_type == I_8x8 ? 4 : 1;
- + bs_write_ue( s, i_mb_i_offset + 0 );
- + if( h->pps->b_transform_8x8_mode )
- + bs_write1( s, h->mb.b_transform_8x8 );
- - if( i_pred == i_mode )
- - bs_write1( s, 1 ); /* b_prev_intra4x4_pred_mode */
- - else
- - bs_write( s, 4, i_mode - (i_mode > i_pred) );
- + /* Prediction: Luma */
- + for( i = 0; i < 16; i += di )
- + {
- + int i_pred = x264_mb_predict_intra4x4_mode( h, i );
- + int i_mode = x264_mb_pred_mode4x4_fix( h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] );
- +
- + if( i_pred == i_mode )
- + bs_write1( s, 1 ); /* b_prev_intra4x4_pred_mode */
- + else
- + bs_write( s, 4, i_mode - (i_mode > i_pred) );
- + }
- + bs_write_ue( s, x264_mb_pred_mode8x8c_fix[ h->mb.i_chroma_pred_mode ] );
- + break;
- + case I_16x16:
- + bs_write_ue( s, i_mb_i_offset + 1 + x264_mb_pred_mode16x16_fix[h->mb.i_intra16x16_pred_mode] +
- + h->mb.i_cbp_chroma * 4 + ( h->mb.i_cbp_luma == 0 ? 0 : 12 ) );
- + bs_write_ue( s, x264_mb_pred_mode8x8c_fix[ h->mb.i_chroma_pred_mode ] );
- + break;
- }
- - bs_write_ue( s, x264_mb_pred_mode8x8c_fix[ h->mb.i_chroma_pred_mode ] );
- - }
- - else if( i_mb_type == I_16x16 )
- - {
- - bs_write_ue( s, i_mb_i_offset + 1 + x264_mb_pred_mode16x16_fix[h->mb.i_intra16x16_pred_mode] +
- - h->mb.i_cbp_chroma * 4 + ( h->mb.i_cbp_luma == 0 ? 0 : 12 ) );
- - bs_write_ue( s, x264_mb_pred_mode8x8c_fix[ h->mb.i_chroma_pred_mode ] );
- - }
- - else if( i_mb_type == P_L0 )
- - {
- - if( h->mb.i_partition == D_16x16 )
- +#if !RDO_SKIP_BS
- + case I_PCM:
- {
- - bs_write1( s, 1 );
- -
- - if( h->mb.pic.i_fref[0] > 1 )
- - bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
- - x264_cavlc_mb_mvd( h, 0, 0, 4 );
- + uint8_t *p_start = s->p_start;
- + bs_write_ue( s, i_mb_i_offset + 25 );
- + i_mb_pos_tex = bs_pos( s );
- + h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start;
- +
- + bs_align_0( s );
- +
- + memcpy( s->p, h->mb.pic.p_fenc[0], 256 );
- + s->p += 256;
- + for( i = 0; i < 8; i++ )
- + memcpy( s->p + i*8, h->mb.pic.p_fenc[1] + i*FENC_STRIDE, 8 );
- + s->p += 64;
- + for( i = 0; i < 8; i++ )
- + memcpy( s->p + i*8, h->mb.pic.p_fenc[2] + i*FENC_STRIDE, 8 );
- + s->p += 64;
- +
- + bs_init( s, s->p, s->p_end - s->p );
- + s->p_start = p_start;
- +
- + /* if PCM is chosen, we need to store reconstructed frame data */
- + h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.p_fenc[0], FENC_STRIDE, 16 );
- + h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE, 8 );
- + h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2], FENC_STRIDE, 8 );
- +
- + h->stat.frame.i_tex_bits += bs_pos(s) - i_mb_pos_tex;
- + return;
- }
- - else if( h->mb.i_partition == D_16x8 )
- +#endif
- + case P_L0:
- {
- - bs_write_ue( s, 1 );
- - if( h->mb.pic.i_fref[0] > 1 )
- + if( h->mb.i_partition == D_16x16 )
- {
- - bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
- - bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[8]] );
- + bs_write1( s, 1 );
- +
- + if( h->mb.pic.i_fref[0] > 1 )
- + bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
- + x264_cavlc_mb_mvd( h, 0, 0, 4 );
- }
- - x264_cavlc_mb_mvd( h, 0, 0, 4 );
- - x264_cavlc_mb_mvd( h, 0, 8, 4 );
- + else if( h->mb.i_partition == D_16x8 )
- + {
- + bs_write_ue( s, 1 );
- + if( h->mb.pic.i_fref[0] > 1 )
- + {
- + bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
- + bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[8]] );
- + }
- + x264_cavlc_mb_mvd( h, 0, 0, 4 );
- + x264_cavlc_mb_mvd( h, 0, 8, 4 );
- + }
- + else if( h->mb.i_partition == D_8x16 )
- + {
- + bs_write_ue( s, 2 );
- + if( h->mb.pic.i_fref[0] > 1 )
- + {
- + bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
- + bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[4]] );
- + }
- + x264_cavlc_mb_mvd( h, 0, 0, 2 );
- + x264_cavlc_mb_mvd( h, 0, 4, 2 );
- + }
- + break;
- }
- - else if( h->mb.i_partition == D_8x16 )
- + case P_8x8:
- {
- - bs_write_ue( s, 2 );
- - if( h->mb.pic.i_fref[0] > 1 )
- + int b_sub_ref;
- + if( (h->mb.cache.ref[0][x264_scan8[0]] | h->mb.cache.ref[0][x264_scan8[ 4]] |
- + h->mb.cache.ref[0][x264_scan8[8]] | h->mb.cache.ref[0][x264_scan8[12]]) == 0 )
- + {
- + bs_write_ue( s, 4 );
- + b_sub_ref = 0;
- + }
- + else
- + {
- + bs_write_ue( s, 3 );
- + b_sub_ref = 1;
- + }
- +
- + /* sub mb type */
- + if( h->param.analyse.inter & X264_ANALYSE_PSUB8x8 )
- + for( i = 0; i < 4; i++ )
- + bs_write_ue( s, sub_mb_type_p_to_golomb[ h->mb.i_sub_partition[i] ] );
- + else
- + bs_write( s, 4, 0xf );
- +
- + /* ref0 */
- + if( b_sub_ref )
- {
- bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
- bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[4]] );
- + bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[8]] );
- + bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[12]] );
- }
- - x264_cavlc_mb_mvd( h, 0, 0, 2 );
- - x264_cavlc_mb_mvd( h, 0, 4, 2 );
- - }
- - }
- - else if( i_mb_type == P_8x8 )
- - {
- - int b_sub_ref;
- - if( (h->mb.cache.ref[0][x264_scan8[0]] | h->mb.cache.ref[0][x264_scan8[ 4]] |
- - h->mb.cache.ref[0][x264_scan8[8]] | h->mb.cache.ref[0][x264_scan8[12]]) == 0 )
- - {
- - bs_write_ue( s, 4 );
- - b_sub_ref = 0;
- - }
- - else
- - {
- - bs_write_ue( s, 3 );
- - b_sub_ref = 1;
- - }
- - /* sub mb type */
- - if( h->param.analyse.inter & X264_ANALYSE_PSUB8x8 )
- for( i = 0; i < 4; i++ )
- - bs_write_ue( s, sub_mb_type_p_to_golomb[ h->mb.i_sub_partition[i] ] );
- - else
- - bs_write( s, 4, 0xf );
- -
- - /* ref0 */
- - if( b_sub_ref )
- - {
- - bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
- - bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[4]] );
- - bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[8]] );
- - bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[12]] );
- + x264_cavlc_mb8x8_mvd( h, i );
- + break;
- }
- + case B_8x8:
- + {
- + bs_write_ue( s, 22 );
- - for( i = 0; i < 4; i++ )
- - x264_cavlc_mb8x8_mvd( h, i );
- - }
- - else if( i_mb_type == B_8x8 )
- - {
- - bs_write_ue( s, 22 );
- -
- - /* sub mb type */
- - for( i = 0; i < 4; i++ )
- - bs_write_ue( s, sub_mb_type_b_to_golomb[ h->mb.i_sub_partition[i] ] );
- + /* sub mb type */
- + for( i = 0; i < 4; i++ )
- + bs_write_ue( s, sub_mb_type_b_to_golomb[ h->mb.i_sub_partition[i] ] );
- - /* ref */
- - if( h->mb.pic.i_fref[0] > 1 )
- + /* ref */
- + if( h->mb.pic.i_fref[0] > 1 )
- + for( i = 0; i < 4; i++ )
- + if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
- + bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[i*4]] );
- + if( h->mb.pic.i_fref[1] > 1 )
- + for( i = 0; i < 4; i++ )
- + if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
- + bs_write_te( s, h->mb.pic.i_fref[1] - 1, h->mb.cache.ref[1][x264_scan8[i*4]] );
- +
- + /* mvd */
- for( i = 0; i < 4; i++ )
- if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
- - bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[i*4]] );
- - if( h->mb.pic.i_fref[1] > 1 )
- + x264_cavlc_mb_mvd( h, 0, 4*i, 2 );
- for( i = 0; i < 4; i++ )
- if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
- - bs_write_te( s, h->mb.pic.i_fref[1] - 1, h->mb.cache.ref[1][x264_scan8[i*4]] );
- -
- - /* mvd */
- - for( i = 0; i < 4; i++ )
- - if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
- - x264_cavlc_mb_mvd( h, 0, 4*i, 2 );
- - for( i = 0; i < 4; i++ )
- - if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
- - x264_cavlc_mb_mvd( h, 1, 4*i, 2 );
- - }
- - else if( i_mb_type != B_DIRECT )
- - {
- - /* All B mode */
- - /* Motion Vector */
- - const uint8_t (*b_list)[2] = x264_mb_type_list_table[i_mb_type];
- - const int i_ref0_max = h->mb.pic.i_fref[0] - 1;
- - const int i_ref1_max = h->mb.pic.i_fref[1] - 1;
- -
- - bs_write_ue( s, mb_type_b_to_golomb[ h->mb.i_partition - D_16x8 ][ i_mb_type - B_L0_L0 ] );
- - if( h->mb.i_partition == D_16x16 )
- + x264_cavlc_mb_mvd( h, 1, 4*i, 2 );
- + break;
- + }
- + case B_DIRECT:
- {
- - if( i_ref0_max && b_list[0][0] ) bs_write_te( s, i_ref0_max, h->mb.cache.ref[0][x264_scan8[0]] );
- - if( i_ref1_max && b_list[1][0] ) bs_write_te( s, i_ref1_max, h->mb.cache.ref[1][x264_scan8[0]] );
- - if( b_list[0][0] ) x264_cavlc_mb_mvd( h, 0, 0, 4 );
- - if( b_list[1][0] ) x264_cavlc_mb_mvd( h, 1, 0, 4 );
- + bs_write1( s, 1 );
- + break;
- }
- - else
- + default: /* Rest of the B types */
- {
- - if( i_ref0_max && b_list[0][0] ) bs_write_te( s, i_ref0_max, h->mb.cache.ref[0][x264_scan8[ 0]] );
- - if( i_ref0_max && b_list[0][1] ) bs_write_te( s, i_ref0_max, h->mb.cache.ref[0][x264_scan8[12]] );
- - if( i_ref1_max && b_list[1][0] ) bs_write_te( s, i_ref1_max, h->mb.cache.ref[1][x264_scan8[ 0]] );
- - if( i_ref1_max && b_list[1][1] ) bs_write_te( s, i_ref1_max, h->mb.cache.ref[1][x264_scan8[12]] );
- - if( h->mb.i_partition == D_16x8 )
- + const uint8_t (*b_list)[2] = x264_mb_type_list_table[i_mb_type];
- + const int i_ref0_max = h->mb.pic.i_fref[0] - 1;
- + const int i_ref1_max = h->mb.pic.i_fref[1] - 1;
- +
- + bs_write_ue( s, mb_type_b_to_golomb[ h->mb.i_partition - D_16x8 ][ i_mb_type - B_L0_L0 ] );
- + if( h->mb.i_partition == D_16x16 )
- {
- + if( i_ref0_max && b_list[0][0] ) bs_write_te( s, i_ref0_max, h->mb.cache.ref[0][x264_scan8[0]] );
- + if( i_ref1_max && b_list[1][0] ) bs_write_te( s, i_ref1_max, h->mb.cache.ref[1][x264_scan8[0]] );
- if( b_list[0][0] ) x264_cavlc_mb_mvd( h, 0, 0, 4 );
- - if( b_list[0][1] ) x264_cavlc_mb_mvd( h, 0, 8, 4 );
- if( b_list[1][0] ) x264_cavlc_mb_mvd( h, 1, 0, 4 );
- - if( b_list[1][1] ) x264_cavlc_mb_mvd( h, 1, 8, 4 );
- }
- - else //if( h->mb.i_partition == D_8x16 )
- + else
- {
- - if( b_list[0][0] ) x264_cavlc_mb_mvd( h, 0, 0, 2 );
- - if( b_list[0][1] ) x264_cavlc_mb_mvd( h, 0, 4, 2 );
- - if( b_list[1][0] ) x264_cavlc_mb_mvd( h, 1, 0, 2 );
- - if( b_list[1][1] ) x264_cavlc_mb_mvd( h, 1, 4, 2 );
- + if( i_ref0_max && b_list[0][0] ) bs_write_te( s, i_ref0_max, h->mb.cache.ref[0][x264_scan8[ 0]] );
- + if( i_ref0_max && b_list[0][1] ) bs_write_te( s, i_ref0_max, h->mb.cache.ref[0][x264_scan8[12]] );
- + if( i_ref1_max && b_list[1][0] ) bs_write_te( s, i_ref1_max, h->mb.cache.ref[1][x264_scan8[ 0]] );
- + if( i_ref1_max && b_list[1][1] ) bs_write_te( s, i_ref1_max, h->mb.cache.ref[1][x264_scan8[12]] );
- + if( h->mb.i_partition == D_16x8 )
- + {
- + if( b_list[0][0] ) x264_cavlc_mb_mvd( h, 0, 0, 4 );
- + if( b_list[0][1] ) x264_cavlc_mb_mvd( h, 0, 8, 4 );
- + if( b_list[1][0] ) x264_cavlc_mb_mvd( h, 1, 0, 4 );
- + if( b_list[1][1] ) x264_cavlc_mb_mvd( h, 1, 8, 4 );
- + }
- + else //if( h->mb.i_partition == D_8x16 )
- + {
- + if( b_list[0][0] ) x264_cavlc_mb_mvd( h, 0, 0, 2 );
- + if( b_list[0][1] ) x264_cavlc_mb_mvd( h, 0, 4, 2 );
- + if( b_list[1][0] ) x264_cavlc_mb_mvd( h, 1, 0, 2 );
- + if( b_list[1][1] ) x264_cavlc_mb_mvd( h, 1, 4, 2 );
- + }
- }
- + break;
- }
- }
- - else //if( i_mb_type == B_DIRECT )
- - bs_write1( s, 1 );
- #if !RDO_SKIP_BS
- i_mb_pos_tex = bs_pos( s );
- @@ -505,16 +512,16 @@ void x264_macroblock_write_cavlc( x264_t *h )
- /* Coded block patern */
- if( i_mb_type == I_4x4 || i_mb_type == I_8x8 )
- - bs_write_ue( s, intra4x4_cbp_to_golomb[( h->mb.i_cbp_chroma << 4 )|h->mb.i_cbp_luma] );
- + bs_write_ue( s, intra4x4_cbp_to_golomb[h->mb.i_cbp_combined&0x3f] );
- else if( i_mb_type != I_16x16 )
- - bs_write_ue( s, inter_cbp_to_golomb[( h->mb.i_cbp_chroma << 4 )|h->mb.i_cbp_luma] );
- + bs_write_ue( s, inter_cbp_to_golomb[h->mb.i_cbp_combined&0x3f] );
- /* transform size 8x8 flag */
- if( x264_mb_transform_8x8_allowed( h ) && h->mb.i_cbp_luma )
- bs_write1( s, h->mb.b_transform_8x8 );
- /* write residual */
- - if( h->mb.i_cbp_luma || h->mb.i_cbp_chroma || i_mb_type == I_16x16 )
- + if( h->mb.i_cbp_combined&0x3f || i_mb_type == I_16x16 )
- {
- x264_cavlc_mb_qp_delta( h );
- @@ -561,24 +568,24 @@ static int x264_partition_size_cavlc( x264_t *h, int i8, int i_pixel )
- int b_8x16 = h->mb.i_partition == D_8x16;
- int j;
- - if( i_mb_type == P_8x8 )
- - {
- - x264_cavlc_mb8x8_mvd( h, i8 );
- - bs_write_ue( s, sub_mb_type_p_to_golomb[ h->mb.i_sub_partition[i8] ] );
- - }
- - else if( i_mb_type == P_L0 )
- - x264_cavlc_mb_mvd( h, 0, 4*i8, 4>>b_8x16 );
- - else if( i_mb_type > B_DIRECT && i_mb_type < B_8x8 )
- + switch( i_mb_type )
- {
- - if( x264_mb_type_list_table[ i_mb_type ][0][!!i8] ) x264_cavlc_mb_mvd( h, 0, 4*i8, 4>>b_8x16 );
- - if( x264_mb_type_list_table[ i_mb_type ][1][!!i8] ) x264_cavlc_mb_mvd( h, 1, 4*i8, 4>>b_8x16 );
- - }
- - else //if( i_mb_type == B_8x8 )
- - {
- - if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i8] ] )
- - x264_cavlc_mb_mvd( h, 0, 4*i8, 2 );
- - if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i8] ] )
- - x264_cavlc_mb_mvd( h, 1, 4*i8, 2 );
- + case P_L0:
- + x264_cavlc_mb_mvd( h, 0, 4*i8, 4>>b_8x16 );
- + break;
- + case P_8x8:
- + x264_cavlc_mb8x8_mvd( h, i8 );
- + bs_write_ue( s, sub_mb_type_p_to_golomb[ h->mb.i_sub_partition[i8] ] );
- + break;
- + case B_8x8:
- + if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i8] ] )
- + x264_cavlc_mb_mvd( h, 0, 4*i8, 2 );
- + if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i8] ] )
- + x264_cavlc_mb_mvd( h, 1, 4*i8, 2 );
- + break;
- + default: /* Rest of the B types */
- + if( x264_mb_type_list_table[ i_mb_type ][0][!!i8] ) x264_cavlc_mb_mvd( h, 0, 4*i8, 4>>b_8x16 );
- + if( x264_mb_type_list_table[ i_mb_type ][1][!!i8] ) x264_cavlc_mb_mvd( h, 1, 4*i8, 4>>b_8x16 );
- }
- for( j = (i_pixel < PIXEL_8x8); j >= 0; j-- )
- @@ -618,6 +625,8 @@ static int cavlc_intra4x4_pred_size( x264_t *h, int i4, int i_mode )
- static int x264_partition_i8x8_size_cavlc( x264_t *h, int i8, int i_mode )
- {
- h->out.bs.i_bits_encoded = cavlc_intra4x4_pred_size( h, 4*i8, i_mode );
- + /* We can't use h->mb.i_cbp_combined here because it's only calculated at the end of
- + * x264_macroblock_encode(), which hasn't been called at this point. */
- bs_write_ue( &h->out.bs, intra4x4_cbp_to_golomb[( h->mb.i_cbp_chroma << 4 )|h->mb.i_cbp_luma] );
- x264_macroblock_luma_write_cavlc( h, i8, i8 );
- return h->out.bs.i_bits_encoded;
- diff --git a/encoder/macroblock.c b/encoder/macroblock.c
- index fa7942d..f5f6267 100644
- --- a/encoder/macroblock.c
- +++ b/encoder/macroblock.c
- @@ -488,7 +488,7 @@ static void x264_macroblock_encode_skip( x264_t *h )
- h->mb.i_cbp_chroma = 0x00;
- memset( h->mb.cache.non_zero_count, 0, X264_SCAN8_SIZE );
- /* store cbp */
- - h->mb.cbp[h->mb.i_mb_xy] = 0;
- + h->mb.i_cbp_combined = 0;
- }
- /*****************************************************************************
- @@ -604,7 +604,6 @@ void x264_predict_lossless_16x16( x264_t *h, int i_mode )
- *****************************************************************************/
- void x264_macroblock_encode( x264_t *h )
- {
- - int i_cbp_dc = 0;
- int i_qp = h->mb.i_qp;
- int b_decimate = h->mb.b_dct_decimate;
- int b_force_no_skip = 0;
- @@ -880,34 +879,28 @@ void x264_macroblock_encode( x264_t *h )
- /* encode the 8x8 blocks */
- x264_mb_encode_8x8_chroma( h, !IS_INTRA( h->mb.i_type ), h->mb.i_chroma_qp );
- - if( h->param.b_cabac )
- - {
- - i_cbp_dc = h->mb.cache.non_zero_count[x264_scan8[24]]
- + int i_cbp_dc = h->mb.cache.non_zero_count[x264_scan8[24]]
- | h->mb.cache.non_zero_count[x264_scan8[25]] << 1
- | h->mb.cache.non_zero_count[x264_scan8[26]] << 2;
- - }
- /* store cbp */
- - h->mb.cbp[h->mb.i_mb_xy] = (i_cbp_dc << 8) | (h->mb.i_cbp_chroma << 4) | h->mb.i_cbp_luma;
- + h->mb.i_cbp_combined = (i_cbp_dc << 8) | (h->mb.i_cbp_chroma << 4) | h->mb.i_cbp_luma;
- /* Check for P_SKIP
- * XXX: in the me perhaps we should take x264_mb_predict_mv_pskip into account
- * (if multiple mv give same result)*/
- if( !b_force_no_skip )
- {
- - if( h->mb.i_type == P_L0 && h->mb.i_partition == D_16x16 &&
- - !(h->mb.i_cbp_luma | h->mb.i_cbp_chroma) &&
- - M32( h->mb.cache.mv[0][x264_scan8[0]] ) == M32( h->mb.cache.pskip_mv )
- + if( h->mb.i_type == P_L0 && h->mb.i_partition == D_16x16 && !h->mb.i_cbp_combined
- + && M32( h->mb.cache.mv[0][x264_scan8[0]] ) == M32( h->mb.cache.pskip_mv )
- && h->mb.cache.ref[0][x264_scan8[0]] == 0 )
- {
- h->mb.i_type = P_SKIP;
- }
- /* Check for B_SKIP */
- - if( h->mb.i_type == B_DIRECT && !(h->mb.i_cbp_luma | h->mb.i_cbp_chroma) )
- - {
- + if( h->mb.i_type == B_DIRECT && !h->mb.i_cbp_combined )
- h->mb.i_type = B_SKIP;
- - }
- }
- }
- --
- 1.6.1.2
- From 9fb95fea1304984d7d90b1670dcb7c5a4e261697 Mon Sep 17 00:00:00 2001
- From: Yusuke Nakamura <muken.the.vfrmaniac@gmail.com>
- Date: Mon, 8 Feb 2010 01:48:38 -0800
- Subject: [PATCH 10/24] Write PASP atom in mp4 muxing
- Adds container-level aspect ratio support for mp4.
- ---
- output/mp4.c | 3 ++-
- 1 files changed, 2 insertions(+), 1 deletions(-)
- diff --git a/output/mp4.c b/output/mp4.c
- index e3ad9c6..b817c82 100644
- --- a/output/mp4.c
- +++ b/output/mp4.c
- @@ -121,7 +121,7 @@ static int close_file( hnd_t handle, int64_t largest_pts, int64_t second_largest
- if( mdhd_duration != total_duration )
- {
- uint64_t last_dts = gf_isom_get_sample_dts( p_mp4->p_file, p_mp4->i_track, p_mp4->i_numframe );
- - uint32_t last_duration = (uint32_t)( mdhd_duration > last_dts ? mdhd_duration - last_dts : (largest_pts - second_largest_pts) * p_mp4->i_time_inc );
- + uint32_t last_duration = (uint32_t)( mdhd_duration > last_dts ? mdhd_duration - last_dts : (largest_pts - second_largest_pts) * p_mp4->i_time_inc );
- gf_isom_set_last_sample_duration( p_mp4->p_file, p_mp4->i_track, last_duration );
- total_duration = gf_isom_get_media_duration( p_mp4->p_file, p_mp4->i_track );
- }
- @@ -212,6 +212,7 @@ static int set_param( hnd_t handle, x264_param_t *p_param )
- dw *= sar ;
- else
- dh /= sar;
- + gf_isom_set_pixel_aspect_ratio( p_mp4->p_file, p_mp4->i_track, p_mp4->i_descidx, p_param->vui.i_sar_width, p_param->vui.i_sar_height );
- gf_isom_set_track_layout_info( p_mp4->p_file, p_mp4->i_track, dw, dh, 0, 0, 0 );
- }
- --
- 1.6.1.2
- From d0af4adb40429a5335a8b81c67f97bdf75e8dfa0 Mon Sep 17 00:00:00 2001
- From: Henrik Gramner <hengar-6@student.ltu.se>
- Date: Mon, 8 Feb 2010 15:53:52 -0800
- Subject: [PATCH 11/24] Faster 2x2 chroma DC dequant
- ---
- doc/standards.txt | 1 +
- encoder/macroblock.c | 24 +++++++++---------------
- 2 files changed, 10 insertions(+), 15 deletions(-)
- diff --git a/doc/standards.txt b/doc/standards.txt
- index db9a691..7474d8f 100644
- --- a/doc/standards.txt
- +++ b/doc/standards.txt
- @@ -4,6 +4,7 @@ checkasm is written in gcc, with no attempt at compatibility with anything else.
- We make the following additional assumptions which are true of real systems but not guaranteed by C99:
- * Two's complement.
- * Signed right-shifts are sign-extended.
- +* int is 32-bit or larger.
- x86-specific assumptions:
- * The stack is 16-byte aligned. We align it on entry to libx264 and on entry to any thread, but the compiler must preserve alignment after that.
- diff --git a/encoder/macroblock.c b/encoder/macroblock.c
- index f5f6267..3d859de 100644
- --- a/encoder/macroblock.c
- +++ b/encoder/macroblock.c
- @@ -42,30 +42,24 @@ static inline void zigzag_scan_2x2_dc( int16_t level[4], int16_t dct[4] )
- int d1 = dct[2] + dct[3]; \
- int d2 = dct[0] - dct[1]; \
- int d3 = dct[2] - dct[3]; \
- - int dmf = dequant_mf[i_qp%6][0]; \
- - int qbits = i_qp/6 - 5; \
- - if( qbits > 0 ) \
- - { \
- - dmf <<= qbits; \
- - qbits = 0; \
- - }
- + int dmf = dequant_mf[i_qp%6][0] << i_qp/6;
- static inline void idct_dequant_2x2_dc( int16_t dct[4], int16_t dct4x4[4][16], int dequant_mf[6][16], int i_qp )
- {
- IDCT_DEQUANT_START
- - dct4x4[0][0] = (d0 + d1) * dmf >> -qbits;
- - dct4x4[1][0] = (d0 - d1) * dmf >> -qbits;
- - dct4x4[2][0] = (d2 + d3) * dmf >> -qbits;
- - dct4x4[3][0] = (d2 - d3) * dmf >> -qbits;
- + dct4x4[0][0] = (d0 + d1) * dmf >> 5;
- + dct4x4[1][0] = (d0 - d1) * dmf >> 5;
- + dct4x4[2][0] = (d2 + d3) * dmf >> 5;
- + dct4x4[3][0] = (d2 - d3) * dmf >> 5;
- }
- static inline void idct_dequant_2x2_dconly( int16_t out[4], int16_t dct[4], int dequant_mf[6][16], int i_qp )
- {
- IDCT_DEQUANT_START
- - out[0] = (d0 + d1) * dmf >> -qbits;
- - out[1] = (d0 - d1) * dmf >> -qbits;
- - out[2] = (d2 + d3) * dmf >> -qbits;
- - out[3] = (d2 - d3) * dmf >> -qbits;
- + out[0] = (d0 + d1) * dmf >> 5;
- + out[1] = (d0 - d1) * dmf >> 5;
- + out[2] = (d2 + d3) * dmf >> 5;
- + out[3] = (d2 - d3) * dmf >> 5;
- }
- static inline void dct2x2dc( int16_t d[4], int16_t dct4x4[4][16] )
- --
- 1.6.1.2
- From c2c3d4558253b8f4969c35be9442489363ed8902 Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Tue, 9 Feb 2010 15:08:31 -0800
- Subject: [PATCH 12/24] Make psy-(rd|trellis) use more precision in userdata SEI
- ---
- common/common.c | 2 +-
- 1 files changed, 1 insertions(+), 1 deletions(-)
- diff --git a/common/common.c b/common/common.c
- index 6d1d7f0..aaccdf2 100644
- --- a/common/common.c
- +++ b/common/common.c
- @@ -886,7 +886,7 @@ char *x264_param2string( x264_param_t *p, int b_res )
- s += sprintf( s, " subme=%d", p->analyse.i_subpel_refine );
- s += sprintf( s, " psy=%d", p->analyse.b_psy );
- if( p->analyse.b_psy )
- - s += sprintf( s, " psy_rd=%.1f:%.1f", p->analyse.f_psy_rd, p->analyse.f_psy_trellis );
- + s += sprintf( s, " psy_rd=%.2f:%.2f", p->analyse.f_psy_rd, p->analyse.f_psy_trellis );
- s += sprintf( s, " mixed_ref=%d", p->analyse.b_mixed_references );
- s += sprintf( s, " me_range=%d", p->analyse.i_me_range );
- s += sprintf( s, " chroma_me=%d", p->analyse.b_chroma_me );
- --
- 1.6.1.2
- From d5cc99ce2f0ddbb9b27fe14526bb06b6745de0fd Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Wed, 10 Feb 2010 12:12:29 -0800
- Subject: [PATCH 13/24] Overhaul sliced-threads VBV
- Make predictors thread-local and allow each thread to poll the others to get their predicted sizes.
- Many, many other tweaks to improve quality with small VBV and sliced threads.
- Note this may somewhat increase the risk of a VBV underflow in such extreme situations (single-frame VBV).
- This is tolerable, as most relevant use-cases are better off with a few rare underflows (even if they have to drop a slice) than consistent low quality.
- ---
- encoder/encoder.c | 4 +-
- encoder/ratecontrol.c | 150 ++++++++++++++++++++++++++++++-------------------
- encoder/slicetype.c | 4 +-
- 3 files changed, 97 insertions(+), 61 deletions(-)
- diff --git a/encoder/encoder.c b/encoder/encoder.c
- index e266a1a..b977ec6 100644
- --- a/encoder/encoder.c
- +++ b/encoder/encoder.c
- @@ -2061,6 +2061,8 @@ static int x264_threaded_slices_write( x264_t *h )
- for( i = 0; i <= h->sps->i_mb_height; i++ )
- x264_fdec_filter_row( h, i );
- + x264_threads_merge_ratecontrol( h );
- +
- for( i = 1; i < h->param.i_threads; i++ )
- {
- x264_t *t = h->thread[i];
- @@ -2076,8 +2078,6 @@ static int x264_threaded_slices_write( x264_t *h )
- ((int*)&h->stat.frame)[j] += ((int*)&t->stat.frame)[j];
- }
- - x264_threads_merge_ratecontrol( h );
- -
- return 0;
- }
- diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
- index e314ba2..b2cbb26 100644
- --- a/encoder/ratecontrol.c
- +++ b/encoder/ratecontrol.c
- @@ -134,9 +134,11 @@ struct x264_ratecontrol_t
- * This value is the current position (0 or 1). */
- /* MBRC stuff */
- - double frame_size_estimated;
- + float frame_size_estimated; /* Access to this variable must be atomic: double is
- + * not atomic on all arches we care about */
- double frame_size_planned;
- double slice_size_planned;
- + double max_frame_error;
- predictor_t (*row_pred)[2];
- predictor_t row_preds[5][2];
- predictor_t *pred_b_from_p; /* predict B-frame size from P-frame satd */
- @@ -505,17 +507,21 @@ int x264_ratecontrol_new( x264_t *h )
- rc->lstep = pow( 2, h->param.rc.i_qp_step / 6.0 );
- rc->last_qscale = qp2qscale(26);
- - CHECKED_MALLOC( rc->pred, 5*sizeof(predictor_t) );
- + int num_preds = h->param.b_sliced_threads * h->param.i_threads + 1;
- + CHECKED_MALLOC( rc->pred, 5 * sizeof(predictor_t) * num_preds );
- CHECKED_MALLOC( rc->pred_b_from_p, sizeof(predictor_t) );
- for( i = 0; i < 5; i++ )
- {
- rc->last_qscale_for[i] = qp2qscale( ABR_INIT_QP );
- rc->lmin[i] = qp2qscale( h->param.rc.i_qp_min );
- rc->lmax[i] = qp2qscale( h->param.rc.i_qp_max );
- - rc->pred[i].coeff= 2.0;
- - rc->pred[i].count= 1.0;
- - rc->pred[i].decay= 0.5;
- - rc->pred[i].offset= 0.0;
- + for( j = 0; j < num_preds; j++ )
- + {
- + rc->pred[i+j*5].coeff= 2.0;
- + rc->pred[i+j*5].count= 1.0;
- + rc->pred[i+j*5].decay= 0.5;
- + rc->pred[i+j*5].offset= 0.0;
- + }
- for( j = 0; j < 2; j++ )
- {
- rc->row_preds[i][j].coeff= .25;
- @@ -986,20 +992,16 @@ void x264_ratecontrol_delete( x264_t *h )
- x264_free( rc );
- }
- +/* We don't actually need mutexes here: the access orders aren't deterministic
- + * to begin with, plus all operations are atomic. */
- void x264_ratecontrol_set_estimated_size( x264_t *h, int bits )
- {
- - x264_pthread_mutex_lock( &h->fenc->mutex );
- h->rc->frame_size_estimated = bits;
- - x264_pthread_mutex_unlock( &h->fenc->mutex );
- }
- -int x264_ratecontrol_get_estimated_size( x264_t const *h)
- +int x264_ratecontrol_get_estimated_size( x264_t const *h )
- {
- - int size;
- - x264_pthread_mutex_lock( &h->fenc->mutex );
- - size = h->rc->frame_size_estimated;
- - x264_pthread_mutex_unlock( &h->fenc->mutex );
- - return size;
- + return h->rc->frame_size_estimated;
- }
- static void accum_p_qp_update( x264_t *h, float qp )
- @@ -1173,6 +1175,7 @@ void x264_ratecontrol_mb( x264_t *h, int bits )
- /* tweak quality based on difference from predicted size */
- if( y < h->i_threadslice_end-1 )
- {
- + int i;
- int prev_row_qp = h->fdec->i_row_qp[y];
- int i_qp_max = X264_MIN( prev_row_qp + h->param.rc.i_qp_step, h->param.rc.i_qp_max );
- int i_qp_min = X264_MAX( prev_row_qp - h->param.rc.i_qp_step, h->param.rc.i_qp_min );
- @@ -1186,19 +1189,23 @@ void x264_ratecontrol_mb( x264_t *h, int bits )
- float buffer_left_planned = rc->buffer_fill - rc->frame_size_planned;
- float slice_size_planned = h->param.b_sliced_threads ? rc->slice_size_planned : rc->frame_size_planned;
- - float size_of_other_slices = rc->frame_size_planned - slice_size_planned;
- + float size_of_other_slices = 0;
- + if( h->param.b_sliced_threads )
- + {
- + for( i = 0; i < h->param.i_threads; i++ )
- + if( h != h->thread[i] )
- + size_of_other_slices += x264_ratecontrol_get_estimated_size( h->thread[i] );
- + }
- + else
- + rc->max_frame_error = X264_MAX( 0.05, 1.0 / (h->sps->i_mb_width) );
- +
- /* More threads means we have to be more cautious in letting ratecontrol use up extra bits. */
- float rc_tol = buffer_left_planned / h->param.i_threads * rc->rate_tolerance;
- - float max_frame_error = X264_MAX( 0.05, 1.0 / h->sps->i_mb_height );
- - int b1 = predict_row_size_sum( h, y, rc->qpm );
- -
- - /* Assume that if this slice has become larger than expected,
- - * the other slices will have gotten equally larger. */
- - b1 += X264_MAX( size_of_other_slices * b1 / slice_size_planned, size_of_other_slices );
- + int b1 = predict_row_size_sum( h, y, rc->qpm ) + size_of_other_slices;
- /* Don't modify the row QPs until a sufficent amount of the bits of the frame have been processed, in case a flat */
- /* area at the top of the frame was measured inaccurately. */
- - if( row_bits_so_far(h,y) < 0.05 * (rc->frame_size_planned-size_of_other_slices) )
- + if( row_bits_so_far( h, y ) < 0.05 * slice_size_planned )
- return;
- if( h->sh.i_type != SLICE_TYPE_I )
- @@ -1213,8 +1220,7 @@ void x264_ratecontrol_mb( x264_t *h, int bits )
- (b1 > rc->frame_size_planned && rc->qpm < rc->qp_novbv)) )
- {
- rc->qpm ++;
- - b1 = predict_row_size_sum( h, y, rc->qpm );
- - b1 += X264_MAX( size_of_other_slices * b1 / slice_size_planned, size_of_other_slices );
- + b1 = predict_row_size_sum( h, y, rc->qpm ) + size_of_other_slices;
- }
- while( rc->qpm > i_qp_min
- @@ -1223,20 +1229,18 @@ void x264_ratecontrol_mb( x264_t *h, int bits )
- || b1 < (rc->buffer_fill - rc->buffer_size + rc->buffer_rate) * 1.1) )
- {
- rc->qpm --;
- - b1 = predict_row_size_sum( h, y, rc->qpm );
- - b1 += X264_MAX( size_of_other_slices * b1 / slice_size_planned, size_of_other_slices );
- + b1 = predict_row_size_sum( h, y, rc->qpm ) + size_of_other_slices;
- }
- /* avoid VBV underflow */
- while( (rc->qpm < h->param.rc.i_qp_max)
- - && (rc->buffer_fill - b1 < rc->buffer_rate * max_frame_error) )
- + && (rc->buffer_fill - b1 < rc->buffer_rate * rc->max_frame_error) )
- {
- rc->qpm ++;
- - b1 = predict_row_size_sum( h, y, rc->qpm );
- - b1 += X264_MAX( size_of_other_slices * b1 / slice_size_planned, size_of_other_slices );
- + b1 = predict_row_size_sum( h, y, rc->qpm ) + size_of_other_slices;
- }
- - x264_ratecontrol_set_estimated_size(h, b1);
- + x264_ratecontrol_set_estimated_size( h, predict_row_size_sum( h, y, rc->qpm ) );
- }
- /* loses the fractional part of the frame-wise qp */
- @@ -1958,56 +1962,88 @@ static float rate_estimate_qscale( x264_t *h )
- }
- }
- +void x264_threads_normalize_predictors( x264_t *h )
- +{
- + int i;
- + double totalsize = 0;
- + for( i = 0; i < h->param.i_threads; i++ )
- + totalsize += h->thread[i]->rc->slice_size_planned;
- + double factor = h->rc->frame_size_planned / totalsize;
- + for( i = 0; i < h->param.i_threads; i++ )
- + h->thread[i]->rc->slice_size_planned *= factor;
- +}
- +
- void x264_threads_distribute_ratecontrol( x264_t *h )
- {
- - int i, row, totalsize = 0;
- - if( h->rc->b_vbv )
- - for( row = 0; row < h->sps->i_mb_height; row++ )
- - totalsize += h->fdec->i_row_satd[row];
- + int i, row;
- + x264_ratecontrol_t *rc = h->rc;
- +
- + /* Initialize row predictors */
- + if( h->i_frame == 0 )
- + for( i = 0; i < h->param.i_threads; i++ )
- + {
- + x264_ratecontrol_t *t = h->thread[i]->rc;
- + memcpy( t->row_preds, rc->row_preds, sizeof(rc->row_preds) );
- + }
- +
- for( i = 0; i < h->param.i_threads; i++ )
- {
- x264_t *t = h->thread[i];
- - x264_ratecontrol_t *rc = h->rc;
- - memcpy( t->rc, rc, sizeof(x264_ratecontrol_t) );
- + memcpy( t->rc, rc, offsetof(x264_ratecontrol_t, row_pred) );
- + t->rc->row_pred = &t->rc->row_preds[h->sh.i_type];
- /* Calculate the planned slice size. */
- - if( h->rc->b_vbv && rc->frame_size_planned )
- + if( rc->b_vbv && rc->frame_size_planned )
- {
- int size = 0;
- for( row = t->i_threadslice_start; row < t->i_threadslice_end; row++ )
- size += h->fdec->i_row_satd[row];
- - t->rc->slice_size_planned = size * rc->frame_size_planned / totalsize;
- + t->rc->slice_size_planned = predict_size( &rc->pred[h->sh.i_type + (i+1)*5], rc->qpm, size );
- }
- else
- t->rc->slice_size_planned = 0;
- }
- + if( rc->b_vbv && rc->frame_size_planned )
- + {
- + x264_threads_normalize_predictors( h );
- +
- + if( rc->single_frame_vbv )
- + {
- + /* Compensate for our max frame error threshold: give more bits (proportionally) to smaller slices. */
- + for( i = 0; i < h->param.i_threads; i++ )
- + {
- + x264_t *t = h->thread[i];
- + t->rc->max_frame_error = X264_MAX( 0.05, 1.0 / (t->i_threadslice_end - t->i_threadslice_start) );
- + t->rc->slice_size_planned += 2 * t->rc->max_frame_error * rc->frame_size_planned;
- + }
- + x264_threads_normalize_predictors( h );
- + }
- +
- + for( i = 0; i < h->param.i_threads; i++ )
- + x264_ratecontrol_set_estimated_size( h->thread[i], h->thread[i]->rc->slice_size_planned );
- + }
- }
- void x264_threads_merge_ratecontrol( x264_t *h )
- {
- - int i, j, k;
- + int i, row;
- x264_ratecontrol_t *rc = h->rc;
- x264_emms();
- - for( i = 1; i < h->param.i_threads; i++ )
- + for( i = 0; i < h->param.i_threads; i++ )
- {
- - x264_ratecontrol_t *t = h->thread[i]->rc;
- - rc->qpa_rc += t->qpa_rc;
- - rc->qpa_aq += t->qpa_aq;
- - for( j = 0; j < 5; j++ )
- - for( k = 0; k < 2; k++ )
- - {
- - rc->row_preds[j][k].coeff += t->row_preds[j][k].coeff;
- - rc->row_preds[j][k].offset += t->row_preds[j][k].offset;
- - rc->row_preds[j][k].count += t->row_preds[j][k].count;
- - }
- + x264_t *t = h->thread[i];
- + x264_ratecontrol_t *rct = h->thread[i]->rc;
- + int size = 0;
- + for( row = t->i_threadslice_start; row < t->i_threadslice_end; row++ )
- + size += h->fdec->i_row_satd[row];
- + int bits = t->stat.frame.i_mv_bits + t->stat.frame.i_tex_bits + t->stat.frame.i_misc_bits;
- + int mb_count = (t->i_threadslice_end - t->i_threadslice_start) * h->sps->i_mb_width;
- + update_predictor( &rc->pred[h->sh.i_type+5*i], qp2qscale(rct->qpa_rc/mb_count), size, bits );
- + if( !i )
- + continue;
- + rc->qpa_rc += rct->qpa_rc;
- + rc->qpa_aq += rct->qpa_aq;
- }
- - for( j = 0; j < 5; j++ )
- - for( k = 0; k < 2; k++ )
- - {
- - rc->row_preds[j][k].coeff /= h->param.i_threads;
- - rc->row_preds[j][k].offset /= h->param.i_threads;
- - rc->row_preds[j][k].count /= h->param.i_threads;
- - }
- }
- void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next )
- diff --git a/encoder/slicetype.c b/encoder/slicetype.c
- index 057f6a6..bb2ed64 100644
- --- a/encoder/slicetype.c
- +++ b/encoder/slicetype.c
- @@ -1394,10 +1394,10 @@ int x264_rc_analyse_slice( x264_t *h )
- int mb_xy = y * h->mb.i_mb_stride;
- for( x = h->fdec->i_pir_start_col; x <= h->fdec->i_pir_end_col; x++, mb_xy++ )
- {
- - int intra_cost = (h->fenc->i_intra_cost[mb_xy] * ip_factor) >> 8;
- + int intra_cost = (h->fenc->i_intra_cost[mb_xy] * ip_factor + 128) >> 8;
- int inter_cost = h->fenc->lowres_costs[b-p0][p1-b][mb_xy];
- int diff = intra_cost - inter_cost;
- - h->fdec->i_row_satd[y] += diff;
- + h->fdec->i_row_satd[y] += (diff * frames[b]->i_inv_qscale_factor[mb_xy] + 128) >> 8;
- cost += diff;
- }
- }
- --
- 1.6.1.2
- From f9012469506ff28ed869cc3518ff1ed5f252cf48 Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Wed, 10 Feb 2010 13:44:28 -0800
- Subject: [PATCH 14/24] Allow longer keyints with intra refresh
- If a long keyint is specified (longer than macroblock width-1), the refresh will simply not occur all the time.
- In other words, a refresh will take place, and then x264 will wait until keyint is over to start another refresh.
- ---
- encoder/encoder.c | 9 ++++-----
- 1 files changed, 4 insertions(+), 5 deletions(-)
- diff --git a/encoder/encoder.c b/encoder/encoder.c
- index b977ec6..6ad67d5 100644
- --- a/encoder/encoder.c
- +++ b/encoder/encoder.c
- @@ -599,8 +599,6 @@ static int x264_validate_parameters( x264_t *h )
- x264_log( h, X264_LOG_WARNING, "ref > 1 + intra-refresh is not supported\n" );
- h->param.i_frame_reference = 1;
- }
- - if( h->param.b_intra_refresh )
- - h->param.i_keyint_max = X264_MIN( h->param.i_keyint_max, (h->param.i_width+15)/16 - 1 );
- h->param.i_keyint_min = x264_clip3( h->param.i_keyint_min, 1, h->param.i_keyint_max/2+1 );
- h->param.rc.i_lookahead = x264_clip3( h->param.rc.i_lookahead, 0, X264_LOOKAHEAD_MAX );
- {
- @@ -2306,12 +2304,12 @@ int x264_encoder_encode( x264_t *h,
- if( h->param.b_intra_refresh && h->fenc->i_type == X264_TYPE_P )
- {
- int pocdiff = (h->fdec->i_poc - h->fref0[0]->i_poc)/2;
- - float increment = ((float)h->sps->i_mb_width-1) / h->param.i_keyint_max;
- + float increment = X264_MAX( ((float)h->sps->i_mb_width-1) / h->param.i_keyint_max, 1 );
- if( IS_X264_TYPE_I( h->fref0[0]->i_type ) )
- h->fdec->f_pir_position = 0;
- else
- {
- - if( h->fref0[0]->i_pir_end_col == h->sps->i_mb_width - 1 )
- + if( h->fdec->f_pir_position >= h->param.i_keyint_max )
- {
- h->fdec->f_pir_position = 0;
- h->fenc->b_keyframe = 1;
- @@ -2357,8 +2355,9 @@ int x264_encoder_encode( x264_t *h,
- if( h->fenc->i_type != X264_TYPE_IDR )
- {
- + int time_to_recovery = X264_MIN( h->sps->i_mb_width - 1, h->param.i_keyint_max ) + h->param.i_bframe;
- x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
- - x264_sei_recovery_point_write( h, &h->out.bs, h->param.i_keyint_max );
- + x264_sei_recovery_point_write( h, &h->out.bs, time_to_recovery );
- x264_nal_end( h );
- overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD;
- }
- --
- 1.6.1.2
- From 6ca08ce108471bd04a199e71571f18619988d3f4 Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Fri, 12 Feb 2010 03:33:54 -0800
- Subject: [PATCH 15/24] Implement direct temporal + interlaced
- This was much easier than I expected.
- It will also be basically useless until TFF/BFF support gets in, since it requires delta_poc_bottom to be set correctly to work well.
- ---
- common/common.h | 5 +++--
- common/macroblock.c | 8 ++++----
- encoder/encoder.c | 5 -----
- 3 files changed, 7 insertions(+), 11 deletions(-)
- diff --git a/common/common.h b/common/common.h
- index d4a8dd9..6da462f 100644
- --- a/common/common.h
- +++ b/common/common.h
- @@ -655,11 +655,12 @@ struct x264_t
- int i_chroma_lambda2_offset;
- /* B_direct and weighted prediction */
- - int16_t dist_scale_factor[16][2];
- + int16_t dist_scale_factor_buf[2][16][2];
- + int16_t (*dist_scale_factor)[2];
- int8_t bipred_weight_buf[2][32][4];
- int8_t (*bipred_weight)[4];
- /* maps fref1[0]'s ref indices into the current list0 */
- -#define map_col_to_list0(col) h->mb.map_col_to_list0[col+2]
- +#define map_col_to_list0(col) h->mb.map_col_to_list0[(col)+2]
- int8_t map_col_to_list0[18];
- int ref_blind_dupe; /* The index of the blind reference frame duplicate. */
- } mb;
- diff --git a/common/macroblock.c b/common/macroblock.c
- index d86f3af..e676b8b 100644
- --- a/common/macroblock.c
- +++ b/common/macroblock.c
- @@ -190,7 +190,8 @@ static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h )
- const int x8 = i8%2;
- const int y8 = i8/2;
- const int i_part_8x8 = i_mb_8x8 + x8 + y8 * h->mb.i_b8_stride;
- - const int i_ref = map_col_to_list0(h->fref1[0]->ref[0][i_part_8x8]);
- + const int i_ref1_ref = h->fref1[0]->ref[0][i_part_8x8];
- + const int i_ref = (map_col_to_list0(i_ref1_ref>>h->sh.b_mbaff) << h->sh.b_mbaff) + (i_ref1_ref&h->sh.b_mbaff);
- if( i_ref >= 0 )
- {
- @@ -1238,6 +1239,7 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
- if( h->sh.i_type == SLICE_TYPE_B )
- {
- h->mb.bipred_weight = h->mb.bipred_weight_buf[h->mb.b_interlaced&(i_mb_y&1)];
- + h->mb.dist_scale_factor = h->mb.dist_scale_factor_buf[h->mb.b_interlaced&(i_mb_y&1)];
- if( h->param.b_cabac )
- {
- uint8_t skipbp;
- @@ -1478,9 +1480,7 @@ void x264_macroblock_bipred_init( x264_t *h )
- dist_scale_factor = x264_clip3( (tb * tx + 32) >> 6, -1024, 1023 );
- }
- - // FIXME: will need this if we ever do temporal MV pred with interlaced
- - if( !h->sh.b_mbaff )
- - h->mb.dist_scale_factor[i_ref0][i_ref1] = dist_scale_factor;
- + h->mb.dist_scale_factor_buf[field][i_ref0][i_ref1] = dist_scale_factor;
- dist_scale_factor >>= 2;
- if( h->param.analyse.b_weighted_bipred
- diff --git a/encoder/encoder.c b/encoder/encoder.c
- index 6ad67d5..25c4ae4 100644
- --- a/encoder/encoder.c
- +++ b/encoder/encoder.c
- @@ -430,11 +430,6 @@ static int x264_validate_parameters( x264_t *h )
- x264_log( h, X264_LOG_WARNING, "interlace + me=esa is not implemented\n" );
- h->param.analyse.i_me_method = X264_ME_UMH;
- }
- - if( h->param.analyse.i_direct_mv_pred > X264_DIRECT_PRED_SPATIAL )
- - {
- - x264_log( h, X264_LOG_WARNING, "interlace + direct=temporal is not implemented\n" );
- - h->param.analyse.i_direct_mv_pred = X264_DIRECT_PRED_SPATIAL;
- - }
- if( h->param.analyse.i_weighted_pred > 0 )
- {
- x264_log( h, X264_LOG_WARNING, "interlace + weightp is not implemented\n" );
- --
- 1.6.1.2
- From f431bbc62a793e70865bbe83d94860739579f362 Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Fri, 12 Feb 2010 21:15:12 -0800
- Subject: [PATCH 16/24] Backport various speed tweak ideas from ffmpeg
- Add mv0 early termination to spatial direct calculation
- Up to twice as fast direct mv calculation on near-motionless video.
- Branchless CAVLC level code adjustment based on trailing ones.
- A few clocks faster.
- Check tc value before clipping in C version of deblock functions.
- Much faster, but nobody uses those anyways.
- Thanks to Michael Niedermayer for the ideas.
- ---
- common/frame.c | 6 ++++--
- common/macroblock.c | 3 +++
- encoder/cavlc.c | 7 +++----
- 3 files changed, 10 insertions(+), 6 deletions(-)
- diff --git a/common/frame.c b/common/frame.c
- index 40cc78f..d89f5ab 100644
- --- a/common/frame.c
- +++ b/common/frame.c
- @@ -472,12 +472,14 @@ static inline void deblock_luma_c( uint8_t *pix, int xstride, int ystride, int a
- int delta;
- if( abs( p2 - p0 ) < beta )
- {
- - pix[-2*xstride] = p1 + x264_clip3( (( p2 + ((p0 + q0 + 1) >> 1)) >> 1) - p1, -tc0[i], tc0[i] );
- + if( tc0[i] )
- + pix[-2*xstride] = p1 + x264_clip3( (( p2 + ((p0 + q0 + 1) >> 1)) >> 1) - p1, -tc0[i], tc0[i] );
- tc++;
- }
- if( abs( q2 - q0 ) < beta )
- {
- - pix[ 1*xstride] = q1 + x264_clip3( (( q2 + ((p0 + q0 + 1) >> 1)) >> 1) - q1, -tc0[i], tc0[i] );
- + if( tc0[i] )
- + pix[ 1*xstride] = q1 + x264_clip3( (( q2 + ((p0 + q0 + 1) >> 1)) >> 1) - q1, -tc0[i], tc0[i] );
- tc++;
- }
- diff --git a/common/macroblock.c b/common/macroblock.c
- index e676b8b..c9ce597 100644
- --- a/common/macroblock.c
- +++ b/common/macroblock.c
- @@ -272,6 +272,9 @@ static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h )
- x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, mv[0] );
- x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 1, mv[1] );
- + if( !M64( mv ) )
- + return 1;
- +
- if( h->param.i_threads > 1
- && ( mv[0][1] > h->mb.mv_max_spel[1]
- || mv[1][1] > h->mb.mv_max_spel[1] ) )
- diff --git a/encoder/cavlc.c b/encoder/cavlc.c
- index 45b55fe..12806ae 100644
- --- a/encoder/cavlc.c
- +++ b/encoder/cavlc.c
- @@ -147,10 +147,9 @@ static int block_residual_write_cavlc( x264_t *h, int i_ctxBlockCat, int16_t *l,
- if( i_trailing < i_total )
- {
- - int16_t val = runlevel.level[i_trailing];
- - int16_t val_original = runlevel.level[i_trailing]+LEVEL_TABLE_SIZE/2;
- - if( i_trailing < 3 )
- - val -= (val>>15)|1; /* as runlevel.level[i] can't be 1 for the first one if i_trailing < 3 */
- + int val = runlevel.level[i_trailing];
- + int val_original = runlevel.level[i_trailing]+LEVEL_TABLE_SIZE/2;
- + val -= ((val>>31)|1) & -(i_trailing < 3); /* as runlevel.level[i] can't be 1 for the first one if i_trailing < 3 */
- val += LEVEL_TABLE_SIZE/2;
- if( (unsigned)val_original < LEVEL_TABLE_SIZE )
- --
- 1.6.1.2
- From f1194492a77e4bcc115be7a6dfc129b0ae9b835b Mon Sep 17 00:00:00 2001
- From: Alexander Strange <astrange@ithinksw.com>
- Date: Mon, 10 Nov 2008 00:55:20 -0500
- Subject: [PATCH 17/24] Allow | as a separator between psy-rd and psy-trellis values.
- [,:/] are all taken when setting psy-trellis in a zone in an mencoder option.
- Also fix a comment typo and remove a useless line of code.
- ---
- common/common.c | 3 ++-
- encoder/encoder.c | 4 +---
- 2 files changed, 3 insertions(+), 4 deletions(-)
- diff --git a/common/common.c b/common/common.c
- index aaccdf2..0dd7af5 100644
- --- a/common/common.c
- +++ b/common/common.c
- @@ -515,7 +515,8 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
- OPT("psy-rd")
- {
- if( 2 == sscanf( value, "%f:%f", &p->analyse.f_psy_rd, &p->analyse.f_psy_trellis ) ||
- - 2 == sscanf( value, "%f,%f", &p->analyse.f_psy_rd, &p->analyse.f_psy_trellis ) )
- + 2 == sscanf( value, "%f,%f", &p->analyse.f_psy_rd, &p->analyse.f_psy_trellis ) ||
- + 2 == sscanf( value, "%f|%f", &p->analyse.f_psy_rd, &p->analyse.f_psy_trellis ))
- { }
- else if( sscanf( value, "%f", &p->analyse.f_psy_rd ) )
- {
- diff --git a/encoder/encoder.c b/encoder/encoder.c
- index 25c4ae4..fb916b5 100644
- --- a/encoder/encoder.c
- +++ b/encoder/encoder.c
- @@ -84,7 +84,7 @@ static void x264_slice_header_init( x264_t *h, x264_slice_header_t *sh,
- x264_param_t *param = &h->param;
- int i;
- - /* First we fill all field */
- + /* First we fill all fields */
- sh->sps = sps;
- sh->pps = pps;
- @@ -685,8 +685,6 @@ static int x264_validate_parameters( x264_t *h )
- /* Psy trellis has a similar effect. */
- if( h->mb.i_psy_trellis )
- h->param.analyse.i_chroma_qp_offset -= h->param.analyse.f_psy_trellis < 0.25 ? 1 : 2;
- - else
- - h->mb.i_psy_trellis = 0;
- h->param.analyse.i_chroma_qp_offset = x264_clip3(h->param.analyse.i_chroma_qp_offset, -12, 12);
- h->param.rc.i_aq_mode = x264_clip3( h->param.rc.i_aq_mode, 0, 2 );
- h->param.rc.f_aq_strength = x264_clip3f( h->param.rc.f_aq_strength, 0, 3 );
- --
- 1.6.1.2
- From 7b1d4aaf2dc8da4c53ec03adcff1c54ff94051fa Mon Sep 17 00:00:00 2001
- From: Alexander Strange <astrange@ithinksw.com>
- Date: Sat, 13 Feb 2010 01:41:41 -0500
- Subject: [PATCH 18/24] mkv: Write SimpleBlock instead of Block for frame headers
- mkvtoolnix writes these by default since 2009/04/13.
- Slightly simplifies muxer and allows 'mkvinfo -s' to show B-frames
- as 'B' (but not B-ref frames).
- ---
- output/matroska.c | 2 +-
- output/matroska_ebml.c | 80 ++++++++----------------------------------------
- output/matroska_ebml.h | 2 +-
- 3 files changed, 15 insertions(+), 69 deletions(-)
- diff --git a/output/matroska.c b/output/matroska.c
- index 8e84f52..db7639c 100644
- --- a/output/matroska.c
- +++ b/output/matroska.c
- @@ -185,7 +185,7 @@ static int write_frame( hnd_t handle, uint8_t *p_nalu, int i_size, x264_picture_
- p_mkv->b_writing_frame = 0;
- - if( mk_set_frame_flags( p_mkv->w, i_stamp, p_picture->b_keyframe ) < 0 )
- + if( mk_set_frame_flags( p_mkv->w, i_stamp, p_picture->b_keyframe, p_picture->i_type == X264_TYPE_B ) < 0 )
- return -1;
- return i_size;
- diff --git a/output/matroska_ebml.c b/output/matroska_ebml.c
- index d1c6e13..7265909 100644
- --- a/output/matroska_ebml.c
- +++ b/output/matroska_ebml.c
- @@ -53,9 +53,9 @@ struct mk_writer
- int64_t def_duration;
- int64_t timescale;
- int64_t cluster_tc_scaled;
- - int64_t frame_tc, prev_frame_tc_scaled, max_frame_tc;
- + int64_t frame_tc, max_frame_tc;
- - char wrote_header, in_frame, keyframe;
- + char wrote_header, in_frame, keyframe, skippable;
- };
- static mk_context *mk_create_context( mk_writer *w, mk_context *parent, unsigned id )
- @@ -258,23 +258,6 @@ static int mk_write_uint( mk_context *c, unsigned id, int64_t ui )
- return 0;
- }
- -static int mk_write_sint( mk_context *c, unsigned id, int64_t si )
- -{
- - unsigned char c_si[8] = { si >> 56, si >> 48, si >> 40, si >> 32, si >> 24, si >> 16, si >> 8, si };
- - unsigned i = 0;
- -
- - CHECK( mk_write_id( c, id ) );
- - if( si < 0 )
- - while( i < 7 && c_si[i] == 0xff && c_si[i+1] & 0x80 )
- - ++i;
- - else
- - while( i < 7 && c_si[i] == 0 && !(c_si[i+1] & 0x80 ) )
- - ++i;
- - CHECK( mk_write_size( c, 8 - i ) );
- - CHECK( mk_append_context_data( c, c_si+i, 8 - i ) );
- - return 0;
- -}
- -
- static int mk_write_float_raw( mk_context *c, float f )
- {
- union
- @@ -301,34 +284,6 @@ static int mk_write_float( mk_context *c, unsigned id, float f )
- return 0;
- }
- -static unsigned mk_ebml_size_size( unsigned s )
- -{
- - if( s < 0x7f )
- - return 1;
- - if( s < 0x3fff )
- - return 2;
- - if( s < 0x1fffff )
- - return 3;
- - if( s < 0x0fffffff )
- - return 4;
- - return 5;
- -}
- -
- -static unsigned mk_ebml_sint_size( int64_t si )
- -{
- - unsigned char c_si[8] = { si >> 56, si >> 48, si >> 40, si >> 32, si >> 24, si >> 16, si >> 8, si };
- - unsigned i = 0;
- -
- - if( si < 0 )
- - while( i < 7 && c_si[i] == 0xff && c_si[i+1] & 0x80 )
- - ++i;
- - else
- - while( i < 7 && c_si[i] == 0 && !(c_si[i+1] & 0x80) )
- - ++i;
- -
- - return 8 - i;
- -}
- -
- mk_writer *mk_create_writer( const char *filename )
- {
- mk_writer *w = malloc( sizeof(*w) );
- @@ -446,8 +401,8 @@ static int mk_close_cluster( mk_writer *w )
- static int mk_flush_frame( mk_writer *w )
- {
- - int64_t delta, ref = 0;
- - unsigned fsize, bgsize;
- + int64_t delta;
- + unsigned fsize;
- unsigned char c_delta_flags[3];
- if( !w->in_frame )
- @@ -470,33 +425,22 @@ static int mk_flush_frame( mk_writer *w )
- }
- fsize = w->frame ? w->frame->d_cur : 0;
- - bgsize = fsize + 4 + mk_ebml_size_size( fsize + 4 ) + 1;
- - if( !w->keyframe )
- - {
- - ref = w->prev_frame_tc_scaled - w->cluster_tc_scaled - delta;
- - bgsize += 1 + 1 + mk_ebml_sint_size( ref );
- - }
- - CHECK( mk_write_id( w->cluster, 0xa0 ) ); // BlockGroup
- - CHECK( mk_write_size( w->cluster, bgsize ) );
- - CHECK( mk_write_id( w->cluster, 0xa1 ) ); // Block
- + CHECK( mk_write_id( w->cluster, 0xa3 ) ); // SimpleBlock
- CHECK( mk_write_size( w->cluster, fsize + 4 ) );
- CHECK( mk_write_size( w->cluster, 1 ) ); // track number
- c_delta_flags[0] = delta >> 8;
- c_delta_flags[1] = delta;
- - c_delta_flags[2] = 0;
- + c_delta_flags[2] = (w->keyframe << 7) | w->skippable;
- CHECK( mk_append_context_data( w->cluster, c_delta_flags, 3 ) );
- if( w->frame )
- {
- CHECK( mk_append_context_data( w->cluster, w->frame->data, w->frame->d_cur ) );
- w->frame->d_cur = 0;
- }
- - if( !w->keyframe )
- - CHECK( mk_write_sint( w->cluster, 0xfb, ref ) ); // ReferenceBlock
- w->in_frame = 0;
- - w->prev_frame_tc_scaled = w->cluster_tc_scaled + delta;
- if( w->cluster->d_cur > CLSIZE )
- CHECK( mk_close_cluster( w ) );
- @@ -509,19 +453,21 @@ int mk_start_frame( mk_writer *w )
- if( mk_flush_frame( w ) < 0 )
- return -1;
- - w->in_frame = 1;
- - w->keyframe = 0;
- + w->in_frame = 1;
- + w->keyframe = 0;
- + w->skippable = 0;
- return 0;
- }
- -int mk_set_frame_flags( mk_writer *w, int64_t timestamp, int keyframe )
- +int mk_set_frame_flags( mk_writer *w, int64_t timestamp, int keyframe, int skippable )
- {
- if( !w->in_frame )
- return -1;
- - w->frame_tc = timestamp;
- - w->keyframe = keyframe != 0;
- + w->frame_tc = timestamp;
- + w->keyframe = keyframe != 0;
- + w->skippable = skippable != 0;
- if( w->max_frame_tc < timestamp )
- w->max_frame_tc = timestamp;
- diff --git a/output/matroska_ebml.h b/output/matroska_ebml.h
- index 252e781..56eb8cc 100644
- --- a/output/matroska_ebml.h
- +++ b/output/matroska_ebml.h
- @@ -35,7 +35,7 @@ int mk_writeHeader( mk_writer *w, const char *writing_app,
- int mk_start_frame( mk_writer *w );
- int mk_add_frame_data( mk_writer *w, const void *data, unsigned size );
- -int mk_set_frame_flags( mk_writer *w, int64_t timestamp, int keyframe );
- +int mk_set_frame_flags( mk_writer *w, int64_t timestamp, int keyframe, int skippable );
- int mk_close( mk_writer *w, int64_t last_delta );
- #endif
- --
- 1.6.1.2
- From b3076b3d1dbdbe2efb29d23c52c98a6596a02687 Mon Sep 17 00:00:00 2001
- From: Alexander Strange <astrange@ithinksw.com>
- Date: Sat, 13 Feb 2010 02:00:57 -0500
- Subject: [PATCH 19/24] mkv: Write the x264 version into the file header
- This only updates the "writing application"; matroska_ebml.c is the
- "muxing application", but the version string for that is still hardcoded.
- ---
- output/matroska.c | 2 +-
- 1 files changed, 1 insertions(+), 1 deletions(-)
- diff --git a/output/matroska.c b/output/matroska.c
- index db7639c..b1805e4 100644
- --- a/output/matroska.c
- +++ b/output/matroska.c
- @@ -146,7 +146,7 @@ static int write_headers( hnd_t handle, x264_nal_t *p_nal )
- memcpy( avcC+11+sps_size, pps, pps_size );
- - ret = mk_writeHeader( p_mkv->w, "x264", "V_MPEG4/ISO/AVC",
- + ret = mk_writeHeader( p_mkv->w, "x264" X264_VERSION, "V_MPEG4/ISO/AVC",
- avcC, avcC_len, p_mkv->frame_duration, 50000,
- p_mkv->width, p_mkv->height,
- p_mkv->d_width, p_mkv->d_height );
- --
- 1.6.1.2
- From 96f261c48ebe4108cd2e0f8a94d012a01f3f7235 Mon Sep 17 00:00:00 2001
- From: Alexander Strange <astrange@ithinksw.com>
- Date: Sat, 13 Feb 2010 02:22:04 -0500
- Subject: [PATCH 20/24] Mark cli_input/output_t variables as const when possible
- ---
- input/avs.c | 2 +-
- input/ffms.c | 2 +-
- input/input.h | 10 +++++-----
- input/lavf.c | 2 +-
- input/y4m.c | 2 +-
- input/yuv.c | 2 +-
- output/flv.c | 2 +-
- output/matroska.c | 2 +-
- output/mp4.c | 2 +-
- output/output.h | 8 ++++----
- output/raw.c | 2 +-
- 11 files changed, 18 insertions(+), 18 deletions(-)
- diff --git a/input/avs.c b/input/avs.c
- index 522f8fe..79b5c80 100644
- --- a/input/avs.c
- +++ b/input/avs.c
- @@ -313,4 +313,4 @@ static int close_file( hnd_t handle )
- return 0;
- }
- -cli_input_t avs_input = { open_file, get_frame_total, picture_alloc, read_frame, release_frame, picture_clean, close_file };
- +const cli_input_t avs_input = { open_file, get_frame_total, picture_alloc, read_frame, release_frame, picture_clean, close_file };
- diff --git a/input/ffms.c b/input/ffms.c
- index b680967..14962c7 100644
- --- a/input/ffms.c
- +++ b/input/ffms.c
- @@ -244,4 +244,4 @@ static int close_file( hnd_t handle )
- return 0;
- }
- -cli_input_t ffms_input = { open_file, get_frame_total, x264_picture_alloc, read_frame, NULL, x264_picture_clean, close_file };
- +const cli_input_t ffms_input = { open_file, get_frame_total, x264_picture_alloc, read_frame, NULL, x264_picture_clean, close_file };
- diff --git a/input/input.h b/input/input.h
- index 9fb425c..6e386f4 100644
- --- a/input/input.h
- +++ b/input/input.h
- @@ -60,11 +60,11 @@ typedef struct
- int (*close_file)( hnd_t handle );
- } cli_input_t;
- -extern cli_input_t yuv_input;
- -extern cli_input_t y4m_input;
- -extern cli_input_t avs_input;
- +extern const cli_input_t yuv_input;
- +extern const cli_input_t y4m_input;
- +extern const cli_input_t avs_input;
- extern cli_input_t thread_input;
- -extern cli_input_t lavf_input;
- -extern cli_input_t ffms_input;
- +extern const cli_input_t lavf_input;
- +extern const cli_input_t ffms_input;
- #endif
- diff --git a/input/lavf.c b/input/lavf.c
- index 180e509..6ecc6b0 100644
- --- a/input/lavf.c
- +++ b/input/lavf.c
- @@ -269,4 +269,4 @@ static int close_file( hnd_t handle )
- return 0;
- }
- -cli_input_t lavf_input = { open_file, get_frame_total, picture_alloc, read_frame, NULL, picture_clean, close_file };
- +const cli_input_t lavf_input = { open_file, get_frame_total, picture_alloc, read_frame, NULL, picture_clean, close_file };
- diff --git a/input/y4m.c b/input/y4m.c
- index 1619f74..8645ff7 100644
- --- a/input/y4m.c
- +++ b/input/y4m.c
- @@ -242,4 +242,4 @@ static int close_file( hnd_t handle )
- return 0;
- }
- -cli_input_t y4m_input = { open_file, get_frame_total, x264_picture_alloc, read_frame, NULL, x264_picture_clean, close_file };
- +const cli_input_t y4m_input = { open_file, get_frame_total, x264_picture_alloc, read_frame, NULL, x264_picture_clean, close_file };
- diff --git a/input/yuv.c b/input/yuv.c
- index dbd0317..3e39e07 100644
- --- a/input/yuv.c
- +++ b/input/yuv.c
- @@ -125,4 +125,4 @@ static int close_file( hnd_t handle )
- return 0;
- }
- -cli_input_t yuv_input = { open_file, get_frame_total, x264_picture_alloc, read_frame, NULL, x264_picture_clean, close_file };
- +const cli_input_t yuv_input = { open_file, get_frame_total, x264_picture_alloc, read_frame, NULL, x264_picture_clean, close_file };
- diff --git a/output/flv.c b/output/flv.c
- index b3e5d16..2e0a0e4 100644
- --- a/output/flv.c
- +++ b/output/flv.c
- @@ -305,4 +305,4 @@ static int close_file( hnd_t handle, int64_t largest_pts, int64_t second_largest
- return 0;
- }
- -cli_output_t flv_output = { open_file, set_param, write_headers, write_frame, close_file };
- +const cli_output_t flv_output = { open_file, set_param, write_headers, write_frame, close_file };
- diff --git a/output/matroska.c b/output/matroska.c
- index b1805e4..fb39ced 100644
- --- a/output/matroska.c
- +++ b/output/matroska.c
- @@ -206,4 +206,4 @@ static int close_file( hnd_t handle, int64_t largest_pts, int64_t second_largest
- return ret;
- }
- -cli_output_t mkv_output = { open_file, set_param, write_headers, write_frame, close_file };
- +const cli_output_t mkv_output = { open_file, set_param, write_headers, write_frame, close_file };
- diff --git a/output/mp4.c b/output/mp4.c
- index b817c82..b99eaed 100644
- --- a/output/mp4.c
- +++ b/output/mp4.c
- @@ -298,4 +298,4 @@ static int write_frame( hnd_t handle, uint8_t *p_nalu, int i_size, x264_picture_
- return i_size;
- }
- -cli_output_t mp4_output = { open_file, set_param, write_headers, write_frame, close_file };
- +const cli_output_t mp4_output = { open_file, set_param, write_headers, write_frame, close_file };
- diff --git a/output/output.h b/output/output.h
- index 851b819..c79b48e 100644
- --- a/output/output.h
- +++ b/output/output.h
- @@ -33,9 +33,9 @@ typedef struct
- int (*close_file)( hnd_t handle, int64_t largest_pts, int64_t second_largest_pts );
- } cli_output_t;
- -extern cli_output_t raw_output;
- -extern cli_output_t mkv_output;
- -extern cli_output_t mp4_output;
- -extern cli_output_t flv_output;
- +extern const cli_output_t raw_output;
- +extern const cli_output_t mkv_output;
- +extern const cli_output_t mp4_output;
- +extern const cli_output_t flv_output;
- #endif
- diff --git a/output/raw.c b/output/raw.c
- index a4d1175..02e4c56 100644
- --- a/output/raw.c
- +++ b/output/raw.c
- @@ -62,5 +62,5 @@ static int close_file( hnd_t handle, int64_t largest_pts, int64_t second_largest
- return fclose( (FILE*)handle );
- }
- -cli_output_t raw_output = { open_file, set_param, write_headers, write_frame, close_file };
- +const cli_output_t raw_output = { open_file, set_param, write_headers, write_frame, close_file };
- --
- 1.6.1.2
- From b1939fa0adbff86f35960c14619211d1ab51e174 Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Sat, 13 Feb 2010 00:52:31 -0800
- Subject: [PATCH 21/24] Make the ABR buffer consider the distance to the end of the video
- Should improve bitrate accuracy in 2-pass mode.
- May also slightly improve quality by allowing more variation earlier-on in a file.
- Also fix abr_buffer with 1-pass: it does something very different than what it does for 2-pass.
- Thus, the earlier change that increased it based on threads caused 1-pass ABR to be somewhat less accurate.
- ---
- encoder/ratecontrol.c | 6 ++++--
- 1 files changed, 4 insertions(+), 2 deletions(-)
- diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
- index b2cbb26..0b809c5 100644
- --- a/encoder/ratecontrol.c
- +++ b/encoder/ratecontrol.c
- @@ -1796,13 +1796,15 @@ static float rate_estimate_qscale( x264_t *h )
- }
- else
- {
- - double abr_buffer = 2 * rcc->rate_tolerance * rcc->bitrate * h->i_thread_frames;
- + double abr_buffer = 2 * rcc->rate_tolerance * rcc->bitrate;
- if( rcc->b_2pass )
- {
- - //FIXME adjust abr_buffer based on distance to the end of the video
- int64_t diff;
- int64_t predicted_bits = total_bits;
- + /* Adjust ABR buffer based on distance to the end of the video. */
- + if( rcc->num_entries > h->fenc->i_frame )
- + abr_buffer *= X264_MAX( log( rcc->num_entries - h->fenc->i_frame ), 1 );
- if( rcc->b_vbv )
- {
- --
- 1.6.1.2
- From 3779fc91240b422201395174e1610b2dd93334a9 Mon Sep 17 00:00:00 2001
- From: David Conrad <lessen42@gmail.com>
- Date: Sat, 13 Feb 2010 01:25:56 -0800
- Subject: [PATCH 22/24] Use #ifdef instead of #if in checkasm
- ---
- tools/checkasm.c | 4 ++--
- 1 files changed, 2 insertions(+), 2 deletions(-)
- diff --git a/tools/checkasm.c b/tools/checkasm.c
- index 0bedc5b..595bd9e 100644
- --- a/tools/checkasm.c
- +++ b/tools/checkasm.c
- @@ -1662,13 +1662,13 @@ static int check_all_flags( void )
- cpu1 &= ~X264_CPU_CACHELINE_64;
- ret |= add_flags( &cpu0, &cpu1, X264_CPU_SSE4, "SSE4" );
- }
- -#elif ARCH_PPC
- +#elif defined(ARCH_PPC)
- if( x264_cpu_detect() & X264_CPU_ALTIVEC )
- {
- fprintf( stderr, "x264: ALTIVEC against C\n" );
- ret = check_all_funcs( 0, X264_CPU_ALTIVEC );
- }
- -#elif ARCH_ARM
- +#elif defined(ARCH_ARM)
- if( x264_cpu_detect() & X264_CPU_ARMV6 )
- ret |= add_flags( &cpu0, &cpu1, X264_CPU_ARMV6, "ARMv6" );
- if( x264_cpu_detect() & X264_CPU_NEON )
- --
- 1.6.1.2
- From 2ac5d04b4d20142fc2c277fe5ef8dbe41c73fcdb Mon Sep 17 00:00:00 2001
- From: David Conrad <lessen42@gmail.com>
- Date: Fri, 8 Jan 2010 22:40:09 -0500
- Subject: [PATCH 23/24] ARM NEON versions of weightp functions
- ---
- common/arm/mc-a.S | 305 +++++++++++++++++++++++++++++++++++++++++++++++++++++
- common/arm/mc-c.c | 47 ++++++++
- 2 files changed, 352 insertions(+), 0 deletions(-)
- diff --git a/common/arm/mc-a.S b/common/arm/mc-a.S
- index a62af39..e1db404 100644
- --- a/common/arm/mc-a.S
- +++ b/common/arm/mc-a.S
- @@ -432,6 +432,311 @@ avg2_w20_loop:
- .endfunc
- +.macro weight_prologue type
- + push {r4-r5,lr}
- + ldr r4, [sp, #4*3] // weight_t
- + ldr ip, [sp, #4*3+4] // h
- +.ifc \type, full
- + ldr lr, [r4, #32] // denom
- +.endif
- + ldrd r4, [r4, #32+4] // scale, offset
- + vdup.16 q0, r4
- + vdup.16 q1, r5
- +.ifc \type, full
- + rsb lr, lr, #0
- + vdup.16 q2, lr
- +.endif
- +.endm
- +
- +// void mc_weight( uint8_t *src, int src_stride, uint8_t *dst, int dst_stride,
- +// const x264_weight_t *weight, int height )
- +function x264_mc_weight_w20_neon
- + weight_prologue full
- + sub r1, #16
- +weight20_loop:
- + subs ip, #2
- + vld1.8 {d17-d19}, [r2], r3
- + vmovl.u8 q10, d17
- + vmovl.u8 q11, d18
- + vmovl.u8 q14, d19
- + vld1.8 {d16-d18}, [r2], r3
- + vmovl.u8 q12, d16
- + vmovl.u8 q13, d17
- + vmovl.u8 q15, d18
- + vmul.s16 q10, q10, q0
- + vmul.s16 q11, q11, q0
- + vmul.s16 q12, q12, q0
- + vmul.s16 q13, q13, q0
- + vmul.s16 d28, d28, d0
- + vmul.s16 d29, d30, d0
- + vrshl.s16 q10, q10, q2
- + vrshl.s16 q11, q11, q2
- + vrshl.s16 q12, q12, q2
- + vrshl.s16 q13, q13, q2
- + vrshl.s16 q14, q14, q2
- + vadd.s16 q10, q10, q1
- + vadd.s16 q11, q11, q1
- + vadd.s16 q12, q12, q1
- + vadd.s16 q13, q13, q1
- + vadd.s16 q14, q14, q1
- + vqmovun.s16 d16, q10
- + vqmovun.s16 d17, q11
- + vqmovun.s16 d18, q12
- + vqmovun.s16 d19, q13
- + vqmovun.s16 d20, q14
- + vst1.8 {d16-d17}, [r0,:128]!
- + vst1.32 {d20[0]}, [r0,:32], r1
- + vst1.8 {d18-d19}, [r0,:128]!
- + vst1.32 {d20[1]}, [r0,:32], r1
- + bgt weight20_loop
- + pop {r4-r5,pc}
- +.endfunc
- +
- +function x264_mc_weight_w16_neon
- + weight_prologue full
- +weight16_loop:
- + subs ip, #2
- + vld1.8 {d16-d17}, [r2], r3
- + vld1.8 {d18-d19}, [r2], r3
- + vmovl.u8 q10, d16
- + vmovl.u8 q11, d17
- + vmovl.u8 q12, d18
- + vmovl.u8 q13, d19
- + vmul.s16 q10, q10, q0
- + vmul.s16 q11, q11, q0
- + vmul.s16 q12, q12, q0
- + vmul.s16 q13, q13, q0
- + vrshl.s16 q10, q10, q2
- + vrshl.s16 q11, q11, q2
- + vrshl.s16 q12, q12, q2
- + vrshl.s16 q13, q13, q2
- + vadd.s16 q10, q10, q1
- + vadd.s16 q11, q11, q1
- + vadd.s16 q12, q12, q1
- + vadd.s16 q13, q13, q1
- + vqmovun.s16 d16, q10
- + vqmovun.s16 d17, q11
- + vqmovun.s16 d18, q12
- + vqmovun.s16 d19, q13
- + vst1.8 {d16-d17}, [r0,:128], r1
- + vst1.8 {d18-d19}, [r0,:128], r1
- + bgt weight16_loop
- + pop {r4-r5,pc}
- +.endfunc
- +
- +function x264_mc_weight_w8_neon
- + weight_prologue full
- +weight8_loop:
- + subs ip, #2
- + vld1.8 {d16}, [r2], r3
- + vld1.8 {d18}, [r2], r3
- + vmovl.u8 q8, d16
- + vmovl.u8 q9, d18
- + vmul.s16 q8, q8, q0
- + vmul.s16 q9, q9, q0
- + vrshl.s16 q8, q8, q2
- + vrshl.s16 q9, q9, q2
- + vadd.s16 q8, q8, q1
- + vadd.s16 q9, q9, q1
- + vqmovun.s16 d16, q8
- + vqmovun.s16 d18, q9
- + vst1.8 {d16}, [r0,:64], r1
- + vst1.8 {d18}, [r0,:64], r1
- + bgt weight8_loop
- + pop {r4-r5,pc}
- +.endfunc
- +
- +function x264_mc_weight_w4_neon
- + weight_prologue full
- +weight4_loop:
- + subs ip, #2
- + vld1.32 {d16[]}, [r2], r3
- + vld1.32 {d18[]}, [r2], r3
- + vmovl.u8 q8, d16
- + vmovl.u8 q9, d18
- + vmul.s16 d16, d16, d0
- + vmul.s16 d17, d18, d0
- + vrshl.s16 q8, q8, q2
- + vadd.s16 q8, q8, q1
- + vqmovun.s16 d16, q8
- + vst1.32 {d16[0]}, [r0,:32], r1
- + vst1.32 {d16[1]}, [r0,:32], r1
- + bgt weight4_loop
- + pop {r4-r5,pc}
- +.endfunc
- +
- +function x264_mc_weight_w20_nodenom_neon
- + weight_prologue nodenom
- + sub r1, #16
- +weight20_nodenom_loop:
- + subs ip, #2
- + vld1.8 {d17-d19}, [r2], r3
- + vmovl.u8 q10, d17
- + vmovl.u8 q11, d18
- + vmovl.u8 q14, d19
- + vld1.8 {d16-d18}, [r2], r3
- + vmovl.u8 q12, d16
- + vmovl.u8 q13, d17
- + vmovl.u8 q15, d18
- + vmov q8, q1
- + vmov q9, q1
- + vmla.s16 q8, q10, q0
- + vmla.s16 q9, q11, q0
- + vmov q10, q1
- + vmov q11, q1
- + vmla.s16 q10, q12, q0
- + vmla.s16 q11, q13, q0
- + vmov q12, q1
- + vmla.s16 d24, d28, d0
- + vmla.s16 d25, d30, d0
- + vqmovun.s16 d16, q8
- + vqmovun.s16 d17, q9
- + vqmovun.s16 d18, q10
- + vqmovun.s16 d19, q11
- + vqmovun.s16 d20, q12
- + vst1.8 {d16-d17}, [r0,:128]!
- + vst1.32 {d20[0]}, [r0,:32], r1
- + vst1.8 {d18-d19}, [r0,:128]!
- + vst1.32 {d20[1]}, [r0,:32], r1
- + bgt weight20_nodenom_loop
- + pop {r4-r5,pc}
- +.endfunc
- +
- +function x264_mc_weight_w16_nodenom_neon
- + weight_prologue nodenom
- +weight16_nodenom_loop:
- + subs ip, #2
- + vld1.8 {d16-d17}, [r2], r3
- + vld1.8 {d18-d19}, [r2], r3
- + vmovl.u8 q12, d16
- + vmovl.u8 q13, d17
- + vmovl.u8 q14, d18
- + vmovl.u8 q15, d19
- + vmov q8, q1
- + vmov q9, q1
- + vmov q10, q1
- + vmov q11, q1
- + vmla.s16 q8, q12, q0
- + vmla.s16 q9, q13, q0
- + vmla.s16 q10, q14, q0
- + vmla.s16 q11, q15, q0
- + vqmovun.s16 d16, q8
- + vqmovun.s16 d17, q9
- + vqmovun.s16 d18, q10
- + vqmovun.s16 d19, q11
- + vst1.8 {d16-d17}, [r0,:128], r1
- + vst1.8 {d18-d19}, [r0,:128], r1
- + bgt weight16_nodenom_loop
- + pop {r4-r5,pc}
- +.endfunc
- +
- +function x264_mc_weight_w8_nodenom_neon
- + weight_prologue nodenom
- +weight8_nodenom_loop:
- + subs ip, #2
- + vld1.8 {d16}, [r2], r3
- + vld1.8 {d18}, [r2], r3
- + vmovl.u8 q8, d16
- + vmovl.u8 q9, d18
- + vmov q10, q1
- + vmov q11, q1
- + vmla.s16 q10, q8, q0
- + vmla.s16 q11, q9, q0
- + vqmovun.s16 d16, q10
- + vqmovun.s16 d17, q11
- + vst1.8 {d16}, [r0,:64], r1
- + vst1.8 {d17}, [r0,:64], r1
- + bgt weight8_nodenom_loop
- + pop {r4-r5,pc}
- +.endfunc
- +
- +function x264_mc_weight_w4_nodenom_neon
- + weight_prologue nodenom
- +weight4_nodenom_loop:
- + subs ip, #2
- + vld1.32 {d16[]}, [r2], r3
- + vld1.32 {d18[]}, [r2], r3
- + vmovl.u8 q8, d16
- + vmovl.u8 q9, d18
- + vmov q10, q1
- + vmla.s16 d20, d16, d0
- + vmla.s16 d21, d18, d0
- + vqmovun.s16 d16, q10
- + vst1.32 {d16[0]}, [r0,:32], r1
- + vst1.32 {d16[1]}, [r0,:32], r1
- + bgt weight4_nodenom_loop
- + pop {r4-r5,pc}
- +.endfunc
- +
- +.macro weight_simple_prologue
- + push {lr}
- + ldr lr, [sp, #4] // weight_t
- + ldr ip, [sp, #8] // h
- + ldr lr, [lr] // offset
- + vdup.8 q1, lr
- +.endm
- +
- +.macro weight_simple name op
- +function x264_mc_weight_w20_\name\()_neon
- + weight_simple_prologue
- +weight20_\name\()_loop:
- + subs ip, #2
- + vld1.8 {d16-d18}, [r2], r3
- + vld1.8 {d19-d21}, [r2], r3
- + \op q8, q8, q1
- + \op q9, q9, q1
- + \op q10, q10, q1
- + vst1.8 {d16-d18}, [r0,:64], r1
- + vst1.8 {d19-d21}, [r0,:64], r1
- + bgt weight20_\name\()_loop
- + pop {pc}
- +.endfunc
- +
- +function x264_mc_weight_w16_\name\()_neon
- + weight_simple_prologue
- +weight16_\name\()_loop:
- + subs ip, #2
- + vld1.8 {d16-d17}, [r2], r3
- + vld1.8 {d18-d19}, [r2], r3
- + \op q8, q8, q1
- + \op q9, q9, q1
- + vst1.8 {d16-d17}, [r0,:128], r1
- + vst1.8 {d18-d19}, [r0,:128], r1
- + bgt weight16_\name\()_loop
- + pop {pc}
- +.endfunc
- +
- +function x264_mc_weight_w8_\name\()_neon
- + weight_simple_prologue
- +weight8_\name\()_loop:
- + subs ip, #2
- + vld1.8 {d16}, [r2], r3
- + vld1.8 {d17}, [r2], r3
- + \op q8, q8, q1
- + vst1.8 {d16}, [r0,:64], r1
- + vst1.8 {d17}, [r0,:64], r1
- + bgt weight8_\name\()_loop
- + pop {pc}
- +.endfunc
- +
- +function x264_mc_weight_w4_\name\()_neon
- + weight_simple_prologue
- +weight4_\name\()_loop:
- + subs ip, #2
- + vld1.32 {d16[]}, [r2], r3
- + vld1.32 {d17[]}, [r2], r3
- + \op q8, q8, q1
- + vst1.32 {d16[0]}, [r0,:32], r1
- + vst1.32 {d17[0]}, [r0,:32], r1
- + bgt weight4_\name\()_loop
- + pop {pc}
- +.endfunc
- +.endm
- +
- +weight_simple offsetadd, vqadd.u8
- +weight_simple offsetsub, vqsub.u8
- +
- +
- // void mc_copy( uint8_t *dst, int dst_stride, uint8_t *src, int src_stride, int height )
- function x264_mc_copy_w4_neon
- ldr ip, [sp]
- diff --git a/common/arm/mc-c.c b/common/arm/mc-c.c
- index 20cf151..0a7b734 100644
- --- a/common/arm/mc-c.c
- +++ b/common/arm/mc-c.c
- @@ -43,6 +43,48 @@ void x264_pixel_avg2_w8_neon( uint8_t *, int, uint8_t *, int, uint8_t *, int );
- void x264_pixel_avg2_w16_neon( uint8_t *, int, uint8_t *, int, uint8_t *, int );
- void x264_pixel_avg2_w20_neon( uint8_t *, int, uint8_t *, int, uint8_t *, int );
- +#define MC_WEIGHT(func)\
- +void x264_mc_weight_w20##func##_neon( uint8_t *, int, uint8_t *, int, const x264_weight_t *, int );\
- +void x264_mc_weight_w16##func##_neon( uint8_t *, int, uint8_t *, int, const x264_weight_t *, int );\
- +void x264_mc_weight_w8##func##_neon( uint8_t *, int, uint8_t *, int, const x264_weight_t *, int );\
- +void x264_mc_weight_w4##func##_neon( uint8_t *, int, uint8_t *, int, const x264_weight_t *, int );\
- +\
- +static void (* const x264_mc##func##_wtab_neon[6])( uint8_t *, int, uint8_t *, int, const x264_weight_t *, int ) =\
- +{\
- + x264_mc_weight_w4##func##_neon,\
- + x264_mc_weight_w4##func##_neon,\
- + x264_mc_weight_w8##func##_neon,\
- + x264_mc_weight_w16##func##_neon,\
- + x264_mc_weight_w16##func##_neon,\
- + x264_mc_weight_w20##func##_neon,\
- +};
- +
- +MC_WEIGHT()
- +MC_WEIGHT(_nodenom)
- +MC_WEIGHT(_offsetadd)
- +MC_WEIGHT(_offsetsub)
- +
- +static void x264_weight_cache_neon( x264_t *h, x264_weight_t *w )
- +{
- + if( w->i_scale == 1<<w->i_denom )
- + {
- + if( w->i_offset < 0 )
- + {
- + w->weightfn = x264_mc_offsetsub_wtab_neon;
- + w->cachea[0] = -w->i_offset;
- + }
- + else
- + {
- + w->weightfn = x264_mc_offsetadd_wtab_neon;
- + w->cachea[0] = w->i_offset;
- + }
- + }
- + else if( !w->i_denom )
- + w->weightfn = x264_mc_nodenom_wtab_neon;
- + else
- + w->weightfn = x264_mc_wtab_neon;
- +}
- +
- void x264_mc_copy_w4_neon( uint8_t *, int, uint8_t *, int, int );
- void x264_mc_copy_w8_neon( uint8_t *, int, uint8_t *, int, int );
- void x264_mc_copy_w16_neon( uint8_t *, int, uint8_t *, int, int );
- @@ -182,6 +224,11 @@ void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf )
- pf->avg[PIXEL_4x4] = x264_pixel_avg_4x4_neon;
- pf->avg[PIXEL_4x2] = x264_pixel_avg_4x2_neon;
- + pf->weight = x264_mc_wtab_neon;
- + pf->offsetadd = x264_mc_offsetadd_wtab_neon;
- + pf->offsetsub = x264_mc_offsetsub_wtab_neon;
- + pf->weight_cache = x264_weight_cache_neon;
- +
- // Apple's gcc stupidly cannot align stack variables, and ALIGNED_ARRAY can't work on structs
- #ifndef SYS_MACOSX
- pf->memcpy_aligned = x264_memcpy_aligned_neon;
- --
- 1.6.1.2
- From af5f0a12d0d3b74d0d0e50b1f4eee409c376a989 Mon Sep 17 00:00:00 2001
- From: David Conrad <lessen42@gmail.com>
- Date: Sun, 4 Oct 2009 07:24:42 -0400
- Subject: [PATCH 24/24] iPhone compilation support
- Also add --sysroot to configure options
- To build for iPhone 3gs / iPod touch 3g:
- CC=/Developer/Platforms/iPhoneOS.platform/Developer/usr/bin/gcc ./configure --host=arm-apple-darwin --sysroot=/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS3.0.sdk
- For older devices, add
- --extra-cflags='-arch armv6 -mcpu=arm1176jzf-s' --extra-ldflags='-arch armv6' --disable-asm
- ---
- common/arm/asm.S | 9 ++-
- common/arm/pixel-a.S | 13 ++-
- configure | 17 +++-
- extras/gas-preprocessor.pl | 256 ++++++++++++++++++++++++++++++++++++++++++++
- 4 files changed, 287 insertions(+), 8 deletions(-)
- create mode 100755 extras/gas-preprocessor.pl
- diff --git a/common/arm/asm.S b/common/arm/asm.S
- index d163165..395267f 100644
- --- a/common/arm/asm.S
- +++ b/common/arm/asm.S
- @@ -20,6 +20,12 @@
- #include "config.h"
- +#ifdef PREFIX
- +# define EXTERN_ASM _
- +#else
- +# define EXTERN_ASM
- +#endif
- +
- #ifdef __ELF__
- # define ELF
- #else
- @@ -35,7 +41,8 @@ ELF .eabi_attribute 25, \val
- .endm
- .macro function name
- - .global \name
- + .global EXTERN_ASM\name
- +EXTERN_ASM\name:
- ELF .hidden \name
- ELF .type \name, %function
- .func \name
- diff --git a/common/arm/pixel-a.S b/common/arm/pixel-a.S
- index 4dd65ed..d8533e5 100644
- --- a/common/arm/pixel-a.S
- +++ b/common/arm/pixel-a.S
- @@ -110,16 +110,17 @@ SAD4_ARMV6 8
- .macro SAD_FUNC w, h, name, align:vararg
- function x264_pixel_sad\name\()_\w\()x\h\()_neon
- + SAD_START_\w \align
- +
- .if \w == 16
- - .set r, \h / 2 - 1
- +.rept \h / 2 - 1
- + SAD_\w \align
- +.endr
- .else
- - .set r, \h - 1
- -.endif
- -
- - SAD_START_\w \align
- -.rept r
- +.rept \h - 1
- SAD_\w \align
- .endr
- +.endif
- .if \w > 8
- vabal.u8 q8, d4, d6
- diff --git a/configure b/configure
- index b254383..b25cd36 100755
- --- a/configure
- +++ b/configure
- @@ -23,6 +23,7 @@ echo " --extra-cflags=ECFLAGS add ECFLAGS to CFLAGS"
- echo " --extra-ldflags=ELDFLAGS add ELDFLAGS to LDFLAGS"
- echo " --host=HOST build programs to run on HOST"
- echo " --cross-prefix=PREFIX use PREFIX for compilation tools"
- +echo " --sysroot=SYSROOT root of cross-build tree"
- echo ""
- exit 1
- fi
- @@ -223,6 +224,10 @@ for opt do
- --cross-prefix=*)
- cross_prefix="${opt#--cross-prefix=}"
- ;;
- + --sysroot=*)
- + CFLAGS="$CFLAGS --sysroot=${opt#--sysroot=}"
- + LDFLAGS="$LDFLAGS --sysroot=${opt#--sysroot=}"
- + ;;
- *)
- echo "Unknown option $opt, ignored"
- ;;
- @@ -367,7 +372,17 @@ case $host_cpu in
- ;;
- arm*)
- ARCH="ARM"
- - AS="${AS-${cross_prefix}gcc}"
- + if [ "$SYS" = MACOSX ] ; then
- + AS="${AS-./extras/gas-preprocessor.pl $CC}"
- + ASFLAGS="$ASFLAGS -DPREFIX -DPIC" # apple's ld doesn't support movw/movt relocations at all
- + # build for armv7 by default
- + if ! echo $CFLAGS | grep -Eq '\-arch' ; then
- + CFLAGS="$CFLAGS -arch armv7"
- + LDFLAGS="$LDFLAGS -arch armv7"
- + fi
- + else
- + AS="${AS-${cross_prefix}gcc}"
- + fi
- ;;
- s390|s390x)
- ARCH="S390"
- diff --git a/extras/gas-preprocessor.pl b/extras/gas-preprocessor.pl
- new file mode 100755
- index 0000000..d60893c
- --- /dev/null
- +++ b/extras/gas-preprocessor.pl
- @@ -0,0 +1,256 @@
- +#!/usr/bin/env perl
- +# by David Conrad
- +# This code is licensed under GPLv2 or later; go to gnu.org to read it
- +# (not that it much matters for an asm preprocessor)
- +# usage: set your assembler to be something like "perl gas-preprocessor.pl gcc"
- +use strict;
- +
- +# Apple's gas is ancient and doesn't support modern preprocessing features like
- +# .rept and has ugly macro syntax, among other things. Thus, this script
- +# implements the subset of the gas preprocessor used by x264 and ffmpeg
- +# that isn't supported by Apple's gas.
- +
- +# FIXME: doesn't work if the path has spaces, but oh well...
- +my $gcc_cmd = join(' ', @ARGV);
- +my $preprocess_c_cmd;
- +
- +if ($gcc_cmd =~ /\S+\.c/) {
- + # C file (inline asm?) - compile
- + $preprocess_c_cmd = "$gcc_cmd -S";
- + $gcc_cmd =~ s/\S+\.c/-x assembler -/g;
- +} elsif ($gcc_cmd =~ /\S+\.S/) {
- + # asm file, just do C preprocessor
- + $preprocess_c_cmd = "$gcc_cmd -E";
- + $gcc_cmd =~ s/\S+\.S/-x assembler -/g;
- +} else {
- + die "Unrecognized input filetype";
- +}
- +
- +$preprocess_c_cmd =~ s/\S+\.o/-/g;
- +
- +open(ASMFILE, "-|", $preprocess_c_cmd) || die "Error running preprocessor";
- +
- +my $current_macro = '';
- +my %macro_lines;
- +my %macro_args;
- +my %macro_args_default;
- +
- +my @pass1_lines;
- +
- +# pass 1: parse .macro
- +# note that the handling of arguments is probably overly permissive vs. gas
- +# but it should be the same for valid cases
- +while (<ASMFILE>) {
- + # comment out unsupported directives
- + s/\.type/@.type/x;
- + s/\.func/@.func/x;
- + s/\.endfunc/@.endfunc/x;
- + s/\.ltorg/@.ltorg/x;
- + s/\.size/@.size/x;
- + s/\.fpu/@.fpu/x;
- +
- + # the syntax for these is a little different
- + s/\.global/.globl/x;
- + # also catch .section .rodata since the equivalent to .const_data is .section __DATA,__const
- + s/(.*)\.rodata/.const_data/x;
- + s/\.int/.long/x;
- + s/\.float/.single/x;
- +
- + # catch unknown section names that aren't mach-o style (with a comma)
- + if (/.section ([^,]*)$/) {
- + die ".section $1 unsupported; figure out the mach-o section name and add it";
- + }
- +
- + # macros creating macros is not handled (is that valid?)
- + if (/\.macro\s+([\d\w\.]+)\s*(.*)/) {
- + $current_macro = $1;
- +
- + # commas in the argument list are optional, so only use whitespace as the separator
- + my $arglist = $2;
- + $arglist =~ s/,/ /g;
- +
- + my @args = split(/\s+/, $arglist);
- + foreach my $i (0 .. $#args) {
- + my @argpair = split(/=/, $args[$i]);
- + $macro_args{$current_macro}[$i] = $argpair[0];
- + $argpair[0] =~ s/:vararg$//;
- + $macro_args_default{$current_macro}{$argpair[0]} = $argpair[1];
- + }
- + # ensure %macro_lines has the macro name added as a key
- + $macro_lines{$current_macro} = [];
- + } elsif (/\.endm/) {
- + if (!$current_macro) {
- + die "ERROR: .endm without .macro";
- + }
- + $current_macro = '';
- + } elsif ($current_macro) {
- + push(@{$macro_lines{$current_macro}}, $_);
- + } else {
- + expand_macros($_);
- + }
- +}
- +
- +sub expand_macros {
- + my $line = @_[0];
- + if ($line =~ /(\S+:|)\s*([\w\d\.]+)\s*(.*)/ && exists $macro_lines{$2}) {
- + push(@pass1_lines, $1);
- + my $macro = $2;
- +
- + # commas are optional here too, but are syntactically important because
- + # parameters can be blank
- + my @arglist = split(/,/, $3);
- + my @args;
- + foreach (@arglist) {
- + my @whitespace_split = split(/\s+/, $_);
- + if (!@whitespace_split) {
- + push(@args, '');
- + } else {
- + foreach (@whitespace_split) {
- + if (length($_)) {
- + push(@args, $_);
- + }
- + }
- + }
- + }
- +
- + my %replacements;
- + if ($macro_args_default{$macro}){
- + %replacements = %{$macro_args_default{$macro}};
- + }
- +
- + # construct hashtable of text to replace
- + foreach my $i (0 .. $#args) {
- + my $argname = $macro_args{$macro}[$i];
- +
- + if ($args[$i] =~ m/=/) {
- + # arg=val references the argument name
- + # XXX: I'm not sure what the expected behaviour if a lot of
- + # these are mixed with unnamed args
- + my @named_arg = split(/=/, $args[$i]);
- + $replacements{$named_arg[0]} = $named_arg[1];
- + } elsif ($i > $#{$macro_args{$macro}}) {
- + # more args given than the macro has named args
- + # XXX: is vararg allowed on arguments before the last?
- + $argname = $macro_args{$macro}[-1];
- + if ($argname =~ s/:vararg$//) {
- + $replacements{$argname} .= ", $args[$i]";
- + } else {
- + die "Too many arguments to macro $macro";
- + }
- + } else {
- + $argname =~ s/:vararg$//;
- + $replacements{$argname} = $args[$i];
- + }
- + }
- +
- + # apply replacements as regex
- + foreach (@{$macro_lines{$macro}}) {
- + my $macro_line = $_;
- + # do replacements by longest first, this avoids wrong replacement
- + # when argument names are subsets of each other
- + foreach (reverse sort {length $a <=> length $b} keys %replacements) {
- + $macro_line =~ s/\\$_/$replacements{$_}/g;
- + }
- + $macro_line =~ s/\\\(\)//g; # remove \()
- + expand_macros($macro_line);
- + }
- + } else {
- + push(@pass1_lines, $line);
- + }
- +}
- +
- +close(ASMFILE) or exit 1;
- +open(ASMFILE, "|-", $gcc_cmd) or die "Error running assembler";
- +
- +my @sections;
- +my $num_repts;
- +my $rept_lines;
- +
- +my %literal_labels; # for ldr <reg>, =<expr>
- +my $literal_num = 0;
- +
- +# pass 2: parse .rept and .if variants
- +# NOTE: since we don't implement a proper parser, using .rept with a
- +# variable assigned from .set is not supported
- +foreach my $line (@pass1_lines) {
- + # textual comparison .if
- + # this assumes nothing else on the same line
- + if ($line =~ /\.ifnb\s+(.*)/) {
- + if ($1) {
- + $line = ".if 1\n";
- + } else {
- + $line = ".if 0\n";
- + }
- + } elsif ($line =~ /\.ifb\s+(.*)/) {
- + if ($1) {
- + $line = ".if 0\n";
- + } else {
- + $line = ".if 1\n";
- + }
- + } elsif ($line =~ /\.ifc\s+(.*)\s*,\s*(.*)/) {
- + if ($1 eq $2) {
- + $line = ".if 1\n";
- + } else {
- + $line = ".if 0\n";
- + }
- + }
- +
- + # handle .previous (only with regard to .section not .subsection)
- + if ($line =~ /\.(section|text|const_data)/) {
- + push(@sections, $line);
- + } elsif ($line =~ /\.previous/) {
- + if (!$sections[-2]) {
- + die ".previous without a previous section";
- + }
- + $line = $sections[-2];
- + push(@sections, $line);
- + }
- +
- + # handle ldr <reg>, =<expr>
- + if ($line =~ /(.*)\s*ldr([\w\s\d]+)\s*,\s*=(.*)/) {
- + my $label = $literal_labels{$3};
- + if (!$label) {
- + $label = ".Literal_$literal_num";
- + $literal_num++;
- + $literal_labels{$3} = $label;
- + }
- + $line = "$1 ldr$2, $label\n";
- + } elsif ($line =~ /\.ltorg/) {
- + foreach my $literal (keys %literal_labels) {
- + $line .= "$literal_labels{$literal}:\n .word $literal\n";
- + }
- + %literal_labels = ();
- + }
- +
- + # @l -> lo16() @ha -> ha16()
- + $line =~ s/,\s+([^,]+)\@l(\s)/, lo16($1)$2/g;
- + $line =~ s/,\s+([^,]+)\@ha(\s)/, ha16($1)$2/g;
- +
- + if ($line =~ /\.rept\s+(.*)/) {
- + $num_repts = $1;
- + $rept_lines = "\n";
- +
- + # handle the possibility of repeating another directive on the same line
- + # .endr on the same line is not valid, I don't know if a non-directive is
- + if ($num_repts =~ s/(\.\w+.*)//) {
- + $rept_lines .= "$1\n";
- + }
- + $num_repts = eval($num_repts);
- + } elsif ($line =~ /\.endr/) {
- + for (1 .. $num_repts) {
- + print ASMFILE $rept_lines;
- + }
- + $rept_lines = '';
- + } elsif ($rept_lines) {
- + $rept_lines .= $line;
- + } else {
- + print ASMFILE $line;
- + }
- +}
- +
- +print ASMFILE ".text\n";
- +foreach my $literal (keys %literal_labels) {
- + print ASMFILE "$literal_labels{$literal}:\n .word $literal\n";
- +}
- +
- +close(ASMFILE) or exit 1;
- --
- 1.6.1.2
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement