Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- From 396a8f247bcf548189de29a7bf9cbad968a7ce50 Mon Sep 17 00:00:00 2001
- From: Simon Horlick <simonhorlick@gmail.com>
- Date: Tue, 11 Jan 2011 20:05:54 +0000
- Subject: [PATCH 1/5] Save interlace decision for all mbs
- ---
- common/common.h | 1 +
- common/frame.c | 3 +++
- common/frame.h | 1 +
- common/macroblock.c | 1 +
- encoder/encoder.c | 8 ++++++++
- 5 files changed, 14 insertions(+), 0 deletions(-)
- diff --git a/common/common.h b/common/common.h
- index 4654c17..b450ac0 100644
- --- a/common/common.h
- +++ b/common/common.h
- @@ -616,6 +616,7 @@ struct x264_t
- int8_t *mb_transform_size; /* transform_size_8x8_flag of each mb */
- uint16_t *slice_table; /* sh->first_mb of the slice that the indexed mb is part of
- * NOTE: this will fail on resolutions above 2^16 MBs... */
- + uint8_t *field;
- /* buffer for weighted versions of the reference frames */
- pixel *p_weight_buf[X264_REF_MAX];
- diff --git a/common/frame.c b/common/frame.c
- index ca90539..eff8ca5 100644
- --- a/common/frame.c
- +++ b/common/frame.c
- @@ -145,6 +145,8 @@ x264_frame_t *x264_frame_new( x264_t *h, int b_fdec )
- frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) << h->frames.b_have_sub8x8_esa );
- frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH;
- }
- + if( h->param.b_interlaced )
- + CHECKED_MALLOC( frame->field, i_mb_count * sizeof(uint8_t) );
- }
- else /* fenc frame */
- {
- @@ -219,6 +221,7 @@ void x264_frame_delete( x264_frame_t *frame )
- x264_free( frame->i_inv_qscale_factor );
- x264_free( frame->i_row_bits );
- x264_free( frame->f_row_qp );
- + x264_free( frame->field );
- x264_free( frame->mb_type );
- x264_free( frame->mb_partition );
- x264_free( frame->mv[0] );
- diff --git a/common/frame.h b/common/frame.h
- index 38d0bf2..7a1bd77 100644
- --- a/common/frame.h
- +++ b/common/frame.h
- @@ -62,6 +62,7 @@ typedef struct x264_frame
- float f_qp_avg_rc; /* QPs as decided by ratecontrol */
- float f_qp_avg_aq; /* QPs as decided by AQ in addition to ratecontrol */
- int i_poc_l0ref0; /* poc of first refframe in L0, used to check if direct temporal is possible */
- + uint8_t *field;
- /* YUV buffer */
- int i_plane;
- diff --git a/common/macroblock.c b/common/macroblock.c
- index e559ab1..de5b01f 100644
- --- a/common/macroblock.c
- +++ b/common/macroblock.c
- @@ -368,6 +368,7 @@ void x264_macroblock_slice_init( x264_t *h )
- h->mb.ref[1] = h->fdec->ref[1];
- h->mb.type = h->fdec->mb_type;
- h->mb.partition = h->fdec->mb_partition;
- + h->mb.field = h->fdec->field;
- h->fdec->i_ref[0] = h->i_ref[0];
- h->fdec->i_ref[1] = h->i_ref[1];
- diff --git a/encoder/encoder.c b/encoder/encoder.c
- index 0182c8c..3b9a175 100644
- --- a/encoder/encoder.c
- +++ b/encoder/encoder.c
- @@ -1950,6 +1950,14 @@ static int x264_slice_write( x264_t *h )
- }
- }
- + if( h->param.b_interlaced )
- + {
- + if( !(i_mb_y&1) )
- + h->mb.b_interlaced = 1;
- + x264_zigzag_init( h->param.cpu, &h->zigzagf, h->mb.b_interlaced );
- + h->mb.field[mb_xy] = h->mb.b_interlaced;
- + }
- +
- if( i_mb_x == 0 && !h->mb.b_reencode_mb )
- x264_fdec_filter_row( h, i_mb_y, 1 );
- --
- 1.7.1
- From 05caa9ab13813eb34aca6e01841ef975576db4bc Mon Sep 17 00:00:00 2001
- From: Simon Horlick <simonhorlick@gmail.com>
- Date: Tue, 11 Jan 2011 20:09:00 +0000
- Subject: [PATCH 2/5] Disable adaptive MBAFF when subme 0 is used.
- ---
- encoder/encoder.c | 9 ++++++---
- x264.c | 12 ++++++++++++
- x264.h | 1 +
- 3 files changed, 19 insertions(+), 3 deletions(-)
- diff --git a/encoder/encoder.c b/encoder/encoder.c
- index 3b9a175..4fb0a57 100644
- --- a/encoder/encoder.c
- +++ b/encoder/encoder.c
- @@ -1952,9 +1952,12 @@ static int x264_slice_write( x264_t *h )
- if( h->param.b_interlaced )
- {
- - if( !(i_mb_y&1) )
- - h->mb.b_interlaced = 1;
- - x264_zigzag_init( h->param.cpu, &h->zigzagf, h->mb.b_interlaced );
- + if( h->param.b_adaptive_mbaff )
- + {
- + if( !(i_mb_y&1) )
- + h->mb.b_interlaced = 1;
- + x264_zigzag_init( h->param.cpu, &h->zigzagf, h->mb.b_interlaced );
- + }
- h->mb.field[mb_xy] = h->mb.b_interlaced;
- }
- diff --git a/x264.c b/x264.c
- index c6261d8..d9c60cd 100644
- --- a/x264.c
- +++ b/x264.c
- @@ -1494,6 +1494,18 @@ generic_option:
- param->b_tff = !!info.tff;
- }
- + /* Adaptive MBAFF and subme 0 are not supported as motion vectors between
- + * field macroblocks and frame macroblocks require halving and hpel pixels.
- + * The chosen solution is to make MBAFF non-adaptive in this case. */
- + if( param->b_interlaced && !param->analyse.i_subpel_refine )
- + {
- + x264_cli_log( "x264", X264_LOG_WARNING, "Adaptive MBAFF and subme 0 are not supported. "
- + "Using non-adaptive MBAFF instead.\n");
- + param->b_adaptive_mbaff = 0;
- + }
- + else
- + param->b_adaptive_mbaff = 1;
- +
- /* Automatically reduce reference frame count to match the user's target level
- * if the user didn't explicitly set a reference frame count. */
- if( !b_user_ref )
- diff --git a/x264.h b/x264.h
- index 8f39497..739d456 100644
- --- a/x264.h
- +++ b/x264.h
- @@ -291,6 +291,7 @@ typedef struct x264_param_t
- int i_cabac_init_idc;
- int b_interlaced;
- + int b_adaptive_mbaff; /* MBAFF+subme 0 require non-adaptive MBAFF i.e. all field mbs */
- int b_constrained_intra;
- int i_cqm_preset;
- --
- 1.7.1
- From ac56eec487176b63ca67c20da5e04d96faba9652 Mon Sep 17 00:00:00 2001
- From: Simon Horlick <simonhorlick@gmail.com>
- Date: Tue, 11 Jan 2011 20:16:18 +0000
- Subject: [PATCH 3/5] Left index table
- ---
- common/common.h | 1 +
- common/macroblock.c | 56 ++++++++++++++++++++++++++++++++------------------
- 2 files changed, 37 insertions(+), 20 deletions(-)
- diff --git a/common/common.h b/common/common.h
- index b450ac0..f244e3d 100644
- --- a/common/common.h
- +++ b/common/common.h
- @@ -594,6 +594,7 @@ struct x264_t
- int i_mb_top_xy;
- int i_mb_topleft_xy;
- int i_mb_topright_xy;
- + int *left_index_table;
- /**** thread synchronization ends here ****/
- /* subsequent variables are either thread-local or constant,
- diff --git a/common/macroblock.c b/common/macroblock.c
- index de5b01f..40f5c67 100644
- --- a/common/macroblock.c
- +++ b/common/macroblock.c
- @@ -547,6 +547,18 @@ static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x
- }
- }
- +static const int left_indices[5][22] = {
- +/* intra modes nnz mv ref real indices */
- + /* Current is progressive. */
- + { 4, 4, 5, 5, 3, 3, 7, 7, 16+1, 16+1, 16+4+1, 16+4+1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1 },
- + { 6, 6, 3, 3, 11, 11, 15, 15, 16+3, 16+3, 16+4+3, 16+4+3, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3 },
- + /* Current is interlaced.*/
- + { 4, 6, 4, 6, 3, 11, 3, 11, 16+1, 16+1, 16+4+1, 16+4+1, 0, 2, 0, 2, 0, 0, 0, 2, 0, 2 },
- + { 4, 6, 4, 6, 3, 11, 3, 11, 16+1, 16+1, 16+4+1, 16+4+1, 0, 2, 0, 2, 0, 0, 0, 2, 0, 2 },
- + /*Both same.*/
- + { 4, 5, 6, 3, 3, 7, 11, 15, 16+1, 16+3, 16+4+1, 16+4+3, 0, 1, 2, 3, 0, 1, 0, 1, 2, 3 },
- +};
- +
- static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, int mb_y )
- {
- int top = (mb_y - (1 << h->mb.b_interlaced)) * h->mb.i_mb_stride + mb_x;
- @@ -567,6 +579,7 @@ static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, i
- h->mb.i_mb_type_left = -1;
- h->mb.i_mb_type_topleft = -1;
- h->mb.i_mb_type_topright = -1;
- + h->mb.left_index_table = left_indices[4];
- if( mb_x > 0 )
- {
- @@ -658,6 +671,8 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
- uint8_t (*nnz)[24] = h->mb.non_zero_count;
- int16_t *cbp = h->mb.cbp;
- + int *left_index_table = h->mb.left_index_table;
- +
- /* load cache */
- if( h->mb.i_neighbour & MB_TOP )
- {
- @@ -700,22 +715,22 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
- h->mb.cache.i_cbp_left = cbp[left];
- /* load intra4x4 */
- - h->mb.cache.intra4x4_pred_mode[x264_scan8[0 ] - 1] = i4x4[left][4];
- - h->mb.cache.intra4x4_pred_mode[x264_scan8[2 ] - 1] = i4x4[left][5];
- - h->mb.cache.intra4x4_pred_mode[x264_scan8[8 ] - 1] = i4x4[left][6];
- - h->mb.cache.intra4x4_pred_mode[x264_scan8[10] - 1] = i4x4[left][3];
- + h->mb.cache.intra4x4_pred_mode[x264_scan8[0 ] - 1] = i4x4[left][left_index_table[0]];
- + h->mb.cache.intra4x4_pred_mode[x264_scan8[2 ] - 1] = i4x4[left][left_index_table[1]];
- + h->mb.cache.intra4x4_pred_mode[x264_scan8[8 ] - 1] = i4x4[left][left_index_table[2]];
- + h->mb.cache.intra4x4_pred_mode[x264_scan8[10] - 1] = i4x4[left][left_index_table[3]];
- /* load non_zero_count */
- - h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left][3];
- - h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left][7];
- - h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left][11];
- - h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left][15];
- + h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left][left_index_table[4+0]];
- + h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left][left_index_table[4+1]];
- + h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left][left_index_table[4+2]];
- + h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left][left_index_table[4+3]];
- - h->mb.cache.non_zero_count[x264_scan8[16+0] - 1] = nnz[left][16+1];
- - h->mb.cache.non_zero_count[x264_scan8[16+2] - 1] = nnz[left][16+3];
- + h->mb.cache.non_zero_count[x264_scan8[16+0] - 1] = nnz[left][left_index_table[4+4]];
- + h->mb.cache.non_zero_count[x264_scan8[16+2] - 1] = nnz[left][left_index_table[4+5]];
- - h->mb.cache.non_zero_count[x264_scan8[16+4+0] - 1] = nnz[left][16+4+1];
- - h->mb.cache.non_zero_count[x264_scan8[16+4+2] - 1] = nnz[left][16+4+3];
- + h->mb.cache.non_zero_count[x264_scan8[16+4+0] - 1] = nnz[left][left_index_table[4+6]];
- + h->mb.cache.non_zero_count[x264_scan8[16+4+2] - 1] = nnz[left][left_index_table[4+7]];
- }
- else
- {
- @@ -854,10 +869,10 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
- if( h->mb.i_neighbour & MB_LEFT )
- {
- - CP16( h->mb.cache.mvd[l][x264_scan8[0 ] - 1], mvd[left][4] );
- - CP16( h->mb.cache.mvd[l][x264_scan8[2 ] - 1], mvd[left][5] );
- - CP16( h->mb.cache.mvd[l][x264_scan8[8 ] - 1], mvd[left][6] );
- - CP16( h->mb.cache.mvd[l][x264_scan8[10] - 1], mvd[left][3] );
- + CP16( h->mb.cache.mvd[l][x264_scan8[0 ] - 1], mvd[left][left_index_table[0]] );
- + CP16( h->mb.cache.mvd[l][x264_scan8[2 ] - 1], mvd[left][left_index_table[1]] );
- + CP16( h->mb.cache.mvd[l][x264_scan8[8 ] - 1], mvd[left][left_index_table[2]] );
- + CP16( h->mb.cache.mvd[l][x264_scan8[10] - 1], mvd[left][left_index_table[3]] );
- }
- else
- for( int i = 0; i < 4; i++ )
- @@ -946,6 +961,7 @@ void x264_macroblock_cache_load_deblock( x264_t *h )
- int s4x4 = h->mb.i_b4_stride;
- uint8_t (*nnz)[24] = h->mb.non_zero_count;
- + int *left_index_table = h->mb.left_index_table;
- if( h->mb.i_neighbour & MB_TOP )
- CP32( &h->mb.cache.non_zero_count[x264_scan8[0] - 8], &nnz[h->mb.i_mb_top_xy][12] );
- @@ -953,10 +969,10 @@ void x264_macroblock_cache_load_deblock( x264_t *h )
- if( h->mb.i_neighbour & MB_LEFT )
- {
- int left = h->mb.i_mb_left_xy;
- - h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left][3];
- - h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left][7];
- - h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left][11];
- - h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left][15];
- + h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left][left_index_table[4+0]];
- + h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left][left_index_table[4+1]];
- + h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left][left_index_table[4+2]];
- + h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left][left_index_table[4+3]];
- }
- for( int l = 0; l <= (h->sh.i_type == SLICE_TYPE_B); l++ )
- --
- 1.7.1
- From 8e33318d3a7c64c2f4a8e77bed4464e5748caf33 Mon Sep 17 00:00:00 2001
- From: Simon Horlick <simonhorlick@gmail.com>
- Date: Tue, 11 Jan 2011 20:21:26 +0000
- Subject: [PATCH 4/5] Store references to the two left macroblocks in MBAFF
- ---
- common/common.h | 4 ++--
- common/deblock.c | 4 ++--
- common/macroblock.c | 20 ++++++++++----------
- common/mvpred.c | 2 +-
- encoder/analyse.c | 8 ++++----
- encoder/cabac.c | 8 ++++----
- 6 files changed, 23 insertions(+), 23 deletions(-)
- diff --git a/common/common.h b/common/common.h
- index f244e3d..d49c41f 100644
- --- a/common/common.h
- +++ b/common/common.h
- @@ -586,11 +586,11 @@ struct x264_t
- unsigned int i_neighbour_intra; /* for constrained intra pred */
- unsigned int i_neighbour_frame; /* ignoring slice boundaries */
- int i_mb_type_top;
- - int i_mb_type_left;
- + int i_mb_type_left[2];
- int i_mb_type_topleft;
- int i_mb_type_topright;
- int i_mb_prev_xy;
- - int i_mb_left_xy;
- + int i_mb_left_xy[2];
- int i_mb_top_xy;
- int i_mb_topleft_xy;
- int i_mb_topright_xy;
- diff --git a/common/deblock.c b/common/deblock.c
- index ff7c99f..5074b77 100644
- --- a/common/deblock.c
- +++ b/common/deblock.c
- @@ -347,10 +347,10 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
- if( h->mb.i_neighbour & MB_LEFT )
- {
- - int qpl = h->mb.qp[h->mb.i_mb_left_xy];
- + int qpl = h->mb.qp[h->mb.i_mb_left_xy[0]];
- int qp_left = (qp + qpl + 1) >> 1;
- int qpc_left = (h->chroma_qp_table[qp] + h->chroma_qp_table[qpl] + 1) >> 1;
- - int intra_left = IS_INTRA( h->mb.type[h->mb.i_mb_left_xy] );
- + int intra_left = IS_INTRA( h->mb.type[h->mb.i_mb_left_xy[0]] );
- if( intra_cur || intra_left )
- FILTER( _intra, 0, 0, qp_left, qpc_left );
- else
- diff --git a/common/macroblock.c b/common/macroblock.c
- index 40f5c67..1e40b28 100644
- --- a/common/macroblock.c
- +++ b/common/macroblock.c
- @@ -572,11 +572,11 @@ static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, i
- h->mb.i_neighbour_intra = 0;
- h->mb.i_neighbour_frame = 0;
- h->mb.i_mb_top_xy = -1;
- - h->mb.i_mb_left_xy = -1;
- + h->mb.i_mb_left_xy[0] = h->mb.i_mb_left_xy[1] = -1;
- h->mb.i_mb_topleft_xy = -1;
- h->mb.i_mb_topright_xy = -1;
- h->mb.i_mb_type_top = -1;
- - h->mb.i_mb_type_left = -1;
- + h->mb.i_mb_type_left[0] = h->mb.i_mb_type_left[1] = -1;
- h->mb.i_mb_type_topleft = -1;
- h->mb.i_mb_type_topright = -1;
- h->mb.left_index_table = left_indices[4];
- @@ -584,13 +584,13 @@ static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, i
- if( mb_x > 0 )
- {
- h->mb.i_neighbour_frame |= MB_LEFT;
- - h->mb.i_mb_left_xy = h->mb.i_mb_xy - 1;
- - h->mb.i_mb_type_left = h->mb.type[h->mb.i_mb_left_xy];
- + h->mb.i_mb_left_xy[0] = h->mb.i_mb_xy - 1;
- + h->mb.i_mb_type_left[0] = h->mb.type[h->mb.i_mb_left_xy[0]];
- if( h->mb.i_mb_xy > h->sh.i_first_mb )
- {
- h->mb.i_neighbour |= MB_LEFT;
- - if( !h->param.b_constrained_intra || IS_INTRA( h->mb.i_mb_type_left ) )
- + if( !h->param.b_constrained_intra || IS_INTRA( h->mb.i_mb_type_left[0] ) )
- h->mb.i_neighbour_intra |= MB_LEFT;
- }
- }
- @@ -656,7 +656,7 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
- {
- x264_macroblock_cache_load_neighbours( h, mb_x, mb_y );
- - int left = h->mb.i_mb_left_xy;
- + int left = h->mb.i_mb_left_xy[0];
- int top = h->mb.i_mb_top_xy;
- int top_y = mb_y - (1 << h->mb.b_interlaced);
- int s8x8 = h->mb.i_b8_stride;
- @@ -924,8 +924,8 @@ void x264_macroblock_cache_load_neighbours_deblock( x264_t *h, int mb_x, int mb_
- if( mb_x > 0 )
- {
- - h->mb.i_mb_left_xy = h->mb.i_mb_xy - 1;
- - if( deblock_on_slice_edges || h->mb.slice_table[h->mb.i_mb_left_xy] == h->mb.slice_table[h->mb.i_mb_xy] )
- + h->mb.i_mb_left_xy[0] = h->mb.i_mb_xy - 1;
- + if( deblock_on_slice_edges || h->mb.slice_table[h->mb.i_mb_left_xy[0]] == h->mb.slice_table[h->mb.i_mb_xy] )
- h->mb.i_neighbour |= MB_LEFT;
- }
- @@ -968,7 +968,7 @@ void x264_macroblock_cache_load_deblock( x264_t *h )
- if( h->mb.i_neighbour & MB_LEFT )
- {
- - int left = h->mb.i_mb_left_xy;
- + int left = h->mb.i_mb_left_xy[0];
- h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left][left_index_table[4+0]];
- h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left][left_index_table[4+1]];
- h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left][left_index_table[4+2]];
- @@ -1043,7 +1043,7 @@ void x264_macroblock_cache_load_deblock( x264_t *h )
- {
- uint8_t (*nnz)[24] = h->mb.non_zero_count;
- int top = h->mb.i_mb_top_xy;
- - int left = h->mb.i_mb_left_xy;
- + int left = h->mb.i_mb_left_xy[0];
- if( (h->mb.i_neighbour & MB_TOP) && h->mb.mb_transform_size[top] )
- {
- diff --git a/common/mvpred.c b/common/mvpred.c
- index a24dde8..c8efe1f 100644
- --- a/common/mvpred.c
- +++ b/common/mvpred.c
- @@ -426,7 +426,7 @@ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t mvc[
- }
- /* spatial predictors */
- - SET_MVP( mvr[h->mb.i_mb_left_xy] );
- + SET_MVP( mvr[h->mb.i_mb_left_xy[0]] );
- SET_MVP( mvr[h->mb.i_mb_top_xy] );
- SET_MVP( mvr[h->mb.i_mb_topleft_xy] );
- SET_MVP( mvr[h->mb.i_mb_topright_xy] );
- diff --git a/encoder/analyse.c b/encoder/analyse.c
- index bf9f3c9..d87950e 100644
- --- a/encoder/analyse.c
- +++ b/encoder/analyse.c
- @@ -516,7 +516,7 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int qp )
- {
- /* Always run in fast-intra mode for subme < 3 */
- if( h->mb.i_subpel_refine > 2 &&
- - ( IS_INTRA( h->mb.i_mb_type_left ) ||
- + ( IS_INTRA( h->mb.i_mb_type_left[0] ) ||
- IS_INTRA( h->mb.i_mb_type_top ) ||
- IS_INTRA( h->mb.i_mb_type_topleft ) ||
- IS_INTRA( h->mb.i_mb_type_topright ) ||
- @@ -1296,7 +1296,7 @@ static void x264_mb_analyse_inter_p8x8_mixed_ref( x264_t *h, x264_mb_analysis_t
- /* early termination: if 16x16 chose ref 0, then evalute no refs older
- * than those used by the neighbors */
- if( i_maxref > 0 && (a->l0.me16x16.i_ref == 0 || a->l0.me16x16.i_ref == h->mb.ref_blind_dupe) &&
- - h->mb.i_mb_type_top > 0 && h->mb.i_mb_type_left > 0 )
- + h->mb.i_mb_type_top > 0 && h->mb.i_mb_type_left[0] > 0 )
- {
- i_maxref = 0;
- CHECK_NEIGHBOUR( -8 - 1 );
- @@ -2063,7 +2063,7 @@ static void x264_mb_analyse_inter_b8x8_mixed_ref( x264_t *h, x264_mb_analysis_t
- {
- x264_mb_analysis_list_t *lX = l ? &a->l1 : &a->l0;
- if( i_maxref[l] > 0 && lX->me16x16.i_ref == 0 &&
- - h->mb.i_mb_type_top > 0 && h->mb.i_mb_type_left > 0 )
- + h->mb.i_mb_type_top > 0 && h->mb.i_mb_type_left[0] > 0 )
- {
- i_maxref[l] = 0;
- CHECK_NEIGHBOUR( -8 - 1 );
- @@ -2817,7 +2817,7 @@ intra_analysis:
- {}
- else if( h->param.analyse.i_subpel_refine >= 3 )
- analysis.b_try_skip = 1;
- - else if( h->mb.i_mb_type_left == P_SKIP ||
- + else if( h->mb.i_mb_type_left[0] == P_SKIP ||
- h->mb.i_mb_type_top == P_SKIP ||
- h->mb.i_mb_type_topleft == P_SKIP ||
- h->mb.i_mb_type_topright == P_SKIP )
- diff --git a/encoder/cabac.c b/encoder/cabac.c
- index 6333737..334318d 100644
- --- a/encoder/cabac.c
- +++ b/encoder/cabac.c
- @@ -79,7 +79,7 @@ static void x264_cabac_mb_type( x264_t *h, x264_cabac_t *cb )
- if( h->sh.i_type == SLICE_TYPE_I )
- {
- int ctx = 0;
- - if( (h->mb.i_neighbour & MB_LEFT) && h->mb.i_mb_type_left != I_4x4 )
- + if( (h->mb.i_neighbour & MB_LEFT) && h->mb.i_mb_type_left[0] != I_4x4 )
- ctx++;
- if( (h->mb.i_neighbour & MB_TOP) && h->mb.i_mb_type_top != I_4x4 )
- ctx++;
- @@ -113,7 +113,7 @@ static void x264_cabac_mb_type( x264_t *h, x264_cabac_t *cb )
- else //if( h->sh.i_type == SLICE_TYPE_B )
- {
- int ctx = 0;
- - if( (h->mb.i_neighbour & MB_LEFT) && h->mb.i_mb_type_left != B_SKIP && h->mb.i_mb_type_left != B_DIRECT )
- + if( (h->mb.i_neighbour & MB_LEFT) && h->mb.i_mb_type_left[0] != B_SKIP && h->mb.i_mb_type_left[0] != B_DIRECT )
- ctx++;
- if( (h->mb.i_neighbour & MB_TOP) && h->mb.i_mb_type_top != B_SKIP && h->mb.i_mb_type_top != B_DIRECT )
- ctx++;
- @@ -198,7 +198,7 @@ static void x264_cabac_mb_intra_chroma_pred_mode( x264_t *h, x264_cabac_t *cb )
- int ctx = 0;
- /* No need to test for I4x4 or I_16x16 as cache_save handle that */
- - if( (h->mb.i_neighbour & MB_LEFT) && h->mb.chroma_pred_mode[h->mb.i_mb_left_xy] != 0 )
- + if( (h->mb.i_neighbour & MB_LEFT) && h->mb.chroma_pred_mode[h->mb.i_mb_left_xy[0]] != 0 )
- ctx++;
- if( (h->mb.i_neighbour & MB_TOP) && h->mb.chroma_pred_mode[h->mb.i_mb_top_xy] != 0 )
- ctx++;
- @@ -280,7 +280,7 @@ static void x264_cabac_mb_qp_delta( x264_t *h, x264_cabac_t *cb )
- #if !RDO_SKIP_BS
- void x264_cabac_mb_skip( x264_t *h, int b_skip )
- {
- - int ctx = ((h->mb.i_neighbour & MB_LEFT) && !IS_SKIP( h->mb.i_mb_type_left ))
- + int ctx = ((h->mb.i_neighbour & MB_LEFT) && !IS_SKIP( h->mb.i_mb_type_left[0] ))
- + ((h->mb.i_neighbour & MB_TOP) && !IS_SKIP( h->mb.i_mb_type_top ))
- + (h->sh.i_type == SLICE_TYPE_P ? 11 : 24);
- x264_cabac_encode_decision( &h->cabac, ctx, b_skip );
- --
- 1.7.1
- From 988b2632ea8b8a98f385840313284425d8f0cd89 Mon Sep 17 00:00:00 2001
- From: Simon Horlick <simonhorlick@gmail.com>
- Date: Fri, 14 Jan 2011 21:18:14 +0000
- Subject: [PATCH 5/5] Back up intra borders correctly for MBAFF and make neighbour calculation several times longer.
- ---
- common/common.h | 9 ++-
- common/macroblock.c | 292 +++++++++++++++++++++++++++++++++++++++++++--------
- 2 files changed, 255 insertions(+), 46 deletions(-)
- diff --git a/common/common.h b/common/common.h
- index d49c41f..913c6f0 100644
- --- a/common/common.h
- +++ b/common/common.h
- @@ -548,6 +548,8 @@ struct x264_t
- int i_mb_stride;
- int i_b8_stride;
- int i_b4_stride;
- + int left_b8[2];
- + int left_b4[2];
- /* Current index */
- int i_mb_x;
- @@ -595,6 +597,10 @@ struct x264_t
- int i_mb_topleft_xy;
- int i_mb_topright_xy;
- int *left_index_table;
- + int topleft_partition;
- + int intra_border_index;
- + int topleft_border_index;
- + int topright_border_index;
- /**** thread synchronization ends here ****/
- /* subsequent variables are either thread-local or constant,
- @@ -825,7 +831,8 @@ struct x264_t
- /* Buffers that are allocated per-thread even in sliced threads. */
- void *scratch_buffer; /* for any temporary storage that doesn't want repeated malloc */
- - pixel *intra_border_backup[2][2]; /* bottom pixels of the previous mb row, used for intra prediction after the framebuffer has been deblocked */
- + pixel *intra_border_backup[3][2]; /* bottom pixels of the previous mb row, used for intra prediction after the framebuffer has been deblocked */
- + pixel *intra_diagonal_backup[5][3];
- uint8_t (*deblock_strength[2])[2][4][4];
- /* CPU functions dependents */
- diff --git a/common/macroblock.c b/common/macroblock.c
- index 1e40b28..5c2ffc4 100644
- --- a/common/macroblock.c
- +++ b/common/macroblock.c
- @@ -314,18 +314,35 @@ void x264_macroblock_cache_free( x264_t *h )
- int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
- {
- if( !b_lookahead )
- - for( int i = 0; i <= h->param.b_interlaced; i++ )
- + {
- + for( int i = 0; i <= 2*h->param.b_interlaced; i++ )
- {
- for( int j = 0; j < 2; j++ )
- {
- /* shouldn't really be initialized, just silences a valgrind false-positive in predict_8x8_filter_mmx */
- CHECKED_MALLOCZERO( h->intra_border_backup[i][j], (h->sps->i_mb_width*16+32) * sizeof(pixel) );
- h->intra_border_backup[i][j] += 16;
- - h->intra_border_backup[1][j] = h->intra_border_backup[i][j];
- + if( !h->param.b_interlaced )
- + h->intra_border_backup[1][j] = h->intra_border_backup[i][j];
- + }
- + }
- + for( int i = 0; i < 4*h->mb.b_interlaced; i++ )
- + {
- + for( int j = 0; j < 3; j++ )
- + {
- + const int width = 1 + 8; // top left pixel + eight top right pixels (for luma)
- + CHECKED_MALLOCZERO( h->intra_diagonal_backup[i][j], (h->sps->i_mb_width*width+32) * sizeof(pixel) );
- + h->intra_diagonal_backup[i][j] += 16;
- + if( !h->param.b_interlaced )
- + h->intra_diagonal_backup[1][j] = h->intra_diagonal_backup[i][j];
- }
- + }
- + for( int i = 0; i <= h->param.b_interlaced; i++ )
- + {
- CHECKED_MALLOC( h->deblock_strength[i], sizeof(**h->deblock_strength) * h->mb.i_mb_width );
- h->deblock_strength[1] = h->deblock_strength[i];
- }
- + }
- /* Allocate scratch buffer */
- int scratch_size = 0;
- @@ -350,12 +367,20 @@ fail:
- void x264_macroblock_thread_free( x264_t *h, int b_lookahead )
- {
- if( !b_lookahead )
- + {
- for( int i = 0; i <= h->param.b_interlaced; i++ )
- - {
- x264_free( h->deblock_strength[i] );
- + for( int i = 0; i <= 2*h->param.b_interlaced; i++ )
- + {
- for( int j = 0; j < 2; j++ )
- x264_free( h->intra_border_backup[i][j] - 16 );
- }
- + for( int i = 0; i < 4*h->param.b_interlaced; i++ )
- + {
- + for( int j = 0; j < 3; j++ )
- + x264_free( h->intra_diagonal_backup[i][j] - 16 );
- + }
- + }
- x264_free( h->scratch_buffer );
- }
- @@ -491,14 +516,15 @@ static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x
- {
- int w = (i ? 8 : 16);
- int i_stride = h->fdec->i_stride[i];
- - int i_stride2 = i_stride << b_interlaced;
- - int i_pix_offset = b_interlaced
- + int i_stride2 = i_stride << h->mb.b_interlaced;
- + int i_pix_offset = h->mb.b_interlaced
- ? 16 * mb_x + w * (mb_y&~1) * i_stride + (mb_y&1) * i_stride
- : 16 * mb_x + w * mb_y * i_stride;
- pixel *plane_fdec = &h->fdec->plane[i][i_pix_offset];
- - pixel *intra_fdec = &h->intra_border_backup[mb_y&1][i][mb_x*16];
- + pixel *intra_fdec = &h->intra_border_backup[h->mb.intra_border_index][i][mb_x*16];
- int ref_pix_offset[2] = { i_pix_offset, i_pix_offset };
- - if( b_interlaced )
- + /* ref_pix_offset[0] references the current field and [1] the opposite field. */
- + if( h->mb.b_interlaced )
- ref_pix_offset[1] += (1-2*(mb_y&1)) * i_stride;
- h->mb.pic.i_stride[i] = i_stride2;
- h->mb.pic.p_fenc_plane[i] = &h->fenc->plane[i][i_pix_offset];
- @@ -507,11 +533,28 @@ static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x
- h->mc.load_deinterleave_8x8x2_fenc( h->mb.pic.p_fenc[1], h->mb.pic.p_fenc_plane[1], i_stride2 );
- memcpy( h->mb.pic.p_fdec[1]-FDEC_STRIDE, intra_fdec, 8*sizeof(pixel) );
- memcpy( h->mb.pic.p_fdec[2]-FDEC_STRIDE, intra_fdec+8, 8*sizeof(pixel) );
- + if( h->sh.b_mbaff )
- + {
- + // Top left samples.
- + h->mb.pic.p_fdec[1][-FDEC_STRIDE-1] = h->intra_diagonal_backup[h->mb.topleft_border_index][1][mb_x*9];
- + h->mb.pic.p_fdec[2][-FDEC_STRIDE-1] = h->intra_diagonal_backup[h->mb.topleft_border_index][2][mb_x*9];
- + // Top right samples.
- + CP32( &h->mb.pic.p_fdec[1][-FDEC_STRIDE+8], &h->intra_diagonal_backup[h->mb.topright_border_index][1][mb_x*9+1] );
- + CP32( &h->mb.pic.p_fdec[2][-FDEC_STRIDE+8], &h->intra_diagonal_backup[h->mb.topright_border_index][2][mb_x*9+1] );
- + }
- +
- }
- else
- {
- h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE, h->mb.pic.p_fenc_plane[0], i_stride2, 16 );
- - memcpy( h->mb.pic.p_fdec[0]-FDEC_STRIDE, intra_fdec, 24*sizeof(pixel) );
- + if( h->sh.b_mbaff )
- + {
- + memcpy( h->mb.pic.p_fdec[0]-FDEC_STRIDE, intra_fdec, 16*sizeof(pixel) );
- + h->mb.pic.p_fdec[0][-FDEC_STRIDE-1] = h->intra_diagonal_backup[h->mb.topleft_border_index][0][mb_x*9];
- + CP64( &h->mb.pic.p_fdec[0][-FDEC_STRIDE+16], &h->intra_diagonal_backup[h->mb.topright_border_index][0][mb_x*9+1] );
- + }
- + else
- + memcpy( h->mb.pic.p_fdec[0]-FDEC_STRIDE, intra_fdec, 24*sizeof(pixel) );
- }
- if( b_interlaced )
- {
- @@ -568,6 +611,10 @@ static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, i
- h->mb.i_mb_xy = mb_y * h->mb.i_mb_stride + mb_x;
- h->mb.i_b8_xy = 2*(mb_y * h->mb.i_b8_stride + mb_x);
- h->mb.i_b4_xy = 4*(mb_y * h->mb.i_b4_stride + mb_x);
- + h->mb.left_b8[0] =
- + h->mb.left_b8[1] = -1;
- + h->mb.left_b4[0] =
- + h->mb.left_b4[1] = -1;
- h->mb.i_neighbour = 0;
- h->mb.i_neighbour_intra = 0;
- h->mb.i_neighbour_frame = 0;
- @@ -580,16 +627,111 @@ static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, i
- h->mb.i_mb_type_topleft = -1;
- h->mb.i_mb_type_topright = -1;
- h->mb.left_index_table = left_indices[4];
- + h->mb.topleft_partition = 0;
- + h->mb.topright_border_index =
- + h->mb.topleft_border_index = !(mb_y&1);
- + h->mb.intra_border_index = mb_y&1;
- +
- + int topleft = top - 1;
- + int topright = top + 1;
- + int left[2];
- +
- + left[0] = left[1] = h->mb.i_mb_xy - 1;
- + h->mb.left_b8[0] = h->mb.left_b8[1] = h->mb.i_b8_xy - 2;
- + h->mb.left_b4[0] = h->mb.left_b4[1] = h->mb.i_b4_xy - 4;
- +
- + if( h->sh.b_mbaff )
- + {
- + if( mb_y&1 )
- + {
- + if( mb_x && h->mb.b_interlaced != h->mb.field[h->mb.i_mb_xy-1] )
- + {
- + left[0] = left[1] = h->mb.i_mb_xy - 1 - h->mb.i_mb_stride;
- + h->mb.left_b8[0] = h->mb.left_b8[1] = h->mb.i_b8_xy - 2 - 2*h->mb.i_b8_stride;
- + h->mb.left_b4[0] = h->mb.left_b4[1] = h->mb.i_b4_xy - 4 - 4*h->mb.i_b4_stride;
- +
- + if( h->mb.b_interlaced )
- + {
- + h->mb.left_index_table = left_indices[3];
- + left[1] += h->mb.i_mb_stride;
- + h->mb.left_b8[1] += 2*h->mb.i_b8_stride;
- + h->mb.left_b4[1] += 4*h->mb.i_b4_stride;
- + }
- + else
- + {
- + h->mb.left_index_table = left_indices[1];
- + topleft += h->mb.i_mb_stride;
- + h->mb.topleft_partition = 1;
- + h->mb.topleft_border_index = 3;
- + }
- + }
- + if( h->mb.b_interlaced )
- + h->mb.topleft_border_index = 1;
- + else
- + {
- + topright = -1;
- + h->mb.intra_border_index = 0;
- + }
- + h->mb.topright_border_index = 1;
- + }
- + else
- + {
- + if( h->mb.b_interlaced )
- + {
- + if( top >= 0 )
- + {
- + top += h->mb.i_mb_stride*(!h->mb.field[top]);
- + if( mb_x )
- + topleft += h->mb.i_mb_stride*(!h->mb.field[topleft]);
- + if( mb_x < h->mb.i_mb_width )
- + topright += h->mb.i_mb_stride*(!h->mb.field[topright]);
- + }
- +
- + if( top >= 0)
- + {
- + if( !h->mb.field[top] )
- + h->mb.intra_border_index = 2;
- +
- + if( topright >=0 && h->mb.field[topright] )
- + h->mb.topright_border_index = 0;
- + else
- + h->mb.topright_border_index = 2;
- +
- + if( topleft >=0 && h->mb.field[topleft] )
- + h->mb.topleft_border_index = 0;
- + else
- + h->mb.topleft_border_index = 2;
- + }
- + }
- + else
- + h->mb.intra_border_index = 1;
- + if( mb_x && h->mb.b_interlaced != h->mb.field[h->mb.i_mb_xy-1] )
- + {
- + if( h->mb.b_interlaced )
- + {
- + h->mb.left_index_table = left_indices[2];
- + left[1] += h->mb.i_mb_stride;
- + h->mb.left_b8[1] += 2*h->mb.i_b8_stride;
- + h->mb.left_b4[1] += 4*h->mb.i_b4_stride;
- + }
- + else
- + h->mb.left_index_table = left_indices[0];
- + }
- + }
- + }
- if( mb_x > 0 )
- {
- h->mb.i_neighbour_frame |= MB_LEFT;
- - h->mb.i_mb_left_xy[0] = h->mb.i_mb_xy - 1;
- + h->mb.i_mb_left_xy[0] = left[0];
- + h->mb.i_mb_left_xy[1] = left[1];
- h->mb.i_mb_type_left[0] = h->mb.type[h->mb.i_mb_left_xy[0]];
- + h->mb.i_mb_type_left[1] = h->mb.type[h->mb.i_mb_left_xy[1]];
- if( h->mb.i_mb_xy > h->sh.i_first_mb )
- {
- h->mb.i_neighbour |= MB_LEFT;
- + // FIXME: We don't currently support constrained intra + mbaff.
- if( !h->param.b_constrained_intra || IS_INTRA( h->mb.i_mb_type_left[0] ) )
- h->mb.i_neighbour_intra |= MB_LEFT;
- }
- @@ -622,12 +764,12 @@ static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, i
- }
- }
- - if( mb_x > 0 && top - 1 >= 0 )
- + if( mb_x > 0 && topleft >= 0 )
- {
- h->mb.i_neighbour_frame |= MB_TOPLEFT;
- - h->mb.i_mb_topleft_xy = top - 1;
- + h->mb.i_mb_topleft_xy = topleft;
- h->mb.i_mb_type_topleft = h->mb.type[h->mb.i_mb_topleft_xy];
- - if( top - 1 >= h->sh.i_first_mb )
- + if( topleft >= h->sh.i_first_mb )
- {
- h->mb.i_neighbour |= MB_TOPLEFT;
- @@ -636,12 +778,12 @@ static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, i
- }
- }
- - if( mb_x < h->mb.i_mb_width - 1 && top + 1 >= 0 )
- + if( mb_x < h->mb.i_mb_width - 1 && topright >= 0 )
- {
- h->mb.i_neighbour_frame |= MB_TOPRIGHT;
- - h->mb.i_mb_topright_xy = top + 1;
- + h->mb.i_mb_topright_xy = topright;
- h->mb.i_mb_type_topright = h->mb.type[h->mb.i_mb_topright_xy];
- - if( top + 1 >= h->sh.i_first_mb )
- + if( topright >= h->sh.i_first_mb )
- {
- h->mb.i_neighbour |= MB_TOPRIGHT;
- @@ -656,9 +798,9 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
- {
- x264_macroblock_cache_load_neighbours( h, mb_x, mb_y );
- - int left = h->mb.i_mb_left_xy[0];
- + int *left = h->mb.i_mb_left_xy;
- int top = h->mb.i_mb_top_xy;
- - int top_y = mb_y - (1 << h->mb.b_interlaced);
- + int top_y = top / h->mb.i_mb_stride;
- int s8x8 = h->mb.i_b8_stride;
- int s4x4 = h->mb.i_b4_stride;
- int top_8x8 = (2*top_y+1) * s8x8 + 2*mb_x;
- @@ -712,25 +854,25 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
- if( h->mb.i_neighbour & MB_LEFT )
- {
- - h->mb.cache.i_cbp_left = cbp[left];
- + h->mb.cache.i_cbp_left = cbp[left[0]];
- /* load intra4x4 */
- - h->mb.cache.intra4x4_pred_mode[x264_scan8[0 ] - 1] = i4x4[left][left_index_table[0]];
- - h->mb.cache.intra4x4_pred_mode[x264_scan8[2 ] - 1] = i4x4[left][left_index_table[1]];
- - h->mb.cache.intra4x4_pred_mode[x264_scan8[8 ] - 1] = i4x4[left][left_index_table[2]];
- - h->mb.cache.intra4x4_pred_mode[x264_scan8[10] - 1] = i4x4[left][left_index_table[3]];
- + h->mb.cache.intra4x4_pred_mode[x264_scan8[0 ] - 1] = i4x4[left[0]][left_index_table[0]];
- + h->mb.cache.intra4x4_pred_mode[x264_scan8[2 ] - 1] = i4x4[left[0]][left_index_table[1]];
- + h->mb.cache.intra4x4_pred_mode[x264_scan8[8 ] - 1] = i4x4[left[1]][left_index_table[2]];
- + h->mb.cache.intra4x4_pred_mode[x264_scan8[10] - 1] = i4x4[left[1]][left_index_table[3]];
- /* load non_zero_count */
- - h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left][left_index_table[4+0]];
- - h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left][left_index_table[4+1]];
- - h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left][left_index_table[4+2]];
- - h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left][left_index_table[4+3]];
- + h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left[0]][left_index_table[4+0]];
- + h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left[0]][left_index_table[4+1]];
- + h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left[1]][left_index_table[4+2]];
- + h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left[1]][left_index_table[4+3]];
- - h->mb.cache.non_zero_count[x264_scan8[16+0] - 1] = nnz[left][left_index_table[4+4]];
- - h->mb.cache.non_zero_count[x264_scan8[16+2] - 1] = nnz[left][left_index_table[4+5]];
- + h->mb.cache.non_zero_count[x264_scan8[16+0] - 1] = nnz[left[0]][left_index_table[4+4]];
- + h->mb.cache.non_zero_count[x264_scan8[16+2] - 1] = nnz[left[1]][left_index_table[4+5]];
- - h->mb.cache.non_zero_count[x264_scan8[16+4+0] - 1] = nnz[left][left_index_table[4+6]];
- - h->mb.cache.non_zero_count[x264_scan8[16+4+2] - 1] = nnz[left][left_index_table[4+7]];
- + h->mb.cache.non_zero_count[x264_scan8[16+4+0] - 1] = nnz[left[0]][left_index_table[4+6]];
- + h->mb.cache.non_zero_count[x264_scan8[16+4+2] - 1] = nnz[left[1]][left_index_table[4+7]];
- }
- else
- {
- @@ -755,7 +897,7 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
- if( h->pps->b_transform_8x8_mode )
- {
- h->mb.cache.i_neighbour_transform_size =
- - ( (h->mb.i_neighbour & MB_LEFT) && h->mb.mb_transform_size[left] )
- + ( (h->mb.i_neighbour & MB_LEFT) && h->mb.mb_transform_size[left[0]] )
- + ( (h->mb.i_neighbour & MB_TOP) && h->mb.mb_transform_size[top] );
- }
- @@ -768,7 +910,7 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
- + !!(h->mb.i_neighbour & MB_TOP);
- }
- - if( !h->mb.b_interlaced )
- + if( !h->sh.b_mbaff )
- {
- x264_copy_column8( h->mb.pic.p_fdec[0]-1+ 4*FDEC_STRIDE, h->mb.pic.p_fdec[0]+15+ 4*FDEC_STRIDE );
- x264_copy_column8( h->mb.pic.p_fdec[0]-1+12*FDEC_STRIDE, h->mb.pic.p_fdec[0]+15+12*FDEC_STRIDE );
- @@ -869,10 +1011,10 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
- if( h->mb.i_neighbour & MB_LEFT )
- {
- - CP16( h->mb.cache.mvd[l][x264_scan8[0 ] - 1], mvd[left][left_index_table[0]] );
- - CP16( h->mb.cache.mvd[l][x264_scan8[2 ] - 1], mvd[left][left_index_table[1]] );
- - CP16( h->mb.cache.mvd[l][x264_scan8[8 ] - 1], mvd[left][left_index_table[2]] );
- - CP16( h->mb.cache.mvd[l][x264_scan8[10] - 1], mvd[left][left_index_table[3]] );
- + CP16( h->mb.cache.mvd[l][x264_scan8[0 ] - 1], mvd[left[0]][left_index_table[0]] );
- + CP16( h->mb.cache.mvd[l][x264_scan8[2 ] - 1], mvd[left[0]][left_index_table[1]] );
- + CP16( h->mb.cache.mvd[l][x264_scan8[8 ] - 1], mvd[left[1]][left_index_table[2]] );
- + CP16( h->mb.cache.mvd[l][x264_scan8[10] - 1], mvd[left[1]][left_index_table[3]] );
- }
- else
- for( int i = 0; i < 4; i++ )
- @@ -889,7 +1031,7 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
- {
- uint8_t skipbp;
- x264_macroblock_cache_skip( h, 0, 0, 4, 4, 0 );
- - skipbp = (h->mb.i_neighbour & MB_LEFT) ? h->mb.skipbp[left] : 0;
- + skipbp = (h->mb.i_neighbour & MB_LEFT) ? h->mb.skipbp[left[0]] : 0;
- h->mb.cache.skip[x264_scan8[0] - 1] = skipbp & 0x2;
- h->mb.cache.skip[x264_scan8[8] - 1] = skipbp & 0x8;
- skipbp = (h->mb.i_neighbour & MB_TOP) ? h->mb.skipbp[top] : 0;
- @@ -968,11 +1110,11 @@ void x264_macroblock_cache_load_deblock( x264_t *h )
- if( h->mb.i_neighbour & MB_LEFT )
- {
- - int left = h->mb.i_mb_left_xy[0];
- - h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left][left_index_table[4+0]];
- - h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left][left_index_table[4+1]];
- - h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left][left_index_table[4+2]];
- - h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left][left_index_table[4+3]];
- + int *left = h->mb.i_mb_left_xy;
- + h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left[0]][left_index_table[4+0]];
- + h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left[0]][left_index_table[4+1]];
- + h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left[1]][left_index_table[4+2]];
- + h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left[1]][left_index_table[4+3]];
- }
- for( int l = 0; l <= (h->sh.i_type == SLICE_TYPE_B); l++ )
- @@ -1103,20 +1245,80 @@ static void ALWAYS_INLINE x264_macroblock_store_pic( x264_t *h, int mb_x, int mb
- int i_pix_offset = b_interlaced
- ? 16 * mb_x + w * (mb_y&~1) * i_stride + (mb_y&1) * i_stride
- : 16 * mb_x + w * mb_y * i_stride;
- + const int intra_diag_width = 8+1; // One top left sample, then eight top right samples.
- pixel *intra_fdec = &h->intra_border_backup[mb_y&1][i][mb_x*16];
- if( i )
- {
- + if( h->sh.b_mbaff )
- + {
- + /* Frame macroblocks use the macroblock directly above for intra
- + * prediction. Field macroblock pairs predict from fields of the same
- + * parity. However field macroblock pairs predicting from frame pairs
- + * use the bottom two rows of the frame for prediction, the penultimate
- + * row is stored in intra_border_backup[2]. */
- + if( mb_y&1 )
- + {
- + if( mb_x )
- + {
- + // Store top left.
- + h->intra_diagonal_backup[1][1][mb_x*intra_diag_width] = h->intra_border_backup[1][1][(mb_x-1)*16+7];
- + h->intra_diagonal_backup[1][2][mb_x*intra_diag_width] = h->intra_border_backup[1][1][(mb_x-1)*16+8+7];
- + h->intra_diagonal_backup[2][1][mb_x*intra_diag_width] = h->intra_border_backup[2][1][(mb_x-1)*16+7];
- + h->intra_diagonal_backup[2][2][mb_x*intra_diag_width] = h->intra_border_backup[2][1][(mb_x-1)*16+8+7];
- + // Store top right.
- + CP32( &h->intra_diagonal_backup[1][1][(mb_x-1)*intra_diag_width+1], h->mb.pic.p_fdec[0]+FDEC_STRIDE*7 );
- + CP32( &h->intra_diagonal_backup[1][2][(mb_x-1)*intra_diag_width+1], h->mb.pic.p_fdec[0]+FDEC_STRIDE*7 );
- + CP32( &h->intra_diagonal_backup[2][1][(mb_x-1)*intra_diag_width+1], h->mb.pic.p_fdec[0]+FDEC_STRIDE*6 );
- + CP32( &h->intra_diagonal_backup[2][2][(mb_x-1)*intra_diag_width+1], h->mb.pic.p_fdec[0]+FDEC_STRIDE*6 );
- + }
- + memcpy( &h->intra_border_backup[2][i][mb_x*16], h->mb.pic.p_fdec[1]+FDEC_STRIDE*6, 8*sizeof(pixel) );
- + memcpy( &h->intra_border_backup[2][i][mb_x*16]+8, h->mb.pic.p_fdec[2]+FDEC_STRIDE*6, 8*sizeof(pixel) );
- + }
- + else
- + {
- + if( mb_x )
- + {
- + h->intra_diagonal_backup[0][1][mb_x*intra_diag_width] = h->intra_border_backup[0][1][(mb_x-1)*16+7];
- + h->intra_diagonal_backup[0][2][mb_x*intra_diag_width] = h->intra_border_backup[0][1][(mb_x-1)*16+8+7];
- + CP32( &h->intra_diagonal_backup[0][1][(mb_x-1)*intra_diag_width+1], h->mb.pic.p_fdec[1]+FDEC_STRIDE*7 );
- + CP32( &h->intra_diagonal_backup[0][2][(mb_x-1)*intra_diag_width+1], h->mb.pic.p_fdec[2]+FDEC_STRIDE*7 );
- + }
- + // Sometimes needed for bottom macroblock of this pair.
- + h->intra_diagonal_backup[3][1][mb_x*intra_diag_width] = h->mb.pic.p_fdec[1][-1+7*FDEC_STRIDE];
- + h->intra_diagonal_backup[3][2][mb_x*intra_diag_width] = h->mb.pic.p_fdec[2][-1+7*FDEC_STRIDE];
- + }
- + }
- h->mc.store_interleave_8x8x2( &h->fdec->plane[1][i_pix_offset], i_stride2, h->mb.pic.p_fdec[1], h->mb.pic.p_fdec[2] );
- memcpy( intra_fdec, h->mb.pic.p_fdec[1]+FDEC_STRIDE*7, 8*sizeof(pixel) );
- memcpy( intra_fdec+8, h->mb.pic.p_fdec[2]+FDEC_STRIDE*7, 8*sizeof(pixel) );
- - twiddle_topleft_pixel( h->mb.pic.p_fdec[1]-FDEC_STRIDE-1, h->mb.pic.p_fdec[1]-FDEC_STRIDE+7, b_interlaced );
- - twiddle_topleft_pixel( h->mb.pic.p_fdec[2]-FDEC_STRIDE-1, h->mb.pic.p_fdec[2]-FDEC_STRIDE+7, b_interlaced );
- + twiddle_topleft_pixel( h->mb.pic.p_fdec[1]-FDEC_STRIDE-1, h->mb.pic.p_fdec[1]-FDEC_STRIDE+7, h->sh.b_mbaff );
- + twiddle_topleft_pixel( h->mb.pic.p_fdec[2]-FDEC_STRIDE-1, h->mb.pic.p_fdec[2]-FDEC_STRIDE+7, h->sh.b_mbaff );
- }
- else
- {
- + if( h->sh.b_mbaff )
- + {
- + if( mb_y&1 )
- + {
- + if( mb_x )
- + {
- + h->intra_diagonal_backup[1][0][mb_x*intra_diag_width] = h->intra_border_backup[1][0][(mb_x-1)*16+15];
- + h->intra_diagonal_backup[2][0][mb_x*intra_diag_width] = h->intra_border_backup[2][0][(mb_x-1)*16+15];
- + CP64( &h->intra_diagonal_backup[1][0][(mb_x-1)*intra_diag_width+1], h->mb.pic.p_fdec[0]+FDEC_STRIDE*15 );
- + CP64( &h->intra_diagonal_backup[2][0][(mb_x-1)*intra_diag_width+1], h->mb.pic.p_fdec[0]+FDEC_STRIDE*14 );
- + }
- + memcpy( &h->intra_border_backup[2][i][mb_x*16], h->mb.pic.p_fdec[0]+FDEC_STRIDE*14, 16*sizeof(pixel) );
- + }
- + else
- + {
- + h->intra_diagonal_backup[0][0][mb_x*intra_diag_width] = h->intra_border_backup[0][0][(mb_x-1)*16+15];
- + CP64( &h->intra_diagonal_backup[0][0][(mb_x-1)*intra_diag_width+1], h->mb.pic.p_fdec[0]+FDEC_STRIDE*15 );
- + h->intra_diagonal_backup[3][0][mb_x*intra_diag_width] = h->mb.pic.p_fdec[0][-1+15*FDEC_STRIDE];
- + }
- + }
- h->mc.copy[PIXEL_16x16]( &h->fdec->plane[0][i_pix_offset], i_stride2, h->mb.pic.p_fdec[0], FDEC_STRIDE, 16 );
- memcpy( intra_fdec, h->mb.pic.p_fdec[0]+FDEC_STRIDE*15, 16*sizeof(pixel) );
- - twiddle_topleft_pixel( h->mb.pic.p_fdec[0]-FDEC_STRIDE-1, h->mb.pic.p_fdec[0]-FDEC_STRIDE+15, b_interlaced );
- + twiddle_topleft_pixel( h->mb.pic.p_fdec[0]-FDEC_STRIDE-1, h->mb.pic.p_fdec[0]-FDEC_STRIDE+15, h->sh.b_mbaff );
- }
- }
- --
- 1.7.1
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement