Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- From 167c2760cbc36911302ace046db5eef6fe1ea54a Mon Sep 17 00:00:00 2001
- From: Simon Horlick <simonhorlick@gmail.com>
- Date: Tue, 11 Jan 2011 20:05:54 +0000
- Subject: [PATCH 01/25] Save interlace decision for all macroblocks
- ---
- common/common.h | 1 +
- common/frame.c | 3 +++
- common/frame.h | 1 +
- common/macroblock.c | 1 +
- encoder/encoder.c | 8 ++++++++
- 5 files changed, 14 insertions(+), 0 deletions(-)
- diff --git a/common/common.h b/common/common.h
- index 868f526..231254f 100644
- --- a/common/common.h
- +++ b/common/common.h
- @@ -617,6 +617,7 @@ struct x264_t
- int8_t *mb_transform_size; /* transform_size_8x8_flag of each mb */
- uint16_t *slice_table; /* sh->first_mb of the slice that the indexed mb is part of
- * NOTE: this will fail on resolutions above 2^16 MBs... */
- + uint8_t *field;
- /* buffer for weighted versions of the reference frames */
- pixel *p_weight_buf[X264_REF_MAX];
- diff --git a/common/frame.c b/common/frame.c
- index ca90539..eff8ca5 100644
- --- a/common/frame.c
- +++ b/common/frame.c
- @@ -145,6 +145,8 @@ x264_frame_t *x264_frame_new( x264_t *h, int b_fdec )
- frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) << h->frames.b_have_sub8x8_esa );
- frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH;
- }
- + if( h->param.b_interlaced )
- + CHECKED_MALLOC( frame->field, i_mb_count * sizeof(uint8_t) );
- }
- else /* fenc frame */
- {
- @@ -219,6 +221,7 @@ void x264_frame_delete( x264_frame_t *frame )
- x264_free( frame->i_inv_qscale_factor );
- x264_free( frame->i_row_bits );
- x264_free( frame->f_row_qp );
- + x264_free( frame->field );
- x264_free( frame->mb_type );
- x264_free( frame->mb_partition );
- x264_free( frame->mv[0] );
- diff --git a/common/frame.h b/common/frame.h
- index 38d0bf2..0e0ab3d 100644
- --- a/common/frame.h
- +++ b/common/frame.h
- @@ -92,6 +92,7 @@ typedef struct x264_frame
- int16_t (*mv[2])[2];
- int16_t (*mv16x16)[2];
- int16_t (*lowres_mvs[2][X264_BFRAME_MAX+1])[2];
- + uint8_t *field;
- /* Stored as (lists_used << LOWRES_COST_SHIFT) + (cost).
- * Doesn't need special addressing for intra cost because
- diff --git a/common/macroblock.c b/common/macroblock.c
- index 24c2af9..569d544 100644
- --- a/common/macroblock.c
- +++ b/common/macroblock.c
- @@ -371,6 +371,7 @@ void x264_macroblock_slice_init( x264_t *h )
- h->mb.ref[1] = h->fdec->ref[1];
- h->mb.type = h->fdec->mb_type;
- h->mb.partition = h->fdec->mb_partition;
- + h->mb.field = h->fdec->field;
- h->fdec->i_ref[0] = h->i_ref[0];
- h->fdec->i_ref[1] = h->i_ref[1];
- diff --git a/encoder/encoder.c b/encoder/encoder.c
- index 67646df..af1342b 100644
- --- a/encoder/encoder.c
- +++ b/encoder/encoder.c
- @@ -1956,6 +1956,14 @@ static int x264_slice_write( x264_t *h )
- }
- }
- + if( h->param.b_interlaced )
- + {
- + if( !(i_mb_y&1) )
- + h->mb.b_interlaced = 1;
- + x264_zigzag_init( h->param.cpu, &h->zigzagf, h->mb.b_interlaced );
- + h->mb.field[mb_xy] = h->mb.b_interlaced;
- + }
- +
- if( i_mb_x == 0 && !h->mb.b_reencode_mb )
- x264_fdec_filter_row( h, i_mb_y, 1 );
- --
- 1.7.4
- From 2847b697f1fdbee1b8c3128895f2a50c1cba606e Mon Sep 17 00:00:00 2001
- From: Simon Horlick <simonhorlick@gmail.com>
- Date: Tue, 11 Jan 2011 20:09:00 +0000
- Subject: [PATCH 02/25] Disable adaptive mbaff when subme 0 is used
- ---
- common/common.h | 1 +
- encoder/encoder.c | 13 ++++++++++---
- x264.h | 1 +
- 3 files changed, 12 insertions(+), 3 deletions(-)
- diff --git a/common/common.h b/common/common.h
- index 231254f..75c4d59 100644
- --- a/common/common.h
- +++ b/common/common.h
- @@ -568,6 +568,7 @@ struct x264_t
- int i_psy_trellis; /* Psy trellis strength--fixed point value*/
- int b_interlaced;
- + int b_adaptive_mbaff;
- /* Allowed qpel MV range to stay within the picture + emulated edge pixels */
- int mv_min[2];
- diff --git a/encoder/encoder.c b/encoder/encoder.c
- index af1342b..9f294d1 100644
- --- a/encoder/encoder.c
- +++ b/encoder/encoder.c
- @@ -606,6 +606,10 @@ static int x264_validate_parameters( x264_t *h )
- x264_log( h, X264_LOG_WARNING, "subme=0 + direct=temporal is not supported\n" );
- h->param.analyse.i_direct_mv_pred = X264_DIRECT_PRED_SPATIAL;
- }
- + /* Adaptive MBAFF and subme 0 are not supported as motion vectors between
- + * field macroblocks and frame macroblocks require halving and hpel pixels.
- + * The chosen solution is to make MBAFF non-adaptive in this case. */
- + h->mb.b_adaptive_mbaff = !(h->param.b_interlaced && !h->param.analyse.i_subpel_refine);
- h->param.i_bframe = x264_clip3( h->param.i_bframe, 0, X264_MIN( X264_BFRAME_MAX, h->param.i_keyint_max-1 ) );
- h->param.i_open_gop = x264_clip3( h->param.i_open_gop, X264_OPEN_GOP_NONE, X264_OPEN_GOP_BLURAY );
- h->param.i_bframe_bias = x264_clip3( h->param.i_bframe_bias, -90, 100 );
- @@ -1958,9 +1962,12 @@ static int x264_slice_write( x264_t *h )
- if( h->param.b_interlaced )
- {
- - if( !(i_mb_y&1) )
- - h->mb.b_interlaced = 1;
- - x264_zigzag_init( h->param.cpu, &h->zigzagf, h->mb.b_interlaced );
- + if( h->mb.b_adaptive_mbaff )
- + {
- + if( !(i_mb_y&1) )
- + h->mb.b_interlaced = 1;
- + x264_zigzag_init( h->param.cpu, &h->zigzagf, h->mb.b_interlaced );
- + }
- h->mb.field[mb_xy] = h->mb.b_interlaced;
- }
- diff --git a/x264.h b/x264.h
- index 24c3792..da8746b 100644
- --- a/x264.h
- +++ b/x264.h
- @@ -291,6 +291,7 @@ typedef struct x264_param_t
- int i_cabac_init_idc;
- int b_interlaced;
- + int b_adaptive_mbaff; /* MBAFF+subme 0 require non-adaptive MBAFF i.e. all field mbs */
- int b_constrained_intra;
- int i_cqm_preset;
- --
- 1.7.4
- From eb50f5f3757d825b2664e7991b89c7647605dd28 Mon Sep 17 00:00:00 2001
- From: Simon Horlick <simonhorlick@gmail.com>
- Date: Tue, 11 Jan 2011 20:16:18 +0000
- Subject: [PATCH 03/25] Store left references in a table
- ---
- common/common.h | 1 +
- common/macroblock.c | 56 ++++++++++++++++++++++++++++++++------------------
- 2 files changed, 37 insertions(+), 20 deletions(-)
- diff --git a/common/common.h b/common/common.h
- index 75c4d59..992ba9c 100644
- --- a/common/common.h
- +++ b/common/common.h
- @@ -596,6 +596,7 @@ struct x264_t
- int i_mb_top_xy;
- int i_mb_topleft_xy;
- int i_mb_topright_xy;
- + int *left_index_table;
- /**** thread synchronization ends here ****/
- /* subsequent variables are either thread-local or constant,
- diff --git a/common/macroblock.c b/common/macroblock.c
- index 569d544..d10b19f 100644
- --- a/common/macroblock.c
- +++ b/common/macroblock.c
- @@ -550,6 +550,18 @@ static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x
- }
- }
- +static const int left_indices[5][22] = {
- +/* intra modes nnz mv ref real indices */
- + /* Current is progressive. */
- + { 4, 4, 5, 5, 3, 3, 7, 7, 16+1, 16+1, 16+4+1, 16+4+1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1 },
- + { 6, 6, 3, 3, 11, 11, 15, 15, 16+3, 16+3, 16+4+3, 16+4+3, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3 },
- + /* Current is interlaced.*/
- + { 4, 6, 4, 6, 3, 11, 3, 11, 16+1, 16+1, 16+4+1, 16+4+1, 0, 2, 0, 2, 0, 0, 0, 2, 0, 2 },
- + { 4, 6, 4, 6, 3, 11, 3, 11, 16+1, 16+1, 16+4+1, 16+4+1, 0, 2, 0, 2, 0, 0, 0, 2, 0, 2 },
- + /*Both same.*/
- + { 4, 5, 6, 3, 3, 7, 11, 15, 16+1, 16+3, 16+4+1, 16+4+3, 0, 1, 2, 3, 0, 1, 0, 1, 2, 3 },
- +};
- +
- static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, int mb_y )
- {
- int top = (mb_y - (1 << h->mb.b_interlaced)) * h->mb.i_mb_stride + mb_x;
- @@ -570,6 +582,7 @@ static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, i
- h->mb.i_mb_type_left = -1;
- h->mb.i_mb_type_topleft = -1;
- h->mb.i_mb_type_topright = -1;
- + h->mb.left_index_table = left_indices[4];
- if( mb_x > 0 )
- {
- @@ -661,6 +674,8 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
- uint8_t (*nnz)[24] = h->mb.non_zero_count;
- int16_t *cbp = h->mb.cbp;
- + int *left_index_table = h->mb.left_index_table;
- +
- /* load cache */
- if( h->mb.i_neighbour & MB_TOP )
- {
- @@ -703,22 +718,22 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
- h->mb.cache.i_cbp_left = cbp[left];
- /* load intra4x4 */
- - h->mb.cache.intra4x4_pred_mode[x264_scan8[0 ] - 1] = i4x4[left][4];
- - h->mb.cache.intra4x4_pred_mode[x264_scan8[2 ] - 1] = i4x4[left][5];
- - h->mb.cache.intra4x4_pred_mode[x264_scan8[8 ] - 1] = i4x4[left][6];
- - h->mb.cache.intra4x4_pred_mode[x264_scan8[10] - 1] = i4x4[left][3];
- + h->mb.cache.intra4x4_pred_mode[x264_scan8[0 ] - 1] = i4x4[left][left_index_table[0]];
- + h->mb.cache.intra4x4_pred_mode[x264_scan8[2 ] - 1] = i4x4[left][left_index_table[1]];
- + h->mb.cache.intra4x4_pred_mode[x264_scan8[8 ] - 1] = i4x4[left][left_index_table[2]];
- + h->mb.cache.intra4x4_pred_mode[x264_scan8[10] - 1] = i4x4[left][left_index_table[3]];
- /* load non_zero_count */
- - h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left][3];
- - h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left][7];
- - h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left][11];
- - h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left][15];
- + h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left][left_index_table[4+0]];
- + h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left][left_index_table[4+1]];
- + h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left][left_index_table[4+2]];
- + h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left][left_index_table[4+3]];
- - h->mb.cache.non_zero_count[x264_scan8[16+0] - 1] = nnz[left][16+1];
- - h->mb.cache.non_zero_count[x264_scan8[16+2] - 1] = nnz[left][16+3];
- + h->mb.cache.non_zero_count[x264_scan8[16+0] - 1] = nnz[left][left_index_table[4+4]];
- + h->mb.cache.non_zero_count[x264_scan8[16+2] - 1] = nnz[left][left_index_table[4+5]];
- - h->mb.cache.non_zero_count[x264_scan8[16+4+0] - 1] = nnz[left][16+4+1];
- - h->mb.cache.non_zero_count[x264_scan8[16+4+2] - 1] = nnz[left][16+4+3];
- + h->mb.cache.non_zero_count[x264_scan8[16+4+0] - 1] = nnz[left][left_index_table[4+6]];
- + h->mb.cache.non_zero_count[x264_scan8[16+4+2] - 1] = nnz[left][left_index_table[4+7]];
- }
- else
- {
- @@ -857,10 +872,10 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
- if( h->mb.i_neighbour & MB_LEFT )
- {
- - CP16( h->mb.cache.mvd[l][x264_scan8[0 ] - 1], mvd[left][4] );
- - CP16( h->mb.cache.mvd[l][x264_scan8[2 ] - 1], mvd[left][5] );
- - CP16( h->mb.cache.mvd[l][x264_scan8[8 ] - 1], mvd[left][6] );
- - CP16( h->mb.cache.mvd[l][x264_scan8[10] - 1], mvd[left][3] );
- + CP16( h->mb.cache.mvd[l][x264_scan8[0 ] - 1], mvd[left][left_index_table[0]] );
- + CP16( h->mb.cache.mvd[l][x264_scan8[2 ] - 1], mvd[left][left_index_table[1]] );
- + CP16( h->mb.cache.mvd[l][x264_scan8[8 ] - 1], mvd[left][left_index_table[2]] );
- + CP16( h->mb.cache.mvd[l][x264_scan8[10] - 1], mvd[left][left_index_table[3]] );
- }
- else
- for( int i = 0; i < 4; i++ )
- @@ -949,6 +964,7 @@ void x264_macroblock_cache_load_deblock( x264_t *h )
- int s4x4 = h->mb.i_b4_stride;
- uint8_t (*nnz)[24] = h->mb.non_zero_count;
- + int *left_index_table = h->mb.left_index_table;
- if( h->mb.i_neighbour & MB_TOP )
- CP32( &h->mb.cache.non_zero_count[x264_scan8[0] - 8], &nnz[h->mb.i_mb_top_xy][12] );
- @@ -956,10 +972,10 @@ void x264_macroblock_cache_load_deblock( x264_t *h )
- if( h->mb.i_neighbour & MB_LEFT )
- {
- int left = h->mb.i_mb_left_xy;
- - h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left][3];
- - h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left][7];
- - h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left][11];
- - h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left][15];
- + h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left][left_index_table[4+0]];
- + h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left][left_index_table[4+1]];
- + h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left][left_index_table[4+2]];
- + h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left][left_index_table[4+3]];
- }
- for( int l = 0; l <= (h->sh.i_type == SLICE_TYPE_B); l++ )
- --
- 1.7.4
- From c83e9ad2f2cb3a7f5d2ae6c4fd4b5a8cc04f894a Mon Sep 17 00:00:00 2001
- From: Simon Horlick <simonhorlick@gmail.com>
- Date: Tue, 11 Jan 2011 20:21:26 +0000
- Subject: [PATCH 04/25] Store references to the two left macroblocks
- Fix compiler warnings about discarding const qualifiers
- ---
- common/common.h | 6 +++---
- common/deblock.c | 4 ++--
- common/macroblock.c | 24 ++++++++++++------------
- common/mvpred.c | 2 +-
- encoder/analyse.c | 8 ++++----
- encoder/cabac.c | 8 ++++----
- 6 files changed, 26 insertions(+), 26 deletions(-)
- diff --git a/common/common.h b/common/common.h
- index 992ba9c..f839e7e 100644
- --- a/common/common.h
- +++ b/common/common.h
- @@ -588,15 +588,15 @@ struct x264_t
- unsigned int i_neighbour_intra; /* for constrained intra pred */
- unsigned int i_neighbour_frame; /* ignoring slice boundaries */
- int i_mb_type_top;
- - int i_mb_type_left;
- + int i_mb_type_left[2];
- int i_mb_type_topleft;
- int i_mb_type_topright;
- int i_mb_prev_xy;
- - int i_mb_left_xy;
- + int i_mb_left_xy[2];
- int i_mb_top_xy;
- int i_mb_topleft_xy;
- int i_mb_topright_xy;
- - int *left_index_table;
- + const int *left_index_table;
- /**** thread synchronization ends here ****/
- /* subsequent variables are either thread-local or constant,
- diff --git a/common/deblock.c b/common/deblock.c
- index 1b6448f..0800461 100644
- --- a/common/deblock.c
- +++ b/common/deblock.c
- @@ -347,10 +347,10 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
- if( h->mb.i_neighbour & MB_LEFT )
- {
- - int qpl = h->mb.qp[h->mb.i_mb_left_xy];
- + int qpl = h->mb.qp[h->mb.i_mb_left_xy[0]];
- int qp_left = (qp + qpl + 1) >> 1;
- int qpc_left = (h->chroma_qp_table[qp] + h->chroma_qp_table[qpl] + 1) >> 1;
- - int intra_left = IS_INTRA( h->mb.type[h->mb.i_mb_left_xy] );
- + int intra_left = IS_INTRA( h->mb.type[h->mb.i_mb_left_xy[0]] );
- if( intra_cur || intra_left )
- FILTER( _intra, 0, 0, qp_left, qpc_left );
- else
- diff --git a/common/macroblock.c b/common/macroblock.c
- index d10b19f..aa194a7 100644
- --- a/common/macroblock.c
- +++ b/common/macroblock.c
- @@ -575,11 +575,11 @@ static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, i
- h->mb.i_neighbour_intra = 0;
- h->mb.i_neighbour_frame = 0;
- h->mb.i_mb_top_xy = -1;
- - h->mb.i_mb_left_xy = -1;
- + h->mb.i_mb_left_xy[0] = h->mb.i_mb_left_xy[1] = -1;
- h->mb.i_mb_topleft_xy = -1;
- h->mb.i_mb_topright_xy = -1;
- h->mb.i_mb_type_top = -1;
- - h->mb.i_mb_type_left = -1;
- + h->mb.i_mb_type_left[0] = h->mb.i_mb_type_left[1] = -1;
- h->mb.i_mb_type_topleft = -1;
- h->mb.i_mb_type_topright = -1;
- h->mb.left_index_table = left_indices[4];
- @@ -587,13 +587,13 @@ static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, i
- if( mb_x > 0 )
- {
- h->mb.i_neighbour_frame |= MB_LEFT;
- - h->mb.i_mb_left_xy = h->mb.i_mb_xy - 1;
- - h->mb.i_mb_type_left = h->mb.type[h->mb.i_mb_left_xy];
- + h->mb.i_mb_left_xy[0] = h->mb.i_mb_xy - 1;
- + h->mb.i_mb_type_left[0] = h->mb.type[h->mb.i_mb_left_xy[0]];
- if( h->mb.i_mb_xy > h->sh.i_first_mb )
- {
- h->mb.i_neighbour |= MB_LEFT;
- - if( !h->param.b_constrained_intra || IS_INTRA( h->mb.i_mb_type_left ) )
- + if( !h->param.b_constrained_intra || IS_INTRA( h->mb.i_mb_type_left[0] ) )
- h->mb.i_neighbour_intra |= MB_LEFT;
- }
- }
- @@ -659,7 +659,7 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
- {
- x264_macroblock_cache_load_neighbours( h, mb_x, mb_y );
- - int left = h->mb.i_mb_left_xy;
- + int left = h->mb.i_mb_left_xy[0];
- int top = h->mb.i_mb_top_xy;
- int top_y = mb_y - (1 << h->mb.b_interlaced);
- int s8x8 = h->mb.i_b8_stride;
- @@ -674,7 +674,7 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
- uint8_t (*nnz)[24] = h->mb.non_zero_count;
- int16_t *cbp = h->mb.cbp;
- - int *left_index_table = h->mb.left_index_table;
- + const int *left_index_table = h->mb.left_index_table;
- /* load cache */
- if( h->mb.i_neighbour & MB_TOP )
- @@ -927,8 +927,8 @@ void x264_macroblock_cache_load_neighbours_deblock( x264_t *h, int mb_x, int mb_
- if( mb_x > 0 )
- {
- - h->mb.i_mb_left_xy = h->mb.i_mb_xy - 1;
- - if( deblock_on_slice_edges || h->mb.slice_table[h->mb.i_mb_left_xy] == h->mb.slice_table[h->mb.i_mb_xy] )
- + h->mb.i_mb_left_xy[0] = h->mb.i_mb_xy - 1;
- + if( deblock_on_slice_edges || h->mb.slice_table[h->mb.i_mb_left_xy[0]] == h->mb.slice_table[h->mb.i_mb_xy] )
- h->mb.i_neighbour |= MB_LEFT;
- }
- @@ -964,14 +964,14 @@ void x264_macroblock_cache_load_deblock( x264_t *h )
- int s4x4 = h->mb.i_b4_stride;
- uint8_t (*nnz)[24] = h->mb.non_zero_count;
- - int *left_index_table = h->mb.left_index_table;
- + const int *left_index_table = h->mb.left_index_table;
- if( h->mb.i_neighbour & MB_TOP )
- CP32( &h->mb.cache.non_zero_count[x264_scan8[0] - 8], &nnz[h->mb.i_mb_top_xy][12] );
- if( h->mb.i_neighbour & MB_LEFT )
- {
- - int left = h->mb.i_mb_left_xy;
- + int left = h->mb.i_mb_left_xy[0];
- h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left][left_index_table[4+0]];
- h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left][left_index_table[4+1]];
- h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left][left_index_table[4+2]];
- @@ -1046,7 +1046,7 @@ void x264_macroblock_cache_load_deblock( x264_t *h )
- {
- uint8_t (*nnz)[24] = h->mb.non_zero_count;
- int top = h->mb.i_mb_top_xy;
- - int left = h->mb.i_mb_left_xy;
- + int left = h->mb.i_mb_left_xy[0];
- if( (h->mb.i_neighbour & MB_TOP) && h->mb.mb_transform_size[top] )
- {
- diff --git a/common/mvpred.c b/common/mvpred.c
- index a24dde8..c8efe1f 100644
- --- a/common/mvpred.c
- +++ b/common/mvpred.c
- @@ -426,7 +426,7 @@ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t mvc[
- }
- /* spatial predictors */
- - SET_MVP( mvr[h->mb.i_mb_left_xy] );
- + SET_MVP( mvr[h->mb.i_mb_left_xy[0]] );
- SET_MVP( mvr[h->mb.i_mb_top_xy] );
- SET_MVP( mvr[h->mb.i_mb_topleft_xy] );
- SET_MVP( mvr[h->mb.i_mb_topright_xy] );
- diff --git a/encoder/analyse.c b/encoder/analyse.c
- index 5419bd1..87125c1 100644
- --- a/encoder/analyse.c
- +++ b/encoder/analyse.c
- @@ -516,7 +516,7 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int qp )
- {
- /* Always run in fast-intra mode for subme < 3 */
- if( h->mb.i_subpel_refine > 2 &&
- - ( IS_INTRA( h->mb.i_mb_type_left ) ||
- + ( IS_INTRA( h->mb.i_mb_type_left[0] ) ||
- IS_INTRA( h->mb.i_mb_type_top ) ||
- IS_INTRA( h->mb.i_mb_type_topleft ) ||
- IS_INTRA( h->mb.i_mb_type_topright ) ||
- @@ -1296,7 +1296,7 @@ static void x264_mb_analyse_inter_p8x8_mixed_ref( x264_t *h, x264_mb_analysis_t
- /* early termination: if 16x16 chose ref 0, then evalute no refs older
- * than those used by the neighbors */
- if( i_maxref > 0 && (a->l0.me16x16.i_ref == 0 || a->l0.me16x16.i_ref == h->mb.ref_blind_dupe) &&
- - h->mb.i_mb_type_top > 0 && h->mb.i_mb_type_left > 0 )
- + h->mb.i_mb_type_top > 0 && h->mb.i_mb_type_left[0] > 0 )
- {
- i_maxref = 0;
- CHECK_NEIGHBOUR( -8 - 1 );
- @@ -2063,7 +2063,7 @@ static void x264_mb_analyse_inter_b8x8_mixed_ref( x264_t *h, x264_mb_analysis_t
- {
- x264_mb_analysis_list_t *lX = l ? &a->l1 : &a->l0;
- if( i_maxref[l] > 0 && lX->me16x16.i_ref == 0 &&
- - h->mb.i_mb_type_top > 0 && h->mb.i_mb_type_left > 0 )
- + h->mb.i_mb_type_top > 0 && h->mb.i_mb_type_left[0] > 0 )
- {
- i_maxref[l] = 0;
- CHECK_NEIGHBOUR( -8 - 1 );
- @@ -2817,7 +2817,7 @@ intra_analysis:
- {}
- else if( h->param.analyse.i_subpel_refine >= 3 )
- analysis.b_try_skip = 1;
- - else if( h->mb.i_mb_type_left == P_SKIP ||
- + else if( h->mb.i_mb_type_left[0] == P_SKIP ||
- h->mb.i_mb_type_top == P_SKIP ||
- h->mb.i_mb_type_topleft == P_SKIP ||
- h->mb.i_mb_type_topright == P_SKIP )
- diff --git a/encoder/cabac.c b/encoder/cabac.c
- index 6333737..334318d 100644
- --- a/encoder/cabac.c
- +++ b/encoder/cabac.c
- @@ -79,7 +79,7 @@ static void x264_cabac_mb_type( x264_t *h, x264_cabac_t *cb )
- if( h->sh.i_type == SLICE_TYPE_I )
- {
- int ctx = 0;
- - if( (h->mb.i_neighbour & MB_LEFT) && h->mb.i_mb_type_left != I_4x4 )
- + if( (h->mb.i_neighbour & MB_LEFT) && h->mb.i_mb_type_left[0] != I_4x4 )
- ctx++;
- if( (h->mb.i_neighbour & MB_TOP) && h->mb.i_mb_type_top != I_4x4 )
- ctx++;
- @@ -113,7 +113,7 @@ static void x264_cabac_mb_type( x264_t *h, x264_cabac_t *cb )
- else //if( h->sh.i_type == SLICE_TYPE_B )
- {
- int ctx = 0;
- - if( (h->mb.i_neighbour & MB_LEFT) && h->mb.i_mb_type_left != B_SKIP && h->mb.i_mb_type_left != B_DIRECT )
- + if( (h->mb.i_neighbour & MB_LEFT) && h->mb.i_mb_type_left[0] != B_SKIP && h->mb.i_mb_type_left[0] != B_DIRECT )
- ctx++;
- if( (h->mb.i_neighbour & MB_TOP) && h->mb.i_mb_type_top != B_SKIP && h->mb.i_mb_type_top != B_DIRECT )
- ctx++;
- @@ -198,7 +198,7 @@ static void x264_cabac_mb_intra_chroma_pred_mode( x264_t *h, x264_cabac_t *cb )
- int ctx = 0;
- /* No need to test for I4x4 or I_16x16 as cache_save handle that */
- - if( (h->mb.i_neighbour & MB_LEFT) && h->mb.chroma_pred_mode[h->mb.i_mb_left_xy] != 0 )
- + if( (h->mb.i_neighbour & MB_LEFT) && h->mb.chroma_pred_mode[h->mb.i_mb_left_xy[0]] != 0 )
- ctx++;
- if( (h->mb.i_neighbour & MB_TOP) && h->mb.chroma_pred_mode[h->mb.i_mb_top_xy] != 0 )
- ctx++;
- @@ -280,7 +280,7 @@ static void x264_cabac_mb_qp_delta( x264_t *h, x264_cabac_t *cb )
- #if !RDO_SKIP_BS
- void x264_cabac_mb_skip( x264_t *h, int b_skip )
- {
- - int ctx = ((h->mb.i_neighbour & MB_LEFT) && !IS_SKIP( h->mb.i_mb_type_left ))
- + int ctx = ((h->mb.i_neighbour & MB_LEFT) && !IS_SKIP( h->mb.i_mb_type_left[0] ))
- + ((h->mb.i_neighbour & MB_TOP) && !IS_SKIP( h->mb.i_mb_type_top ))
- + (h->sh.i_type == SLICE_TYPE_P ? 11 : 24);
- x264_cabac_encode_decision( &h->cabac, ctx, b_skip );
- --
- 1.7.4
- From 135b93d39e3d8d8540a41bd66d90aa42f7a73ba4 Mon Sep 17 00:00:00 2001
- From: Simon Horlick <simonhorlick@gmail.com>
- Date: Fri, 14 Jan 2011 21:18:14 +0000
- Subject: [PATCH 05/25] Neighbour calculation for mbaff
- Back up intra borders correctly and make neighbour calculation several times longer.
- ---
- common/common.h | 9 ++-
- common/macroblock.c | 286 +++++++++++++++++++++++++++++++++++++++++++--------
- 2 files changed, 249 insertions(+), 46 deletions(-)
- diff --git a/common/common.h b/common/common.h
- index f839e7e..c993857 100644
- --- a/common/common.h
- +++ b/common/common.h
- @@ -549,6 +549,8 @@ struct x264_t
- int i_mb_stride;
- int i_b8_stride;
- int i_b4_stride;
- + int left_b8[2];
- + int left_b4[2];
- /* Current index */
- int i_mb_x;
- @@ -597,6 +599,10 @@ struct x264_t
- int i_mb_topleft_xy;
- int i_mb_topright_xy;
- const int *left_index_table;
- + int topleft_partition;
- + int intra_border_index;
- + int topleft_border_index;
- + int topright_border_index;
- /**** thread synchronization ends here ****/
- /* subsequent variables are either thread-local or constant,
- @@ -827,7 +833,8 @@ struct x264_t
- /* Buffers that are allocated per-thread even in sliced threads. */
- void *scratch_buffer; /* for any temporary storage that doesn't want repeated malloc */
- - pixel *intra_border_backup[2][2]; /* bottom pixels of the previous mb row, used for intra prediction after the framebuffer has been deblocked */
- + pixel *intra_border_backup[3][2]; /* bottom pixels of the previous mb row, used for intra prediction after the framebuffer has been deblocked */
- + pixel *intra_diagonal_backup[5][3];
- uint8_t (*deblock_strength[2])[2][4][4];
- /* CPU functions dependents */
- diff --git a/common/macroblock.c b/common/macroblock.c
- index aa194a7..60275ae 100644
- --- a/common/macroblock.c
- +++ b/common/macroblock.c
- @@ -314,18 +314,35 @@ void x264_macroblock_cache_free( x264_t *h )
- int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
- {
- if( !b_lookahead )
- - for( int i = 0; i <= h->param.b_interlaced; i++ )
- + {
- + for( int i = 0; i <= 2*h->param.b_interlaced; i++ )
- {
- for( int j = 0; j < 2; j++ )
- {
- /* shouldn't really be initialized, just silences a valgrind false-positive in predict_8x8_filter_mmx */
- CHECKED_MALLOCZERO( h->intra_border_backup[i][j], (h->sps->i_mb_width*16+32) * sizeof(pixel) );
- h->intra_border_backup[i][j] += 16;
- - h->intra_border_backup[1][j] = h->intra_border_backup[i][j];
- + if( !h->param.b_interlaced )
- + h->intra_border_backup[1][j] = h->intra_border_backup[i][j];
- + }
- + }
- + for( int i = 0; i < 4*h->mb.b_interlaced; i++ )
- + {
- + for( int j = 0; j < 3; j++ )
- + {
- + const int width = 1 + 8; // top left pixel + eight top right pixels (for luma)
- + CHECKED_MALLOCZERO( h->intra_diagonal_backup[i][j], (h->sps->i_mb_width*width+32) * sizeof(pixel) );
- + h->intra_diagonal_backup[i][j] += 16;
- + if( !h->param.b_interlaced )
- + h->intra_diagonal_backup[1][j] = h->intra_diagonal_backup[i][j];
- }
- + }
- + for( int i = 0; i <= h->param.b_interlaced; i++ )
- + {
- CHECKED_MALLOC( h->deblock_strength[i], sizeof(**h->deblock_strength) * h->mb.i_mb_width );
- h->deblock_strength[1] = h->deblock_strength[i];
- }
- + }
- /* Allocate scratch buffer */
- int scratch_size = 0;
- @@ -353,12 +370,20 @@ fail:
- void x264_macroblock_thread_free( x264_t *h, int b_lookahead )
- {
- if( !b_lookahead )
- + {
- for( int i = 0; i <= h->param.b_interlaced; i++ )
- - {
- x264_free( h->deblock_strength[i] );
- + for( int i = 0; i <= 2*h->param.b_interlaced; i++ )
- + {
- for( int j = 0; j < 2; j++ )
- x264_free( h->intra_border_backup[i][j] - 16 );
- }
- + for( int i = 0; i < 4*h->param.b_interlaced; i++ )
- + {
- + for( int j = 0; j < 3; j++ )
- + x264_free( h->intra_diagonal_backup[i][j] - 16 );
- + }
- + }
- x264_free( h->scratch_buffer );
- }
- @@ -494,14 +519,15 @@ static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x
- {
- int w = (i ? 8 : 16);
- int i_stride = h->fdec->i_stride[i];
- - int i_stride2 = i_stride << b_interlaced;
- - int i_pix_offset = b_interlaced
- + int i_stride2 = i_stride << h->mb.b_interlaced;
- + int i_pix_offset = h->mb.b_interlaced
- ? 16 * mb_x + w * (mb_y&~1) * i_stride + (mb_y&1) * i_stride
- : 16 * mb_x + w * mb_y * i_stride;
- pixel *plane_fdec = &h->fdec->plane[i][i_pix_offset];
- - pixel *intra_fdec = &h->intra_border_backup[mb_y&1][i][mb_x*16];
- + pixel *intra_fdec = &h->intra_border_backup[h->mb.intra_border_index][i][mb_x*16];
- int ref_pix_offset[2] = { i_pix_offset, i_pix_offset };
- - if( b_interlaced )
- + /* ref_pix_offset[0] references the current field and [1] the opposite field. */
- + if( h->mb.b_interlaced )
- ref_pix_offset[1] += (1-2*(mb_y&1)) * i_stride;
- h->mb.pic.i_stride[i] = i_stride2;
- h->mb.pic.p_fenc_plane[i] = &h->fenc->plane[i][i_pix_offset];
- @@ -510,11 +536,28 @@ static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x
- h->mc.load_deinterleave_8x8x2_fenc( h->mb.pic.p_fenc[1], h->mb.pic.p_fenc_plane[1], i_stride2 );
- memcpy( h->mb.pic.p_fdec[1]-FDEC_STRIDE, intra_fdec, 8*sizeof(pixel) );
- memcpy( h->mb.pic.p_fdec[2]-FDEC_STRIDE, intra_fdec+8, 8*sizeof(pixel) );
- + if( h->sh.b_mbaff )
- + {
- + // Top left samples.
- + h->mb.pic.p_fdec[1][-FDEC_STRIDE-1] = h->intra_diagonal_backup[h->mb.topleft_border_index][1][mb_x*9];
- + h->mb.pic.p_fdec[2][-FDEC_STRIDE-1] = h->intra_diagonal_backup[h->mb.topleft_border_index][2][mb_x*9];
- + // Top right samples.
- + CP32( &h->mb.pic.p_fdec[1][-FDEC_STRIDE+8], &h->intra_diagonal_backup[h->mb.topright_border_index][1][mb_x*9+1] );
- + CP32( &h->mb.pic.p_fdec[2][-FDEC_STRIDE+8], &h->intra_diagonal_backup[h->mb.topright_border_index][2][mb_x*9+1] );
- + }
- +
- }
- else
- {
- h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE, h->mb.pic.p_fenc_plane[0], i_stride2, 16 );
- - memcpy( h->mb.pic.p_fdec[0]-FDEC_STRIDE, intra_fdec, 24*sizeof(pixel) );
- + if( h->sh.b_mbaff )
- + {
- + memcpy( h->mb.pic.p_fdec[0]-FDEC_STRIDE, intra_fdec, 16*sizeof(pixel) );
- + h->mb.pic.p_fdec[0][-FDEC_STRIDE-1] = h->intra_diagonal_backup[h->mb.topleft_border_index][0][mb_x*9];
- + CP64( &h->mb.pic.p_fdec[0][-FDEC_STRIDE+16], &h->intra_diagonal_backup[h->mb.topright_border_index][0][mb_x*9+1] );
- + }
- + else
- + memcpy( h->mb.pic.p_fdec[0]-FDEC_STRIDE, intra_fdec, 24*sizeof(pixel) );
- }
- if( b_interlaced )
- {
- @@ -571,6 +614,10 @@ static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, i
- h->mb.i_mb_xy = mb_y * h->mb.i_mb_stride + mb_x;
- h->mb.i_b8_xy = 2*(mb_y * h->mb.i_b8_stride + mb_x);
- h->mb.i_b4_xy = 4*(mb_y * h->mb.i_b4_stride + mb_x);
- + h->mb.left_b8[0] =
- + h->mb.left_b8[1] = -1;
- + h->mb.left_b4[0] =
- + h->mb.left_b4[1] = -1;
- h->mb.i_neighbour = 0;
- h->mb.i_neighbour_intra = 0;
- h->mb.i_neighbour_frame = 0;
- @@ -583,16 +630,105 @@ static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, i
- h->mb.i_mb_type_topleft = -1;
- h->mb.i_mb_type_topright = -1;
- h->mb.left_index_table = left_indices[4];
- + h->mb.topleft_partition = 0;
- + h->mb.topright_border_index =
- + h->mb.topleft_border_index = !(mb_y&1);
- + h->mb.intra_border_index = mb_y&1;
- +
- + int topleft = top - 1;
- + int topright = top + 1;
- + int left[2];
- +
- + left[0] = left[1] = h->mb.i_mb_xy - 1;
- + h->mb.left_b8[0] = h->mb.left_b8[1] = h->mb.i_b8_xy - 2;
- + h->mb.left_b4[0] = h->mb.left_b4[1] = h->mb.i_b4_xy - 4;
- +
- + if( h->sh.b_mbaff )
- + {
- + if( mb_y&1 )
- + {
- + if( mb_x && h->mb.b_interlaced != h->mb.field[h->mb.i_mb_xy-1] )
- + {
- + left[0] = left[1] = h->mb.i_mb_xy - 1 - h->mb.i_mb_stride;
- + h->mb.left_b8[0] = h->mb.left_b8[1] = h->mb.i_b8_xy - 2 - 2*h->mb.i_b8_stride;
- + h->mb.left_b4[0] = h->mb.left_b4[1] = h->mb.i_b4_xy - 4 - 4*h->mb.i_b4_stride;
- +
- + if( h->mb.b_interlaced )
- + {
- + h->mb.left_index_table = left_indices[3];
- + left[1] += h->mb.i_mb_stride;
- + h->mb.left_b8[1] += 2*h->mb.i_b8_stride;
- + h->mb.left_b4[1] += 4*h->mb.i_b4_stride;
- + }
- + else
- + {
- + h->mb.left_index_table = left_indices[1];
- + topleft += h->mb.i_mb_stride;
- + h->mb.topleft_partition = 1;
- + h->mb.topleft_border_index = 3;
- + }
- + }
- + if( h->mb.b_interlaced )
- + h->mb.topleft_border_index = 1;
- + else
- + {
- + topright = -1;
- + h->mb.intra_border_index = 0;
- + }
- + h->mb.topright_border_index = 1;
- + }
- + else
- + {
- + if( h->mb.b_interlaced && top >= 0 )
- + {
- + if( !h->mb.field[top] )
- + {
- + top += h->mb.i_mb_stride;
- + h->mb.intra_border_index = 2;
- + }
- + if( mb_x )
- + topleft += h->mb.i_mb_stride*(!h->mb.field[topleft]);
- + if( mb_x < h->mb.i_mb_width-1 )
- + topright += h->mb.i_mb_stride*(!h->mb.field[topright]);
- +
- + if( topright >=0 && h->mb.field[topright] )
- + h->mb.topright_border_index = 0;
- + else
- + h->mb.topright_border_index = 2;
- + if( topleft >=0 && h->mb.field[topleft] )
- + h->mb.topleft_border_index = 0;
- + else
- + h->mb.topleft_border_index = 2;
- + }
- + else
- + h->mb.intra_border_index = 1;
- + if( mb_x && h->mb.b_interlaced != h->mb.field[h->mb.i_mb_xy-1] )
- + {
- + if( h->mb.b_interlaced )
- + {
- + h->mb.left_index_table = left_indices[2];
- + left[1] += h->mb.i_mb_stride;
- + h->mb.left_b8[1] += 2*h->mb.i_b8_stride;
- + h->mb.left_b4[1] += 4*h->mb.i_b4_stride;
- + }
- + else
- + h->mb.left_index_table = left_indices[0];
- + }
- + }
- + }
- if( mb_x > 0 )
- {
- h->mb.i_neighbour_frame |= MB_LEFT;
- - h->mb.i_mb_left_xy[0] = h->mb.i_mb_xy - 1;
- + h->mb.i_mb_left_xy[0] = left[0];
- + h->mb.i_mb_left_xy[1] = left[1];
- h->mb.i_mb_type_left[0] = h->mb.type[h->mb.i_mb_left_xy[0]];
- + h->mb.i_mb_type_left[1] = h->mb.type[h->mb.i_mb_left_xy[1]];
- if( h->mb.i_mb_xy > h->sh.i_first_mb )
- {
- h->mb.i_neighbour |= MB_LEFT;
- + // FIXME: We don't currently support constrained intra + mbaff.
- if( !h->param.b_constrained_intra || IS_INTRA( h->mb.i_mb_type_left[0] ) )
- h->mb.i_neighbour_intra |= MB_LEFT;
- }
- @@ -625,12 +761,12 @@ static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, i
- }
- }
- - if( mb_x > 0 && top - 1 >= 0 )
- + if( mb_x > 0 && topleft >= 0 )
- {
- h->mb.i_neighbour_frame |= MB_TOPLEFT;
- - h->mb.i_mb_topleft_xy = top - 1;
- + h->mb.i_mb_topleft_xy = topleft;
- h->mb.i_mb_type_topleft = h->mb.type[h->mb.i_mb_topleft_xy];
- - if( top - 1 >= h->sh.i_first_mb )
- + if( topleft >= h->sh.i_first_mb )
- {
- h->mb.i_neighbour |= MB_TOPLEFT;
- @@ -639,12 +775,12 @@ static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, i
- }
- }
- - if( mb_x < h->mb.i_mb_width - 1 && top + 1 >= 0 )
- + if( mb_x < h->mb.i_mb_width - 1 && topright >= 0 )
- {
- h->mb.i_neighbour_frame |= MB_TOPRIGHT;
- - h->mb.i_mb_topright_xy = top + 1;
- + h->mb.i_mb_topright_xy = topright;
- h->mb.i_mb_type_topright = h->mb.type[h->mb.i_mb_topright_xy];
- - if( top + 1 >= h->sh.i_first_mb )
- + if( topright >= h->sh.i_first_mb )
- {
- h->mb.i_neighbour |= MB_TOPRIGHT;
- @@ -659,9 +795,9 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
- {
- x264_macroblock_cache_load_neighbours( h, mb_x, mb_y );
- - int left = h->mb.i_mb_left_xy[0];
- + int *left = h->mb.i_mb_left_xy;
- int top = h->mb.i_mb_top_xy;
- - int top_y = mb_y - (1 << h->mb.b_interlaced);
- + int top_y = top / h->mb.i_mb_stride;
- int s8x8 = h->mb.i_b8_stride;
- int s4x4 = h->mb.i_b4_stride;
- int top_8x8 = (2*top_y+1) * s8x8 + 2*mb_x;
- @@ -715,25 +851,25 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
- if( h->mb.i_neighbour & MB_LEFT )
- {
- - h->mb.cache.i_cbp_left = cbp[left];
- + h->mb.cache.i_cbp_left = cbp[left[0]];
- /* load intra4x4 */
- - h->mb.cache.intra4x4_pred_mode[x264_scan8[0 ] - 1] = i4x4[left][left_index_table[0]];
- - h->mb.cache.intra4x4_pred_mode[x264_scan8[2 ] - 1] = i4x4[left][left_index_table[1]];
- - h->mb.cache.intra4x4_pred_mode[x264_scan8[8 ] - 1] = i4x4[left][left_index_table[2]];
- - h->mb.cache.intra4x4_pred_mode[x264_scan8[10] - 1] = i4x4[left][left_index_table[3]];
- + h->mb.cache.intra4x4_pred_mode[x264_scan8[0 ] - 1] = i4x4[left[0]][left_index_table[0]];
- + h->mb.cache.intra4x4_pred_mode[x264_scan8[2 ] - 1] = i4x4[left[0]][left_index_table[1]];
- + h->mb.cache.intra4x4_pred_mode[x264_scan8[8 ] - 1] = i4x4[left[1]][left_index_table[2]];
- + h->mb.cache.intra4x4_pred_mode[x264_scan8[10] - 1] = i4x4[left[1]][left_index_table[3]];
- /* load non_zero_count */
- - h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left][left_index_table[4+0]];
- - h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left][left_index_table[4+1]];
- - h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left][left_index_table[4+2]];
- - h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left][left_index_table[4+3]];
- + h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left[0]][left_index_table[4+0]];
- + h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left[0]][left_index_table[4+1]];
- + h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left[1]][left_index_table[4+2]];
- + h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left[1]][left_index_table[4+3]];
- - h->mb.cache.non_zero_count[x264_scan8[16+0] - 1] = nnz[left][left_index_table[4+4]];
- - h->mb.cache.non_zero_count[x264_scan8[16+2] - 1] = nnz[left][left_index_table[4+5]];
- + h->mb.cache.non_zero_count[x264_scan8[16+0] - 1] = nnz[left[0]][left_index_table[4+4]];
- + h->mb.cache.non_zero_count[x264_scan8[16+2] - 1] = nnz[left[1]][left_index_table[4+5]];
- - h->mb.cache.non_zero_count[x264_scan8[16+4+0] - 1] = nnz[left][left_index_table[4+6]];
- - h->mb.cache.non_zero_count[x264_scan8[16+4+2] - 1] = nnz[left][left_index_table[4+7]];
- + h->mb.cache.non_zero_count[x264_scan8[16+4+0] - 1] = nnz[left[0]][left_index_table[4+6]];
- + h->mb.cache.non_zero_count[x264_scan8[16+4+2] - 1] = nnz[left[1]][left_index_table[4+7]];
- }
- else
- {
- @@ -758,7 +894,7 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
- if( h->pps->b_transform_8x8_mode )
- {
- h->mb.cache.i_neighbour_transform_size =
- - ( (h->mb.i_neighbour & MB_LEFT) && h->mb.mb_transform_size[left] )
- + ( (h->mb.i_neighbour & MB_LEFT) && h->mb.mb_transform_size[left[0]] )
- + ( (h->mb.i_neighbour & MB_TOP) && h->mb.mb_transform_size[top] );
- }
- @@ -771,7 +907,7 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
- + !!(h->mb.i_neighbour & MB_TOP);
- }
- - if( !h->mb.b_interlaced )
- + if( !h->sh.b_mbaff )
- {
- x264_copy_column8( h->mb.pic.p_fdec[0]-1+ 4*FDEC_STRIDE, h->mb.pic.p_fdec[0]+15+ 4*FDEC_STRIDE );
- x264_copy_column8( h->mb.pic.p_fdec[0]-1+12*FDEC_STRIDE, h->mb.pic.p_fdec[0]+15+12*FDEC_STRIDE );
- @@ -872,10 +1008,10 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
- if( h->mb.i_neighbour & MB_LEFT )
- {
- - CP16( h->mb.cache.mvd[l][x264_scan8[0 ] - 1], mvd[left][left_index_table[0]] );
- - CP16( h->mb.cache.mvd[l][x264_scan8[2 ] - 1], mvd[left][left_index_table[1]] );
- - CP16( h->mb.cache.mvd[l][x264_scan8[8 ] - 1], mvd[left][left_index_table[2]] );
- - CP16( h->mb.cache.mvd[l][x264_scan8[10] - 1], mvd[left][left_index_table[3]] );
- + CP16( h->mb.cache.mvd[l][x264_scan8[0 ] - 1], mvd[left[0]][left_index_table[0]] );
- + CP16( h->mb.cache.mvd[l][x264_scan8[2 ] - 1], mvd[left[0]][left_index_table[1]] );
- + CP16( h->mb.cache.mvd[l][x264_scan8[8 ] - 1], mvd[left[1]][left_index_table[2]] );
- + CP16( h->mb.cache.mvd[l][x264_scan8[10] - 1], mvd[left[1]][left_index_table[3]] );
- }
- else
- for( int i = 0; i < 4; i++ )
- @@ -892,7 +1028,7 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
- {
- uint8_t skipbp;
- x264_macroblock_cache_skip( h, 0, 0, 4, 4, 0 );
- - skipbp = (h->mb.i_neighbour & MB_LEFT) ? h->mb.skipbp[left] : 0;
- + skipbp = (h->mb.i_neighbour & MB_LEFT) ? h->mb.skipbp[left[0]] : 0;
- h->mb.cache.skip[x264_scan8[0] - 1] = skipbp & 0x2;
- h->mb.cache.skip[x264_scan8[8] - 1] = skipbp & 0x8;
- skipbp = (h->mb.i_neighbour & MB_TOP) ? h->mb.skipbp[top] : 0;
- @@ -971,11 +1107,11 @@ void x264_macroblock_cache_load_deblock( x264_t *h )
- if( h->mb.i_neighbour & MB_LEFT )
- {
- - int left = h->mb.i_mb_left_xy[0];
- - h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left][left_index_table[4+0]];
- - h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left][left_index_table[4+1]];
- - h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left][left_index_table[4+2]];
- - h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left][left_index_table[4+3]];
- + int *left = h->mb.i_mb_left_xy;
- + h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left[0]][left_index_table[4+0]];
- + h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left[0]][left_index_table[4+1]];
- + h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left[1]][left_index_table[4+2]];
- + h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left[1]][left_index_table[4+3]];
- }
- for( int l = 0; l <= (h->sh.i_type == SLICE_TYPE_B); l++ )
- @@ -1106,20 +1242,80 @@ static void ALWAYS_INLINE x264_macroblock_store_pic( x264_t *h, int mb_x, int mb
- int i_pix_offset = b_interlaced
- ? 16 * mb_x + w * (mb_y&~1) * i_stride + (mb_y&1) * i_stride
- : 16 * mb_x + w * mb_y * i_stride;
- + const int intra_diag_width = 8+1; // One top left sample, then eight top right samples.
- pixel *intra_fdec = &h->intra_border_backup[mb_y&1][i][mb_x*16];
- if( i )
- {
- + if( h->sh.b_mbaff )
- + {
- + /* Frame macroblocks use the macroblock directly above for intra
- + * prediction. Field macroblock pairs predict from fields of the same
- + * parity. However field macroblock pairs predicting from frame pairs
- + * use the bottom two rows of the frame for prediction, the penultimate
- + * row is stored in intra_border_backup[2]. */
- + if( mb_y&1 )
- + {
- + if( mb_x )
- + {
- + // Store top left.
- + h->intra_diagonal_backup[1][1][mb_x*intra_diag_width] = h->intra_border_backup[1][1][(mb_x-1)*16+7];
- + h->intra_diagonal_backup[1][2][mb_x*intra_diag_width] = h->intra_border_backup[1][1][(mb_x-1)*16+8+7];
- + h->intra_diagonal_backup[2][1][mb_x*intra_diag_width] = h->intra_border_backup[2][1][(mb_x-1)*16+7];
- + h->intra_diagonal_backup[2][2][mb_x*intra_diag_width] = h->intra_border_backup[2][1][(mb_x-1)*16+8+7];
- + // Store top right.
- + CP32( &h->intra_diagonal_backup[1][1][(mb_x-1)*intra_diag_width+1], h->mb.pic.p_fdec[0]+FDEC_STRIDE*7 );
- + CP32( &h->intra_diagonal_backup[1][2][(mb_x-1)*intra_diag_width+1], h->mb.pic.p_fdec[0]+FDEC_STRIDE*7 );
- + CP32( &h->intra_diagonal_backup[2][1][(mb_x-1)*intra_diag_width+1], h->mb.pic.p_fdec[0]+FDEC_STRIDE*6 );
- + CP32( &h->intra_diagonal_backup[2][2][(mb_x-1)*intra_diag_width+1], h->mb.pic.p_fdec[0]+FDEC_STRIDE*6 );
- + }
- + memcpy( &h->intra_border_backup[2][i][mb_x*16], h->mb.pic.p_fdec[1]+FDEC_STRIDE*6, 8*sizeof(pixel) );
- + memcpy( &h->intra_border_backup[2][i][mb_x*16]+8, h->mb.pic.p_fdec[2]+FDEC_STRIDE*6, 8*sizeof(pixel) );
- + }
- + else
- + {
- + if( mb_x )
- + {
- + h->intra_diagonal_backup[0][1][mb_x*intra_diag_width] = h->intra_border_backup[0][1][(mb_x-1)*16+7];
- + h->intra_diagonal_backup[0][2][mb_x*intra_diag_width] = h->intra_border_backup[0][1][(mb_x-1)*16+8+7];
- + CP32( &h->intra_diagonal_backup[0][1][(mb_x-1)*intra_diag_width+1], h->mb.pic.p_fdec[1]+FDEC_STRIDE*7 );
- + CP32( &h->intra_diagonal_backup[0][2][(mb_x-1)*intra_diag_width+1], h->mb.pic.p_fdec[2]+FDEC_STRIDE*7 );
- + }
- + // Sometimes needed for bottom macroblock of this pair.
- + h->intra_diagonal_backup[3][1][mb_x*intra_diag_width] = h->mb.pic.p_fdec[1][-1+7*FDEC_STRIDE];
- + h->intra_diagonal_backup[3][2][mb_x*intra_diag_width] = h->mb.pic.p_fdec[2][-1+7*FDEC_STRIDE];
- + }
- + }
- h->mc.store_interleave_8x8x2( &h->fdec->plane[1][i_pix_offset], i_stride2, h->mb.pic.p_fdec[1], h->mb.pic.p_fdec[2] );
- memcpy( intra_fdec, h->mb.pic.p_fdec[1]+FDEC_STRIDE*7, 8*sizeof(pixel) );
- memcpy( intra_fdec+8, h->mb.pic.p_fdec[2]+FDEC_STRIDE*7, 8*sizeof(pixel) );
- - twiddle_topleft_pixel( h->mb.pic.p_fdec[1]-FDEC_STRIDE-1, h->mb.pic.p_fdec[1]-FDEC_STRIDE+7, b_interlaced );
- - twiddle_topleft_pixel( h->mb.pic.p_fdec[2]-FDEC_STRIDE-1, h->mb.pic.p_fdec[2]-FDEC_STRIDE+7, b_interlaced );
- + twiddle_topleft_pixel( h->mb.pic.p_fdec[1]-FDEC_STRIDE-1, h->mb.pic.p_fdec[1]-FDEC_STRIDE+7, h->sh.b_mbaff );
- + twiddle_topleft_pixel( h->mb.pic.p_fdec[2]-FDEC_STRIDE-1, h->mb.pic.p_fdec[2]-FDEC_STRIDE+7, h->sh.b_mbaff );
- }
- else
- {
- + if( h->sh.b_mbaff )
- + {
- + if( mb_y&1 )
- + {
- + if( mb_x )
- + {
- + h->intra_diagonal_backup[1][0][mb_x*intra_diag_width] = h->intra_border_backup[1][0][(mb_x-1)*16+15];
- + h->intra_diagonal_backup[2][0][mb_x*intra_diag_width] = h->intra_border_backup[2][0][(mb_x-1)*16+15];
- + CP64( &h->intra_diagonal_backup[1][0][(mb_x-1)*intra_diag_width+1], h->mb.pic.p_fdec[0]+FDEC_STRIDE*15 );
- + CP64( &h->intra_diagonal_backup[2][0][(mb_x-1)*intra_diag_width+1], h->mb.pic.p_fdec[0]+FDEC_STRIDE*14 );
- + }
- + memcpy( &h->intra_border_backup[2][i][mb_x*16], h->mb.pic.p_fdec[0]+FDEC_STRIDE*14, 16*sizeof(pixel) );
- + }
- + else
- + {
- + h->intra_diagonal_backup[0][0][mb_x*intra_diag_width] = h->intra_border_backup[0][0][(mb_x-1)*16+15];
- + CP64( &h->intra_diagonal_backup[0][0][(mb_x-1)*intra_diag_width+1], h->mb.pic.p_fdec[0]+FDEC_STRIDE*15 );
- + h->intra_diagonal_backup[3][0][mb_x*intra_diag_width] = h->mb.pic.p_fdec[0][-1+15*FDEC_STRIDE];
- + }
- + }
- h->mc.copy[PIXEL_16x16]( &h->fdec->plane[0][i_pix_offset], i_stride2, h->mb.pic.p_fdec[0], FDEC_STRIDE, 16 );
- memcpy( intra_fdec, h->mb.pic.p_fdec[0]+FDEC_STRIDE*15, 16*sizeof(pixel) );
- - twiddle_topleft_pixel( h->mb.pic.p_fdec[0]-FDEC_STRIDE-1, h->mb.pic.p_fdec[0]-FDEC_STRIDE+15, b_interlaced );
- + twiddle_topleft_pixel( h->mb.pic.p_fdec[0]-FDEC_STRIDE-1, h->mb.pic.p_fdec[0]-FDEC_STRIDE+15, h->sh.b_mbaff );
- }
- }
- --
- 1.7.4
- From db33884079bf79074a67fac2851d8c9425c45bfa Mon Sep 17 00:00:00 2001
- From: Simon Horlick <simonhorlick@gmail.com>
- Date: Thu, 17 Feb 2011 00:56:59 +0000
- Subject: [PATCH 06/25] Change b_interlaced in store_pic back to its original meaning
- ---
- common/macroblock.c | 10 +++++-----
- 1 files changed, 5 insertions(+), 5 deletions(-)
- diff --git a/common/macroblock.c b/common/macroblock.c
- index 60275ae..63a8933 100644
- --- a/common/macroblock.c
- +++ b/common/macroblock.c
- @@ -1238,15 +1238,15 @@ static void ALWAYS_INLINE x264_macroblock_store_pic( x264_t *h, int mb_x, int mb
- {
- int w = i ? 8 : 16;
- int i_stride = h->fdec->i_stride[i];
- - int i_stride2 = i_stride << b_interlaced;
- - int i_pix_offset = b_interlaced
- + int i_stride2 = i_stride << (b_interlaced && h->mb.b_interlaced);
- + int i_pix_offset = (b_interlaced && h->mb.b_interlaced)
- ? 16 * mb_x + w * (mb_y&~1) * i_stride + (mb_y&1) * i_stride
- : 16 * mb_x + w * mb_y * i_stride;
- const int intra_diag_width = 8+1; // One top left sample, then eight top right samples.
- pixel *intra_fdec = &h->intra_border_backup[mb_y&1][i][mb_x*16];
- if( i )
- {
- - if( h->sh.b_mbaff )
- + if( b_interlaced )
- {
- /* Frame macroblocks use the macroblock directly above for intra
- * prediction. Field macroblock pairs predict from fields of the same
- @@ -1293,7 +1293,7 @@ static void ALWAYS_INLINE x264_macroblock_store_pic( x264_t *h, int mb_x, int mb
- }
- else
- {
- - if( h->sh.b_mbaff )
- + if( b_interlaced )
- {
- if( mb_y&1 )
- {
- @@ -1333,7 +1333,7 @@ void x264_macroblock_cache_save( x264_t *h )
- int8_t *i4x4 = h->mb.intra4x4_pred_mode[i_mb_xy];
- uint8_t *nnz = h->mb.non_zero_count[i_mb_xy];
- - if( h->mb.b_interlaced )
- + if( h->sh.b_mbaff )
- {
- x264_macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 0, 1 );
- x264_macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 1, 1 );
- --
- 1.7.4
- From b816ff5ba65edb03226237e2fc6fff06d7d9e60a Mon Sep 17 00:00:00 2001
- From: Simon Horlick <simonhorlick@gmail.com>
- Date: Fri, 18 Feb 2011 18:57:15 +0000
- Subject: [PATCH 07/25] Only enable twiddle_topleft in progressive
- ---
- common/macroblock.c | 3 ++-
- 1 files changed, 2 insertions(+), 1 deletions(-)
- diff --git a/common/macroblock.c b/common/macroblock.c
- index 63a8933..f775030 100644
- --- a/common/macroblock.c
- +++ b/common/macroblock.c
- @@ -1313,9 +1313,10 @@ static void ALWAYS_INLINE x264_macroblock_store_pic( x264_t *h, int mb_x, int mb
- h->intra_diagonal_backup[3][0][mb_x*intra_diag_width] = h->mb.pic.p_fdec[0][-1+15*FDEC_STRIDE];
- }
- }
- + else
- + twiddle_topleft_pixel( h->mb.pic.p_fdec[0]-FDEC_STRIDE-1, h->mb.pic.p_fdec[0]-FDEC_STRIDE+15, h->sh.b_mbaff );
- h->mc.copy[PIXEL_16x16]( &h->fdec->plane[0][i_pix_offset], i_stride2, h->mb.pic.p_fdec[0], FDEC_STRIDE, 16 );
- memcpy( intra_fdec, h->mb.pic.p_fdec[0]+FDEC_STRIDE*15, 16*sizeof(pixel) );
- - twiddle_topleft_pixel( h->mb.pic.p_fdec[0]-FDEC_STRIDE-1, h->mb.pic.p_fdec[0]-FDEC_STRIDE+15, h->sh.b_mbaff );
- }
- }
- --
- 1.7.4
- From 2b1351a44177a7dc289c9bfe38c7ddc7b0f9a3a2 Mon Sep 17 00:00:00 2001
- From: Simon Horlick <simonhorlick@gmail.com>
- Date: Wed, 16 Mar 2011 21:34:28 +0000
- Subject: [PATCH 08/25] Initial inter support
- ---
- common/common.h | 5 +
- common/frame.c | 33 +++++++-
- common/frame.h | 3 +
- common/macroblock.c | 217 +++++++++++++++++++++++++++++++++++++++++++-------
- common/mc.c | 37 +++++++--
- common/mvpred.c | 23 +++++-
- common/x86/util.h | 24 ------
- encoder/macroblock.c | 4 +-
- 8 files changed, 277 insertions(+), 69 deletions(-)
- diff --git a/common/common.h b/common/common.h
- index c993857..ef9b35a 100644
- --- a/common/common.h
- +++ b/common/common.h
- @@ -600,6 +600,7 @@ struct x264_t
- int i_mb_topright_xy;
- const int *left_index_table;
- int topleft_partition;
- + int allow_skip;
- int intra_border_index;
- int topleft_border_index;
- int topright_border_index;
- @@ -730,6 +731,10 @@ struct x264_t
- /* neighbor CBPs */
- int i_cbp_top;
- int i_cbp_left;
- +
- + /* extra data required for mbaff in mv prediction */
- + int16_t topright_mv[2][3][2];
- + int8_t topright_ref[2][3];
- } cache;
- /* */
- diff --git a/common/frame.c b/common/frame.c
- index eff8ca5..d04f047 100644
- --- a/common/frame.c
- +++ b/common/frame.c
- @@ -48,7 +48,7 @@ x264_frame_t *x264_frame_new( x264_t *h, int b_fdec )
- int i_mb_count = h->mb.i_mb_count;
- int i_stride, i_width, i_lines;
- - int i_padv = PADV << h->param.b_interlaced;
- + int i_padv = PADV << 2*h->param.b_interlaced;
- int luma_plane_size, chroma_plane_size;
- int align = h->param.cpu&X264_CPU_CACHELINE_64 ? 64 : h->param.cpu&X264_CPU_CACHELINE_32 ? 32 : 16;
- int disalign = h->param.cpu&X264_CPU_ALTIVEC ? 1<<9 : 1<<10;
- @@ -99,21 +99,30 @@ x264_frame_t *x264_frame_new( x264_t *h, int b_fdec )
- chroma_plane_size = (frame->i_stride[1] * (frame->i_lines[1] + i_padv));
- CHECKED_MALLOC( frame->buffer[1], chroma_plane_size * sizeof(pixel) );
- + CHECKED_MALLOC( frame->buffer_fld[1], chroma_plane_size * sizeof(pixel) );
- frame->plane[1] = frame->buffer[1] + frame->i_stride[1] * i_padv/2 + PADH;
- + frame->plane_fld[1] = frame->buffer_fld[1] + frame->i_stride[1] * i_padv/2 + PADH;
- /* all 4 luma planes allocated together, since the cacheline split code
- * requires them to be in-phase wrt cacheline alignment. */
- if( h->param.analyse.i_subpel_refine && b_fdec )
- {
- CHECKED_MALLOC( frame->buffer[0], 4*luma_plane_size * sizeof(pixel) );
- + CHECKED_MALLOC( frame->buffer_fld[0], 4*luma_plane_size * sizeof(pixel) );
- for( int i = 0; i < 4; i++ )
- + {
- frame->filtered[i] = frame->buffer[0] + i*luma_plane_size + frame->i_stride[0] * i_padv + PADH;
- + frame->filtered_fld[i] = frame->buffer_fld[0] + i*luma_plane_size + frame->i_stride[0] * i_padv + PADH;
- + }
- frame->plane[0] = frame->filtered[0];
- + frame->plane_fld[0] = frame->filtered_fld[0];
- }
- else
- {
- CHECKED_MALLOC( frame->buffer[0], luma_plane_size * sizeof(pixel) );
- + CHECKED_MALLOC( frame->buffer_fld[0], luma_plane_size * sizeof(pixel) );
- frame->filtered[0] = frame->plane[0] = frame->buffer[0] + frame->i_stride[0] * i_padv + PADH;
- + frame->filtered_fld[0] = frame->plane_fld[0] = frame->buffer_fld[0] + frame->i_stride[0] * i_padv + PADH;
- }
- frame->b_duplicate = 0;
- @@ -200,7 +209,10 @@ void x264_frame_delete( x264_frame_t *frame )
- if( !frame->b_duplicate )
- {
- for( int i = 0; i < 4; i++ )
- + {
- x264_free( frame->buffer[i] );
- + x264_free( frame->buffer_fld[i] );
- + }
- for( int i = 0; i < 4; i++ )
- x264_free( frame->buffer_lowres[i] );
- for( int i = 0; i < X264_BFRAME_MAX+2; i++ )
- @@ -363,16 +375,25 @@ void x264_frame_expand_border( x264_t *h, x264_frame_t *frame, int mb_y, int b_e
- int padh = PADH;
- int padv = PADV >> !!i;
- // buffer: 2 chroma, 3 luma (rounded to 4) because deblocking goes beyond the top of the mb
- - pixel *pix = frame->plane[i] + X264_MAX(0, (16*mb_y-4)*stride >> !!i);
- if( b_end && !b_start )
- height += 4 >> (!!i + h->sh.b_mbaff);
- + pixel *pix;
- if( h->sh.b_mbaff )
- {
- + // border samples for each field are extended separately
- + pix = frame->plane_fld[i] + X264_MAX(0, (16*mb_y-4)*stride >> !!i);
- plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end, i );
- plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end, i );
- +
- + height = (b_end ? 16*(h->mb.i_mb_height - mb_y) : 32) >> !!i;
- + if( b_end && !b_start )
- + height += 4 >> (!!i);
- + pix = frame->plane[i] + X264_MAX(0, (16*mb_y-4)*stride >> !!i);
- + plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end, i );
- }
- else
- {
- + pix = frame->plane[i] + X264_MAX(0, (16*mb_y-4)*stride >> !!i);
- plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end, i );
- }
- }
- @@ -392,14 +413,16 @@ void x264_frame_expand_border_filtered( x264_t *h, x264_frame_t *frame, int mb_y
- for( int i = 1; i < 4; i++ )
- {
- // buffer: 8 luma, to match the hpel filter
- - pixel *pix = frame->filtered[i] + (16*mb_y - (8 << h->sh.b_mbaff)) * stride - 4;
- + pixel *pix;
- if( h->sh.b_mbaff )
- {
- + pix = frame->filtered_fld[i] + (16*mb_y - 16) * stride - 4;
- plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end, 0 );
- plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end, 0 );
- }
- - else
- - plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end, 0 );
- +
- + pix = frame->filtered[i] + (16*mb_y - 8) * stride - 4;
- + plane_expand_border( pix, stride, width, height << h->sh.b_mbaff, padh, padv, b_start, b_end, 0 );
- }
- }
- diff --git a/common/frame.h b/common/frame.h
- index 0e0ab3d..8fe0627 100644
- --- a/common/frame.h
- +++ b/common/frame.h
- @@ -72,13 +72,16 @@ typedef struct x264_frame
- int i_width_lowres;
- int i_lines_lowres;
- pixel *plane[2];
- + pixel *plane_fld[2];
- pixel *filtered[4]; /* plane[0], H, V, HV */
- + pixel *filtered_fld[4];
- pixel *lowres[4]; /* half-size copy of input frame: Orig, H, V, HV */
- uint16_t *integral;
- /* for unrestricted mv we allocate more data than needed
- * allocated data are stored in buffer */
- pixel *buffer[4];
- + pixel *buffer_fld[4];
- pixel *buffer_lowres[4];
- x264_weight_t weight[X264_REF_MAX][3]; /* [ref_index][plane] */
- diff --git a/common/macroblock.c b/common/macroblock.c
- index f775030..a441981 100644
- --- a/common/macroblock.c
- +++ b/common/macroblock.c
- @@ -523,7 +523,7 @@ static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x
- int i_pix_offset = h->mb.b_interlaced
- ? 16 * mb_x + w * (mb_y&~1) * i_stride + (mb_y&1) * i_stride
- : 16 * mb_x + w * mb_y * i_stride;
- - pixel *plane_fdec = &h->fdec->plane[i][i_pix_offset];
- + pixel *plane_fdec = h->mb.b_interlaced ? &h->fdec->plane_fld[i][i_pix_offset] : &h->fdec->plane[i][i_pix_offset];
- pixel *intra_fdec = &h->intra_border_backup[h->mb.intra_border_index][i][mb_x*16];
- int ref_pix_offset[2] = { i_pix_offset, i_pix_offset };
- /* ref_pix_offset[0] references the current field and [1] the opposite field. */
- @@ -570,15 +570,28 @@ static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x
- else
- h->mb.pic.p_fdec[0][-1+j*FDEC_STRIDE] = plane_fdec[-1+j*i_stride2];
- }
- + pixel *plane_src, **filtered_src;
- for( int j = 0; j < h->mb.pic.i_fref[0]; j++ )
- {
- - h->mb.pic.p_fref[0][j][i?4:0] = &h->fref[0][j >> b_interlaced]->plane[i][ref_pix_offset[j&1]];
- + // Interpolate between pixels in same field.
- + if( h->mb.b_interlaced )
- + {
- + plane_src = h->fref[0][j>>1]->plane_fld[i];
- + filtered_src = h->fref[0][j>>1]->filtered_fld;
- + }
- + else
- + {
- + plane_src = h->fref[0][j]->plane[i];
- + filtered_src = h->fref[0][j]->filtered;
- + }
- + h->mb.pic.p_fref[0][j][i?4:0] = plane_src + ref_pix_offset[j&1];
- +
- if( !i )
- {
- for( int k = 1; k < 4; k++ )
- - h->mb.pic.p_fref[0][j][k] = &h->fref[0][j >> b_interlaced]->filtered[k][ref_pix_offset[j&1]];
- + h->mb.pic.p_fref[0][j][k] = filtered_src[k] + ref_pix_offset[j&1];
- if( h->sh.weight[j][0].weightfn )
- - h->mb.pic.p_fref_w[j] = &h->fenc->weighted[j >> b_interlaced][ref_pix_offset[j&1]];
- + h->mb.pic.p_fref_w[j] = &h->fenc->weighted[j >> h->mb.b_interlaced][ref_pix_offset[j&1]];
- else
- h->mb.pic.p_fref_w[j] = h->mb.pic.p_fref[0][j][0];
- }
- @@ -586,10 +599,21 @@ static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x
- if( h->sh.i_type == SLICE_TYPE_B )
- for( int j = 0; j < h->mb.pic.i_fref[1]; j++ )
- {
- - h->mb.pic.p_fref[1][j][i?4:0] = &h->fref[1][j >> b_interlaced]->plane[i][ref_pix_offset[j&1]];
- + if( h->mb.b_interlaced )
- + {
- + plane_src = h->fref[1][j>>1]->plane_fld[i];
- + filtered_src = h->fref[1][j>>1]->filtered_fld;
- + }
- + else
- + {
- + plane_src = h->fref[1][j]->plane[i];
- + filtered_src = h->fref[1][j]->filtered;
- + }
- + h->mb.pic.p_fref[1][j][i?4:0] = plane_src + ref_pix_offset[j&1];
- +
- if( !i )
- for( int k = 1; k < 4; k++ )
- - h->mb.pic.p_fref[1][j][k] = &h->fref[1][j >> b_interlaced]->filtered[k][ref_pix_offset[j&1]];
- + h->mb.pic.p_fref[1][j][k] = filtered_src[k] + ref_pix_offset[j&1];
- }
- }
- @@ -851,7 +875,9 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
- if( h->mb.i_neighbour & MB_LEFT )
- {
- - h->mb.cache.i_cbp_left = cbp[left[0]];
- + const int16_t top_luma = (cbp[left[0]] >> (left_index_table[18+0]&(~1))) & 2;
- + const int16_t bot_luma = (cbp[left[1]] >> (left_index_table[18+2]&(~1))) & 2;
- + h->mb.cache.i_cbp_left = (cbp[left[0]] & 0xfff0) | (bot_luma<<2) | top_luma;
- /* load intra4x4 */
- h->mb.cache.intra4x4_pred_mode[x264_scan8[0 ] - 1] = i4x4[left[0]][left_index_table[0]];
- @@ -903,8 +929,8 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
- h->mb.pic.i_fref[0] = h->i_ref[0] << h->mb.b_interlaced;
- h->mb.pic.i_fref[1] = h->i_ref[1] << h->mb.b_interlaced;
- h->mb.cache.i_neighbour_interlaced =
- - !!(h->mb.i_neighbour & MB_LEFT)
- - + !!(h->mb.i_neighbour & MB_TOP);
- + !!(h->mb.i_neighbour & MB_LEFT && h->mb.field[left[0]])
- + + !!(h->mb.i_neighbour & MB_TOP && h->mb.field[top]);
- }
- if( !h->sh.b_mbaff )
- @@ -941,8 +967,18 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
- int i8 = x264_scan8[0] - 1 - 1*8;
- if( h->mb.i_neighbour & MB_TOPLEFT )
- {
- - h->mb.cache.ref[l][i8] = ref[top_8x8 - 1];
- - CP32( h->mb.cache.mv[l][i8], mv[top_4x4 - 1] );
- + int y = h->mb.i_mb_topleft_xy / h->mb.i_mb_stride;
- + int ir = 2*(s8x8*y + mb_x-1)+1+s8x8;
- + int iv = 4*(s4x4*y + mb_x-1)+3+3*s4x4;
- + if( h->mb.topleft_partition )
- + {
- + /* Take motion vector from the middle of macroblock instead of
- + * the bottom right as usual. */
- + iv -= 2*s4x4;
- + ir -= s8x8;
- + }
- + h->mb.cache.ref[l][i8] = ref[ir];
- + CP32( h->mb.cache.mv[l][i8], mv[iv] );
- }
- else
- {
- @@ -968,8 +1004,9 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
- i8 = x264_scan8[0] + 4 - 1*8;
- if( h->mb.i_neighbour & MB_TOPRIGHT )
- {
- - h->mb.cache.ref[l][i8] = ref[top_8x8 + 2];
- - CP32( h->mb.cache.mv[l][i8], mv[top_4x4 + 4] );
- + int y = h->mb.i_mb_topright_xy / h->mb.i_mb_stride;
- + h->mb.cache.ref[l][i8] = ref[2*(s8x8*y + (mb_x+1))+s8x8];
- + CP32( h->mb.cache.mv[l][i8], mv[4*(s4x4*y + (mb_x+1))+3*s4x4] );
- }
- else
- h->mb.cache.ref[l][i8] = -2;
- @@ -977,17 +1014,15 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
- i8 = x264_scan8[0] - 1;
- if( h->mb.i_neighbour & MB_LEFT )
- {
- - const int ir = h->mb.i_b8_xy - 1;
- - const int iv = h->mb.i_b4_xy - 1;
- - h->mb.cache.ref[l][i8+0*8] =
- - h->mb.cache.ref[l][i8+1*8] = ref[ir + 0*s8x8];
- - h->mb.cache.ref[l][i8+2*8] =
- - h->mb.cache.ref[l][i8+3*8] = ref[ir + 1*s8x8];
- -
- - CP32( h->mb.cache.mv[l][i8+0*8], mv[iv + 0*s4x4] );
- - CP32( h->mb.cache.mv[l][i8+1*8], mv[iv + 1*s4x4] );
- - CP32( h->mb.cache.mv[l][i8+2*8], mv[iv + 2*s4x4] );
- - CP32( h->mb.cache.mv[l][i8+3*8], mv[iv + 3*s4x4] );
- + h->mb.cache.ref[l][i8+0*8] = ref[h->mb.left_b8[0] + 1 + s8x8*((left_index_table[12+0]&~1)>>1)];
- + h->mb.cache.ref[l][i8+1*8] = ref[h->mb.left_b8[0] + 1 + s8x8*((left_index_table[12+1]&~1)>>1)];
- + h->mb.cache.ref[l][i8+2*8] = ref[h->mb.left_b8[1] + 1 + s8x8*((left_index_table[12+2]&~1)>>1)];
- + h->mb.cache.ref[l][i8+3*8] = ref[h->mb.left_b8[1] + 1 + s8x8*((left_index_table[12+3]&~1)>>1)];
- +
- + CP32( h->mb.cache.mv[l][i8+0*8], mv[h->mb.left_b4[0] + 3 + s4x4*left_index_table[12+0]] );
- + CP32( h->mb.cache.mv[l][i8+1*8], mv[h->mb.left_b4[0] + 3 + s4x4*left_index_table[12+1]] );
- + CP32( h->mb.cache.mv[l][i8+2*8], mv[h->mb.left_b4[1] + 3 + s4x4*left_index_table[12+2]] );
- + CP32( h->mb.cache.mv[l][i8+3*8], mv[h->mb.left_b4[1] + 3 + s4x4*left_index_table[12+3]] );
- }
- else
- {
- @@ -998,6 +1033,42 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
- }
- }
- + /* Extra logic for top right mv in mbaff.
- + * . . . d . . a .
- + * . . . e . . . .
- + * . . . f b . c .
- + * . . . . . . . .
- + *
- + * If the top right of the 4x4 partitions labeled a, b and c in the
- + * above diagram do not exist, but the entries d, e and f exist (in
- + * the macroblock to the left) then use those instead.
- + */
- + if( h->param.b_interlaced )
- + {
- + if( h->mb.i_neighbour & MB_LEFT )
- + {
- + if( h->mb.b_interlaced && !h->mb.field[h->mb.i_mb_xy-1] )
- + {
- + h->mb.cache.topright_ref[l][0] = ref[h->mb.left_b8[0] + 1 + s8x8*0];
- + h->mb.cache.topright_ref[l][1] = ref[h->mb.left_b8[0] + 1 + s8x8*1];
- + h->mb.cache.topright_ref[l][2] = ref[h->mb.left_b8[1] + 1 + s8x8*0];
- + CP32( h->mb.cache.topright_mv[l][0], mv[h->mb.left_b4[0] + 3 + s4x4*(left_index_table[12+0]+1)] );
- + CP32( h->mb.cache.topright_mv[l][1], mv[h->mb.left_b4[0] + 3 + s4x4*(left_index_table[12+1]+1)] );
- + CP32( h->mb.cache.topright_mv[l][2], mv[h->mb.left_b4[1] + 3 + s4x4*(left_index_table[12+2]+1)] );
- + }
- + else if( !h->mb.b_interlaced && h->mb.field[h->mb.i_mb_xy-1] )
- + {
- + // Looking at the bottom field so always take the bottom macroblock of the pair.
- + h->mb.cache.topright_ref[l][0] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table[12+4]];
- + h->mb.cache.topright_ref[l][1] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table[12+4]];
- + h->mb.cache.topright_ref[l][2] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table[12+5]];
- + CP32( h->mb.cache.topright_mv[l][0], mv[h->mb.left_b4[0] + 3 + s4x4*4 + s4x4*left_index_table[12+0]] );
- + CP32( h->mb.cache.topright_mv[l][1], mv[h->mb.left_b4[0] + 3 + s4x4*4 + s4x4*left_index_table[12+1]] );
- + CP32( h->mb.cache.topright_mv[l][2], mv[h->mb.left_b4[0] + 3 + s4x4*4 + s4x4*left_index_table[12+2]] );
- + }
- + }
- + }
- +
- if( h->param.b_cabac )
- {
- uint8_t (*mvd)[8][2] = h->mb.mvd[l];
- @@ -1006,16 +1077,103 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
- else
- M64( h->mb.cache.mvd[l][x264_scan8[0] - 8] ) = 0;
- - if( h->mb.i_neighbour & MB_LEFT )
- + if( h->mb.cache.ref[l][x264_scan8[0]-1] >= 0 )
- {
- CP16( h->mb.cache.mvd[l][x264_scan8[0 ] - 1], mvd[left[0]][left_index_table[0]] );
- CP16( h->mb.cache.mvd[l][x264_scan8[2 ] - 1], mvd[left[0]][left_index_table[1]] );
- + }
- + else
- + {
- + M16( h->mb.cache.mvd[l][x264_scan8[0]-1+0*8] ) = 0;
- + M16( h->mb.cache.mvd[l][x264_scan8[0]-1+1*8] ) = 0;
- + }
- + if( h->mb.cache.ref[l][x264_scan8[0]-1+2*8] >=0 )
- + {
- CP16( h->mb.cache.mvd[l][x264_scan8[8 ] - 1], mvd[left[1]][left_index_table[2]] );
- CP16( h->mb.cache.mvd[l][x264_scan8[10] - 1], mvd[left[1]][left_index_table[3]] );
- }
- else
- - for( int i = 0; i < 4; i++ )
- - M16( h->mb.cache.mvd[l][x264_scan8[0]-1+i*8] ) = 0;
- + {
- + M16( h->mb.cache.mvd[l][x264_scan8[0]-1+2*8] ) = 0;
- + M16( h->mb.cache.mvd[l][x264_scan8[0]-1+3*8] ) = 0;
- + }
- + }
- +
- + /* If motion vectors are cached from frame macroblocks but this
- + * macroblock is a field macroblock then the motion vector must be
- + * halved. Similarly, motion vectors from field macroblocks are doubled. */
- + if( h->sh.b_mbaff )
- + {
- +#define MAP_MVS\
- + MAP_F2F(mv, ref, x264_scan8[0] - 1 - 1*8, h->mb.i_mb_topleft_xy)\
- + MAP_F2F(mv, ref, x264_scan8[0] + 0 - 1*8, top)\
- + MAP_F2F(mv, ref, x264_scan8[0] + 1 - 1*8, top)\
- + MAP_F2F(mv, ref, x264_scan8[0] + 2 - 1*8, top)\
- + MAP_F2F(mv, ref, x264_scan8[0] + 3 - 1*8, top)\
- + MAP_F2F(mv, ref, x264_scan8[0] + 4 - 1*8, h->mb.i_mb_topright_xy)\
- + MAP_F2F(mv, ref, x264_scan8[0] - 1 + 0*8, left[0])\
- + MAP_F2F(mv, ref, x264_scan8[0] - 1 + 1*8, left[0])\
- + MAP_F2F(mv, ref, x264_scan8[0] - 1 + 2*8, left[1])\
- + MAP_F2F(mv, ref, x264_scan8[0] - 1 + 3*8, left[1])\
- + MAP_F2F(topright_mv, topright_ref, 0, left[0])\
- + MAP_F2F(topright_mv, topright_ref, 1, left[0])\
- + MAP_F2F(topright_mv, topright_ref, 2, left[1])
- +
- + if( h->mb.b_interlaced )
- + {
- +#define MAP_F2F(varmv, varref, index, macroblock)\
- + if( h->mb.cache.varref[l][index] >= 0 && macroblock >= 0 && !h->mb.field[macroblock] )\
- + {\
- + h->mb.cache.varref[l][index] <<= 1;\
- + h->mb.cache.varmv[l][index][1] /= 2;\
- + h->mb.cache.mvd[l][index][1] >>= 1;\
- + }
- + MAP_MVS
- +#undef MAP_F2F
- + }
- + else
- + {
- +#define MAP_F2F(varmv, varref, index, macroblock)\
- + if( h->mb.cache.varref[l][index] >= 0 && macroblock >= 0 && h->mb.field[macroblock] )\
- + {\
- + h->mb.cache.varref[l][index] >>= 1;\
- + h->mb.cache.varmv[l][index][1] <<= 1;\
- + h->mb.cache.mvd[l][index][1] <<= 1;\
- + }
- + MAP_MVS
- +#undef MAP_F2F
- + }
- + }
- + }
- +
- + /* Check whether skip here would cause decoder to predict interlace mode incorrectly. */
- + h->mb.allow_skip = 1;
- + if( h->sh.b_mbaff && (mb_y&1) && IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride]) )
- + {
- + if( h->mb.i_neighbour & MB_LEFT )
- + {
- + if( h->mb.field[h->mb.i_mb_xy - 1] != h->mb.b_interlaced )
- + h->mb.allow_skip = 0;
- + }
- + else if( h->mb.i_neighbour & MB_TOP )
- + {
- + if( h->mb.field[h->mb.i_mb_top_xy] != h->mb.b_interlaced )
- + h->mb.allow_skip = 0;
- + }
- + else // Frame mb pair is predicted
- + {
- + if( h->mb.b_interlaced )
- + h->mb.allow_skip = 0;
- + }
- + if( !h->mb.allow_skip )
- + {
- + if( IS_SKIP(h->mb.i_type) )
- + {
- + if( h->mb.i_type == P_SKIP )
- + h->mb.i_type = P_L0;
- + else if( h->mb.i_type == B_SKIP )
- + h->mb.i_type = B_DIRECT;
- + }
- }
- }
- @@ -1286,6 +1444,7 @@ static void ALWAYS_INLINE x264_macroblock_store_pic( x264_t *h, int mb_x, int mb
- }
- }
- h->mc.store_interleave_8x8x2( &h->fdec->plane[1][i_pix_offset], i_stride2, h->mb.pic.p_fdec[1], h->mb.pic.p_fdec[2] );
- + h->mc.store_interleave_8x8x2( &h->fdec->plane_fld[1][i_pix_offset], i_stride2, h->mb.pic.p_fdec[1], h->mb.pic.p_fdec[2] );
- memcpy( intra_fdec, h->mb.pic.p_fdec[1]+FDEC_STRIDE*7, 8*sizeof(pixel) );
- memcpy( intra_fdec+8, h->mb.pic.p_fdec[2]+FDEC_STRIDE*7, 8*sizeof(pixel) );
- twiddle_topleft_pixel( h->mb.pic.p_fdec[1]-FDEC_STRIDE-1, h->mb.pic.p_fdec[1]-FDEC_STRIDE+7, h->sh.b_mbaff );
- @@ -1299,12 +1458,13 @@ static void ALWAYS_INLINE x264_macroblock_store_pic( x264_t *h, int mb_x, int mb
- {
- if( mb_x )
- {
- + // Take rightmost sample from top border of left mb to use as topleft here.
- h->intra_diagonal_backup[1][0][mb_x*intra_diag_width] = h->intra_border_backup[1][0][(mb_x-1)*16+15];
- h->intra_diagonal_backup[2][0][mb_x*intra_diag_width] = h->intra_border_backup[2][0][(mb_x-1)*16+15];
- CP64( &h->intra_diagonal_backup[1][0][(mb_x-1)*intra_diag_width+1], h->mb.pic.p_fdec[0]+FDEC_STRIDE*15 );
- CP64( &h->intra_diagonal_backup[2][0][(mb_x-1)*intra_diag_width+1], h->mb.pic.p_fdec[0]+FDEC_STRIDE*14 );
- }
- - memcpy( &h->intra_border_backup[2][i][mb_x*16], h->mb.pic.p_fdec[0]+FDEC_STRIDE*14, 16*sizeof(pixel) );
- + memcpy( &h->intra_border_backup[2][0][mb_x*16], h->mb.pic.p_fdec[0]+FDEC_STRIDE*14, 16*sizeof(pixel) );
- }
- else
- {
- @@ -1316,6 +1476,7 @@ static void ALWAYS_INLINE x264_macroblock_store_pic( x264_t *h, int mb_x, int mb
- else
- twiddle_topleft_pixel( h->mb.pic.p_fdec[0]-FDEC_STRIDE-1, h->mb.pic.p_fdec[0]-FDEC_STRIDE+15, h->sh.b_mbaff );
- h->mc.copy[PIXEL_16x16]( &h->fdec->plane[0][i_pix_offset], i_stride2, h->mb.pic.p_fdec[0], FDEC_STRIDE, 16 );
- + h->mc.copy[PIXEL_16x16]( &h->fdec->plane_fld[0][i_pix_offset], i_stride2, h->mb.pic.p_fdec[0], FDEC_STRIDE, 16 );
- memcpy( intra_fdec, h->mb.pic.p_fdec[0]+FDEC_STRIDE*15, 16*sizeof(pixel) );
- }
- }
- diff --git a/common/mc.c b/common/mc.c
- index 76061c3..e594785 100644
- --- a/common/mc.c
- +++ b/common/mc.c
- @@ -512,22 +512,43 @@ void x264_mc_init( int cpu, x264_mc_functions_t *pf )
- void x264_frame_filter( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
- {
- const int b_interlaced = h->sh.b_mbaff;
- - const int stride = frame->i_stride[0] << b_interlaced;
- + int stride = frame->i_stride[0];
- const int width = frame->i_width[0];
- - int start = (mb_y*16 >> b_interlaced) - 8; // buffer = 4 for deblock + 3 for 6tap, rounded to 8
- - int height = ((b_end ? frame->i_lines[0] : mb_y*16) >> b_interlaced) + 8;
- + int start = mb_y*16 - 8; // buffer = 4 for deblock + 3 for 6tap, rounded to 8
- + int height = (b_end ? frame->i_lines[0] + 16 : (mb_y+b_interlaced)*16) + 8;
- int offs = start*stride - 8; // buffer = 3 for 6tap, aligned to 8 for simd
- if( mb_y & b_interlaced )
- return;
- - for( int y = 0; y <= b_interlaced; y++, offs += frame->i_stride[0] )
- + h->mc.hpel_filter(
- + frame->filtered[1] + offs,
- + frame->filtered[2] + offs,
- + frame->filtered[3] + offs,
- + frame->plane[0] + offs,
- + stride, width + 16, height - start,
- + h->scratch_buffer );
- +
- + if( b_interlaced )
- {
- + /* MC must happen between pixels in the same field. */
- + stride = frame->i_stride[0] << 1;
- + start = (mb_y*16 >> 1) - 8;
- + height = ((b_end ? frame->i_lines[0] : mb_y*16) >> 1) + 8;
- + offs = start*stride - 8;
- + h->mc.hpel_filter(
- + frame->filtered_fld[1] + offs,
- + frame->filtered_fld[2] + offs,
- + frame->filtered_fld[3] + offs,
- + frame->plane_fld[0] + offs,
- + stride, width + 16, height - start,
- + h->scratch_buffer );
- + offs += frame->i_stride[0];
- h->mc.hpel_filter(
- - frame->filtered[1] + offs,
- - frame->filtered[2] + offs,
- - frame->filtered[3] + offs,
- - frame->plane[0] + offs,
- + frame->filtered_fld[1] + offs,
- + frame->filtered_fld[2] + offs,
- + frame->filtered_fld[3] + offs,
- + frame->plane_fld[0] + offs,
- stride, width + 16, height - start,
- h->scratch_buffer );
- }
- diff --git a/common/mvpred.c b/common/mvpred.c
- index c8efe1f..278e0ac 100644
- --- a/common/mvpred.c
- +++ b/common/mvpred.c
- @@ -38,12 +38,33 @@ void x264_mb_predict_mv( x264_t *h, int i_list, int idx, int i_width, int16_t mv
- int i_refc = h->mb.cache.ref[i_list][i8 - 8 + i_width];
- int16_t *mv_c = h->mb.cache.mv[i_list][i8 - 8 + i_width];
- + // Partitions not yet reached in scan order are unavailable.
- if( (idx&3) >= 2 + (i_width&1) || i_refc == -2 )
- {
- i_refc = h->mb.cache.ref[i_list][i8 - 8 - 1];
- mv_c = h->mb.cache.mv[i_list][i8 - 8 - 1];
- - }
- + if( h->param.b_interlaced
- + && h->mb.cache.ref[i_list][x264_scan8[0]-1] != -2
- + && h->mb.b_interlaced != h->mb.field[h->mb.i_mb_left_xy[0]] )
- + {
- + if( idx == 2 )
- + {
- + mv_c = h->mb.cache.topright_mv[i_list][0];
- + i_refc = h->mb.cache.topright_ref[i_list][0];
- + }
- + else if( idx == 8 )
- + {
- + mv_c = h->mb.cache.topright_mv[i_list][1];
- + i_refc = h->mb.cache.topright_ref[i_list][1];
- + }
- + else if( idx == 10 )
- + {
- + mv_c = h->mb.cache.topright_mv[i_list][2];
- + i_refc = h->mb.cache.topright_ref[i_list][2];
- + }
- + }
- + }
- if( h->mb.i_partition == D_16x8 )
- {
- if( idx == 0 )
- diff --git a/common/x86/util.h b/common/x86/util.h
- index 6544207..01e54f9 100644
- --- a/common/x86/util.h
- +++ b/common/x86/util.h
- @@ -87,30 +87,6 @@ static ALWAYS_INLINE int x264_predictor_difference_mmxext( int16_t (*mvc)[2], in
- return sum;
- }
- -#define x264_cabac_mvd_sum x264_cabac_mvd_sum_mmxext
- -static ALWAYS_INLINE uint16_t x264_cabac_mvd_sum_mmxext(uint8_t *mvdleft, uint8_t *mvdtop)
- -{
- - static const uint64_t pb_2 = 0x0202020202020202ULL;
- - static const uint64_t pb_32 = 0x2020202020202020ULL;
- - int amvd;
- - asm(
- - "movd %1, %%mm0 \n"
- - "movd %2, %%mm1 \n"
- - "paddb %%mm1, %%mm0 \n"
- - "pxor %%mm2, %%mm2 \n"
- - "movq %%mm0, %%mm1 \n"
- - "pcmpgtb %3, %%mm0 \n"
- - "pcmpgtb %4, %%mm1 \n"
- - "psubb %%mm0, %%mm2 \n"
- - "psubb %%mm1, %%mm2 \n"
- - "movd %%mm2, %0 \n"
- - :"=r"(amvd)
- - :"m"(M16( mvdleft )),"m"(M16( mvdtop )),
- - "m"(pb_2),"m"(pb_32)
- - );
- - return amvd;
- -}
- -
- #define x264_predictor_roundclip x264_predictor_roundclip_mmxext
- static void ALWAYS_INLINE x264_predictor_roundclip_mmxext( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int mv_x_min, int mv_x_max, int mv_y_min, int mv_y_max )
- {
- diff --git a/encoder/macroblock.c b/encoder/macroblock.c
- index a3fcd61..28609d8 100644
- --- a/encoder/macroblock.c
- +++ b/encoder/macroblock.c
- @@ -609,9 +609,7 @@ void x264_macroblock_encode( x264_t *h )
- return;
- }
- - if( h->sh.b_mbaff
- - && h->mb.i_mb_xy == h->sh.i_first_mb + h->mb.i_mb_stride
- - && IS_SKIP(h->mb.type[h->sh.i_first_mb]) )
- + if( !h->mb.allow_skip )
- {
- /* The first skip is predicted to be a frame mb pair.
- * We don't yet support the aff part of mbaff, so force it to non-skip
- --
- 1.7.4
- From 88aa754aa9a2e3f2907f656e21439e7b93b8cfab Mon Sep 17 00:00:00 2001
- From: Simon Horlick <simonhorlick@gmail.com>
- Date: Wed, 16 Mar 2011 21:34:51 +0000
- Subject: [PATCH 09/25] Copy deblocked pixels to other plane
- ---
- common/deblock.c | 15 +++++++++++++++
- 1 files changed, 15 insertions(+), 0 deletions(-)
- diff --git a/common/deblock.c b/common/deblock.c
- index 0800461..52d410d 100644
- --- a/common/deblock.c
- +++ b/common/deblock.c
- @@ -389,6 +389,21 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
- #undef FILTER
- }
- +
- + // FIXME: Don't copy the whole frame around.
- + int y = mb_y*16;
- + int start = mb_y == h->i_threadslice_start;
- + int last = mb_y == h->i_threadslice_end - (1 << h->sh.b_mbaff);
- + int height = last ? 32+4 : 32;
- + if( !start ) y -= 4; // Make sure to copy the above four rows of deblocked pixels.
- + for( int i = y; i < y+height; i++ )
- + memcpy( h->fdec->plane_fld[0] + i*stridey, h->fdec->plane[0] + i*stridey, h->mb.i_mb_width*16*sizeof(pixel) );
- +
- + y = mb_y*8;
- + height = last ? 16+2 : 16;
- + if( !start ) y -=2;
- + for( int i = y; i < y+height; i++ )
- + memcpy( h->fdec->plane_fld[1] + i*strideuv, h->fdec->plane[1] + i*strideuv, h->mb.i_mb_width*16*sizeof(pixel) );
- }
- /* For deblock-aware RD.
- --
- 1.7.4
- From d42239c146bd2fc1417987ce1794fbf049796112 Mon Sep 17 00:00:00 2001
- From: Simon Horlick <simonhorlick@gmail.com>
- Date: Sun, 6 Feb 2011 22:58:39 +0000
- Subject: [PATCH 10/25] Fix thread max mv check
- ---
- encoder/analyse.c | 2 +-
- 1 files changed, 1 insertions(+), 1 deletions(-)
- diff --git a/encoder/analyse.c b/encoder/analyse.c
- index 87125c1..4f439d4 100644
- --- a/encoder/analyse.c
- +++ b/encoder/analyse.c
- @@ -460,7 +460,7 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int qp )
- if( h->param.b_deterministic )
- thread_mvy_range = h->param.analyse.i_mv_range_thread;
- - if( h->mb.b_interlaced )
- + if( h->sh.b_mbaff )
- thread_mvy_range >>= 1;
- x264_analyse_weight_frame( h, pix_y + thread_mvy_range );
- --
- 1.7.4
- From 0ce16f54027e00ee0a9070e807eed3ca570325ad Mon Sep 17 00:00:00 2001
- From: Simon Horlick <simonhorlick@gmail.com>
- Date: Sun, 20 Feb 2011 15:31:55 +0000
- Subject: [PATCH 11/25] Track what interlace decision the decoder is using
- ---
- common/common.h | 1 +
- encoder/cabac.c | 17 ++++++++++++++++-
- encoder/encoder.c | 11 +++++++++++
- 3 files changed, 28 insertions(+), 1 deletions(-)
- diff --git a/common/common.h b/common/common.h
- index ef9b35a..bc14c10 100644
- --- a/common/common.h
- +++ b/common/common.h
- @@ -604,6 +604,7 @@ struct x264_t
- int intra_border_index;
- int topleft_border_index;
- int topright_border_index;
- + int field_decoding_flag;
- /**** thread synchronization ends here ****/
- /* subsequent variables are either thread-local or constant,
- diff --git a/encoder/cabac.c b/encoder/cabac.c
- index 334318d..6138d06 100644
- --- a/encoder/cabac.c
- +++ b/encoder/cabac.c
- @@ -66,6 +66,21 @@ static inline void x264_cabac_mb_type_intra( x264_t *h, x264_cabac_t *cb, int i_
- }
- }
- +static void x264_cabac_field_decoding_flag( x264_t *h, x264_cabac_t *cb )
- +{
- + const int top = h->mb.i_mb_xy - 2*h->mb.i_mb_stride;
- + int ctx = 0;
- + ctx += h->mb.field_decoding_flag & !!h->mb.i_mb_x;
- + ctx += (top >= 0
- + && h->mb.slice_table[top] == h->sh.i_first_mb
- + && h->mb.field[top]);
- +
- + x264_cabac_encode_decision_noup( cb, 70 + ctx, h->mb.b_interlaced );
- +#if !RDO_SKIP_BS
- + h->mb.field_decoding_flag = h->mb.b_interlaced;
- +#endif
- +}
- +
- static void x264_cabac_mb_type( x264_t *h, x264_cabac_t *cb )
- {
- const int i_mb_type = h->mb.i_type;
- @@ -73,7 +88,7 @@ static void x264_cabac_mb_type( x264_t *h, x264_cabac_t *cb )
- if( h->sh.b_mbaff &&
- (!(h->mb.i_mb_y & 1) || IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride])) )
- {
- - x264_cabac_encode_decision_noup( cb, 70 + h->mb.cache.i_neighbour_interlaced, h->mb.b_interlaced );
- + x264_cabac_field_decoding_flag( h, cb );
- }
- if( h->sh.i_type == SLICE_TYPE_I )
- diff --git a/encoder/encoder.c b/encoder/encoder.c
- index 9f294d1..2525fec 100644
- --- a/encoder/encoder.c
- +++ b/encoder/encoder.c
- @@ -1932,6 +1932,8 @@ static int x264_slice_write( x264_t *h )
- i_mb_x = h->sh.i_first_mb % h->mb.i_mb_width;
- i_skip = 0;
- + h->mb.field_decoding_flag = 0;
- +
- while( (mb_xy = i_mb_x + i_mb_y * h->mb.i_mb_width) <= h->sh.i_last_mb )
- {
- int mb_spos = bs_pos(&h->out.bs) + x264_cabac_pos(&h->cabac);
- @@ -1988,7 +1990,12 @@ static int x264_slice_write( x264_t *h )
- x264_cabac_encode_terminal( &h->cabac );
- if( IS_SKIP( h->mb.i_type ) )
- + {
- + // FIXME: It might be better to change the interlace type
- + // rather than forcing a skip to be non-skip, but this would
- + // require modifying the already saved image data.
- x264_cabac_mb_skip( h, 1 );
- + }
- else
- {
- if( h->sh.i_type != SLICE_TYPE_I )
- @@ -2145,6 +2152,10 @@ static int x264_slice_write( x264_t *h )
- {
- i_mb_y++;
- i_mb_x = 0;
- + if( h->sh.b_mbaff && i_mb_y > 0 )
- + h->mb.field_decoding_flag = h->mb.field[i_mb_x+(i_mb_y-1)*h->mb.i_mb_stride];
- + else
- + h->mb.field_decoding_flag = 0;
- }
- }
- h->out.nal[h->out.i_nal].i_last_mb = h->sh.i_last_mb;
- --
- 1.7.4
- From 02af876c83b5a6fc69a20c1928ad604069e533a7 Mon Sep 17 00:00:00 2001
- From: Simon Horlick <simonhorlick@gmail.com>
- Date: Sun, 20 Feb 2011 15:35:44 +0000
- Subject: [PATCH 12/25] Disallow skip where interlace would be wrong
- ---
- common/macroblock.c | 27 +++++++++++++++++++--------
- 1 files changed, 19 insertions(+), 8 deletions(-)
- diff --git a/common/macroblock.c b/common/macroblock.c
- index a441981..4fe1f82 100644
- --- a/common/macroblock.c
- +++ b/common/macroblock.c
- @@ -1148,6 +1148,17 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
- /* Check whether skip here would cause decoder to predict interlace mode incorrectly. */
- h->mb.allow_skip = 1;
- + int prevmb = mb_x + h->mb.i_mb_stride*mb_y;
- + if( mb_y&1 )
- + prevmb -= h->mb.i_mb_stride;
- + else if( mb_x )
- + prevmb = (mb_y+1)*h->mb.i_mb_stride + mb_x - 1;
- + else
- + prevmb = (mb_y-1)*h->mb.i_mb_stride + h->mb.i_mb_width;
- + if( h->mb.b_interlaced != h->mb.field_decoding_flag && IS_SKIP(h->mb.type[prevmb]) )
- + {
- + h->mb.allow_skip = 0;
- + }
- if( h->sh.b_mbaff && (mb_y&1) && IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride]) )
- {
- if( h->mb.i_neighbour & MB_LEFT )
- @@ -1165,15 +1176,15 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
- if( h->mb.b_interlaced )
- h->mb.allow_skip = 0;
- }
- - if( !h->mb.allow_skip )
- + }
- + if( !h->mb.allow_skip )
- + {
- + if( IS_SKIP(h->mb.i_type) )
- {
- - if( IS_SKIP(h->mb.i_type) )
- - {
- - if( h->mb.i_type == P_SKIP )
- - h->mb.i_type = P_L0;
- - else if( h->mb.i_type == B_SKIP )
- - h->mb.i_type = B_DIRECT;
- - }
- + if( h->mb.i_type == P_SKIP )
- + h->mb.i_type = P_L0;
- + else if( h->mb.i_type == B_SKIP )
- + h->mb.i_type = B_DIRECT;
- }
- }
- --
- 1.7.4
- From 2a15d908619a780aa5a0f8bd9c6b61ade5d52237 Mon Sep 17 00:00:00 2001
- From: Simon Horlick <simonhorlick@gmail.com>
- Date: Sun, 20 Feb 2011 15:36:29 +0000
- Subject: [PATCH 13/25] CABAC encoding of skips
- ---
- common/common.h | 2 ++
- common/macroblock.c | 29 +++++++++++++++++++++++++++++
- encoder/cabac.c | 14 +++++++-------
- 3 files changed, 38 insertions(+), 7 deletions(-)
- diff --git a/common/common.h b/common/common.h
- index bc14c10..6d81496 100644
- --- a/common/common.h
- +++ b/common/common.h
- @@ -605,6 +605,8 @@ struct x264_t
- int topleft_border_index;
- int topright_border_index;
- int field_decoding_flag;
- + int left_skip;
- + int top_skip;
- /**** thread synchronization ends here ****/
- /* subsequent variables are either thread-local or constant,
- diff --git a/common/macroblock.c b/common/macroblock.c
- index 4fe1f82..c01320c 100644
- --- a/common/macroblock.c
- +++ b/common/macroblock.c
- @@ -1188,6 +1188,35 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
- }
- }
- + if( h->param.b_cabac )
- + {
- + /* Neighbours here are calculated based on field_decoding_flag */
- + int left_xy, top_xy;
- + if( h->sh.b_mbaff )
- + {
- + int mb_xy = mb_x + (h->mb.i_mb_y&~1)*h->mb.i_mb_stride;
- + left_xy = mb_xy - 1;
- + if( (mb_y&1) && mb_x > 0 && h->mb.field_decoding_flag == h->mb.field[left_xy] )
- + left_xy += h->mb.i_mb_stride;
- + if( h->mb.field_decoding_flag )
- + {
- + top_xy = mb_xy - h->mb.i_mb_stride;
- + if( !(mb_y&1) && top_xy >= 0 && h->mb.slice_table[top_xy] == h->sh.i_first_mb && h->mb.field[top_xy] )
- + top_xy -= h->mb.i_mb_stride;
- + }
- + else
- + top_xy = mb_x + (mb_y-1)*h->mb.i_mb_stride;
- + }
- + else
- + {
- + left_xy = h->mb.i_mb_xy - 1;
- + top_xy = h->mb.i_mb_xy - h->mb.i_mb_stride;
- + }
- +
- + h->mb.left_skip = mb_x > 0 && h->mb.slice_table[left_xy] == h->sh.i_first_mb && !IS_SKIP( h->mb.type[left_xy] );
- + h->mb.top_skip = top_xy >= 0 && (h->mb.slice_table[top_xy] == h->sh.i_first_mb) && !IS_SKIP( h->mb.type[top_xy] );
- + }
- +
- /* load skip */
- if( h->sh.i_type == SLICE_TYPE_B )
- {
- diff --git a/encoder/cabac.c b/encoder/cabac.c
- index 6138d06..3435048 100644
- --- a/encoder/cabac.c
- +++ b/encoder/cabac.c
- @@ -295,10 +295,10 @@ static void x264_cabac_mb_qp_delta( x264_t *h, x264_cabac_t *cb )
- #if !RDO_SKIP_BS
- void x264_cabac_mb_skip( x264_t *h, int b_skip )
- {
- - int ctx = ((h->mb.i_neighbour & MB_LEFT) && !IS_SKIP( h->mb.i_mb_type_left[0] ))
- - + ((h->mb.i_neighbour & MB_TOP) && !IS_SKIP( h->mb.i_mb_type_top ))
- - + (h->sh.i_type == SLICE_TYPE_P ? 11 : 24);
- - x264_cabac_encode_decision( &h->cabac, ctx, b_skip );
- + int ctx = h->mb.left_skip + h->mb.top_skip;
- + if( h->sh.i_type != SLICE_TYPE_P )
- + ctx += 13;
- + x264_cabac_encode_decision( &h->cabac, 11+ctx, b_skip );
- }
- #endif
- @@ -350,7 +350,7 @@ static void x264_cabac_mb_ref( x264_t *h, x264_cabac_t *cb, int i_list, int idx
- const int i8 = x264_scan8[idx];
- const int i_refa = h->mb.cache.ref[i_list][i8 - 1];
- const int i_refb = h->mb.cache.ref[i_list][i8 - 8];
- - int ctx = 0;
- + int ctx = 0;
- if( i_refa > 0 && !h->mb.cache.skip[i8 - 1] )
- ctx++;
- @@ -423,9 +423,9 @@ static ALWAYS_INLINE int x264_cabac_mb_mvd_cpn( x264_t *h, x264_cabac_t *cb, int
- x264_cabac_encode_bypass( cb, mvd < 0 );
- }
- #endif
- - /* Since we don't need to keep track of MVDs larger than 33, just cap the value.
- + /* Since we don't need to keep track of MVDs larger than 70, just cap the value.
- * This lets us store MVDs as 8-bit values instead of 16-bit. */
- - return X264_MIN( i_abs, 33 );
- + return X264_MIN( i_abs, 70 );
- }
- static NOINLINE uint16_t x264_cabac_mb_mvd( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int width )
- --
- 1.7.4
- From b49fa962fcd25984e5f65a36a2784fbe702bdb4c Mon Sep 17 00:00:00 2001
- From: Simon Horlick <simonhorlick@gmail.com>
- Date: Wed, 16 Mar 2011 21:18:59 +0000
- Subject: [PATCH 14/25] Add mbaff deblock strength calculation
- ---
- common/deblock.c | 38 ++++++++++++++++++++++++++++++++++++++
- common/frame.h | 3 +++
- encoder/encoder.c | 12 +++++++++---
- 3 files changed, 50 insertions(+), 3 deletions(-)
- diff --git a/common/deblock.c b/common/deblock.c
- index 52d410d..55a0154 100644
- --- a/common/deblock.c
- +++ b/common/deblock.c
- @@ -269,6 +269,44 @@ static void deblock_strength_c( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264
- }
- }
- }
- +void deblock_strength_mbaff( x264_t *h, uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
- + int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][4][4], int mvy_limit, int bframe )
- +{
- + const int vertical = 0, horizontal = 1;
- + int neighbour_field[2];
- + int current_field = h->mb.b_interlaced;
- + neighbour_field[vertical] = h->mb.i_mb_left_xy[0] >= 0 && h->mb.field[h->mb.i_mb_left_xy[0]];
- + neighbour_field[horizontal] = h->mb.i_mb_top_xy >= 0 && h->mb.field[h->mb.i_mb_top_xy];
- +
- + for( int dir = 0; dir < 2; dir++ )
- + {
- + int edge_stride = dir ? 8 : 1;
- + int part_stride = dir ? 1 : 8;
- + for( int edge = 0; edge < 4; edge++ )
- + {
- + for( int i = 0, q = X264_SCAN8_0+edge*edge_stride; i < 4; i++, q += part_stride )
- + {
- + int p = q - edge_stride;
- + if( nnz[q] || nnz[p] )
- + {
- + bs[dir][edge][i] = 2;
- + }
- + else if( (edge == 0 && current_field != neighbour_field[dir]) ||
- + ref[0][q] != ref[0][p] ||
- + abs( mv[0][q][0] - mv[0][p][0] ) >= 4 ||
- + abs( mv[0][q][1] - mv[0][p][1] ) >= mvy_limit ||
- + (bframe && (ref[1][q] != ref[1][p] ||
- + abs( mv[1][q][0] - mv[1][p][0] ) >= 4 ||
- + abs( mv[1][q][1] - mv[1][p][1] ) >= mvy_limit )) )
- + {
- + bs[dir][edge][i] = 1;
- + }
- + else
- + bs[dir][edge][i] = 0;
- + }
- + }
- + }
- +}
- static inline void deblock_edge( x264_t *h, pixel *pix, int i_stride, uint8_t bS[4], int i_qp, int b_chroma, x264_deblock_inter_t pf_inter )
- {
- diff --git a/common/frame.h b/common/frame.h
- index 8fe0627..3296a2c 100644
- --- a/common/frame.h
- +++ b/common/frame.h
- @@ -186,6 +186,9 @@ typedef struct
- int bframe );
- } x264_deblock_function_t;
- +void deblock_strength_mbaff( x264_t *h, uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
- + int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][4][4], int mvy_limit, int bframe );
- +
- x264_frame_t *x264_frame_new( x264_t *h, int b_fdec );
- void x264_frame_delete( x264_frame_t *frame );
- diff --git a/encoder/encoder.c b/encoder/encoder.c
- index 2525fec..0319126 100644
- --- a/encoder/encoder.c
- +++ b/encoder/encoder.c
- @@ -2129,14 +2129,20 @@ static int x264_slice_write( x264_t *h )
- /* calculate deblock strength values (actual deblocking is done per-row along with hpel) */
- if( b_deblock )
- {
- - int mvy_limit = 4 >> h->sh.b_mbaff;
- + int mvy_limit = 4 >> h->mb.b_interlaced;
- uint8_t (*bs)[4][4] = h->deblock_strength[h->mb.i_mb_y&1][h->mb.i_mb_x];
- x264_macroblock_cache_load_deblock( h );
- if( IS_INTRA( h->mb.type[h->mb.i_mb_xy] ) )
- memset( bs, 3, 2*4*4*sizeof(uint8_t) );
- else
- - h->loopf.deblock_strength( h->mb.cache.non_zero_count, h->mb.cache.ref, h->mb.cache.mv,
- - bs, mvy_limit, h->sh.i_type == SLICE_TYPE_B );
- + {
- + if( h->sh.b_mbaff )
- + deblock_strength_mbaff( h, h->mb.cache.non_zero_count, h->mb.cache.ref, h->mb.cache.mv,
- + bs, mvy_limit, h->sh.i_type == SLICE_TYPE_B );
- + else
- + h->loopf.deblock_strength( h->mb.cache.non_zero_count, h->mb.cache.ref, h->mb.cache.mv,
- + bs, mvy_limit, h->sh.i_type == SLICE_TYPE_B );
- + }
- }
- x264_ratecontrol_mb( h, mb_size );
- --
- 1.7.4
- From 55bb59fb940208de76113875c0744694043a5f4d Mon Sep 17 00:00:00 2001
- From: Simon Horlick <simonhorlick@gmail.com>
- Date: Wed, 16 Mar 2011 21:27:07 +0000
- Subject: [PATCH 15/25] Initial deblocking support
- ---
- common/deblock.c | 13 +++++++--
- common/macroblock.c | 67 ++++++++++++++++++++++++++++++++++----------------
- 2 files changed, 55 insertions(+), 25 deletions(-)
- diff --git a/common/deblock.c b/common/deblock.c
- index 55a0154..48788ae 100644
- --- a/common/deblock.c
- +++ b/common/deblock.c
- @@ -345,13 +345,17 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
- int b_interlaced = h->sh.b_mbaff;
- int qp_thresh = 15 - X264_MIN( h->sh.i_alpha_c0_offset, h->sh.i_beta_offset ) - X264_MAX( 0, h->param.analyse.i_chroma_qp_offset );
- int stridey = h->fdec->i_stride[0];
- - int stride2y = stridey << b_interlaced;
- int strideuv = h->fdec->i_stride[1];
- - int stride2uv = strideuv << b_interlaced;
- +
- + // Backup mb.b_interlaced because it will be changed in x264_macroblock_cache_load_neighbours_deblock.
- + int interlaced_backup = h->mb.b_interlaced;
- for( int mb_x = 0; mb_x < h->mb.i_mb_width; mb_x += (~b_interlaced | mb_y)&1, mb_y ^= b_interlaced )
- {
- x264_prefetch_fenc( h, h->fdec, mb_x, mb_y );
- + h->mb.i_mb_x = mb_x;
- + h->mb.i_mb_y = mb_y;
- + h->mb.i_mb_xy = mb_x + h->mb.i_mb_stride*mb_y;
- x264_macroblock_cache_load_neighbours_deblock( h, mb_x, mb_y );
- int mb_xy = h->mb.i_mb_xy;
- @@ -361,12 +365,14 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
- pixel *pixy = h->fdec->plane[0] + 16*mb_y*stridey + 16*mb_x;
- pixel *pixuv = h->fdec->plane[1] + 8*mb_y*strideuv + 16*mb_x;
- - if( mb_y & b_interlaced )
- + if( mb_y & h->mb.b_interlaced )
- {
- pixy -= 15*stridey;
- pixuv -= 7*strideuv;
- }
- + int stride2y = stridey << h->mb.b_interlaced;
- + int stride2uv = strideuv << h->mb.b_interlaced;
- int qp = h->mb.qp[mb_xy];
- int qpc = h->chroma_qp_table[qp];
- int first_edge_only = h->mb.type[mb_xy] == P_SKIP || qp <= qp_thresh;
- @@ -427,6 +433,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
- #undef FILTER
- }
- + h->mb.b_interlaced = interlaced_backup;
- // FIXME: Don't copy the whole frame around.
- int y = mb_y*16;
- diff --git a/common/macroblock.c b/common/macroblock.c
- index c01320c..47bb2ff 100644
- --- a/common/macroblock.c
- +++ b/common/macroblock.c
- @@ -1257,18 +1257,36 @@ void x264_macroblock_cache_load_neighbours_deblock( x264_t *h, int mb_x, int mb_
- int deblock_on_slice_edges = h->sh.i_disable_deblocking_filter_idc != 2;
- h->mb.i_neighbour = 0;
- - h->mb.i_mb_xy = mb_y * h->mb.i_mb_stride + mb_x;
- +
- + if( h->sh.b_mbaff )
- + h->mb.b_interlaced = h->mb.field[h->mb.i_mb_xy];
- +
- + h->mb.i_mb_top_xy = h->mb.i_mb_xy - (h->mb.i_mb_stride << h->mb.b_interlaced);
- + h->mb.i_mb_left_xy[1] =
- + h->mb.i_mb_left_xy[0] = h->mb.i_mb_xy - 1;
- + if( h->sh.b_mbaff )
- + {
- + if( mb_y&1 )
- + {
- + if( h->mb.field[h->mb.i_mb_xy - 1] != h->mb.b_interlaced )
- + h->mb.i_mb_left_xy[0] -= h->mb.i_mb_stride;
- + }
- + else
- + {
- + if( h->mb.b_interlaced && !h->mb.field[h->mb.i_mb_top_xy] )
- + h->mb.i_mb_top_xy += h->mb.i_mb_stride;
- + if( h->mb.field[h->mb.i_mb_xy - 1] != h->mb.b_interlaced )
- + h->mb.i_mb_left_xy[1] += h->mb.i_mb_stride;
- + }
- + }
- if( mb_x > 0 )
- {
- - h->mb.i_mb_left_xy[0] = h->mb.i_mb_xy - 1;
- if( deblock_on_slice_edges || h->mb.slice_table[h->mb.i_mb_left_xy[0]] == h->mb.slice_table[h->mb.i_mb_xy] )
- h->mb.i_neighbour |= MB_LEFT;
- }
- -
- if( mb_y > h->mb.b_interlaced )
- {
- - h->mb.i_mb_top_xy = h->mb.i_mb_xy - (h->mb.i_mb_stride << h->mb.b_interlaced);
- if( deblock_on_slice_edges || h->mb.slice_table[h->mb.i_mb_top_xy] == h->mb.slice_table[h->mb.i_mb_xy] )
- h->mb.i_neighbour |= MB_TOP;
- }
- @@ -1276,7 +1294,7 @@ void x264_macroblock_cache_load_neighbours_deblock( x264_t *h, int mb_x, int mb_
- void x264_macroblock_cache_load_deblock( x264_t *h )
- {
- - if( IS_INTRA( h->mb.type[h->mb.i_mb_xy] ) )
- + if( !h->sh.b_mbaff && IS_INTRA( h->mb.type[h->mb.i_mb_xy] ) )
- return;
- /* If we have multiple slices and we're deblocking on slice edges, we
- @@ -1291,7 +1309,7 @@ void x264_macroblock_cache_load_deblock( x264_t *h )
- h->mb.i_neighbour &= ~old_neighbour;
- if( h->mb.i_neighbour )
- {
- - int top_y = mb_y - (1 << h->mb.b_interlaced);
- + int top_y = h->mb.i_mb_top_xy / h->mb.i_mb_stride;
- int top_8x8 = (2*top_y+1) * h->mb.i_b8_stride + 2*mb_x;
- int top_4x4 = (4*top_y+3) * h->mb.i_b4_stride + 4*mb_x;
- int s8x8 = h->mb.i_b8_stride;
- @@ -1330,17 +1348,15 @@ void x264_macroblock_cache_load_deblock( x264_t *h )
- i8 = x264_scan8[0] - 1;
- if( h->mb.i_neighbour & MB_LEFT )
- {
- - int ir = h->mb.i_b8_xy - 1;
- - int iv = h->mb.i_b4_xy - 1;
- h->mb.cache.ref[l][i8+0*8] =
- - h->mb.cache.ref[l][i8+1*8] = ref[ir + 0*s8x8];
- + h->mb.cache.ref[l][i8+1*8] = ref[h->mb.left_b8[0] + 1 + s8x8*left_index_table[12+4]];
- h->mb.cache.ref[l][i8+2*8] =
- - h->mb.cache.ref[l][i8+3*8] = ref[ir + 1*s8x8];
- + h->mb.cache.ref[l][i8+3*8] = ref[h->mb.left_b8[1] + 1 + s8x8*left_index_table[12+5]];
- - CP32( h->mb.cache.mv[l][i8+0*8], mv[iv + 0*s4x4] );
- - CP32( h->mb.cache.mv[l][i8+1*8], mv[iv + 1*s4x4] );
- - CP32( h->mb.cache.mv[l][i8+2*8], mv[iv + 2*s4x4] );
- - CP32( h->mb.cache.mv[l][i8+3*8], mv[iv + 3*s4x4] );
- + CP32( h->mb.cache.mv[l][i8+0*8], mv[h->mb.left_b4[0] + 3 + s4x4*left_index_table[12+0]] );
- + CP32( h->mb.cache.mv[l][i8+1*8], mv[h->mb.left_b4[0] + 3 + s4x4*left_index_table[12+1]] );
- + CP32( h->mb.cache.mv[l][i8+2*8], mv[h->mb.left_b4[1] + 3 + s4x4*left_index_table[12+2]] );
- + CP32( h->mb.cache.mv[l][i8+3*8], mv[h->mb.left_b4[1] + 3 + s4x4*left_index_table[12+3]] );
- }
- }
- }
- @@ -1380,7 +1396,7 @@ void x264_macroblock_cache_load_deblock( x264_t *h )
- {
- uint8_t (*nnz)[24] = h->mb.non_zero_count;
- int top = h->mb.i_mb_top_xy;
- - int left = h->mb.i_mb_left_xy[0];
- + int *left = h->mb.i_mb_left_xy;
- if( (h->mb.i_neighbour & MB_TOP) && h->mb.mb_transform_size[top] )
- {
- @@ -1391,15 +1407,22 @@ void x264_macroblock_cache_load_deblock( x264_t *h )
- M16( &h->mb.cache.non_zero_count[i8+2] ) = nnz_top1 ? 0x0101 : 0;
- }
- - if( (h->mb.i_neighbour & MB_LEFT) && h->mb.mb_transform_size[left] )
- + if( h->mb.i_neighbour & MB_LEFT )
- {
- + // TODO: Merge code in deblock
- int i8 = x264_scan8[0] - 1;
- - int nnz_left0 = M16( &nnz[left][2] ) | M16( &nnz[left][6] );
- - int nnz_left1 = M16( &nnz[left][10] ) | M16( &nnz[left][14] );
- - h->mb.cache.non_zero_count[i8+8*0] = !!nnz_left0;
- - h->mb.cache.non_zero_count[i8+8*1] = !!nnz_left0;
- - h->mb.cache.non_zero_count[i8+8*2] = !!nnz_left1;
- - h->mb.cache.non_zero_count[i8+8*3] = !!nnz_left1;
- + if( h->mb.mb_transform_size[left[0]] )
- + {
- + int nnz_left0 = M16( &nnz[left[0]][2] ) | M16( &nnz[left[0]][6] );
- + h->mb.cache.non_zero_count[i8+8*0] = !!nnz_left0;
- + h->mb.cache.non_zero_count[i8+8*1] = !!nnz_left0;
- + }
- + if( h->mb.mb_transform_size[left[1]] )
- + {
- + int nnz_left1 = M16( &nnz[left[1]][10] ) | M16( &nnz[left[1]][14] );
- + h->mb.cache.non_zero_count[i8+8*2] = !!nnz_left1;
- + h->mb.cache.non_zero_count[i8+8*3] = !!nnz_left1;
- + }
- }
- if( h->mb.mb_transform_size[h->mb.i_mb_xy] )
- --
- 1.7.4
- From c1420786e639c96f7fd0b11bac69952edeabbafc Mon Sep 17 00:00:00 2001
- From: Simon Horlick <simonhorlick@gmail.com>
- Date: Thu, 17 Mar 2011 18:15:06 +0000
- Subject: [PATCH 16/25] Calculate deblock strength for mbaff
- ---
- common/common.h | 1 +
- common/deblock.c | 1 +
- common/macroblock.c | 5 +++++
- 3 files changed, 7 insertions(+), 0 deletions(-)
- diff --git a/common/common.h b/common/common.h
- index 6d81496..7990cf0 100644
- --- a/common/common.h
- +++ b/common/common.h
- @@ -844,6 +844,7 @@ struct x264_t
- pixel *intra_border_backup[3][2]; /* bottom pixels of the previous mb row, used for intra prediction after the framebuffer has been deblocked */
- pixel *intra_diagonal_backup[5][3];
- uint8_t (*deblock_strength[2])[2][4][4];
- + uint8_t (*deblock_strength_mbaff[2])[2][8]; /* store [field][mb_x][dir][partition], there can be 8 different block strengths in mbaff left/top edges */
- /* CPU functions dependents */
- x264_predict_t predict_16x16[4+3];
- diff --git a/common/deblock.c b/common/deblock.c
- index 48788ae..c1bb10a 100644
- --- a/common/deblock.c
- +++ b/common/deblock.c
- @@ -362,6 +362,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
- int transform_8x8 = h->mb.mb_transform_size[h->mb.i_mb_xy];
- int intra_cur = IS_INTRA( h->mb.type[mb_xy] );
- uint8_t (*bs)[4][4] = h->deblock_strength[mb_y&1][mb_x];
- + uint8_t (*bs_mbaff)[8] = h->deblock_strength_mbaff[mb_y&1][mb_x];
- pixel *pixy = h->fdec->plane[0] + 16*mb_y*stridey + 16*mb_x;
- pixel *pixuv = h->fdec->plane[1] + 8*mb_y*strideuv + 16*mb_x;
- diff --git a/common/macroblock.c b/common/macroblock.c
- index 47bb2ff..3a4c873 100644
- --- a/common/macroblock.c
- +++ b/common/macroblock.c
- @@ -340,7 +340,9 @@ int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
- for( int i = 0; i <= h->param.b_interlaced; i++ )
- {
- CHECKED_MALLOC( h->deblock_strength[i], sizeof(**h->deblock_strength) * h->mb.i_mb_width );
- + CHECKED_MALLOC( h->deblock_strength_mbaff[i], sizeof(**h->deblock_strength_mbaff) * h->mb.i_mb_width );
- h->deblock_strength[1] = h->deblock_strength[i];
- + h->deblock_strength_mbaff[1] = h->deblock_strength_mbaff[i];
- }
- }
- @@ -372,7 +374,10 @@ void x264_macroblock_thread_free( x264_t *h, int b_lookahead )
- if( !b_lookahead )
- {
- for( int i = 0; i <= h->param.b_interlaced; i++ )
- + {
- x264_free( h->deblock_strength[i] );
- + x264_free( h->deblock_strength_mbaff[i] );
- + }
- for( int i = 0; i <= 2*h->param.b_interlaced; i++ )
- {
- for( int j = 0; j < 2; j++ )
- --
- 1.7.4
- From 6cda0dc785db073de2c496c93e412bdb54c37a2d Mon Sep 17 00:00:00 2001
- From: Simon Horlick <simonhorlick@gmail.com>
- Date: Wed, 16 Mar 2011 22:06:27 +0000
- Subject: [PATCH 17/25] Left edge deblocking
- ---
- common/deblock.c | 181 ++++++++++++++++++++++++++++++++++++++++++++++++--
- common/macroblock.c | 63 ++++++++++++++++++
- 2 files changed, 236 insertions(+), 8 deletions(-)
- diff --git a/common/deblock.c b/common/deblock.c
- index c1bb10a..be96fc8 100644
- --- a/common/deblock.c
- +++ b/common/deblock.c
- @@ -118,6 +118,128 @@ static inline void deblock_luma_c( pixel *pix, int xstride, int ystride, int alp
- }
- }
- }
- +
- +static inline void deblock_v_luma_mbaff_c( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 )
- +{
- + /* Only filter 8 pixels at a time... */
- + for( int d = 0; d < 8; d++, pix += stride )
- + {
- + int p2 = pix[-3];
- + int p1 = pix[-2];
- + int p0 = pix[-1];
- + int q0 = pix[ 0];
- + int q1 = pix[ 1];
- + int q2 = pix[ 2];
- +
- + if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
- + {
- + int tc = tc0[d>>1];
- + int delta;
- + if( abs( p2 - p0 ) < beta )
- + {
- + if( tc0[d>>1] )
- + pix[-2] = p1 + x264_clip3( (( p2 + ((p0 + q0 + 1) >> 1)) >> 1) - p1, -tc0[d>>1], tc0[d>>1] );
- + tc++;
- + }
- + if( abs( q2 - q0 ) < beta )
- + {
- + if( tc0[d>>1] )
- + pix[ 1] = q1 + x264_clip3( (( q2 + ((p0 + q0 + 1) >> 1)) >> 1) - q1, -tc0[d>>1], tc0[d>>1] );
- + tc++;
- + }
- +
- + delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
- + pix[-1] = x264_clip_pixel( p0 + delta ); /* p0' */
- + pix[ 0] = x264_clip_pixel( q0 - delta ); /* q0' */
- + }
- + }
- +}
- +
- +static inline void deblock_v_luma_intra_mbaff_c( pixel *pix, int stride, int alpha, int beta )
- +{
- + /* Only filter 8 pixels at a time... */
- + for( int d = 0; d < 8; d++, pix += stride )
- + {
- + /* Filter intra samples normally */
- + int p2 = pix[-3];
- + int p1 = pix[-2];
- + int p0 = pix[-1];
- + int q0 = pix[ 0];
- + int q1 = pix[ 1];
- + int q2 = pix[ 2];
- +
- + if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
- + {
- + if(abs( p0 - q0 ) < ((alpha >> 2) + 2) )
- + {
- + if( abs( p2 - p0 ) < beta ) /* p0', p1', p2' */
- + {
- + const int p3 = pix[-4];
- + pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
- + pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
- + pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
- + }
- + else /* p0' */
- + pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
- + if( abs( q2 - q0 ) < beta ) /* q0', q1', q2' */
- + {
- + const int q3 = pix[3];
- + pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
- + pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
- + pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
- + }
- + else /* q0' */
- + pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
- + }
- + else /* p0', q0' */
- + {
- + pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
- + pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
- + }
- + }
- + }
- +}
- +
- +static inline void deblock_v_chroma_mbaff_c( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 )
- +{
- + const int xstride = 2;
- + for( int i = 0; i < 4; i++, pix += stride )
- + {
- + /* We don't worry about p2 or q2 */
- + int tc = tc0[i];
- + int p1 = pix[xstride*-2];
- + int p0 = pix[xstride*-1];
- + int q0 = pix[xstride* 0];
- + int q1 = pix[xstride* 1];
- +
- + if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
- + {
- + int delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
- + pix[xstride*-1] = x264_clip_pixel( p0 + delta ); /* p0' */
- + pix[xstride* 0] = x264_clip_pixel( q0 - delta ); /* q0' */
- + }
- + }
- +}
- +
- +static inline void deblock_v_chroma_intra_mbaff_c( pixel *pix, int stride, int alpha, int beta )
- +{
- + const int xstride = 2;
- + for( int i = 0; i < 4; i++, pix += stride )
- + {
- + int p1 = pix[xstride*-2];
- + int p0 = pix[xstride*-1];
- + int q0 = pix[xstride* 0];
- + int q1 = pix[xstride* 1];
- +
- + if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
- + {
- + /* p0', q0' */
- + pix[xstride*-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
- + pix[xstride* 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
- + }
- + }
- +}
- +
- static void deblock_v_luma_c( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 )
- {
- deblock_luma_c( pix, stride, 1, alpha, beta, tc0 );
- @@ -392,16 +514,59 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
- if( h->mb.i_neighbour & MB_LEFT )
- {
- - int qpl = h->mb.qp[h->mb.i_mb_left_xy[0]];
- - int qp_left = (qp + qpl + 1) >> 1;
- - int qpc_left = (h->chroma_qp_table[qp] + h->chroma_qp_table[qpl] + 1) >> 1;
- - int intra_left = IS_INTRA( h->mb.type[h->mb.i_mb_left_xy[0]] );
- - if( intra_cur || intra_left )
- - FILTER( _intra, 0, 0, qp_left, qpc_left );
- + if( b_interlaced && h->mb.field[h->mb.i_mb_left_xy[0]] != h->mb.b_interlaced )
- + {
- + int luma_qp[2];
- + int chroma_qp[2];
- + int left_qp[2];
- + int current_qp = h->mb.qp[mb_xy];
- + left_qp[0] = h->mb.qp[h->mb.i_mb_left_xy[0]];
- + luma_qp[0] = (current_qp + left_qp[0] + 1) >> 1;
- + chroma_qp[0] = (h->chroma_qp_table[current_qp] + h->chroma_qp_table[left_qp[0]] + 1) >> 1;
- + if( bs_mbaff[0][0] == 4)
- + {
- + deblock_edge_intra( h, pixy, 2*stridey, bs_mbaff[0], luma_qp[0], 0, deblock_v_luma_intra_mbaff_c );
- + deblock_edge_intra( h, pixuv, 2*strideuv, bs_mbaff[0], chroma_qp[0], 1, deblock_v_chroma_intra_mbaff_c );
- + deblock_edge_intra( h, pixuv + 1, 2*strideuv, bs_mbaff[0], chroma_qp[0], 1, deblock_v_chroma_intra_mbaff_c );
- + }
- + else
- + {
- + deblock_edge( h, pixy, 2*stridey, bs_mbaff[0], luma_qp[0], 0, deblock_v_luma_mbaff_c );
- + deblock_edge( h, pixuv, 2*strideuv, bs_mbaff[0], chroma_qp[0], 1, deblock_v_chroma_mbaff_c );
- + deblock_edge( h, pixuv + 1, 2*strideuv, bs_mbaff[0], chroma_qp[0], 1, deblock_v_chroma_mbaff_c );
- + }
- +
- + int offy = h->mb.b_interlaced ? 4 : 0;
- + int offuv = h->mb.b_interlaced ? 3 : 0;
- + left_qp[1] = h->mb.qp[h->mb.i_mb_left_xy[1]];
- + luma_qp[1] = (current_qp + left_qp[1] + 1) >> 1;
- + chroma_qp[1] = (h->chroma_qp_table[current_qp] + h->chroma_qp_table[left_qp[1]] + 1) >> 1;
- + if( bs_mbaff[0][4] == 4)
- + {
- + deblock_edge_intra( h, pixy + (stridey<<offy), 2*stridey, bs_mbaff[0]+4, luma_qp[1], 0, deblock_v_luma_intra_mbaff_c );
- + deblock_edge_intra( h, pixuv + (strideuv<<offuv), 2*strideuv, bs_mbaff[0]+4, chroma_qp[1], 1, deblock_v_chroma_intra_mbaff_c );
- + deblock_edge_intra( h, pixuv + 1 + (strideuv<<offuv), 2*strideuv, bs_mbaff[0]+4, chroma_qp[1], 1, deblock_v_chroma_intra_mbaff_c );
- + }
- + else
- + {
- + deblock_edge( h, pixy + (stridey<<offy), 2*stridey, bs_mbaff[0]+4, luma_qp[1], 0, deblock_v_luma_mbaff_c );
- + deblock_edge( h, pixuv + (strideuv<<offuv), 2*strideuv, bs_mbaff[0]+4, chroma_qp[1], 1, deblock_v_chroma_mbaff_c );
- + deblock_edge( h, pixuv + 1 + (strideuv<<offuv), 2*strideuv, bs_mbaff[0]+4, chroma_qp[1], 1, deblock_v_chroma_mbaff_c );
- + }
- + }
- else
- - FILTER( , 0, 0, qp_left, qpc_left );
- - }
- + {
- + int qpl = h->mb.qp[h->mb.i_mb_xy-1];
- + int qp_left = (qp + qpl + 1) >> 1;
- + int qpc_left = (h->chroma_qp_table[qp] + h->chroma_qp_table[qpl] + 1) >> 1;
- + int intra_left = IS_INTRA( h->mb.type[h->mb.i_mb_xy-1] );
- + if( intra_cur || intra_left ) // bs=4
- + FILTER( _intra, 0, 0, qp_left, qpc_left );
- + else
- + FILTER( , 0, 0, qp_left, qpc_left );
- + }
- + }
- if( !first_edge_only )
- {
- if( !transform_8x8 ) FILTER( , 0, 1, qp, qpc );
- diff --git a/common/macroblock.c b/common/macroblock.c
- index 3a4c873..1d72fe8 100644
- --- a/common/macroblock.c
- +++ b/common/macroblock.c
- @@ -1445,6 +1445,69 @@ void x264_macroblock_cache_load_deblock( x264_t *h )
- M32( &h->mb.cache.non_zero_count[x264_scan8[0]+8*3] ) = nnzbot;
- }
- }
- +
- + int mb_x = h->mb.i_mb_x;
- + int mb_y = h->mb.i_mb_y;
- + int mb_xy = h->mb.i_mb_xy;
- +
- + // left is wrong without this here
- + x264_macroblock_cache_load_neighbours_deblock( h, mb_x, mb_y );
- +
- + uint8_t (*bs_mbaff)[8] = h->deblock_strength_mbaff[h->mb.i_mb_y&1][h->mb.i_mb_x];
- +
- + int intra_cur = IS_INTRA( h->mb.type[mb_xy] );
- +
- + if( h->mb.i_neighbour & MB_LEFT )
- + {
- + if( h->sh.b_mbaff && h->mb.field[h->mb.i_mb_left_xy[0]] != h->mb.b_interlaced )
- + {
- + static const uint8_t offset[2][2][8] = {
- + { { 0, 0, 0, 0, 1, 1, 1, 1 },
- + { 2, 2, 2, 2, 3, 3, 3, 3 }, },
- + { { 0, 1, 2, 3, 0, 1, 2, 3 },
- + { 0, 1, 2, 3, 0, 1, 2, 3 }, }
- + };
- + uint8_t bS[8];
- +
- + if( intra_cur )
- + bS[0] = bS[1] = bS[2] = bS[3] =
- + bS[4] = bS[5] = bS[6] = bS[7] = 4;
- + else
- + {
- + const uint8_t *off = offset[h->mb.b_interlaced][mb_y&1];
- + uint8_t (*nnz)[24] = h->mb.non_zero_count;
- +
- + for( int i=0; i<8; i++ )
- + {
- + int left = h->mb.i_mb_left_xy[h->mb.b_interlaced ? i>>2 : i&1];
- + int nnz_this = h->mb.cache.non_zero_count[x264_scan8[0]+8*(i>>1)];
- + int nnz_left = nnz[left][3 + 4*off[i]];
- + if( !h->param.b_cabac && h->pps->b_transform_8x8_mode )
- + {
- + int j = off[i]&~1;
- + if( h->mb.mb_transform_size[left] )
- + nnz_left = !!(M16( &nnz[left][2+4*j] ) | M16( &nnz[left][2+4*(1+j)] ));
- + }
- + if( IS_INTRA( h->mb.type[left] ) )
- + bS[i] = 4;
- + else if( nnz_left || nnz_this )
- + bS[i] = 2;
- + else // As left is different interlaced.
- + bS[i] = 1;
- + }
- + }
- +
- + if( h->mb.b_interlaced )
- + {
- + for( int i=0; i<8; i++ ) bs_mbaff[0][i] = bS[i];
- + }
- + else
- + {
- + for( int i=0; i<4; i++ ) bs_mbaff[0][i] = bS[2*i];
- + for( int i=0; i<4; i++ ) bs_mbaff[0][i+4] = bS[1+2*i];
- + }
- + }
- + }
- }
- static void ALWAYS_INLINE twiddle_topleft_pixel( pixel *dst, pixel *src, int b_interlaced )
- --
- 1.7.4
- From 374cba76623f48c2084b89c471e5fdf7951dfc76 Mon Sep 17 00:00:00 2001
- From: Simon Horlick <simonhorlick@gmail.com>
- Date: Wed, 16 Mar 2011 21:24:42 +0000
- Subject: [PATCH 18/25] Top edge deblocking
- ---
- common/deblock.c | 41 ++++++++++++++++++++++++++-------
- common/macroblock.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++
- 2 files changed, 95 insertions(+), 9 deletions(-)
- diff --git a/common/deblock.c b/common/deblock.c
- index be96fc8..de9d9fb 100644
- --- a/common/deblock.c
- +++ b/common/deblock.c
- @@ -576,17 +576,40 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
- if( h->mb.i_neighbour & MB_TOP )
- {
- - int qpt = h->mb.qp[h->mb.i_mb_top_xy];
- - int qp_top = (qp + qpt + 1) >> 1;
- - int qpc_top = (h->chroma_qp_table[qp] + h->chroma_qp_table[qpt] + 1) >> 1;
- - int intra_top = IS_INTRA( h->mb.type[h->mb.i_mb_top_xy] );
- - if( ~b_interlaced & (intra_cur | intra_top) )
- - FILTER( _intra, 1, 0, qp_top, qpc_top );
- + if( b_interlaced && !(mb_y&1) && !h->mb.b_interlaced && h->mb.field[h->mb.i_mb_top_xy] )
- + {
- + /* Need to filter both fields (even for frame macroblocks) */
- + /* Filter top two rows using the top and then bottom macroblocks of the above pair. */
- + int mbn_xy = mb_xy - 2 * h->mb.i_mb_stride;
- +
- + for(int j=0; j<2; j++, mbn_xy += h->mb.i_mb_stride)
- + {
- + int qpt = h->mb.qp[mbn_xy];
- + int qp_top = (qp + qpt + 1) >> 1;
- + int qpc_top = (h->chroma_qp_table[qp] + h->chroma_qp_table[qpt] + 1) >> 1;
- +
- + deblock_edge( h, pixy + j*stridey, 2* stridey, bs_mbaff[1]+4*j, qp_top, 0, deblock_v_luma_c );
- + deblock_edge( h, pixuv + j*strideuv, 2*strideuv, bs_mbaff[1]+4*j, qpc_top, 1, deblock_v_chroma_c );
- + }
- + }
- else
- {
- - if( intra_top )
- - M32( bs[1][0] ) = 0x03030303;
- - FILTER( , 1, 0, qp_top, qpc_top );
- + int qpt = h->mb.qp[h->mb.i_mb_top_xy];
- + int qp_top = (qp + qpt + 1) >> 1;
- + int qpc_top = (h->chroma_qp_table[qp] + h->chroma_qp_table[qpt] + 1) >> 1;
- + int intra_top = IS_INTRA( h->mb.type[h->mb.i_mb_top_xy] );
- +
- + if( (!b_interlaced || (!h->mb.b_interlaced && !h->mb.field[h->mb.i_mb_top_xy]))
- + && (intra_cur || intra_top) )
- + {
- + FILTER( _intra, 1, 0, qp_top, qpc_top );
- + }
- + else
- + {
- + if( intra_top )
- + M32( bs[1][0] ) = 0x03030303;
- + FILTER( , 1, 0, qp_top, qpc_top );
- + }
- }
- }
- diff --git a/common/macroblock.c b/common/macroblock.c
- index 1d72fe8..0da5958 100644
- --- a/common/macroblock.c
- +++ b/common/macroblock.c
- @@ -1508,6 +1508,69 @@ void x264_macroblock_cache_load_deblock( x264_t *h )
- }
- }
- }
- +
- + if( h->mb.i_neighbour & MB_TOP )
- + {
- + if( h->sh.b_mbaff && !(mb_y&1) && !h->mb.b_interlaced && h->mb.field[h->mb.i_mb_top_xy] )
- + {
- + /* Need to filter both fields (even for frame macroblocks) */
- + /* Filter top two rows using the top and then bottom macroblocks of the above pair. */
- + int mbn_xy = mb_xy - 2 * h->mb.i_mb_stride;
- +
- + for(int j=0; j<2; j++, mbn_xy += h->mb.i_mb_stride)
- + {
- + int mbn_intra = IS_INTRA( h->mb.type[mbn_xy] );
- + uint8_t (*nnz)[24] = h->mb.non_zero_count;
- +
- + uint32_t nnz_top[4];
- + uint32_t nnz_cur[4];
- + nnz_top[0] = nnz[mbn_xy][3*4+0];
- + nnz_top[1] = nnz[mbn_xy][3*4+1];
- + nnz_top[2] = nnz[mbn_xy][3*4+2];
- + nnz_top[3] = nnz[mbn_xy][3*4+3];
- + nnz_cur[0] = h->mb.cache.non_zero_count[x264_scan8[0]+0];
- + nnz_cur[1] = h->mb.cache.non_zero_count[x264_scan8[0]+1];
- + nnz_cur[2] = h->mb.cache.non_zero_count[x264_scan8[0]+2];
- + nnz_cur[3] = h->mb.cache.non_zero_count[x264_scan8[0]+3];
- +
- + /* Munge NNZ for cavlc + 8x8dct */
- + if( !h->param.b_cabac && h->pps->b_transform_8x8_mode )
- + {
- + int top = mbn_xy;
- + if( (h->mb.i_neighbour & MB_TOP) && h->mb.mb_transform_size[top] )
- + {
- + int nnz_top0 = M16( &nnz[top][8] ) | M16( &nnz[top][12] );
- + int nnz_top1 = M16( &nnz[top][10] ) | M16( &nnz[top][14] );
- + nnz_top[0] = nnz_top[1] = nnz_top0 ? 0x0101 : 0;
- + nnz_top[2] = nnz_top[3] = nnz_top1 ? 0x0101 : 0;
- + }
- + if( h->mb.mb_transform_size[h->mb.i_mb_xy] )
- + {
- + int nnz0 = M16( &h->mb.cache.non_zero_count[x264_scan8[ 0]] ) | M16( &h->mb.cache.non_zero_count[x264_scan8[ 2]] );
- + int nnz1 = M16( &h->mb.cache.non_zero_count[x264_scan8[ 4]] ) | M16( &h->mb.cache.non_zero_count[x264_scan8[ 6]] );
- + nnz_cur[0] = nnz_cur[1] = !!nnz0;
- + nnz_cur[2] = nnz_cur[3] = !!nnz1;
- + }
- + }
- +
- + uint8_t bS[4];
- + if( intra_cur || mbn_intra )
- + bS[0] = bS[1] = bS[2] = bS[3] = 3;
- + else
- + {
- + for( int i = 0; i < 4; i++ )
- + {
- + if( nnz_cur[i] || nnz_top[i] )
- + bS[i] = 2;
- + else
- + bS[i] = 1;
- + }
- + }
- + for( int i=0; i<4; i++ )
- + bs_mbaff[1][i+4*j] = bS[i];
- + }
- + }
- + }
- }
- static void ALWAYS_INLINE twiddle_topleft_pixel( pixel *dst, pixel *src, int b_interlaced )
- --
- 1.7.4
- From 388145f81f75bfcd549335774457ad7d7fcd475f Mon Sep 17 00:00:00 2001
- From: Simon Horlick <simonhorlick@gmail.com>
- Date: Tue, 15 Mar 2011 01:39:49 +0000
- Subject: [PATCH 19/25] Use both left macroblocks for ref_idx calculation
- ---
- common/macroblock.c | 58 ++++++++++++++++++++++++++++++++++++++++++++------
- 1 files changed, 51 insertions(+), 7 deletions(-)
- diff --git a/common/macroblock.c b/common/macroblock.c
- index 0da5958..5b42966 100644
- --- a/common/macroblock.c
- +++ b/common/macroblock.c
- @@ -1229,14 +1229,58 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
- h->mb.dist_scale_factor = h->mb.dist_scale_factor_buf[h->mb.b_interlaced&(mb_y&1)];
- if( h->param.b_cabac )
- {
- - uint8_t skipbp;
- x264_macroblock_cache_skip( h, 0, 0, 4, 4, 0 );
- - skipbp = (h->mb.i_neighbour & MB_LEFT) ? h->mb.skipbp[left[0]] : 0;
- - h->mb.cache.skip[x264_scan8[0] - 1] = skipbp & 0x2;
- - h->mb.cache.skip[x264_scan8[8] - 1] = skipbp & 0x8;
- - skipbp = (h->mb.i_neighbour & MB_TOP) ? h->mb.skipbp[top] : 0;
- - h->mb.cache.skip[x264_scan8[0] - 8] = skipbp & 0x4;
- - h->mb.cache.skip[x264_scan8[4] - 8] = skipbp & 0x8;
- +
- + if( h->mb.i_neighbour & MB_LEFT )
- + {
- + if( h->mb.skipbp[left[0]] == 0xf )
- + h->mb.cache.skip[x264_scan8[0] - 1] = 1;
- + else if( h->mb.partition[left[0]] == D_8x8 )
- + {
- + int off = 1+(left_index_table[18]&~1);
- + h->mb.cache.skip[x264_scan8[0] - 1] = (h->mb.skipbp[left[0]] >> off) & 1;
- + }
- + else
- + h->mb.cache.skip[x264_scan8[0] - 1] = h->mb.skipbp[left[0]] & 0x2;
- +
- + if( h->mb.skipbp[left[1]] == 0xf )
- + h->mb.cache.skip[x264_scan8[8] - 1] = 1;
- + else if( h->mb.partition[left[1]] == D_8x8 )
- + {
- + int off = 1+(left_index_table[20]&~1);
- + h->mb.cache.skip[x264_scan8[8] - 1] = (h->mb.skipbp[left[1]] >> off) & 1;
- + }
- + else
- + h->mb.cache.skip[x264_scan8[8] - 1] = h->mb.skipbp[left[1]] & 0x8;
- + }
- + else
- + {
- + h->mb.cache.skip[x264_scan8[0] - 1 + 0*8] = 0;
- + h->mb.cache.skip[x264_scan8[0] - 1 + 1*8] = 0;
- + h->mb.cache.skip[x264_scan8[0] - 1 + 2*8] = 0;
- + h->mb.cache.skip[x264_scan8[0] - 1 + 3*8] = 0;
- + }
- +
- + if( h->mb.i_neighbour & MB_TOP )
- + {
- + if( h->mb.skipbp[top] == 0xf )
- + {
- + h->mb.cache.skip[x264_scan8[0] - 8] = 1;
- + h->mb.cache.skip[x264_scan8[4] - 8] = 1;
- + }
- + else if( h->mb.partition[top] == D_8x8 )
- + {
- + h->mb.cache.skip[x264_scan8[0] - 8] = h->mb.skipbp[top] & 0x4;
- + h->mb.cache.skip[x264_scan8[4] - 8] = h->mb.skipbp[top] & 0x8;
- + }
- + else
- + {
- + h->mb.cache.skip[x264_scan8[0] - 8] = 0;
- + h->mb.cache.skip[x264_scan8[4] - 8] = 0;
- + }
- + }
- + else
- + M32( &h->mb.cache.skip[x264_scan8[0] - 8] ) = 0;
- }
- }
- --
- 1.7.4
- From e30f270b1830c3fc2b12014aa85d6dd959e3c4d3 Mon Sep 17 00:00:00 2001
- From: Simon Horlick <simonhorlick@gmail.com>
- Date: Tue, 15 Mar 2011 01:14:16 +0000
- Subject: [PATCH 20/25] Fix min/max mv calculation
- ---
- common/macroblock.c | 5 +++++
- encoder/analyse.c | 12 ++++++++----
- 2 files changed, 13 insertions(+), 4 deletions(-)
- diff --git a/common/macroblock.c b/common/macroblock.c
- index 5b42966..00e9403 100644
- --- a/common/macroblock.c
- +++ b/common/macroblock.c
- @@ -841,6 +841,11 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
- const int *left_index_table = h->mb.left_index_table;
- + int my = h->mb.i_mb_y >> h->mb.b_interlaced;
- + int mb_height = h->mb.i_mb_height >> h->mb.b_interlaced;
- + h->mb.mv_min[1] = 4*( -16*my - 24 );
- + h->mb.mv_max[1] = 4*( 16*( mb_height - my - 1 ) + 24 );
- +
- /* load cache */
- if( h->mb.i_neighbour & MB_TOP )
- {
- diff --git a/encoder/analyse.c b/encoder/analyse.c
- index 4f439d4..d004c66 100644
- --- a/encoder/analyse.c
- +++ b/encoder/analyse.c
- @@ -443,13 +443,11 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int qp )
- h->mb.mv_max_fpel[0] = (h->mb.mv_max_spel[0]>>2) - i_fpel_border;
- if( h->mb.i_mb_x == 0 )
- {
- - int mb_y = h->mb.i_mb_y >> h->sh.b_mbaff;
- - int mb_height = h->sps->i_mb_height >> h->sh.b_mbaff;
- int thread_mvy_range = i_fmv_range;
- if( h->i_thread_frames > 1 )
- {
- - int pix_y = (h->mb.i_mb_y | h->mb.b_interlaced) * 16;
- + int pix_y = (h->mb.i_mb_y | h->sh.b_mbaff) * 16;
- int thresh = pix_y + h->param.analyse.i_mv_range_thread;
- for( int i = (h->sh.i_type == SLICE_TYPE_B); i >= 0; i-- )
- for( int j = 0; j < h->i_ref[i]; j++ )
- @@ -465,7 +463,8 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int qp )
- x264_analyse_weight_frame( h, pix_y + thread_mvy_range );
- }
- -
- + int mb_y = h->mb.i_mb_y >> h->sh.b_mbaff;
- + int mb_height = h->sps->i_mb_height >> h->sh.b_mbaff;
- h->mb.mv_min[1] = 4*( -16*mb_y - 24 );
- h->mb.mv_max[1] = 4*( 16*( mb_height - mb_y - 1 ) + 24 );
- h->mb.mv_min_spel[1] = x264_clip3( h->mb.mv_min[1], -i_fmv_range, i_fmv_range );
- @@ -473,6 +472,11 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int qp )
- h->mb.mv_max_spel[1] = X264_MIN( h->mb.mv_max_spel[1], thread_mvy_range*4 );
- h->mb.mv_min_fpel[1] = (h->mb.mv_min_spel[1]>>2) + i_fpel_border;
- h->mb.mv_max_fpel[1] = (h->mb.mv_max_spel[1]>>2) - i_fpel_border;
- +
- + mb_y = h->mb.i_mb_y >> h->mb.b_interlaced;
- + mb_height = h->sps->i_mb_height >> h->mb.b_interlaced;
- + h->mb.mv_min[1] = 4*( -16*mb_y - 24 );
- + h->mb.mv_max[1] = 4*( 16*( mb_height - mb_y - 1 ) + 24 );
- }
- #undef CLIP_FMV
- --
- 1.7.4
- From 962a6ce9a008d7239c11920635e570903bed7a85 Mon Sep 17 00:00:00 2001
- From: Simon Horlick <simonhorlick@gmail.com>
- Date: Tue, 15 Mar 2011 01:15:06 +0000
- Subject: [PATCH 21/25] Calculate bipred POCs correctly
- ---
- common/common.h | 4 +-
- common/macroblock.c | 67 ++++++++++++++++++++++++++-------------------------
- 2 files changed, 36 insertions(+), 35 deletions(-)
- diff --git a/common/common.h b/common/common.h
- index 7990cf0..c7670e7 100644
- --- a/common/common.h
- +++ b/common/common.h
- @@ -756,9 +756,9 @@ struct x264_t
- int i_chroma_lambda2_offset;
- /* B_direct and weighted prediction */
- - int16_t dist_scale_factor_buf[2][X264_REF_MAX*2][4];
- + int16_t dist_scale_factor_buf[2][2][X264_REF_MAX*2][4];
- int16_t (*dist_scale_factor)[4];
- - int8_t bipred_weight_buf[2][X264_REF_MAX*2][4];
- + int8_t bipred_weight_buf[2][2][X264_REF_MAX*2][4];
- int8_t (*bipred_weight)[4];
- /* maps fref1[0]'s ref indices into the current list0 */
- #define map_col_to_list0(col) h->mb.map_col_to_list0[(col)+2]
- diff --git a/common/macroblock.c b/common/macroblock.c
- index 00e9403..f6406fe 100644
- --- a/common/macroblock.c
- +++ b/common/macroblock.c
- @@ -1230,8 +1230,8 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
- /* load skip */
- if( h->sh.i_type == SLICE_TYPE_B )
- {
- - h->mb.bipred_weight = h->mb.bipred_weight_buf[h->mb.b_interlaced&(mb_y&1)];
- - h->mb.dist_scale_factor = h->mb.dist_scale_factor_buf[h->mb.b_interlaced&(mb_y&1)];
- + h->mb.bipred_weight = h->mb.bipred_weight_buf[h->mb.b_interlaced][h->mb.b_interlaced&(mb_y&1)];
- + h->mb.dist_scale_factor = h->mb.dist_scale_factor_buf[h->mb.b_interlaced][h->mb.b_interlaced&(mb_y&1)];
- if( h->param.b_cabac )
- {
- x264_macroblock_cache_skip( h, 0, 0, 4, 4, 0 );
- @@ -1903,42 +1903,43 @@ void x264_macroblock_cache_save( x264_t *h )
- void x264_macroblock_bipred_init( x264_t *h )
- {
- - for( int field = 0; field <= h->sh.b_mbaff; field++ )
- - for( int i_ref0 = 0; i_ref0 < (h->i_ref[0]<<h->sh.b_mbaff); i_ref0++ )
- - {
- - x264_frame_t *l0 = h->fref[0][i_ref0>>h->sh.b_mbaff];
- - int poc0 = l0->i_poc + l0->i_delta_poc[field^(i_ref0&1)];
- - for( int i_ref1 = 0; i_ref1 < (h->i_ref[1]<<h->sh.b_mbaff); i_ref1++ )
- + for( int mbfield = 0; mbfield <= h->sh.b_mbaff; mbfield++ )
- + for( int field = 0; field <= h->sh.b_mbaff; field++ )
- + for( int i_ref0 = 0; i_ref0 < (h->i_ref[0]<<mbfield); i_ref0++ )
- {
- - int dist_scale_factor;
- - x264_frame_t *l1 = h->fref[1][i_ref1>>h->sh.b_mbaff];
- - int poc1 = l1->i_poc + l1->i_delta_poc[field^(i_ref1&1)];
- - int cur_poc = h->fdec->i_poc + h->fdec->i_delta_poc[field];
- - int td = x264_clip3( poc1 - poc0, -128, 127 );
- - if( td == 0 /* || pic0 is a long-term ref */ )
- - dist_scale_factor = 256;
- - else
- + x264_frame_t *l0 = h->fref[0][i_ref0>>mbfield];
- + int poc0 = l0->i_poc + mbfield*l0->i_delta_poc[field^(i_ref0&1)];
- + for( int i_ref1 = 0; i_ref1 < (h->i_ref[1]<<mbfield); i_ref1++ )
- {
- - int tb = x264_clip3( cur_poc - poc0, -128, 127 );
- - int tx = (16384 + (abs(td) >> 1)) / td;
- - dist_scale_factor = x264_clip3( (tb * tx + 32) >> 6, -1024, 1023 );
- - }
- + int dist_scale_factor;
- + x264_frame_t *l1 = h->fref[1][i_ref1>>mbfield];
- + int cur_poc = h->fdec->i_poc + h->fdec->i_delta_poc[field];
- + int poc1 = l1->i_poc + mbfield*l1->i_delta_poc[field^(i_ref1&1)];
- + int td = x264_clip3( poc1 - poc0, -128, 127 );
- + if( td == 0 /* || pic0 is a long-term ref */ )
- + dist_scale_factor = 256;
- + else
- + {
- + int tb = x264_clip3( cur_poc - poc0, -128, 127 );
- + int tx = (16384 + (abs(td) >> 1)) / td;
- + dist_scale_factor = x264_clip3( (tb * tx + 32) >> 6, -1024, 1023 );
- + }
- - h->mb.dist_scale_factor_buf[field][i_ref0][i_ref1] = dist_scale_factor;
- + h->mb.dist_scale_factor_buf[mbfield][field][i_ref0][i_ref1] = dist_scale_factor;
- - dist_scale_factor >>= 2;
- - if( h->param.analyse.b_weighted_bipred
- - && dist_scale_factor >= -64
- - && dist_scale_factor <= 128 )
- - {
- - h->mb.bipred_weight_buf[field][i_ref0][i_ref1] = 64 - dist_scale_factor;
- - // ssse3 implementation of biweight doesn't support the extrema.
- - // if we ever generate them, we'll have to drop that optimization.
- - assert( dist_scale_factor >= -63 && dist_scale_factor <= 127 );
- + dist_scale_factor >>= 2;
- + if( h->param.analyse.b_weighted_bipred
- + && dist_scale_factor >= -64
- + && dist_scale_factor <= 128 )
- + {
- + h->mb.bipred_weight_buf[mbfield][field][i_ref0][i_ref1] = 64 - dist_scale_factor;
- + // ssse3 implementation of biweight doesn't support the extrema.
- + // if we ever generate them, we'll have to drop that optimization.
- + assert( dist_scale_factor >= -63 && dist_scale_factor <= 127 );
- + }
- + else
- + h->mb.bipred_weight_buf[mbfield][field][i_ref0][i_ref1] = 32;
- }
- - else
- - h->mb.bipred_weight_buf[field][i_ref0][i_ref1] = 32;
- }
- - }
- }
- --
- 1.7.4
- From a9f5b05e3dc0c7482e6661a1ee8d457f1e35ee75 Mon Sep 17 00:00:00 2001
- From: Simon Horlick <simonhorlick@gmail.com>
- Date: Tue, 15 Mar 2011 01:16:20 +0000
- Subject: [PATCH 22/25] Direct temporal
- ---
- common/mvpred.c | 100 +++++++++++++++++++++++++++++++++++++++++--------------
- 1 files changed, 75 insertions(+), 25 deletions(-)
- diff --git a/common/mvpred.c b/common/mvpred.c
- index 278e0ac..28eabca 100644
- --- a/common/mvpred.c
- +++ b/common/mvpred.c
- @@ -182,50 +182,100 @@ void x264_mb_predict_mv_pskip( x264_t *h, int16_t mv[2] )
- static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h )
- {
- - int i_mb_4x4 = 16 * h->mb.i_mb_stride * h->mb.i_mb_y + 4 * h->mb.i_mb_x;
- - int i_mb_8x8 = 4 * h->mb.i_mb_stride * h->mb.i_mb_y + 2 * h->mb.i_mb_x;
- - const int type_col = h->fref[1][0]->mb_type[h->mb.i_mb_xy];
- - const int partition_col = h->fref[1][0]->mb_partition[h->mb.i_mb_xy];
- -
- - x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, 0 );
- -
- - h->mb.i_partition = partition_col;
- -
- - if( IS_INTRA( type_col ) )
- + int mb_x = h->mb.i_mb_x;
- + int mb_y = h->mb.i_mb_y;
- + int mb_xy = mb_x + h->mb.i_mb_stride * mb_y;
- + int type_col[2] = { h->fref[1][0]->mb_type[mb_xy], h->fref[1][0]->mb_type[mb_xy] };
- + int partition_col[2] = { h->fref[1][0]->mb_partition[mb_xy], h->fref[1][0]->mb_partition[mb_xy] };
- + int col_parity = abs(h->fref[1][0]->i_poc - h->fdec->i_poc)
- + >= abs(h->fref[1][0]->i_poc + h->sh.i_delta_poc_bottom - h->fdec->i_poc);
- + int preshift = h->mb.b_interlaced;
- + int postshift = h->mb.b_interlaced;
- + int offset = 1;
- + int yshift = 1;
- + h->mb.i_partition = partition_col[0];
- + if( h->param.b_interlaced && h->fref[1][0]->field[mb_xy] != h->mb.b_interlaced )
- {
- - x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, 0 );
- - x264_macroblock_cache_mv( h, 0, 0, 4, 4, 0, 0 );
- - x264_macroblock_cache_mv( h, 0, 0, 4, 4, 1, 0 );
- - return 1;
- + if( h->mb.b_interlaced )
- + {
- + mb_y = h->mb.i_mb_y&~1;
- + mb_xy = mb_x + h->mb.i_mb_stride * mb_y;
- + type_col[0] = h->fref[1][0]->mb_type[mb_xy];
- + type_col[1] = h->fref[1][0]->mb_type[mb_xy + h->mb.i_mb_stride];
- + partition_col[0] = h->fref[1][0]->mb_partition[mb_xy];
- + partition_col[1] = h->fref[1][0]->mb_partition[mb_xy + h->mb.i_mb_stride];
- + preshift = 0;
- + yshift = 0;
- +
- + if( (IS_INTRA(type_col[0]) || partition_col[0] == D_16x16)
- + && (IS_INTRA(type_col[1]) || partition_col[1] == D_16x16)
- + && partition_col[0] != D_8x8 )
- + h->mb.i_partition = D_16x8;
- + else
- + h->mb.i_partition = D_8x8;
- + }
- + else
- + {
- + mb_y = (h->mb.i_mb_y&~1) + col_parity;
- + mb_xy = mb_x + h->mb.i_mb_stride * mb_y;
- + type_col[0] = type_col[1] = h->fref[1][0]->mb_type[mb_xy];
- + partition_col[0] = partition_col[1] = h->fref[1][0]->mb_partition[mb_xy];
- + preshift = 1;
- + yshift = 2;
- + h->mb.i_partition = partition_col[0];
- + }
- + offset = 0;
- }
- + int i_mb_4x4 = 16 * h->mb.i_mb_stride * mb_y + 4 * mb_x;
- + int i_mb_8x8 = 4 * h->mb.i_mb_stride * mb_y + 2 * mb_x;
- +
- + x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, 0 );
- /* Don't do any checks other than the ones we have to, based
- * on the size of the colocated partitions.
- * Depends on the enum order: D_8x8, D_16x8, D_8x16, D_16x16 */
- - int max_i8 = (D_16x16 - partition_col) + 1;
- - int step = (partition_col == D_16x8) + 1;
- - int width = 4 >> ((D_16x16 - partition_col)&1);
- - int height = 4 >> ((D_16x16 - partition_col)>>1);
- -
- + int max_i8 = (D_16x16 - h->mb.i_partition) + 1;
- + int step = (h->mb.i_partition == D_16x8) + 1;
- + int width = 4 >> ((D_16x16 - h->mb.i_partition)&1);
- + int height = 4 >> ((D_16x16 - h->mb.i_partition)>>1);
- for( int i8 = 0; i8 < max_i8; i8 += step )
- {
- int x8 = i8&1;
- int y8 = i8>>1;
- - int i_part_8x8 = i_mb_8x8 + x8 + y8 * h->mb.i_b8_stride;
- +
- + if( IS_INTRA( type_col[y8] ) )
- + {
- + x264_macroblock_cache_ref( h, 2*x8, 2*y8, width, height, 0, 0 );
- + x264_macroblock_cache_mv( h, 2*x8, 2*y8, width, height, 0, 0 );
- + x264_macroblock_cache_mv( h, 2*x8, 2*y8, width, height, 1, 0 );
- + continue;
- + }
- +
- + int yM = 3*y8;
- + if( h->param.b_interlaced && h->fref[1][0]->field[mb_xy] != h->mb.b_interlaced )
- + {
- + if( h->mb.b_interlaced )
- + yM = y8*6;
- + else
- + yM = 2*(h->mb.i_mb_y&1) + y8;
- + }
- +
- + int i_part_8x8 = i_mb_8x8 + x8 + (yM>>1) * h->mb.i_b8_stride;
- int i_ref1_ref = h->fref[1][0]->ref[0][i_part_8x8];
- - int i_ref = (map_col_to_list0(i_ref1_ref>>h->sh.b_mbaff) << h->sh.b_mbaff) + (i_ref1_ref&h->sh.b_mbaff);
- + int i_ref = (map_col_to_list0(i_ref1_ref>>preshift) << postshift) + (offset&i_ref1_ref&h->mb.b_interlaced);
- if( i_ref >= 0 )
- {
- int dist_scale_factor = h->mb.dist_scale_factor[i_ref][0];
- - int16_t *mv_col = h->fref[1][0]->mv[0][i_mb_4x4 + 3*x8 + 3*y8 * h->mb.i_b4_stride];
- + int16_t *mv_col = h->fref[1][0]->mv[0][i_mb_4x4 + 3*x8 + yM * h->mb.i_b4_stride];
- + int16_t mv_y = (mv_col[1]<<yshift)/2;
- int l0x = ( dist_scale_factor * mv_col[0] + 128 ) >> 8;
- - int l0y = ( dist_scale_factor * mv_col[1] + 128 ) >> 8;
- - if( h->param.i_threads > 1 && (l0y > h->mb.mv_max_spel[1] || l0y-mv_col[1] > h->mb.mv_max_spel[1]) )
- + int l0y = ( dist_scale_factor * mv_y + 128 ) >> 8;
- + if( h->param.i_threads > 1 && (l0y > h->mb.mv_max_spel[1] || l0y-mv_y > h->mb.mv_max_spel[1]) )
- return 0;
- x264_macroblock_cache_ref( h, 2*x8, 2*y8, width, height, 0, i_ref );
- x264_macroblock_cache_mv( h, 2*x8, 2*y8, width, height, 0, pack16to32_mask(l0x, l0y) );
- - x264_macroblock_cache_mv( h, 2*x8, 2*y8, width, height, 1, pack16to32_mask(l0x-mv_col[0], l0y-mv_col[1]) );
- + x264_macroblock_cache_mv( h, 2*x8, 2*y8, width, height, 1, pack16to32_mask(l0x-mv_col[0], l0y-mv_y) );
- }
- else
- {
- --
- 1.7.4
- From fe257a3f3ad1b5121c52999f1db6727aa50082c5 Mon Sep 17 00:00:00 2001
- From: Simon Horlick <simonhorlick@gmail.com>
- Date: Tue, 15 Mar 2011 01:17:01 +0000
- Subject: [PATCH 23/25] Direct spatial
- ---
- common/mvpred.c | 78 +++++++++++++++++++++++++++++++++++++++++++-----------
- 1 files changed, 62 insertions(+), 16 deletions(-)
- diff --git a/common/mvpred.c b/common/mvpred.c
- index 28eabca..f25fa03 100644
- --- a/common/mvpred.c
- +++ b/common/mvpred.c
- @@ -295,15 +295,6 @@ static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h )
- {
- int8_t ref[2];
- ALIGNED_ARRAY_8( int16_t, mv,[2],[2] );
- - const int8_t *l1ref0 = &h->fref[1][0]->ref[0][h->mb.i_b8_xy];
- - const int8_t *l1ref1 = &h->fref[1][0]->ref[1][h->mb.i_b8_xy];
- - const int16_t (*l1mv[2])[2] = { (const int16_t (*)[2]) &h->fref[1][0]->mv[0][h->mb.i_b4_xy],
- - (const int16_t (*)[2]) &h->fref[1][0]->mv[1][h->mb.i_b4_xy] };
- - const int type_col = h->fref[1][0]->mb_type[h->mb.i_mb_xy];
- - const int partition_col = h->fref[1][0]->mb_partition[h->mb.i_mb_xy];
- -
- - h->mb.i_partition = partition_col;
- -
- for( int i_list = 0; i_list < 2; i_list++ )
- {
- int i_refa = h->mb.cache.ref[i_list][X264_SCAN8_0 - 1];
- @@ -348,6 +339,49 @@ static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h )
- ref[i_list] = i_ref;
- }
- + int mb_x = h->mb.i_mb_x;
- + int mb_y = h->mb.i_mb_y;
- + int mb_xy = mb_x + h->mb.i_mb_stride * mb_y;
- + int type_col[2] = { h->fref[1][0]->mb_type[mb_xy], h->fref[1][0]->mb_type[mb_xy] };
- + int partition_col[2] = { h->fref[1][0]->mb_partition[mb_xy], h->fref[1][0]->mb_partition[mb_xy] };
- + int col_parity = abs(h->fref[1][0]->i_poc - h->fdec->i_poc)
- + >= abs(h->fref[1][0]->i_poc + h->sh.i_delta_poc_bottom - h->fdec->i_poc);
- + h->mb.i_partition = partition_col[0];
- + if( h->param.b_interlaced && h->fref[1][0]->field[mb_xy] != h->mb.b_interlaced )
- + {
- + if( h->mb.b_interlaced )
- + {
- + mb_y = h->mb.i_mb_y&~1;
- + mb_xy = mb_x + h->mb.i_mb_stride * mb_y;
- + type_col[0] = h->fref[1][0]->mb_type[mb_xy];
- + type_col[1] = h->fref[1][0]->mb_type[mb_xy + h->mb.i_mb_stride];
- + partition_col[0] = h->fref[1][0]->mb_partition[mb_xy];
- + partition_col[1] = h->fref[1][0]->mb_partition[mb_xy + h->mb.i_mb_stride];
- +
- + if( (IS_INTRA(type_col[0]) || partition_col[0] == D_16x16)
- + && (IS_INTRA(type_col[1]) || partition_col[1] == D_16x16)
- + && partition_col[0] != D_8x8 )
- + h->mb.i_partition = D_16x8;
- + else
- + h->mb.i_partition = D_8x8;
- + }
- + else
- + {
- + mb_y = (h->mb.i_mb_y&~1) + col_parity;
- + mb_xy = mb_x + h->mb.i_mb_stride * mb_y;
- + type_col[0] = type_col[1] = h->fref[1][0]->mb_type[mb_xy];
- + partition_col[0] = partition_col[1] = h->fref[1][0]->mb_partition[mb_xy];
- + h->mb.i_partition = partition_col[0];
- + }
- + }
- + int i_mb_4x4 = 16 * h->mb.i_mb_stride * mb_y + 4 * mb_x;
- + int i_mb_8x8 = 4 * h->mb.i_mb_stride * mb_y + 2 * mb_x;
- +
- + int8_t *l1ref0 = &h->fref[1][0]->ref[0][i_mb_8x8];
- + int8_t *l1ref1 = &h->fref[1][0]->ref[1][i_mb_8x8];
- + int16_t (*l1mv[2])[2] = { (int16_t (*)[2]) &h->fref[1][0]->mv[0][i_mb_4x4],
- + (int16_t (*)[2]) &h->fref[1][0]->mv[1][i_mb_4x4] };
- +
- if( (M16( ref ) & 0x8080) == 0x8080 ) /* if( ref[0] < 0 && ref[1] < 0 ) */
- {
- x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, 0 );
- @@ -367,24 +401,33 @@ static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h )
- return 0;
- }
- - if( !M64( mv ) || IS_INTRA( type_col ) || (ref[0]&&ref[1]) )
- + if( !M64( mv ) || (ref[0]&&ref[1]) )
- return 1;
- /* Don't do any checks other than the ones we have to, based
- * on the size of the colocated partitions.
- * Depends on the enum order: D_8x8, D_16x8, D_8x16, D_16x16 */
- - int max_i8 = (D_16x16 - partition_col) + 1;
- - int step = (partition_col == D_16x8) + 1;
- - int width = 4 >> ((D_16x16 - partition_col)&1);
- - int height = 4 >> ((D_16x16 - partition_col)>>1);
- + int max_i8 = (D_16x16 - h->mb.i_partition) + 1;
- + int step = (h->mb.i_partition == D_16x8) + 1;
- + int width = 4 >> ((D_16x16 - h->mb.i_partition)&1);
- + int height = 4 >> ((D_16x16 - h->mb.i_partition)>>1);
- /* col_zero_flag */
- for( int i8 = 0; i8 < max_i8; i8 += step )
- {
- const int x8 = i8&1;
- const int y8 = i8>>1;
- - const int o8 = x8 + y8 * h->mb.i_b8_stride;
- - const int o4 = 3*(x8 + y8 * h->mb.i_b4_stride);
- + int yM = 3*y8;
- + if( h->param.b_interlaced && h->fref[1][0]->field[mb_xy] != h->mb.b_interlaced )
- + {
- + if( h->mb.b_interlaced )
- + yM = y8*6;
- + else
- + yM = 2*(h->mb.i_mb_y&1) + y8;
- + }
- + int o8 = x8 + (yM>>1) * h->mb.i_b8_stride;
- + int o4 = 3*x8 + yM * h->mb.i_b4_stride;
- +
- int idx;
- if( l1ref0[o8] == 0 )
- idx = 0;
- @@ -393,6 +436,9 @@ static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h )
- else
- continue;
- + if( IS_INTRA( type_col[y8] ) )
- + continue;
- +
- if( abs( l1mv[idx][o4][0] ) <= 1 && abs( l1mv[idx][o4][1] ) <= 1 )
- {
- if( ref[0] == 0 ) x264_macroblock_cache_mv( h, 2*x8, 2*y8, width, height, 0, 0 );
- --
- 1.7.4
- From 53c6284008aa1471a62de4f6da6d587698a817c2 Mon Sep 17 00:00:00 2001
- From: Simon Horlick <simonhorlick@gmail.com>
- Date: Thu, 17 Mar 2011 17:39:18 +0000
- Subject: [PATCH 24/25] Fix non-determinism with AQ
- ---
- encoder/ratecontrol.c | 4 ++--
- 1 files changed, 2 insertions(+), 2 deletions(-)
- diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
- index e1a673f..bcbcb02 100644
- --- a/encoder/ratecontrol.c
- +++ b/encoder/ratecontrol.c
- @@ -219,10 +219,10 @@ static ALWAYS_INLINE uint32_t ac_energy_plane( x264_t *h, int mb_x, int mb_y, x2
- {
- int w = i ? 8 : 16;
- int stride = frame->i_stride[i];
- - int offset = h->mb.b_interlaced
- + int offset = h->sh.b_mbaff
- ? 16 * mb_x + w * (mb_y&~1) * stride + (mb_y&1) * stride
- : 16 * mb_x + w * mb_y * stride;
- - stride <<= h->mb.b_interlaced;
- + stride <<= h->sh.b_mbaff;
- if( i )
- {
- ALIGNED_ARRAY_16( pixel, pix,[FENC_STRIDE*8] );
- --
- 1.7.4
- From 7e2d83f1ef2dc762ffe6880ee54686088aeff660 Mon Sep 17 00:00:00 2001
- From: Simon Horlick <simonhorlick@gmail.com>
- Date: Mon, 14 Mar 2011 02:54:30 +0000
- Subject: [PATCH 25/25] Adaptive mbaff with vsad decision
- ---
- encoder/encoder.c | 21 ++++++++++++++++++++-
- 1 files changed, 20 insertions(+), 1 deletions(-)
- diff --git a/encoder/encoder.c b/encoder/encoder.c
- index 0319126..d885f31 100644
- --- a/encoder/encoder.c
- +++ b/encoder/encoder.c
- @@ -1873,6 +1873,25 @@ static inline void x264_slice_init( x264_t *h, int i_nal_type, int i_global_qp )
- x264_macroblock_slice_init( h );
- }
- +static int field_vsad( x264_t *h, int mb_x, int mb_y )
- +{
- + int score_field = 0;
- + int score_frame = 0;
- + int stride = h->fenc->i_stride[0];
- + uint8_t *fenc = h->fenc->plane[0] + 16 * (mb_x + mb_y * stride);
- +
- + for( int i = 1; i < 16; i++ )
- + for( int j = 0; j < 16; j++ )
- + score_frame += abs(fenc[i*stride+j] - fenc[(i-1)*stride+j]);
- + for( int i = 2; i < 16; i+=2 )
- + for( int j = 0; j < 16; j++ )
- + score_field += abs(fenc[i*stride+j] - fenc[(i-2)*stride+j]);
- + for( int i = 3; i < 16; i+=2 )
- + for( int j = 0; j < 16; j++ )
- + score_field += abs(fenc[i*stride+j] - fenc[(i-2)*stride+j]);
- + return (score_field < score_frame);
- +}
- +
- static int x264_slice_write( x264_t *h )
- {
- int i_skip;
- @@ -1967,7 +1986,7 @@ static int x264_slice_write( x264_t *h )
- if( h->mb.b_adaptive_mbaff )
- {
- if( !(i_mb_y&1) )
- - h->mb.b_interlaced = 1;
- + h->mb.b_interlaced = field_vsad( h, i_mb_x, i_mb_y );
- x264_zigzag_init( h->param.cpu, &h->zigzagf, h->mb.b_interlaced );
- }
- h->mb.field[mb_xy] = h->mb.b_interlaced;
- --
- 1.7.4
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement