Advertisement
Guest User

Untitled

a guest
Jul 18th, 2017
465
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Diff 48.25 KB | None | 0 0
  1. From 396a8f247bcf548189de29a7bf9cbad968a7ce50 Mon Sep 17 00:00:00 2001
  2. From: Simon Horlick <simonhorlick@gmail.com>
  3. Date: Tue, 11 Jan 2011 20:05:54 +0000
  4. Subject: [PATCH 1/5] Save interlace decision for all mbs
  5.  
  6. ---
  7. common/common.h     |    1 +
  8.  common/frame.c      |    3 +++
  9.  common/frame.h      |    1 +
  10.  common/macroblock.c |    1 +
  11.  encoder/encoder.c   |    8 ++++++++
  12.  5 files changed, 14 insertions(+), 0 deletions(-)
  13.  
  14. diff --git a/common/common.h b/common/common.h
  15. index 4654c17..b450ac0 100644
  16. --- a/common/common.h
  17. +++ b/common/common.h
  18. @@ -616,6 +616,7 @@ struct x264_t
  19.          int8_t  *mb_transform_size;         /* transform_size_8x8_flag of each mb */
  20.          uint16_t *slice_table;              /* sh->first_mb of the slice that the indexed mb is part of
  21.                                               * NOTE: this will fail on resolutions above 2^16 MBs... */
  22. +        uint8_t *field;
  23.  
  24.           /* buffer for weighted versions of the reference frames */
  25.          pixel *p_weight_buf[X264_REF_MAX];
  26. diff --git a/common/frame.c b/common/frame.c
  27. index ca90539..eff8ca5 100644
  28. --- a/common/frame.c
  29. +++ b/common/frame.c
  30. @@ -145,6 +145,8 @@ x264_frame_t *x264_frame_new( x264_t *h, int b_fdec )
  31.                              frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) << h->frames.b_have_sub8x8_esa );
  32.              frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH;
  33.          }
  34. +        if( h->param.b_interlaced )
  35. +            CHECKED_MALLOC( frame->field, i_mb_count * sizeof(uint8_t) );
  36.      }
  37.      else /* fenc frame */
  38.      {
  39. @@ -219,6 +221,7 @@ void x264_frame_delete( x264_frame_t *frame )
  40.          x264_free( frame->i_inv_qscale_factor );
  41.          x264_free( frame->i_row_bits );
  42.          x264_free( frame->f_row_qp );
  43. +        x264_free( frame->field );
  44.          x264_free( frame->mb_type );
  45.          x264_free( frame->mb_partition );
  46.          x264_free( frame->mv[0] );
  47. diff --git a/common/frame.h b/common/frame.h
  48. index 38d0bf2..7a1bd77 100644
  49. --- a/common/frame.h
  50. +++ b/common/frame.h
  51. @@ -62,6 +62,7 @@ typedef struct x264_frame
  52.      float   f_qp_avg_rc; /* QPs as decided by ratecontrol */
  53.      float   f_qp_avg_aq; /* QPs as decided by AQ in addition to ratecontrol */
  54.      int     i_poc_l0ref0; /* poc of first refframe in L0, used to check if direct temporal is possible */
  55. +    uint8_t *field;
  56.  
  57.      /* YUV buffer */
  58.      int     i_plane;
  59. diff --git a/common/macroblock.c b/common/macroblock.c
  60. index e559ab1..de5b01f 100644
  61. --- a/common/macroblock.c
  62. +++ b/common/macroblock.c
  63. @@ -368,6 +368,7 @@ void x264_macroblock_slice_init( x264_t *h )
  64.      h->mb.ref[1] = h->fdec->ref[1];
  65.      h->mb.type = h->fdec->mb_type;
  66.      h->mb.partition = h->fdec->mb_partition;
  67. +    h->mb.field = h->fdec->field;
  68.  
  69.      h->fdec->i_ref[0] = h->i_ref[0];
  70.      h->fdec->i_ref[1] = h->i_ref[1];
  71. diff --git a/encoder/encoder.c b/encoder/encoder.c
  72. index 0182c8c..3b9a175 100644
  73. --- a/encoder/encoder.c
  74. +++ b/encoder/encoder.c
  75. @@ -1950,6 +1950,14 @@ static int x264_slice_write( x264_t *h )
  76.              }
  77.          }
  78.  
  79. +        if( h->param.b_interlaced )
  80. +        {
  81. +            if( !(i_mb_y&1) )
  82. +                h->mb.b_interlaced = 1;
  83. +            x264_zigzag_init( h->param.cpu, &h->zigzagf, h->mb.b_interlaced );
  84. +            h->mb.field[mb_xy] = h->mb.b_interlaced;
  85. +        }
  86. +
  87.          if( i_mb_x == 0 && !h->mb.b_reencode_mb )
  88.              x264_fdec_filter_row( h, i_mb_y, 1 );
  89.  
  90. --
  91. 1.7.1
  92.  
  93.  
  94. From 05caa9ab13813eb34aca6e01841ef975576db4bc Mon Sep 17 00:00:00 2001
  95. From: Simon Horlick <simonhorlick@gmail.com>
  96. Date: Tue, 11 Jan 2011 20:09:00 +0000
  97. Subject: [PATCH 2/5] Disable adaptive MBAFF when subme 0 is used.
  98.  
  99. ---
  100. encoder/encoder.c |    9 ++++++---
  101.  x264.c            |   12 ++++++++++++
  102.  x264.h            |    1 +
  103.  3 files changed, 19 insertions(+), 3 deletions(-)
  104.  
  105. diff --git a/encoder/encoder.c b/encoder/encoder.c
  106. index 3b9a175..4fb0a57 100644
  107. --- a/encoder/encoder.c
  108. +++ b/encoder/encoder.c
  109. @@ -1952,9 +1952,12 @@ static int x264_slice_write( x264_t *h )
  110.  
  111.          if( h->param.b_interlaced )
  112.          {
  113. -            if( !(i_mb_y&1) )
  114. -                h->mb.b_interlaced = 1;
  115. -            x264_zigzag_init( h->param.cpu, &h->zigzagf, h->mb.b_interlaced );
  116. +            if( h->param.b_adaptive_mbaff )
  117. +            {
  118. +                if( !(i_mb_y&1) )
  119. +                    h->mb.b_interlaced = 1;
  120. +                x264_zigzag_init( h->param.cpu, &h->zigzagf, h->mb.b_interlaced );
  121. +            }
  122.              h->mb.field[mb_xy] = h->mb.b_interlaced;
  123.          }
  124.  
  125. diff --git a/x264.c b/x264.c
  126. index c6261d8..d9c60cd 100644
  127. --- a/x264.c
  128. +++ b/x264.c
  129. @@ -1494,6 +1494,18 @@ generic_option:
  130.          param->b_tff = !!info.tff;
  131.      }
  132.  
  133. +    /* Adaptive MBAFF and subme 0 are not supported as motion vectors between
  134. +     * field macroblocks and frame macroblocks require halving and hpel pixels.
  135. +     * The chosen solution is to make MBAFF non-adaptive in this case. */
  136. +    if( param->b_interlaced && !param->analyse.i_subpel_refine )
  137. +    {
  138. +        x264_cli_log( "x264", X264_LOG_WARNING, "Adaptive MBAFF and subme 0 are not supported. "
  139. +                "Using non-adaptive MBAFF instead.\n");
  140. +        param->b_adaptive_mbaff = 0;
  141. +    }
  142. +    else
  143. +        param->b_adaptive_mbaff = 1;
  144. +
  145.      /* Automatically reduce reference frame count to match the user's target level
  146.       * if the user didn't explicitly set a reference frame count. */
  147.      if( !b_user_ref )
  148. diff --git a/x264.h b/x264.h
  149. index 8f39497..739d456 100644
  150. --- a/x264.h
  151. +++ b/x264.h
  152. @@ -291,6 +291,7 @@ typedef struct x264_param_t
  153.      int         i_cabac_init_idc;
  154.  
  155.      int         b_interlaced;
  156. +    int         b_adaptive_mbaff; /* MBAFF+subme 0 require non-adaptive MBAFF i.e. all field mbs */
  157.      int         b_constrained_intra;
  158.  
  159.      int         i_cqm_preset;
  160. --
  161. 1.7.1
  162.  
  163.  
  164. From ac56eec487176b63ca67c20da5e04d96faba9652 Mon Sep 17 00:00:00 2001
  165. From: Simon Horlick <simonhorlick@gmail.com>
  166. Date: Tue, 11 Jan 2011 20:16:18 +0000
  167. Subject: [PATCH 3/5] Left index table
  168.  
  169. ---
  170. common/common.h     |    1 +
  171.  common/macroblock.c |   56 ++++++++++++++++++++++++++++++++------------------
  172.  2 files changed, 37 insertions(+), 20 deletions(-)
  173.  
  174. diff --git a/common/common.h b/common/common.h
  175. index b450ac0..f244e3d 100644
  176. --- a/common/common.h
  177. +++ b/common/common.h
  178. @@ -594,6 +594,7 @@ struct x264_t
  179.          int     i_mb_top_xy;
  180.          int     i_mb_topleft_xy;
  181.          int     i_mb_topright_xy;
  182. +        int     *left_index_table;
  183.  
  184.          /**** thread synchronization ends here ****/
  185.          /* subsequent variables are either thread-local or constant,
  186. diff --git a/common/macroblock.c b/common/macroblock.c
  187. index de5b01f..40f5c67 100644
  188. --- a/common/macroblock.c
  189. +++ b/common/macroblock.c
  190. @@ -547,6 +547,18 @@ static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x
  191.          }
  192.  }
  193.  
  194. +static const int left_indices[5][22] = {
  195. +/*    intra modes     nnz                                          mv          ref    real indices */
  196. +    /* Current is progressive. */
  197. +    { 4, 4, 5, 5,     3,  3,  7,  7, 16+1, 16+1, 16+4+1, 16+4+1,   0, 0, 1, 1, 0, 0,  0, 0, 1, 1 },
  198. +    { 6, 6, 3, 3,    11, 11, 15, 15, 16+3, 16+3, 16+4+3, 16+4+3,   2, 2, 3, 3, 1, 1,  2, 2, 3, 3 },
  199. +    /* Current is interlaced.*/
  200. +    { 4, 6, 4, 6,     3, 11,  3, 11, 16+1, 16+1, 16+4+1, 16+4+1,   0, 2, 0, 2, 0, 0,  0, 2, 0, 2 },
  201. +    { 4, 6, 4, 6,     3, 11,  3, 11, 16+1, 16+1, 16+4+1, 16+4+1,   0, 2, 0, 2, 0, 0,  0, 2, 0, 2 },
  202. +    /*Both same.*/
  203. +    { 4, 5, 6, 3,     3,  7, 11, 15, 16+1, 16+3, 16+4+1, 16+4+3,   0, 1, 2, 3, 0, 1,  0, 1, 2, 3 },
  204. +};
  205. +
  206.  static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, int mb_y )
  207.  {
  208.      int top = (mb_y - (1 << h->mb.b_interlaced)) * h->mb.i_mb_stride + mb_x;
  209. @@ -567,6 +579,7 @@ static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, i
  210.      h->mb.i_mb_type_left = -1;
  211.      h->mb.i_mb_type_topleft = -1;
  212.      h->mb.i_mb_type_topright = -1;
  213. +    h->mb.left_index_table = left_indices[4];
  214.  
  215.      if( mb_x > 0 )
  216.      {
  217. @@ -658,6 +671,8 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
  218.      uint8_t (*nnz)[24] = h->mb.non_zero_count;
  219.      int16_t *cbp = h->mb.cbp;
  220.  
  221. +    int *left_index_table = h->mb.left_index_table;
  222. +
  223.      /* load cache */
  224.      if( h->mb.i_neighbour & MB_TOP )
  225.      {
  226. @@ -700,22 +715,22 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
  227.          h->mb.cache.i_cbp_left = cbp[left];
  228.  
  229.          /* load intra4x4 */
  230. -        h->mb.cache.intra4x4_pred_mode[x264_scan8[0 ] - 1] = i4x4[left][4];
  231. -        h->mb.cache.intra4x4_pred_mode[x264_scan8[2 ] - 1] = i4x4[left][5];
  232. -        h->mb.cache.intra4x4_pred_mode[x264_scan8[8 ] - 1] = i4x4[left][6];
  233. -        h->mb.cache.intra4x4_pred_mode[x264_scan8[10] - 1] = i4x4[left][3];
  234. +        h->mb.cache.intra4x4_pred_mode[x264_scan8[0 ] - 1] = i4x4[left][left_index_table[0]];
  235. +        h->mb.cache.intra4x4_pred_mode[x264_scan8[2 ] - 1] = i4x4[left][left_index_table[1]];
  236. +        h->mb.cache.intra4x4_pred_mode[x264_scan8[8 ] - 1] = i4x4[left][left_index_table[2]];
  237. +        h->mb.cache.intra4x4_pred_mode[x264_scan8[10] - 1] = i4x4[left][left_index_table[3]];
  238.  
  239.          /* load non_zero_count */
  240. -        h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left][3];
  241. -        h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left][7];
  242. -        h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left][11];
  243. -        h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left][15];
  244. +        h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left][left_index_table[4+0]];
  245. +        h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left][left_index_table[4+1]];
  246. +        h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left][left_index_table[4+2]];
  247. +        h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left][left_index_table[4+3]];
  248.  
  249. -        h->mb.cache.non_zero_count[x264_scan8[16+0] - 1] = nnz[left][16+1];
  250. -        h->mb.cache.non_zero_count[x264_scan8[16+2] - 1] = nnz[left][16+3];
  251. +        h->mb.cache.non_zero_count[x264_scan8[16+0] - 1] = nnz[left][left_index_table[4+4]];
  252. +        h->mb.cache.non_zero_count[x264_scan8[16+2] - 1] = nnz[left][left_index_table[4+5]];
  253.  
  254. -        h->mb.cache.non_zero_count[x264_scan8[16+4+0] - 1] = nnz[left][16+4+1];
  255. -        h->mb.cache.non_zero_count[x264_scan8[16+4+2] - 1] = nnz[left][16+4+3];
  256. +        h->mb.cache.non_zero_count[x264_scan8[16+4+0] - 1] = nnz[left][left_index_table[4+6]];
  257. +        h->mb.cache.non_zero_count[x264_scan8[16+4+2] - 1] = nnz[left][left_index_table[4+7]];
  258.      }
  259.      else
  260.      {
  261. @@ -854,10 +869,10 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
  262.  
  263.              if( h->mb.i_neighbour & MB_LEFT )
  264.              {
  265. -                CP16( h->mb.cache.mvd[l][x264_scan8[0 ] - 1], mvd[left][4] );
  266. -                CP16( h->mb.cache.mvd[l][x264_scan8[2 ] - 1], mvd[left][5] );
  267. -                CP16( h->mb.cache.mvd[l][x264_scan8[8 ] - 1], mvd[left][6] );
  268. -                CP16( h->mb.cache.mvd[l][x264_scan8[10] - 1], mvd[left][3] );
  269. +                CP16( h->mb.cache.mvd[l][x264_scan8[0 ] - 1], mvd[left][left_index_table[0]] );
  270. +                CP16( h->mb.cache.mvd[l][x264_scan8[2 ] - 1], mvd[left][left_index_table[1]] );
  271. +                CP16( h->mb.cache.mvd[l][x264_scan8[8 ] - 1], mvd[left][left_index_table[2]] );
  272. +                CP16( h->mb.cache.mvd[l][x264_scan8[10] - 1], mvd[left][left_index_table[3]] );
  273.              }
  274.              else
  275.                  for( int i = 0; i < 4; i++ )
  276. @@ -946,6 +961,7 @@ void x264_macroblock_cache_load_deblock( x264_t *h )
  277.              int s4x4 = h->mb.i_b4_stride;
  278.  
  279.              uint8_t (*nnz)[24] = h->mb.non_zero_count;
  280. +            int *left_index_table = h->mb.left_index_table;
  281.  
  282.              if( h->mb.i_neighbour & MB_TOP )
  283.                  CP32( &h->mb.cache.non_zero_count[x264_scan8[0] - 8], &nnz[h->mb.i_mb_top_xy][12] );
  284. @@ -953,10 +969,10 @@ void x264_macroblock_cache_load_deblock( x264_t *h )
  285.              if( h->mb.i_neighbour & MB_LEFT )
  286.              {
  287.                  int left = h->mb.i_mb_left_xy;
  288. -                h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left][3];
  289. -                h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left][7];
  290. -                h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left][11];
  291. -                h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left][15];
  292. +                h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left][left_index_table[4+0]];
  293. +                h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left][left_index_table[4+1]];
  294. +                h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left][left_index_table[4+2]];
  295. +                h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left][left_index_table[4+3]];
  296.              }
  297.  
  298.              for( int l = 0; l <= (h->sh.i_type == SLICE_TYPE_B); l++ )
  299. --
  300. 1.7.1
  301.  
  302.  
  303. From 8e33318d3a7c64c2f4a8e77bed4464e5748caf33 Mon Sep 17 00:00:00 2001
  304. From: Simon Horlick <simonhorlick@gmail.com>
  305. Date: Tue, 11 Jan 2011 20:21:26 +0000
  306. Subject: [PATCH 4/5] Store references to the two left macroblocks in MBAFF
  307.  
  308. ---
  309. common/common.h     |    4 ++--
  310.  common/deblock.c    |    4 ++--
  311.  common/macroblock.c |   20 ++++++++++----------
  312.  common/mvpred.c     |    2 +-
  313.  encoder/analyse.c   |    8 ++++----
  314.  encoder/cabac.c     |    8 ++++----
  315.  6 files changed, 23 insertions(+), 23 deletions(-)
  316.  
  317. diff --git a/common/common.h b/common/common.h
  318. index f244e3d..d49c41f 100644
  319. --- a/common/common.h
  320. +++ b/common/common.h
  321. @@ -586,11 +586,11 @@ struct x264_t
  322.          unsigned int i_neighbour_intra;     /* for constrained intra pred */
  323.          unsigned int i_neighbour_frame;     /* ignoring slice boundaries */
  324.          int     i_mb_type_top;
  325. -        int     i_mb_type_left;
  326. +        int     i_mb_type_left[2];
  327.          int     i_mb_type_topleft;
  328.          int     i_mb_type_topright;
  329.          int     i_mb_prev_xy;
  330. -        int     i_mb_left_xy;
  331. +        int     i_mb_left_xy[2];
  332.          int     i_mb_top_xy;
  333.          int     i_mb_topleft_xy;
  334.          int     i_mb_topright_xy;
  335. diff --git a/common/deblock.c b/common/deblock.c
  336. index ff7c99f..5074b77 100644
  337. --- a/common/deblock.c
  338. +++ b/common/deblock.c
  339. @@ -347,10 +347,10 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
  340.  
  341.          if( h->mb.i_neighbour & MB_LEFT )
  342.          {
  343. -            int qpl = h->mb.qp[h->mb.i_mb_left_xy];
  344. +            int qpl = h->mb.qp[h->mb.i_mb_left_xy[0]];
  345.              int qp_left = (qp + qpl + 1) >> 1;
  346.              int qpc_left = (h->chroma_qp_table[qp] + h->chroma_qp_table[qpl] + 1) >> 1;
  347. -            int intra_left = IS_INTRA( h->mb.type[h->mb.i_mb_left_xy] );
  348. +            int intra_left = IS_INTRA( h->mb.type[h->mb.i_mb_left_xy[0]] );
  349.              if( intra_cur || intra_left )
  350.                  FILTER( _intra, 0, 0, qp_left, qpc_left );
  351.              else
  352. diff --git a/common/macroblock.c b/common/macroblock.c
  353. index 40f5c67..1e40b28 100644
  354. --- a/common/macroblock.c
  355. +++ b/common/macroblock.c
  356. @@ -572,11 +572,11 @@ static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, i
  357.      h->mb.i_neighbour_intra = 0;
  358.      h->mb.i_neighbour_frame = 0;
  359.      h->mb.i_mb_top_xy = -1;
  360. -    h->mb.i_mb_left_xy = -1;
  361. +    h->mb.i_mb_left_xy[0] = h->mb.i_mb_left_xy[1] = -1;
  362.      h->mb.i_mb_topleft_xy = -1;
  363.      h->mb.i_mb_topright_xy = -1;
  364.      h->mb.i_mb_type_top = -1;
  365. -    h->mb.i_mb_type_left = -1;
  366. +    h->mb.i_mb_type_left[0] = h->mb.i_mb_type_left[1] = -1;
  367.      h->mb.i_mb_type_topleft = -1;
  368.      h->mb.i_mb_type_topright = -1;
  369.      h->mb.left_index_table = left_indices[4];
  370. @@ -584,13 +584,13 @@ static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, i
  371.      if( mb_x > 0 )
  372.      {
  373.          h->mb.i_neighbour_frame |= MB_LEFT;
  374. -        h->mb.i_mb_left_xy = h->mb.i_mb_xy - 1;
  375. -        h->mb.i_mb_type_left = h->mb.type[h->mb.i_mb_left_xy];
  376. +        h->mb.i_mb_left_xy[0] = h->mb.i_mb_xy - 1;
  377. +        h->mb.i_mb_type_left[0] = h->mb.type[h->mb.i_mb_left_xy[0]];
  378.          if( h->mb.i_mb_xy > h->sh.i_first_mb )
  379.          {
  380.              h->mb.i_neighbour |= MB_LEFT;
  381.  
  382. -            if( !h->param.b_constrained_intra || IS_INTRA( h->mb.i_mb_type_left ) )
  383. +            if( !h->param.b_constrained_intra || IS_INTRA( h->mb.i_mb_type_left[0] ) )
  384.                  h->mb.i_neighbour_intra |= MB_LEFT;
  385.          }
  386.      }
  387. @@ -656,7 +656,7 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
  388.  {
  389.      x264_macroblock_cache_load_neighbours( h, mb_x, mb_y );
  390.  
  391. -    int left = h->mb.i_mb_left_xy;
  392. +    int left = h->mb.i_mb_left_xy[0];
  393.      int top  = h->mb.i_mb_top_xy;
  394.      int top_y = mb_y - (1 << h->mb.b_interlaced);
  395.      int s8x8 = h->mb.i_b8_stride;
  396. @@ -924,8 +924,8 @@ void x264_macroblock_cache_load_neighbours_deblock( x264_t *h, int mb_x, int mb_
  397.  
  398.      if( mb_x > 0 )
  399.      {
  400. -        h->mb.i_mb_left_xy = h->mb.i_mb_xy - 1;
  401. -        if( deblock_on_slice_edges || h->mb.slice_table[h->mb.i_mb_left_xy] == h->mb.slice_table[h->mb.i_mb_xy] )
  402. +        h->mb.i_mb_left_xy[0] = h->mb.i_mb_xy - 1;
  403. +        if( deblock_on_slice_edges || h->mb.slice_table[h->mb.i_mb_left_xy[0]] == h->mb.slice_table[h->mb.i_mb_xy] )
  404.              h->mb.i_neighbour |= MB_LEFT;
  405.      }
  406.  
  407. @@ -968,7 +968,7 @@ void x264_macroblock_cache_load_deblock( x264_t *h )
  408.  
  409.              if( h->mb.i_neighbour & MB_LEFT )
  410.              {
  411. -                int left = h->mb.i_mb_left_xy;
  412. +                int left = h->mb.i_mb_left_xy[0];
  413.                  h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left][left_index_table[4+0]];
  414.                  h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left][left_index_table[4+1]];
  415.                  h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left][left_index_table[4+2]];
  416. @@ -1043,7 +1043,7 @@ void x264_macroblock_cache_load_deblock( x264_t *h )
  417.      {
  418.          uint8_t (*nnz)[24] = h->mb.non_zero_count;
  419.          int top = h->mb.i_mb_top_xy;
  420. -        int left = h->mb.i_mb_left_xy;
  421. +        int left = h->mb.i_mb_left_xy[0];
  422.  
  423.          if( (h->mb.i_neighbour & MB_TOP) && h->mb.mb_transform_size[top] )
  424.          {
  425. diff --git a/common/mvpred.c b/common/mvpred.c
  426. index a24dde8..c8efe1f 100644
  427. --- a/common/mvpred.c
  428. +++ b/common/mvpred.c
  429. @@ -426,7 +426,7 @@ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t mvc[
  430.      }
  431.  
  432.      /* spatial predictors */
  433. -    SET_MVP( mvr[h->mb.i_mb_left_xy] );
  434. +    SET_MVP( mvr[h->mb.i_mb_left_xy[0]] );
  435.      SET_MVP( mvr[h->mb.i_mb_top_xy] );
  436.      SET_MVP( mvr[h->mb.i_mb_topleft_xy] );
  437.      SET_MVP( mvr[h->mb.i_mb_topright_xy] );
  438. diff --git a/encoder/analyse.c b/encoder/analyse.c
  439. index bf9f3c9..d87950e 100644
  440. --- a/encoder/analyse.c
  441. +++ b/encoder/analyse.c
  442. @@ -516,7 +516,7 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int qp )
  443.          {
  444.              /* Always run in fast-intra mode for subme < 3 */
  445.              if( h->mb.i_subpel_refine > 2 &&
  446. -              ( IS_INTRA( h->mb.i_mb_type_left ) ||
  447. +              ( IS_INTRA( h->mb.i_mb_type_left[0] ) ||
  448.                  IS_INTRA( h->mb.i_mb_type_top ) ||
  449.                  IS_INTRA( h->mb.i_mb_type_topleft ) ||
  450.                  IS_INTRA( h->mb.i_mb_type_topright ) ||
  451. @@ -1296,7 +1296,7 @@ static void x264_mb_analyse_inter_p8x8_mixed_ref( x264_t *h, x264_mb_analysis_t
  452.      /* early termination: if 16x16 chose ref 0, then evalute no refs older
  453.       * than those used by the neighbors */
  454.      if( i_maxref > 0 && (a->l0.me16x16.i_ref == 0 || a->l0.me16x16.i_ref == h->mb.ref_blind_dupe) &&
  455. -        h->mb.i_mb_type_top > 0 && h->mb.i_mb_type_left > 0 )
  456. +        h->mb.i_mb_type_top > 0 && h->mb.i_mb_type_left[0] > 0 )
  457.      {
  458.          i_maxref = 0;
  459.          CHECK_NEIGHBOUR(  -8 - 1 );
  460. @@ -2063,7 +2063,7 @@ static void x264_mb_analyse_inter_b8x8_mixed_ref( x264_t *h, x264_mb_analysis_t
  461.      {
  462.          x264_mb_analysis_list_t *lX = l ? &a->l1 : &a->l0;
  463.          if( i_maxref[l] > 0 && lX->me16x16.i_ref == 0 &&
  464. -            h->mb.i_mb_type_top > 0 && h->mb.i_mb_type_left > 0 )
  465. +            h->mb.i_mb_type_top > 0 && h->mb.i_mb_type_left[0] > 0 )
  466.          {
  467.              i_maxref[l] = 0;
  468.              CHECK_NEIGHBOUR(  -8 - 1 );
  469. @@ -2817,7 +2817,7 @@ intra_analysis:
  470.                      {}
  471.                  else if( h->param.analyse.i_subpel_refine >= 3 )
  472.                      analysis.b_try_skip = 1;
  473. -                else if( h->mb.i_mb_type_left == P_SKIP ||
  474. +                else if( h->mb.i_mb_type_left[0] == P_SKIP ||
  475.                           h->mb.i_mb_type_top == P_SKIP ||
  476.                           h->mb.i_mb_type_topleft == P_SKIP ||
  477.                           h->mb.i_mb_type_topright == P_SKIP )
  478. diff --git a/encoder/cabac.c b/encoder/cabac.c
  479. index 6333737..334318d 100644
  480. --- a/encoder/cabac.c
  481. +++ b/encoder/cabac.c
  482. @@ -79,7 +79,7 @@ static void x264_cabac_mb_type( x264_t *h, x264_cabac_t *cb )
  483.      if( h->sh.i_type == SLICE_TYPE_I )
  484.      {
  485.          int ctx = 0;
  486. -        if( (h->mb.i_neighbour & MB_LEFT) && h->mb.i_mb_type_left != I_4x4 )
  487. +        if( (h->mb.i_neighbour & MB_LEFT) && h->mb.i_mb_type_left[0] != I_4x4 )
  488.              ctx++;
  489.          if( (h->mb.i_neighbour & MB_TOP) && h->mb.i_mb_type_top != I_4x4 )
  490.              ctx++;
  491. @@ -113,7 +113,7 @@ static void x264_cabac_mb_type( x264_t *h, x264_cabac_t *cb )
  492.      else //if( h->sh.i_type == SLICE_TYPE_B )
  493.      {
  494.          int ctx = 0;
  495. -        if( (h->mb.i_neighbour & MB_LEFT) && h->mb.i_mb_type_left != B_SKIP && h->mb.i_mb_type_left != B_DIRECT )
  496. +        if( (h->mb.i_neighbour & MB_LEFT) && h->mb.i_mb_type_left[0] != B_SKIP && h->mb.i_mb_type_left[0] != B_DIRECT )
  497.              ctx++;
  498.          if( (h->mb.i_neighbour & MB_TOP) && h->mb.i_mb_type_top != B_SKIP && h->mb.i_mb_type_top != B_DIRECT )
  499.              ctx++;
  500. @@ -198,7 +198,7 @@ static void x264_cabac_mb_intra_chroma_pred_mode( x264_t *h, x264_cabac_t *cb )
  501.      int       ctx = 0;
  502.  
  503.      /* No need to test for I4x4 or I_16x16 as cache_save handle that */
  504. -    if( (h->mb.i_neighbour & MB_LEFT) && h->mb.chroma_pred_mode[h->mb.i_mb_left_xy] != 0 )
  505. +    if( (h->mb.i_neighbour & MB_LEFT) && h->mb.chroma_pred_mode[h->mb.i_mb_left_xy[0]] != 0 )
  506.          ctx++;
  507.      if( (h->mb.i_neighbour & MB_TOP) && h->mb.chroma_pred_mode[h->mb.i_mb_top_xy] != 0 )
  508.          ctx++;
  509. @@ -280,7 +280,7 @@ static void x264_cabac_mb_qp_delta( x264_t *h, x264_cabac_t *cb )
  510.  #if !RDO_SKIP_BS
  511.  void x264_cabac_mb_skip( x264_t *h, int b_skip )
  512.  {
  513. -    int ctx = ((h->mb.i_neighbour & MB_LEFT) && !IS_SKIP( h->mb.i_mb_type_left ))
  514. +    int ctx = ((h->mb.i_neighbour & MB_LEFT) && !IS_SKIP( h->mb.i_mb_type_left[0] ))
  515.              + ((h->mb.i_neighbour & MB_TOP) && !IS_SKIP( h->mb.i_mb_type_top ))
  516.              + (h->sh.i_type == SLICE_TYPE_P ? 11 : 24);
  517.      x264_cabac_encode_decision( &h->cabac, ctx, b_skip );
  518. --
  519. 1.7.1
  520.  
  521.  
  522. From 988b2632ea8b8a98f385840313284425d8f0cd89 Mon Sep 17 00:00:00 2001
  523. From: Simon Horlick <simonhorlick@gmail.com>
  524. Date: Fri, 14 Jan 2011 21:18:14 +0000
  525. Subject: [PATCH 5/5] Back up intra borders correctly for MBAFF and make neighbour calculation several times longer.
  526.  
  527. ---
  528. common/common.h     |    9 ++-
  529.  common/macroblock.c |  292 +++++++++++++++++++++++++++++++++++++++++++--------
  530.  2 files changed, 255 insertions(+), 46 deletions(-)
  531.  
  532. diff --git a/common/common.h b/common/common.h
  533. index d49c41f..913c6f0 100644
  534. --- a/common/common.h
  535. +++ b/common/common.h
  536. @@ -548,6 +548,8 @@ struct x264_t
  537.          int     i_mb_stride;
  538.          int     i_b8_stride;
  539.          int     i_b4_stride;
  540. +        int     left_b8[2];
  541. +        int     left_b4[2];
  542.  
  543.          /* Current index */
  544.          int     i_mb_x;
  545. @@ -595,6 +597,10 @@ struct x264_t
  546.          int     i_mb_topleft_xy;
  547.          int     i_mb_topright_xy;
  548.          int     *left_index_table;
  549. +        int     topleft_partition;
  550. +        int     intra_border_index;
  551. +        int     topleft_border_index;
  552. +        int     topright_border_index;
  553.  
  554.          /**** thread synchronization ends here ****/
  555.          /* subsequent variables are either thread-local or constant,
  556. @@ -825,7 +831,8 @@ struct x264_t
  557.  
  558.      /* Buffers that are allocated per-thread even in sliced threads. */
  559.      void *scratch_buffer; /* for any temporary storage that doesn't want repeated malloc */
  560. -    pixel *intra_border_backup[2][2]; /* bottom pixels of the previous mb row, used for intra prediction after the framebuffer has been deblocked */
  561. +    pixel *intra_border_backup[3][2]; /* bottom pixels of the previous mb row, used for intra prediction after the framebuffer has been deblocked */
  562. +    pixel *intra_diagonal_backup[5][3];
  563.      uint8_t (*deblock_strength[2])[2][4][4];
  564.  
  565.      /* CPU functions dependents */
  566. diff --git a/common/macroblock.c b/common/macroblock.c
  567. index 1e40b28..5c2ffc4 100644
  568. --- a/common/macroblock.c
  569. +++ b/common/macroblock.c
  570. @@ -314,18 +314,35 @@ void x264_macroblock_cache_free( x264_t *h )
  571.  int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
  572.  {
  573.      if( !b_lookahead )
  574. -        for( int i = 0; i <= h->param.b_interlaced; i++ )
  575. +    {
  576. +        for( int i = 0; i <= 2*h->param.b_interlaced; i++ )
  577.          {
  578.              for( int j = 0; j < 2; j++ )
  579.              {
  580.                  /* shouldn't really be initialized, just silences a valgrind false-positive in predict_8x8_filter_mmx */
  581.                  CHECKED_MALLOCZERO( h->intra_border_backup[i][j], (h->sps->i_mb_width*16+32) * sizeof(pixel) );
  582.                  h->intra_border_backup[i][j] += 16;
  583. -                h->intra_border_backup[1][j] = h->intra_border_backup[i][j];
  584. +                if( !h->param.b_interlaced )
  585. +                    h->intra_border_backup[1][j] = h->intra_border_backup[i][j];
  586. +            }
  587. +        }
  588. +        for( int i = 0; i < 4*h->mb.b_interlaced; i++ )
  589. +        {
  590. +            for( int j = 0; j < 3; j++ )
  591. +            {
  592. +                const int width = 1 + 8; // top left pixel + eight top right pixels (for luma)
  593. +                CHECKED_MALLOCZERO( h->intra_diagonal_backup[i][j], (h->sps->i_mb_width*width+32) * sizeof(pixel) );
  594. +                h->intra_diagonal_backup[i][j] += 16;
  595. +                if( !h->param.b_interlaced )
  596. +                    h->intra_diagonal_backup[1][j] = h->intra_diagonal_backup[i][j];
  597.              }
  598. +        }
  599. +        for( int i = 0; i <= h->param.b_interlaced; i++ )
  600. +        {
  601.              CHECKED_MALLOC( h->deblock_strength[i], sizeof(**h->deblock_strength) * h->mb.i_mb_width );
  602.              h->deblock_strength[1] = h->deblock_strength[i];
  603.          }
  604. +    }
  605.  
  606.      /* Allocate scratch buffer */
  607.      int scratch_size = 0;
  608. @@ -350,12 +367,20 @@ fail:
  609.  void x264_macroblock_thread_free( x264_t *h, int b_lookahead )
  610.  {
  611.      if( !b_lookahead )
  612. +    {
  613.          for( int i = 0; i <= h->param.b_interlaced; i++ )
  614. -        {
  615.              x264_free( h->deblock_strength[i] );
  616. +        for( int i = 0; i <= 2*h->param.b_interlaced; i++ )
  617. +        {
  618.              for( int j = 0; j < 2; j++ )
  619.                  x264_free( h->intra_border_backup[i][j] - 16 );
  620.          }
  621. +        for( int i = 0; i < 4*h->param.b_interlaced; i++ )
  622. +        {
  623. +            for( int j = 0; j < 3; j++ )
  624. +                x264_free( h->intra_diagonal_backup[i][j] - 16 );
  625. +        }
  626. +    }
  627.      x264_free( h->scratch_buffer );
  628.  }
  629.  
  630. @@ -491,14 +516,15 @@ static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x
  631.  {
  632.      int w = (i ? 8 : 16);
  633.      int i_stride = h->fdec->i_stride[i];
  634. -    int i_stride2 = i_stride << b_interlaced;
  635. -    int i_pix_offset = b_interlaced
  636. +    int i_stride2 = i_stride << h->mb.b_interlaced;
  637. +    int i_pix_offset = h->mb.b_interlaced
  638.                       ? 16 * mb_x + w * (mb_y&~1) * i_stride + (mb_y&1) * i_stride
  639.                       : 16 * mb_x + w * mb_y * i_stride;
  640.      pixel *plane_fdec = &h->fdec->plane[i][i_pix_offset];
  641. -    pixel *intra_fdec = &h->intra_border_backup[mb_y&1][i][mb_x*16];
  642. +    pixel *intra_fdec = &h->intra_border_backup[h->mb.intra_border_index][i][mb_x*16];
  643.      int ref_pix_offset[2] = { i_pix_offset, i_pix_offset };
  644. -    if( b_interlaced )
  645. +    /* ref_pix_offset[0] references the current field and [1] the opposite field. */
  646. +    if( h->mb.b_interlaced )
  647.          ref_pix_offset[1] += (1-2*(mb_y&1)) * i_stride;
  648.      h->mb.pic.i_stride[i] = i_stride2;
  649.      h->mb.pic.p_fenc_plane[i] = &h->fenc->plane[i][i_pix_offset];
  650. @@ -507,11 +533,28 @@ static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x
  651.          h->mc.load_deinterleave_8x8x2_fenc( h->mb.pic.p_fenc[1], h->mb.pic.p_fenc_plane[1], i_stride2 );
  652.          memcpy( h->mb.pic.p_fdec[1]-FDEC_STRIDE, intra_fdec, 8*sizeof(pixel) );
  653.          memcpy( h->mb.pic.p_fdec[2]-FDEC_STRIDE, intra_fdec+8, 8*sizeof(pixel) );
  654. +        if( h->sh.b_mbaff )
  655. +        {
  656. +            // Top left samples.
  657. +            h->mb.pic.p_fdec[1][-FDEC_STRIDE-1] = h->intra_diagonal_backup[h->mb.topleft_border_index][1][mb_x*9];
  658. +            h->mb.pic.p_fdec[2][-FDEC_STRIDE-1] = h->intra_diagonal_backup[h->mb.topleft_border_index][2][mb_x*9];
  659. +            // Top right samples.
  660. +            CP32( &h->mb.pic.p_fdec[1][-FDEC_STRIDE+8], &h->intra_diagonal_backup[h->mb.topright_border_index][1][mb_x*9+1] );
  661. +            CP32( &h->mb.pic.p_fdec[2][-FDEC_STRIDE+8], &h->intra_diagonal_backup[h->mb.topright_border_index][2][mb_x*9+1] );
  662. +        }
  663. +
  664.      }
  665.      else
  666.      {
  667.          h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE, h->mb.pic.p_fenc_plane[0], i_stride2, 16 );
  668. -        memcpy( h->mb.pic.p_fdec[0]-FDEC_STRIDE, intra_fdec, 24*sizeof(pixel) );
  669. +        if( h->sh.b_mbaff )
  670. +        {
  671. +            memcpy( h->mb.pic.p_fdec[0]-FDEC_STRIDE, intra_fdec, 16*sizeof(pixel) );
  672. +            h->mb.pic.p_fdec[0][-FDEC_STRIDE-1] = h->intra_diagonal_backup[h->mb.topleft_border_index][0][mb_x*9];
  673. +            CP64( &h->mb.pic.p_fdec[0][-FDEC_STRIDE+16], &h->intra_diagonal_backup[h->mb.topright_border_index][0][mb_x*9+1] );
  674. +        }
  675. +        else
  676. +            memcpy( h->mb.pic.p_fdec[0]-FDEC_STRIDE, intra_fdec, 24*sizeof(pixel) );
  677.      }
  678.      if( b_interlaced )
  679.      {
  680. @@ -568,6 +611,10 @@ static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, i
  681.      h->mb.i_mb_xy = mb_y * h->mb.i_mb_stride + mb_x;
  682.      h->mb.i_b8_xy = 2*(mb_y * h->mb.i_b8_stride + mb_x);
  683.      h->mb.i_b4_xy = 4*(mb_y * h->mb.i_b4_stride + mb_x);
  684. +    h->mb.left_b8[0] =
  685. +    h->mb.left_b8[1] = -1;
  686. +    h->mb.left_b4[0] =
  687. +    h->mb.left_b4[1] = -1;
  688.      h->mb.i_neighbour = 0;
  689.      h->mb.i_neighbour_intra = 0;
  690.      h->mb.i_neighbour_frame = 0;
  691. @@ -580,16 +627,111 @@ static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, i
  692.      h->mb.i_mb_type_topleft = -1;
  693.      h->mb.i_mb_type_topright = -1;
  694.      h->mb.left_index_table = left_indices[4];
  695. +    h->mb.topleft_partition = 0;
  696. +    h->mb.topright_border_index =
  697. +    h->mb.topleft_border_index = !(mb_y&1);
  698. +    h->mb.intra_border_index = mb_y&1;
  699. +
  700. +    int topleft = top - 1;
  701. +    int topright = top + 1;
  702. +    int left[2];
  703. +
  704. +    left[0] = left[1] = h->mb.i_mb_xy - 1;
  705. +    h->mb.left_b8[0] = h->mb.left_b8[1] = h->mb.i_b8_xy - 2;
  706. +    h->mb.left_b4[0] = h->mb.left_b4[1] = h->mb.i_b4_xy - 4;
  707. +
  708. +    if( h->sh.b_mbaff )
  709. +    {
  710. +        if( mb_y&1 )
  711. +        {
  712. +            if( mb_x && h->mb.b_interlaced != h->mb.field[h->mb.i_mb_xy-1] )
  713. +            {
  714. +                left[0] = left[1] = h->mb.i_mb_xy - 1 - h->mb.i_mb_stride;
  715. +                h->mb.left_b8[0] = h->mb.left_b8[1] = h->mb.i_b8_xy - 2 - 2*h->mb.i_b8_stride;
  716. +                h->mb.left_b4[0] = h->mb.left_b4[1] = h->mb.i_b4_xy - 4 - 4*h->mb.i_b4_stride;
  717. +
  718. +                if( h->mb.b_interlaced )
  719. +                {
  720. +                    h->mb.left_index_table = left_indices[3];
  721. +                    left[1] += h->mb.i_mb_stride;
  722. +                    h->mb.left_b8[1] += 2*h->mb.i_b8_stride;
  723. +                    h->mb.left_b4[1] += 4*h->mb.i_b4_stride;
  724. +                }
  725. +                else
  726. +                {
  727. +                    h->mb.left_index_table = left_indices[1];
  728. +                    topleft += h->mb.i_mb_stride;
  729. +                    h->mb.topleft_partition = 1;
  730. +                    h->mb.topleft_border_index = 3;
  731. +                }
  732. +            }
  733. +            if( h->mb.b_interlaced )
  734. +                h->mb.topleft_border_index = 1;
  735. +            else
  736. +            {
  737. +                topright = -1;
  738. +                h->mb.intra_border_index = 0;
  739. +            }
  740. +            h->mb.topright_border_index = 1;
  741. +        }
  742. +        else
  743. +        {
  744. +            if( h->mb.b_interlaced )
  745. +            {
  746. +                if( top >= 0 )
  747. +                {
  748. +                    top += h->mb.i_mb_stride*(!h->mb.field[top]);
  749. +                    if( mb_x )
  750. +                        topleft += h->mb.i_mb_stride*(!h->mb.field[topleft]);
  751. +                    if( mb_x < h->mb.i_mb_width )
  752. +                        topright += h->mb.i_mb_stride*(!h->mb.field[topright]);
  753. +                }
  754. +
  755. +                if( top >= 0)
  756. +                {
  757. +                   if( !h->mb.field[top] )
  758. +                    h->mb.intra_border_index = 2;
  759. +
  760. +                    if( topright >=0 && h->mb.field[topright] )
  761. +                        h->mb.topright_border_index = 0;
  762. +                    else
  763. +                        h->mb.topright_border_index = 2;
  764. +
  765. +                    if( topleft >=0 && h->mb.field[topleft] )
  766. +                        h->mb.topleft_border_index = 0;
  767. +                    else
  768. +                        h->mb.topleft_border_index = 2;
  769. +                }
  770. +            }
  771. +            else
  772. +                h->mb.intra_border_index = 1;
  773. +            if( mb_x && h->mb.b_interlaced != h->mb.field[h->mb.i_mb_xy-1] )
  774. +            {
  775. +                if( h->mb.b_interlaced )
  776. +                {
  777. +                    h->mb.left_index_table = left_indices[2];
  778. +                    left[1] += h->mb.i_mb_stride;
  779. +                    h->mb.left_b8[1] += 2*h->mb.i_b8_stride;
  780. +                    h->mb.left_b4[1] += 4*h->mb.i_b4_stride;
  781. +                }
  782. +                else
  783. +                    h->mb.left_index_table = left_indices[0];
  784. +            }
  785. +        }
  786. +    }
  787.  
  788.      if( mb_x > 0 )
  789.      {
  790.          h->mb.i_neighbour_frame |= MB_LEFT;
  791. -        h->mb.i_mb_left_xy[0] = h->mb.i_mb_xy - 1;
  792. +        h->mb.i_mb_left_xy[0] = left[0];
  793. +        h->mb.i_mb_left_xy[1] = left[1];
  794.          h->mb.i_mb_type_left[0] = h->mb.type[h->mb.i_mb_left_xy[0]];
  795. +        h->mb.i_mb_type_left[1] = h->mb.type[h->mb.i_mb_left_xy[1]];
  796.          if( h->mb.i_mb_xy > h->sh.i_first_mb )
  797.          {
  798.              h->mb.i_neighbour |= MB_LEFT;
  799.  
  800. +            // FIXME: We don't currently support constrained intra + mbaff.
  801.              if( !h->param.b_constrained_intra || IS_INTRA( h->mb.i_mb_type_left[0] ) )
  802.                  h->mb.i_neighbour_intra |= MB_LEFT;
  803.          }
  804. @@ -622,12 +764,12 @@ static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, i
  805.              }
  806.          }
  807.  
  808. -        if( mb_x > 0 && top - 1 >= 0  )
  809. +        if( mb_x > 0 && topleft >= 0  )
  810.          {
  811.              h->mb.i_neighbour_frame |= MB_TOPLEFT;
  812. -            h->mb.i_mb_topleft_xy = top - 1;
  813. +            h->mb.i_mb_topleft_xy = topleft;
  814.              h->mb.i_mb_type_topleft = h->mb.type[h->mb.i_mb_topleft_xy];
  815. -            if( top - 1 >= h->sh.i_first_mb )
  816. +            if( topleft >= h->sh.i_first_mb )
  817.              {
  818.                  h->mb.i_neighbour |= MB_TOPLEFT;
  819.  
  820. @@ -636,12 +778,12 @@ static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, i
  821.              }
  822.          }
  823.  
  824. -        if( mb_x < h->mb.i_mb_width - 1 && top + 1 >= 0 )
  825. +        if( mb_x < h->mb.i_mb_width - 1 && topright >= 0 )
  826.          {
  827.              h->mb.i_neighbour_frame |= MB_TOPRIGHT;
  828. -            h->mb.i_mb_topright_xy = top + 1;
  829. +            h->mb.i_mb_topright_xy = topright;
  830.              h->mb.i_mb_type_topright = h->mb.type[h->mb.i_mb_topright_xy];
  831. -            if( top + 1 >= h->sh.i_first_mb )
  832. +            if( topright >= h->sh.i_first_mb )
  833.              {
  834.                  h->mb.i_neighbour |= MB_TOPRIGHT;
  835.  
  836. @@ -656,9 +798,9 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
  837.  {
  838.      x264_macroblock_cache_load_neighbours( h, mb_x, mb_y );
  839.  
  840. -    int left = h->mb.i_mb_left_xy[0];
  841. +    int *left = h->mb.i_mb_left_xy;
  842.      int top  = h->mb.i_mb_top_xy;
  843. -    int top_y = mb_y - (1 << h->mb.b_interlaced);
  844. +    int top_y = top / h->mb.i_mb_stride;
  845.      int s8x8 = h->mb.i_b8_stride;
  846.      int s4x4 = h->mb.i_b4_stride;
  847.      int top_8x8 = (2*top_y+1) * s8x8 + 2*mb_x;
  848. @@ -712,25 +854,25 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
  849.  
  850.      if( h->mb.i_neighbour & MB_LEFT )
  851.      {
  852. -        h->mb.cache.i_cbp_left = cbp[left];
  853. +        h->mb.cache.i_cbp_left = cbp[left[0]];
  854.  
  855.          /* load intra4x4 */
  856. -        h->mb.cache.intra4x4_pred_mode[x264_scan8[0 ] - 1] = i4x4[left][left_index_table[0]];
  857. -        h->mb.cache.intra4x4_pred_mode[x264_scan8[2 ] - 1] = i4x4[left][left_index_table[1]];
  858. -        h->mb.cache.intra4x4_pred_mode[x264_scan8[8 ] - 1] = i4x4[left][left_index_table[2]];
  859. -        h->mb.cache.intra4x4_pred_mode[x264_scan8[10] - 1] = i4x4[left][left_index_table[3]];
  860. +        h->mb.cache.intra4x4_pred_mode[x264_scan8[0 ] - 1] = i4x4[left[0]][left_index_table[0]];
  861. +        h->mb.cache.intra4x4_pred_mode[x264_scan8[2 ] - 1] = i4x4[left[0]][left_index_table[1]];
  862. +        h->mb.cache.intra4x4_pred_mode[x264_scan8[8 ] - 1] = i4x4[left[1]][left_index_table[2]];
  863. +        h->mb.cache.intra4x4_pred_mode[x264_scan8[10] - 1] = i4x4[left[1]][left_index_table[3]];
  864.  
  865.          /* load non_zero_count */
  866. -        h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left][left_index_table[4+0]];
  867. -        h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left][left_index_table[4+1]];
  868. -        h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left][left_index_table[4+2]];
  869. -        h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left][left_index_table[4+3]];
  870. +        h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left[0]][left_index_table[4+0]];
  871. +        h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left[0]][left_index_table[4+1]];
  872. +        h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left[1]][left_index_table[4+2]];
  873. +        h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left[1]][left_index_table[4+3]];
  874.  
  875. -        h->mb.cache.non_zero_count[x264_scan8[16+0] - 1] = nnz[left][left_index_table[4+4]];
  876. -        h->mb.cache.non_zero_count[x264_scan8[16+2] - 1] = nnz[left][left_index_table[4+5]];
  877. +        h->mb.cache.non_zero_count[x264_scan8[16+0] - 1] = nnz[left[0]][left_index_table[4+4]];
  878. +        h->mb.cache.non_zero_count[x264_scan8[16+2] - 1] = nnz[left[1]][left_index_table[4+5]];
  879.  
  880. -        h->mb.cache.non_zero_count[x264_scan8[16+4+0] - 1] = nnz[left][left_index_table[4+6]];
  881. -        h->mb.cache.non_zero_count[x264_scan8[16+4+2] - 1] = nnz[left][left_index_table[4+7]];
  882. +        h->mb.cache.non_zero_count[x264_scan8[16+4+0] - 1] = nnz[left[0]][left_index_table[4+6]];
  883. +        h->mb.cache.non_zero_count[x264_scan8[16+4+2] - 1] = nnz[left[1]][left_index_table[4+7]];
  884.      }
  885.      else
  886.      {
  887. @@ -755,7 +897,7 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
  888.      if( h->pps->b_transform_8x8_mode )
  889.      {
  890.          h->mb.cache.i_neighbour_transform_size =
  891. -            ( (h->mb.i_neighbour & MB_LEFT) && h->mb.mb_transform_size[left] )
  892. +            ( (h->mb.i_neighbour & MB_LEFT) && h->mb.mb_transform_size[left[0]] )
  893.            + ( (h->mb.i_neighbour & MB_TOP) && h->mb.mb_transform_size[top]  );
  894.      }
  895.  
  896. @@ -768,7 +910,7 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
  897.            + !!(h->mb.i_neighbour & MB_TOP);
  898.      }
  899.  
  900. -    if( !h->mb.b_interlaced )
  901. +    if( !h->sh.b_mbaff )
  902.      {
  903.          x264_copy_column8( h->mb.pic.p_fdec[0]-1+ 4*FDEC_STRIDE, h->mb.pic.p_fdec[0]+15+ 4*FDEC_STRIDE );
  904.          x264_copy_column8( h->mb.pic.p_fdec[0]-1+12*FDEC_STRIDE, h->mb.pic.p_fdec[0]+15+12*FDEC_STRIDE );
  905. @@ -869,10 +1011,10 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
  906.  
  907.              if( h->mb.i_neighbour & MB_LEFT )
  908.              {
  909. -                CP16( h->mb.cache.mvd[l][x264_scan8[0 ] - 1], mvd[left][left_index_table[0]] );
  910. -                CP16( h->mb.cache.mvd[l][x264_scan8[2 ] - 1], mvd[left][left_index_table[1]] );
  911. -                CP16( h->mb.cache.mvd[l][x264_scan8[8 ] - 1], mvd[left][left_index_table[2]] );
  912. -                CP16( h->mb.cache.mvd[l][x264_scan8[10] - 1], mvd[left][left_index_table[3]] );
  913. +                CP16( h->mb.cache.mvd[l][x264_scan8[0 ] - 1], mvd[left[0]][left_index_table[0]] );
  914. +                CP16( h->mb.cache.mvd[l][x264_scan8[2 ] - 1], mvd[left[0]][left_index_table[1]] );
  915. +                CP16( h->mb.cache.mvd[l][x264_scan8[8 ] - 1], mvd[left[1]][left_index_table[2]] );
  916. +                CP16( h->mb.cache.mvd[l][x264_scan8[10] - 1], mvd[left[1]][left_index_table[3]] );
  917.              }
  918.              else
  919.                  for( int i = 0; i < 4; i++ )
  920. @@ -889,7 +1031,7 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
  921.          {
  922.              uint8_t skipbp;
  923.              x264_macroblock_cache_skip( h, 0, 0, 4, 4, 0 );
  924. -            skipbp = (h->mb.i_neighbour & MB_LEFT) ? h->mb.skipbp[left] : 0;
  925. +            skipbp = (h->mb.i_neighbour & MB_LEFT) ? h->mb.skipbp[left[0]] : 0;
  926.              h->mb.cache.skip[x264_scan8[0] - 1] = skipbp & 0x2;
  927.              h->mb.cache.skip[x264_scan8[8] - 1] = skipbp & 0x8;
  928.              skipbp = (h->mb.i_neighbour & MB_TOP) ? h->mb.skipbp[top] : 0;
  929. @@ -968,11 +1110,11 @@ void x264_macroblock_cache_load_deblock( x264_t *h )
  930.  
  931.              if( h->mb.i_neighbour & MB_LEFT )
  932.              {
  933. -                int left = h->mb.i_mb_left_xy[0];
  934. -                h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left][left_index_table[4+0]];
  935. -                h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left][left_index_table[4+1]];
  936. -                h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left][left_index_table[4+2]];
  937. -                h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left][left_index_table[4+3]];
  938. +                int *left = h->mb.i_mb_left_xy;
  939. +                h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left[0]][left_index_table[4+0]];
  940. +                h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left[0]][left_index_table[4+1]];
  941. +                h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left[1]][left_index_table[4+2]];
  942. +                h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left[1]][left_index_table[4+3]];
  943.              }
  944.  
  945.              for( int l = 0; l <= (h->sh.i_type == SLICE_TYPE_B); l++ )
  946. @@ -1103,20 +1245,80 @@ static void ALWAYS_INLINE x264_macroblock_store_pic( x264_t *h, int mb_x, int mb
  947.      int i_pix_offset = b_interlaced
  948.                       ? 16 * mb_x + w * (mb_y&~1) * i_stride + (mb_y&1) * i_stride
  949.                       : 16 * mb_x + w * mb_y * i_stride;
  950. +    const int intra_diag_width = 8+1; // One top left sample, then eight top right samples.
  951.      pixel *intra_fdec = &h->intra_border_backup[mb_y&1][i][mb_x*16];
  952.      if( i )
  953.      {
  954. +        if( h->sh.b_mbaff )
  955. +        {
  956. +            /* Frame macroblocks use the macroblock directly above for intra
  957. +             * prediction. Field macroblock pairs predict from fields of the same
  958. +             * parity. However field macroblock pairs predicting from frame pairs
  959. +             * use the bottom two rows of the frame for prediction, the penultimate
  960. +             * row is stored in intra_border_backup[2]. */
  961. +            if( mb_y&1 )
  962. +            {
  963. +                if( mb_x )
  964. +                {
  965. +                    // Store top left.
  966. +                    h->intra_diagonal_backup[1][1][mb_x*intra_diag_width] = h->intra_border_backup[1][1][(mb_x-1)*16+7];
  967. +                    h->intra_diagonal_backup[1][2][mb_x*intra_diag_width] = h->intra_border_backup[1][1][(mb_x-1)*16+8+7];
  968. +                    h->intra_diagonal_backup[2][1][mb_x*intra_diag_width] = h->intra_border_backup[2][1][(mb_x-1)*16+7];
  969. +                    h->intra_diagonal_backup[2][2][mb_x*intra_diag_width] = h->intra_border_backup[2][1][(mb_x-1)*16+8+7];
  970. +                    // Store top right.
  971. +                    CP32( &h->intra_diagonal_backup[1][1][(mb_x-1)*intra_diag_width+1], h->mb.pic.p_fdec[0]+FDEC_STRIDE*7 );
  972. +                    CP32( &h->intra_diagonal_backup[1][2][(mb_x-1)*intra_diag_width+1], h->mb.pic.p_fdec[0]+FDEC_STRIDE*7 );
  973. +                    CP32( &h->intra_diagonal_backup[2][1][(mb_x-1)*intra_diag_width+1], h->mb.pic.p_fdec[0]+FDEC_STRIDE*6 );
  974. +                    CP32( &h->intra_diagonal_backup[2][2][(mb_x-1)*intra_diag_width+1], h->mb.pic.p_fdec[0]+FDEC_STRIDE*6 );
  975. +                }
  976. +                memcpy( &h->intra_border_backup[2][i][mb_x*16],   h->mb.pic.p_fdec[1]+FDEC_STRIDE*6, 8*sizeof(pixel) );
  977. +                memcpy( &h->intra_border_backup[2][i][mb_x*16]+8, h->mb.pic.p_fdec[2]+FDEC_STRIDE*6, 8*sizeof(pixel) );
  978. +            }
  979. +            else
  980. +            {
  981. +                if( mb_x )
  982. +                {
  983. +                    h->intra_diagonal_backup[0][1][mb_x*intra_diag_width] = h->intra_border_backup[0][1][(mb_x-1)*16+7];
  984. +                    h->intra_diagonal_backup[0][2][mb_x*intra_diag_width] = h->intra_border_backup[0][1][(mb_x-1)*16+8+7];
  985. +                    CP32( &h->intra_diagonal_backup[0][1][(mb_x-1)*intra_diag_width+1], h->mb.pic.p_fdec[1]+FDEC_STRIDE*7 );
  986. +                    CP32( &h->intra_diagonal_backup[0][2][(mb_x-1)*intra_diag_width+1], h->mb.pic.p_fdec[2]+FDEC_STRIDE*7 );
  987. +                }
  988. +                // Sometimes needed for bottom macroblock of this pair.
  989. +                h->intra_diagonal_backup[3][1][mb_x*intra_diag_width] = h->mb.pic.p_fdec[1][-1+7*FDEC_STRIDE];
  990. +                h->intra_diagonal_backup[3][2][mb_x*intra_diag_width] = h->mb.pic.p_fdec[2][-1+7*FDEC_STRIDE];
  991. +            }
  992. +        }
  993.          h->mc.store_interleave_8x8x2( &h->fdec->plane[1][i_pix_offset], i_stride2, h->mb.pic.p_fdec[1], h->mb.pic.p_fdec[2] );
  994.          memcpy( intra_fdec,   h->mb.pic.p_fdec[1]+FDEC_STRIDE*7, 8*sizeof(pixel) );
  995.          memcpy( intra_fdec+8, h->mb.pic.p_fdec[2]+FDEC_STRIDE*7, 8*sizeof(pixel) );
  996. -        twiddle_topleft_pixel( h->mb.pic.p_fdec[1]-FDEC_STRIDE-1, h->mb.pic.p_fdec[1]-FDEC_STRIDE+7, b_interlaced );
  997. -        twiddle_topleft_pixel( h->mb.pic.p_fdec[2]-FDEC_STRIDE-1, h->mb.pic.p_fdec[2]-FDEC_STRIDE+7, b_interlaced );
  998. +        twiddle_topleft_pixel( h->mb.pic.p_fdec[1]-FDEC_STRIDE-1, h->mb.pic.p_fdec[1]-FDEC_STRIDE+7, h->sh.b_mbaff );
  999. +        twiddle_topleft_pixel( h->mb.pic.p_fdec[2]-FDEC_STRIDE-1, h->mb.pic.p_fdec[2]-FDEC_STRIDE+7, h->sh.b_mbaff );
  1000.      }
  1001.      else
  1002.      {
  1003. +        if( h->sh.b_mbaff )
  1004. +        {
  1005. +            if( mb_y&1 )
  1006. +            {
  1007. +                if( mb_x )
  1008. +                {
  1009. +                    h->intra_diagonal_backup[1][0][mb_x*intra_diag_width] = h->intra_border_backup[1][0][(mb_x-1)*16+15];
  1010. +                    h->intra_diagonal_backup[2][0][mb_x*intra_diag_width] = h->intra_border_backup[2][0][(mb_x-1)*16+15];
  1011. +                    CP64( &h->intra_diagonal_backup[1][0][(mb_x-1)*intra_diag_width+1], h->mb.pic.p_fdec[0]+FDEC_STRIDE*15 );
  1012. +                    CP64( &h->intra_diagonal_backup[2][0][(mb_x-1)*intra_diag_width+1], h->mb.pic.p_fdec[0]+FDEC_STRIDE*14 );
  1013. +                }
  1014. +                memcpy( &h->intra_border_backup[2][i][mb_x*16], h->mb.pic.p_fdec[0]+FDEC_STRIDE*14, 16*sizeof(pixel) );
  1015. +            }
  1016. +            else
  1017. +            {
  1018. +                h->intra_diagonal_backup[0][0][mb_x*intra_diag_width] = h->intra_border_backup[0][0][(mb_x-1)*16+15];
  1019. +                CP64( &h->intra_diagonal_backup[0][0][(mb_x-1)*intra_diag_width+1], h->mb.pic.p_fdec[0]+FDEC_STRIDE*15 );
  1020. +                h->intra_diagonal_backup[3][0][mb_x*intra_diag_width] = h->mb.pic.p_fdec[0][-1+15*FDEC_STRIDE];
  1021. +            }
  1022. +        }
  1023.          h->mc.copy[PIXEL_16x16]( &h->fdec->plane[0][i_pix_offset], i_stride2, h->mb.pic.p_fdec[0], FDEC_STRIDE, 16 );
  1024.          memcpy( intra_fdec, h->mb.pic.p_fdec[0]+FDEC_STRIDE*15, 16*sizeof(pixel) );
  1025. -        twiddle_topleft_pixel( h->mb.pic.p_fdec[0]-FDEC_STRIDE-1, h->mb.pic.p_fdec[0]-FDEC_STRIDE+15, b_interlaced );
  1026. +        twiddle_topleft_pixel( h->mb.pic.p_fdec[0]-FDEC_STRIDE-1, h->mb.pic.p_fdec[0]-FDEC_STRIDE+15, h->sh.b_mbaff );
  1027.      }
  1028.  }
  1029.  
  1030. --
  1031. 1.7.1
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement