Advertisement
Guest User

Untitled

a guest
Aug 4th, 2017
532
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 154.23 KB | None | 0 0
  1. From 167c2760cbc36911302ace046db5eef6fe1ea54a Mon Sep 17 00:00:00 2001
  2. From: Simon Horlick <simonhorlick@gmail.com>
  3. Date: Tue, 11 Jan 2011 20:05:54 +0000
  4. Subject: [PATCH 01/25] Save interlace decision for all macroblocks
  5.  
  6. ---
  7. common/common.h | 1 +
  8. common/frame.c | 3 +++
  9. common/frame.h | 1 +
  10. common/macroblock.c | 1 +
  11. encoder/encoder.c | 8 ++++++++
  12. 5 files changed, 14 insertions(+), 0 deletions(-)
  13.  
  14. diff --git a/common/common.h b/common/common.h
  15. index 868f526..231254f 100644
  16. --- a/common/common.h
  17. +++ b/common/common.h
  18. @@ -617,6 +617,7 @@ struct x264_t
  19. int8_t *mb_transform_size; /* transform_size_8x8_flag of each mb */
  20. uint16_t *slice_table; /* sh->first_mb of the slice that the indexed mb is part of
  21. * NOTE: this will fail on resolutions above 2^16 MBs... */
  22. + uint8_t *field;
  23.  
  24. /* buffer for weighted versions of the reference frames */
  25. pixel *p_weight_buf[X264_REF_MAX];
  26. diff --git a/common/frame.c b/common/frame.c
  27. index ca90539..eff8ca5 100644
  28. --- a/common/frame.c
  29. +++ b/common/frame.c
  30. @@ -145,6 +145,8 @@ x264_frame_t *x264_frame_new( x264_t *h, int b_fdec )
  31. frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) << h->frames.b_have_sub8x8_esa );
  32. frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH;
  33. }
  34. + if( h->param.b_interlaced )
  35. + CHECKED_MALLOC( frame->field, i_mb_count * sizeof(uint8_t) );
  36. }
  37. else /* fenc frame */
  38. {
  39. @@ -219,6 +221,7 @@ void x264_frame_delete( x264_frame_t *frame )
  40. x264_free( frame->i_inv_qscale_factor );
  41. x264_free( frame->i_row_bits );
  42. x264_free( frame->f_row_qp );
  43. + x264_free( frame->field );
  44. x264_free( frame->mb_type );
  45. x264_free( frame->mb_partition );
  46. x264_free( frame->mv[0] );
  47. diff --git a/common/frame.h b/common/frame.h
  48. index 38d0bf2..0e0ab3d 100644
  49. --- a/common/frame.h
  50. +++ b/common/frame.h
  51. @@ -92,6 +92,7 @@ typedef struct x264_frame
  52. int16_t (*mv[2])[2];
  53. int16_t (*mv16x16)[2];
  54. int16_t (*lowres_mvs[2][X264_BFRAME_MAX+1])[2];
  55. + uint8_t *field;
  56.  
  57. /* Stored as (lists_used << LOWRES_COST_SHIFT) + (cost).
  58. * Doesn't need special addressing for intra cost because
  59. diff --git a/common/macroblock.c b/common/macroblock.c
  60. index 24c2af9..569d544 100644
  61. --- a/common/macroblock.c
  62. +++ b/common/macroblock.c
  63. @@ -371,6 +371,7 @@ void x264_macroblock_slice_init( x264_t *h )
  64. h->mb.ref[1] = h->fdec->ref[1];
  65. h->mb.type = h->fdec->mb_type;
  66. h->mb.partition = h->fdec->mb_partition;
  67. + h->mb.field = h->fdec->field;
  68.  
  69. h->fdec->i_ref[0] = h->i_ref[0];
  70. h->fdec->i_ref[1] = h->i_ref[1];
  71. diff --git a/encoder/encoder.c b/encoder/encoder.c
  72. index 67646df..af1342b 100644
  73. --- a/encoder/encoder.c
  74. +++ b/encoder/encoder.c
  75. @@ -1956,6 +1956,14 @@ static int x264_slice_write( x264_t *h )
  76. }
  77. }
  78.  
  79. + if( h->param.b_interlaced )
  80. + {
  81. + if( !(i_mb_y&1) )
  82. + h->mb.b_interlaced = 1;
  83. + x264_zigzag_init( h->param.cpu, &h->zigzagf, h->mb.b_interlaced );
  84. + h->mb.field[mb_xy] = h->mb.b_interlaced;
  85. + }
  86. +
  87. if( i_mb_x == 0 && !h->mb.b_reencode_mb )
  88. x264_fdec_filter_row( h, i_mb_y, 1 );
  89.  
  90. --
  91. 1.7.4
  92.  
  93.  
  94. From 2847b697f1fdbee1b8c3128895f2a50c1cba606e Mon Sep 17 00:00:00 2001
  95. From: Simon Horlick <simonhorlick@gmail.com>
  96. Date: Tue, 11 Jan 2011 20:09:00 +0000
  97. Subject: [PATCH 02/25] Disable adaptive mbaff when subme 0 is used
  98.  
  99. ---
  100. common/common.h | 1 +
  101. encoder/encoder.c | 13 ++++++++++---
  102. x264.h | 1 +
  103. 3 files changed, 12 insertions(+), 3 deletions(-)
  104.  
  105. diff --git a/common/common.h b/common/common.h
  106. index 231254f..75c4d59 100644
  107. --- a/common/common.h
  108. +++ b/common/common.h
  109. @@ -568,6 +568,7 @@ struct x264_t
  110. int i_psy_trellis; /* Psy trellis strength--fixed point value*/
  111.  
  112. int b_interlaced;
  113. + int b_adaptive_mbaff;
  114.  
  115. /* Allowed qpel MV range to stay within the picture + emulated edge pixels */
  116. int mv_min[2];
  117. diff --git a/encoder/encoder.c b/encoder/encoder.c
  118. index af1342b..9f294d1 100644
  119. --- a/encoder/encoder.c
  120. +++ b/encoder/encoder.c
  121. @@ -606,6 +606,10 @@ static int x264_validate_parameters( x264_t *h )
  122. x264_log( h, X264_LOG_WARNING, "subme=0 + direct=temporal is not supported\n" );
  123. h->param.analyse.i_direct_mv_pred = X264_DIRECT_PRED_SPATIAL;
  124. }
  125. + /* Adaptive MBAFF and subme 0 are not supported as motion vectors between
  126. + * field macroblocks and frame macroblocks require halving and hpel pixels.
  127. + * The chosen solution is to make MBAFF non-adaptive in this case. */
  128. + h->mb.b_adaptive_mbaff = !(h->param.b_interlaced && !h->param.analyse.i_subpel_refine);
  129. h->param.i_bframe = x264_clip3( h->param.i_bframe, 0, X264_MIN( X264_BFRAME_MAX, h->param.i_keyint_max-1 ) );
  130. h->param.i_open_gop = x264_clip3( h->param.i_open_gop, X264_OPEN_GOP_NONE, X264_OPEN_GOP_BLURAY );
  131. h->param.i_bframe_bias = x264_clip3( h->param.i_bframe_bias, -90, 100 );
  132. @@ -1958,9 +1962,12 @@ static int x264_slice_write( x264_t *h )
  133.  
  134. if( h->param.b_interlaced )
  135. {
  136. - if( !(i_mb_y&1) )
  137. - h->mb.b_interlaced = 1;
  138. - x264_zigzag_init( h->param.cpu, &h->zigzagf, h->mb.b_interlaced );
  139. + if( h->mb.b_adaptive_mbaff )
  140. + {
  141. + if( !(i_mb_y&1) )
  142. + h->mb.b_interlaced = 1;
  143. + x264_zigzag_init( h->param.cpu, &h->zigzagf, h->mb.b_interlaced );
  144. + }
  145. h->mb.field[mb_xy] = h->mb.b_interlaced;
  146. }
  147.  
  148. diff --git a/x264.h b/x264.h
  149. index 24c3792..da8746b 100644
  150. --- a/x264.h
  151. +++ b/x264.h
  152. @@ -291,6 +291,7 @@ typedef struct x264_param_t
  153. int i_cabac_init_idc;
  154.  
  155. int b_interlaced;
  156. + int b_adaptive_mbaff; /* MBAFF+subme 0 require non-adaptive MBAFF i.e. all field mbs */
  157. int b_constrained_intra;
  158.  
  159. int i_cqm_preset;
  160. --
  161. 1.7.4
  162.  
  163.  
  164. From eb50f5f3757d825b2664e7991b89c7647605dd28 Mon Sep 17 00:00:00 2001
  165. From: Simon Horlick <simonhorlick@gmail.com>
  166. Date: Tue, 11 Jan 2011 20:16:18 +0000
  167. Subject: [PATCH 03/25] Store left references in a table
  168.  
  169. ---
  170. common/common.h | 1 +
  171. common/macroblock.c | 56 ++++++++++++++++++++++++++++++++------------------
  172. 2 files changed, 37 insertions(+), 20 deletions(-)
  173.  
  174. diff --git a/common/common.h b/common/common.h
  175. index 75c4d59..992ba9c 100644
  176. --- a/common/common.h
  177. +++ b/common/common.h
  178. @@ -596,6 +596,7 @@ struct x264_t
  179. int i_mb_top_xy;
  180. int i_mb_topleft_xy;
  181. int i_mb_topright_xy;
  182. + int *left_index_table;
  183.  
  184. /**** thread synchronization ends here ****/
  185. /* subsequent variables are either thread-local or constant,
  186. diff --git a/common/macroblock.c b/common/macroblock.c
  187. index 569d544..d10b19f 100644
  188. --- a/common/macroblock.c
  189. +++ b/common/macroblock.c
  190. @@ -550,6 +550,18 @@ static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x
  191. }
  192. }
  193.  
  194. +static const int left_indices[5][22] = {
  195. +/* intra modes nnz mv ref real indices */
  196. + /* Current is progressive. */
  197. + { 4, 4, 5, 5, 3, 3, 7, 7, 16+1, 16+1, 16+4+1, 16+4+1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1 },
  198. + { 6, 6, 3, 3, 11, 11, 15, 15, 16+3, 16+3, 16+4+3, 16+4+3, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3 },
  199. + /* Current is interlaced.*/
  200. + { 4, 6, 4, 6, 3, 11, 3, 11, 16+1, 16+1, 16+4+1, 16+4+1, 0, 2, 0, 2, 0, 0, 0, 2, 0, 2 },
  201. + { 4, 6, 4, 6, 3, 11, 3, 11, 16+1, 16+1, 16+4+1, 16+4+1, 0, 2, 0, 2, 0, 0, 0, 2, 0, 2 },
  202. + /*Both same.*/
  203. + { 4, 5, 6, 3, 3, 7, 11, 15, 16+1, 16+3, 16+4+1, 16+4+3, 0, 1, 2, 3, 0, 1, 0, 1, 2, 3 },
  204. +};
  205. +
  206. static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, int mb_y )
  207. {
  208. int top = (mb_y - (1 << h->mb.b_interlaced)) * h->mb.i_mb_stride + mb_x;
  209. @@ -570,6 +582,7 @@ static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, i
  210. h->mb.i_mb_type_left = -1;
  211. h->mb.i_mb_type_topleft = -1;
  212. h->mb.i_mb_type_topright = -1;
  213. + h->mb.left_index_table = left_indices[4];
  214.  
  215. if( mb_x > 0 )
  216. {
  217. @@ -661,6 +674,8 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
  218. uint8_t (*nnz)[24] = h->mb.non_zero_count;
  219. int16_t *cbp = h->mb.cbp;
  220.  
  221. + int *left_index_table = h->mb.left_index_table;
  222. +
  223. /* load cache */
  224. if( h->mb.i_neighbour & MB_TOP )
  225. {
  226. @@ -703,22 +718,22 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
  227. h->mb.cache.i_cbp_left = cbp[left];
  228.  
  229. /* load intra4x4 */
  230. - h->mb.cache.intra4x4_pred_mode[x264_scan8[0 ] - 1] = i4x4[left][4];
  231. - h->mb.cache.intra4x4_pred_mode[x264_scan8[2 ] - 1] = i4x4[left][5];
  232. - h->mb.cache.intra4x4_pred_mode[x264_scan8[8 ] - 1] = i4x4[left][6];
  233. - h->mb.cache.intra4x4_pred_mode[x264_scan8[10] - 1] = i4x4[left][3];
  234. + h->mb.cache.intra4x4_pred_mode[x264_scan8[0 ] - 1] = i4x4[left][left_index_table[0]];
  235. + h->mb.cache.intra4x4_pred_mode[x264_scan8[2 ] - 1] = i4x4[left][left_index_table[1]];
  236. + h->mb.cache.intra4x4_pred_mode[x264_scan8[8 ] - 1] = i4x4[left][left_index_table[2]];
  237. + h->mb.cache.intra4x4_pred_mode[x264_scan8[10] - 1] = i4x4[left][left_index_table[3]];
  238.  
  239. /* load non_zero_count */
  240. - h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left][3];
  241. - h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left][7];
  242. - h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left][11];
  243. - h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left][15];
  244. + h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left][left_index_table[4+0]];
  245. + h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left][left_index_table[4+1]];
  246. + h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left][left_index_table[4+2]];
  247. + h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left][left_index_table[4+3]];
  248.  
  249. - h->mb.cache.non_zero_count[x264_scan8[16+0] - 1] = nnz[left][16+1];
  250. - h->mb.cache.non_zero_count[x264_scan8[16+2] - 1] = nnz[left][16+3];
  251. + h->mb.cache.non_zero_count[x264_scan8[16+0] - 1] = nnz[left][left_index_table[4+4]];
  252. + h->mb.cache.non_zero_count[x264_scan8[16+2] - 1] = nnz[left][left_index_table[4+5]];
  253.  
  254. - h->mb.cache.non_zero_count[x264_scan8[16+4+0] - 1] = nnz[left][16+4+1];
  255. - h->mb.cache.non_zero_count[x264_scan8[16+4+2] - 1] = nnz[left][16+4+3];
  256. + h->mb.cache.non_zero_count[x264_scan8[16+4+0] - 1] = nnz[left][left_index_table[4+6]];
  257. + h->mb.cache.non_zero_count[x264_scan8[16+4+2] - 1] = nnz[left][left_index_table[4+7]];
  258. }
  259. else
  260. {
  261. @@ -857,10 +872,10 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
  262.  
  263. if( h->mb.i_neighbour & MB_LEFT )
  264. {
  265. - CP16( h->mb.cache.mvd[l][x264_scan8[0 ] - 1], mvd[left][4] );
  266. - CP16( h->mb.cache.mvd[l][x264_scan8[2 ] - 1], mvd[left][5] );
  267. - CP16( h->mb.cache.mvd[l][x264_scan8[8 ] - 1], mvd[left][6] );
  268. - CP16( h->mb.cache.mvd[l][x264_scan8[10] - 1], mvd[left][3] );
  269. + CP16( h->mb.cache.mvd[l][x264_scan8[0 ] - 1], mvd[left][left_index_table[0]] );
  270. + CP16( h->mb.cache.mvd[l][x264_scan8[2 ] - 1], mvd[left][left_index_table[1]] );
  271. + CP16( h->mb.cache.mvd[l][x264_scan8[8 ] - 1], mvd[left][left_index_table[2]] );
  272. + CP16( h->mb.cache.mvd[l][x264_scan8[10] - 1], mvd[left][left_index_table[3]] );
  273. }
  274. else
  275. for( int i = 0; i < 4; i++ )
  276. @@ -949,6 +964,7 @@ void x264_macroblock_cache_load_deblock( x264_t *h )
  277. int s4x4 = h->mb.i_b4_stride;
  278.  
  279. uint8_t (*nnz)[24] = h->mb.non_zero_count;
  280. + int *left_index_table = h->mb.left_index_table;
  281.  
  282. if( h->mb.i_neighbour & MB_TOP )
  283. CP32( &h->mb.cache.non_zero_count[x264_scan8[0] - 8], &nnz[h->mb.i_mb_top_xy][12] );
  284. @@ -956,10 +972,10 @@ void x264_macroblock_cache_load_deblock( x264_t *h )
  285. if( h->mb.i_neighbour & MB_LEFT )
  286. {
  287. int left = h->mb.i_mb_left_xy;
  288. - h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left][3];
  289. - h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left][7];
  290. - h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left][11];
  291. - h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left][15];
  292. + h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left][left_index_table[4+0]];
  293. + h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left][left_index_table[4+1]];
  294. + h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left][left_index_table[4+2]];
  295. + h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left][left_index_table[4+3]];
  296. }
  297.  
  298. for( int l = 0; l <= (h->sh.i_type == SLICE_TYPE_B); l++ )
  299. --
  300. 1.7.4
  301.  
  302.  
  303. From c83e9ad2f2cb3a7f5d2ae6c4fd4b5a8cc04f894a Mon Sep 17 00:00:00 2001
  304. From: Simon Horlick <simonhorlick@gmail.com>
  305. Date: Tue, 11 Jan 2011 20:21:26 +0000
  306. Subject: [PATCH 04/25] Store references to the two left macroblocks
  307.  
  308. Fix compiler warnings about discarding const qualifiers
  309. ---
  310. common/common.h | 6 +++---
  311. common/deblock.c | 4 ++--
  312. common/macroblock.c | 24 ++++++++++++------------
  313. common/mvpred.c | 2 +-
  314. encoder/analyse.c | 8 ++++----
  315. encoder/cabac.c | 8 ++++----
  316. 6 files changed, 26 insertions(+), 26 deletions(-)
  317.  
  318. diff --git a/common/common.h b/common/common.h
  319. index 992ba9c..f839e7e 100644
  320. --- a/common/common.h
  321. +++ b/common/common.h
  322. @@ -588,15 +588,15 @@ struct x264_t
  323. unsigned int i_neighbour_intra; /* for constrained intra pred */
  324. unsigned int i_neighbour_frame; /* ignoring slice boundaries */
  325. int i_mb_type_top;
  326. - int i_mb_type_left;
  327. + int i_mb_type_left[2];
  328. int i_mb_type_topleft;
  329. int i_mb_type_topright;
  330. int i_mb_prev_xy;
  331. - int i_mb_left_xy;
  332. + int i_mb_left_xy[2];
  333. int i_mb_top_xy;
  334. int i_mb_topleft_xy;
  335. int i_mb_topright_xy;
  336. - int *left_index_table;
  337. + const int *left_index_table;
  338.  
  339. /**** thread synchronization ends here ****/
  340. /* subsequent variables are either thread-local or constant,
  341. diff --git a/common/deblock.c b/common/deblock.c
  342. index 1b6448f..0800461 100644
  343. --- a/common/deblock.c
  344. +++ b/common/deblock.c
  345. @@ -347,10 +347,10 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
  346.  
  347. if( h->mb.i_neighbour & MB_LEFT )
  348. {
  349. - int qpl = h->mb.qp[h->mb.i_mb_left_xy];
  350. + int qpl = h->mb.qp[h->mb.i_mb_left_xy[0]];
  351. int qp_left = (qp + qpl + 1) >> 1;
  352. int qpc_left = (h->chroma_qp_table[qp] + h->chroma_qp_table[qpl] + 1) >> 1;
  353. - int intra_left = IS_INTRA( h->mb.type[h->mb.i_mb_left_xy] );
  354. + int intra_left = IS_INTRA( h->mb.type[h->mb.i_mb_left_xy[0]] );
  355. if( intra_cur || intra_left )
  356. FILTER( _intra, 0, 0, qp_left, qpc_left );
  357. else
  358. diff --git a/common/macroblock.c b/common/macroblock.c
  359. index d10b19f..aa194a7 100644
  360. --- a/common/macroblock.c
  361. +++ b/common/macroblock.c
  362. @@ -575,11 +575,11 @@ static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, i
  363. h->mb.i_neighbour_intra = 0;
  364. h->mb.i_neighbour_frame = 0;
  365. h->mb.i_mb_top_xy = -1;
  366. - h->mb.i_mb_left_xy = -1;
  367. + h->mb.i_mb_left_xy[0] = h->mb.i_mb_left_xy[1] = -1;
  368. h->mb.i_mb_topleft_xy = -1;
  369. h->mb.i_mb_topright_xy = -1;
  370. h->mb.i_mb_type_top = -1;
  371. - h->mb.i_mb_type_left = -1;
  372. + h->mb.i_mb_type_left[0] = h->mb.i_mb_type_left[1] = -1;
  373. h->mb.i_mb_type_topleft = -1;
  374. h->mb.i_mb_type_topright = -1;
  375. h->mb.left_index_table = left_indices[4];
  376. @@ -587,13 +587,13 @@ static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, i
  377. if( mb_x > 0 )
  378. {
  379. h->mb.i_neighbour_frame |= MB_LEFT;
  380. - h->mb.i_mb_left_xy = h->mb.i_mb_xy - 1;
  381. - h->mb.i_mb_type_left = h->mb.type[h->mb.i_mb_left_xy];
  382. + h->mb.i_mb_left_xy[0] = h->mb.i_mb_xy - 1;
  383. + h->mb.i_mb_type_left[0] = h->mb.type[h->mb.i_mb_left_xy[0]];
  384. if( h->mb.i_mb_xy > h->sh.i_first_mb )
  385. {
  386. h->mb.i_neighbour |= MB_LEFT;
  387.  
  388. - if( !h->param.b_constrained_intra || IS_INTRA( h->mb.i_mb_type_left ) )
  389. + if( !h->param.b_constrained_intra || IS_INTRA( h->mb.i_mb_type_left[0] ) )
  390. h->mb.i_neighbour_intra |= MB_LEFT;
  391. }
  392. }
  393. @@ -659,7 +659,7 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
  394. {
  395. x264_macroblock_cache_load_neighbours( h, mb_x, mb_y );
  396.  
  397. - int left = h->mb.i_mb_left_xy;
  398. + int left = h->mb.i_mb_left_xy[0];
  399. int top = h->mb.i_mb_top_xy;
  400. int top_y = mb_y - (1 << h->mb.b_interlaced);
  401. int s8x8 = h->mb.i_b8_stride;
  402. @@ -674,7 +674,7 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
  403. uint8_t (*nnz)[24] = h->mb.non_zero_count;
  404. int16_t *cbp = h->mb.cbp;
  405.  
  406. - int *left_index_table = h->mb.left_index_table;
  407. + const int *left_index_table = h->mb.left_index_table;
  408.  
  409. /* load cache */
  410. if( h->mb.i_neighbour & MB_TOP )
  411. @@ -927,8 +927,8 @@ void x264_macroblock_cache_load_neighbours_deblock( x264_t *h, int mb_x, int mb_
  412.  
  413. if( mb_x > 0 )
  414. {
  415. - h->mb.i_mb_left_xy = h->mb.i_mb_xy - 1;
  416. - if( deblock_on_slice_edges || h->mb.slice_table[h->mb.i_mb_left_xy] == h->mb.slice_table[h->mb.i_mb_xy] )
  417. + h->mb.i_mb_left_xy[0] = h->mb.i_mb_xy - 1;
  418. + if( deblock_on_slice_edges || h->mb.slice_table[h->mb.i_mb_left_xy[0]] == h->mb.slice_table[h->mb.i_mb_xy] )
  419. h->mb.i_neighbour |= MB_LEFT;
  420. }
  421.  
  422. @@ -964,14 +964,14 @@ void x264_macroblock_cache_load_deblock( x264_t *h )
  423. int s4x4 = h->mb.i_b4_stride;
  424.  
  425. uint8_t (*nnz)[24] = h->mb.non_zero_count;
  426. - int *left_index_table = h->mb.left_index_table;
  427. + const int *left_index_table = h->mb.left_index_table;
  428.  
  429. if( h->mb.i_neighbour & MB_TOP )
  430. CP32( &h->mb.cache.non_zero_count[x264_scan8[0] - 8], &nnz[h->mb.i_mb_top_xy][12] );
  431.  
  432. if( h->mb.i_neighbour & MB_LEFT )
  433. {
  434. - int left = h->mb.i_mb_left_xy;
  435. + int left = h->mb.i_mb_left_xy[0];
  436. h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left][left_index_table[4+0]];
  437. h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left][left_index_table[4+1]];
  438. h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left][left_index_table[4+2]];
  439. @@ -1046,7 +1046,7 @@ void x264_macroblock_cache_load_deblock( x264_t *h )
  440. {
  441. uint8_t (*nnz)[24] = h->mb.non_zero_count;
  442. int top = h->mb.i_mb_top_xy;
  443. - int left = h->mb.i_mb_left_xy;
  444. + int left = h->mb.i_mb_left_xy[0];
  445.  
  446. if( (h->mb.i_neighbour & MB_TOP) && h->mb.mb_transform_size[top] )
  447. {
  448. diff --git a/common/mvpred.c b/common/mvpred.c
  449. index a24dde8..c8efe1f 100644
  450. --- a/common/mvpred.c
  451. +++ b/common/mvpred.c
  452. @@ -426,7 +426,7 @@ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t mvc[
  453. }
  454.  
  455. /* spatial predictors */
  456. - SET_MVP( mvr[h->mb.i_mb_left_xy] );
  457. + SET_MVP( mvr[h->mb.i_mb_left_xy[0]] );
  458. SET_MVP( mvr[h->mb.i_mb_top_xy] );
  459. SET_MVP( mvr[h->mb.i_mb_topleft_xy] );
  460. SET_MVP( mvr[h->mb.i_mb_topright_xy] );
  461. diff --git a/encoder/analyse.c b/encoder/analyse.c
  462. index 5419bd1..87125c1 100644
  463. --- a/encoder/analyse.c
  464. +++ b/encoder/analyse.c
  465. @@ -516,7 +516,7 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int qp )
  466. {
  467. /* Always run in fast-intra mode for subme < 3 */
  468. if( h->mb.i_subpel_refine > 2 &&
  469. - ( IS_INTRA( h->mb.i_mb_type_left ) ||
  470. + ( IS_INTRA( h->mb.i_mb_type_left[0] ) ||
  471. IS_INTRA( h->mb.i_mb_type_top ) ||
  472. IS_INTRA( h->mb.i_mb_type_topleft ) ||
  473. IS_INTRA( h->mb.i_mb_type_topright ) ||
  474. @@ -1296,7 +1296,7 @@ static void x264_mb_analyse_inter_p8x8_mixed_ref( x264_t *h, x264_mb_analysis_t
  475. /* early termination: if 16x16 chose ref 0, then evalute no refs older
  476. * than those used by the neighbors */
  477. if( i_maxref > 0 && (a->l0.me16x16.i_ref == 0 || a->l0.me16x16.i_ref == h->mb.ref_blind_dupe) &&
  478. - h->mb.i_mb_type_top > 0 && h->mb.i_mb_type_left > 0 )
  479. + h->mb.i_mb_type_top > 0 && h->mb.i_mb_type_left[0] > 0 )
  480. {
  481. i_maxref = 0;
  482. CHECK_NEIGHBOUR( -8 - 1 );
  483. @@ -2063,7 +2063,7 @@ static void x264_mb_analyse_inter_b8x8_mixed_ref( x264_t *h, x264_mb_analysis_t
  484. {
  485. x264_mb_analysis_list_t *lX = l ? &a->l1 : &a->l0;
  486. if( i_maxref[l] > 0 && lX->me16x16.i_ref == 0 &&
  487. - h->mb.i_mb_type_top > 0 && h->mb.i_mb_type_left > 0 )
  488. + h->mb.i_mb_type_top > 0 && h->mb.i_mb_type_left[0] > 0 )
  489. {
  490. i_maxref[l] = 0;
  491. CHECK_NEIGHBOUR( -8 - 1 );
  492. @@ -2817,7 +2817,7 @@ intra_analysis:
  493. {}
  494. else if( h->param.analyse.i_subpel_refine >= 3 )
  495. analysis.b_try_skip = 1;
  496. - else if( h->mb.i_mb_type_left == P_SKIP ||
  497. + else if( h->mb.i_mb_type_left[0] == P_SKIP ||
  498. h->mb.i_mb_type_top == P_SKIP ||
  499. h->mb.i_mb_type_topleft == P_SKIP ||
  500. h->mb.i_mb_type_topright == P_SKIP )
  501. diff --git a/encoder/cabac.c b/encoder/cabac.c
  502. index 6333737..334318d 100644
  503. --- a/encoder/cabac.c
  504. +++ b/encoder/cabac.c
  505. @@ -79,7 +79,7 @@ static void x264_cabac_mb_type( x264_t *h, x264_cabac_t *cb )
  506. if( h->sh.i_type == SLICE_TYPE_I )
  507. {
  508. int ctx = 0;
  509. - if( (h->mb.i_neighbour & MB_LEFT) && h->mb.i_mb_type_left != I_4x4 )
  510. + if( (h->mb.i_neighbour & MB_LEFT) && h->mb.i_mb_type_left[0] != I_4x4 )
  511. ctx++;
  512. if( (h->mb.i_neighbour & MB_TOP) && h->mb.i_mb_type_top != I_4x4 )
  513. ctx++;
  514. @@ -113,7 +113,7 @@ static void x264_cabac_mb_type( x264_t *h, x264_cabac_t *cb )
  515. else //if( h->sh.i_type == SLICE_TYPE_B )
  516. {
  517. int ctx = 0;
  518. - if( (h->mb.i_neighbour & MB_LEFT) && h->mb.i_mb_type_left != B_SKIP && h->mb.i_mb_type_left != B_DIRECT )
  519. + if( (h->mb.i_neighbour & MB_LEFT) && h->mb.i_mb_type_left[0] != B_SKIP && h->mb.i_mb_type_left[0] != B_DIRECT )
  520. ctx++;
  521. if( (h->mb.i_neighbour & MB_TOP) && h->mb.i_mb_type_top != B_SKIP && h->mb.i_mb_type_top != B_DIRECT )
  522. ctx++;
  523. @@ -198,7 +198,7 @@ static void x264_cabac_mb_intra_chroma_pred_mode( x264_t *h, x264_cabac_t *cb )
  524. int ctx = 0;
  525.  
  526. /* No need to test for I4x4 or I_16x16 as cache_save handle that */
  527. - if( (h->mb.i_neighbour & MB_LEFT) && h->mb.chroma_pred_mode[h->mb.i_mb_left_xy] != 0 )
  528. + if( (h->mb.i_neighbour & MB_LEFT) && h->mb.chroma_pred_mode[h->mb.i_mb_left_xy[0]] != 0 )
  529. ctx++;
  530. if( (h->mb.i_neighbour & MB_TOP) && h->mb.chroma_pred_mode[h->mb.i_mb_top_xy] != 0 )
  531. ctx++;
  532. @@ -280,7 +280,7 @@ static void x264_cabac_mb_qp_delta( x264_t *h, x264_cabac_t *cb )
  533. #if !RDO_SKIP_BS
  534. void x264_cabac_mb_skip( x264_t *h, int b_skip )
  535. {
  536. - int ctx = ((h->mb.i_neighbour & MB_LEFT) && !IS_SKIP( h->mb.i_mb_type_left ))
  537. + int ctx = ((h->mb.i_neighbour & MB_LEFT) && !IS_SKIP( h->mb.i_mb_type_left[0] ))
  538. + ((h->mb.i_neighbour & MB_TOP) && !IS_SKIP( h->mb.i_mb_type_top ))
  539. + (h->sh.i_type == SLICE_TYPE_P ? 11 : 24);
  540. x264_cabac_encode_decision( &h->cabac, ctx, b_skip );
  541. --
  542. 1.7.4
  543.  
  544.  
  545. From 135b93d39e3d8d8540a41bd66d90aa42f7a73ba4 Mon Sep 17 00:00:00 2001
  546. From: Simon Horlick <simonhorlick@gmail.com>
  547. Date: Fri, 14 Jan 2011 21:18:14 +0000
  548. Subject: [PATCH 05/25] Neighbour calculation for mbaff
  549.  
  550. Back up intra borders correctly and make neighbour calculation several times longer.
  551. ---
  552. common/common.h | 9 ++-
  553. common/macroblock.c | 286 +++++++++++++++++++++++++++++++++++++++++++--------
  554. 2 files changed, 249 insertions(+), 46 deletions(-)
  555.  
  556. diff --git a/common/common.h b/common/common.h
  557. index f839e7e..c993857 100644
  558. --- a/common/common.h
  559. +++ b/common/common.h
  560. @@ -549,6 +549,8 @@ struct x264_t
  561. int i_mb_stride;
  562. int i_b8_stride;
  563. int i_b4_stride;
  564. + int left_b8[2];
  565. + int left_b4[2];
  566.  
  567. /* Current index */
  568. int i_mb_x;
  569. @@ -597,6 +599,10 @@ struct x264_t
  570. int i_mb_topleft_xy;
  571. int i_mb_topright_xy;
  572. const int *left_index_table;
  573. + int topleft_partition;
  574. + int intra_border_index;
  575. + int topleft_border_index;
  576. + int topright_border_index;
  577.  
  578. /**** thread synchronization ends here ****/
  579. /* subsequent variables are either thread-local or constant,
  580. @@ -827,7 +833,8 @@ struct x264_t
  581.  
  582. /* Buffers that are allocated per-thread even in sliced threads. */
  583. void *scratch_buffer; /* for any temporary storage that doesn't want repeated malloc */
  584. - pixel *intra_border_backup[2][2]; /* bottom pixels of the previous mb row, used for intra prediction after the framebuffer has been deblocked */
  585. + pixel *intra_border_backup[3][2]; /* bottom pixels of the previous mb row, used for intra prediction after the framebuffer has been deblocked */
  586. + pixel *intra_diagonal_backup[5][3];
  587. uint8_t (*deblock_strength[2])[2][4][4];
  588.  
  589. /* CPU functions dependents */
  590. diff --git a/common/macroblock.c b/common/macroblock.c
  591. index aa194a7..60275ae 100644
  592. --- a/common/macroblock.c
  593. +++ b/common/macroblock.c
  594. @@ -314,18 +314,35 @@ void x264_macroblock_cache_free( x264_t *h )
  595. int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
  596. {
  597. if( !b_lookahead )
  598. - for( int i = 0; i <= h->param.b_interlaced; i++ )
  599. + {
  600. + for( int i = 0; i <= 2*h->param.b_interlaced; i++ )
  601. {
  602. for( int j = 0; j < 2; j++ )
  603. {
  604. /* shouldn't really be initialized, just silences a valgrind false-positive in predict_8x8_filter_mmx */
  605. CHECKED_MALLOCZERO( h->intra_border_backup[i][j], (h->sps->i_mb_width*16+32) * sizeof(pixel) );
  606. h->intra_border_backup[i][j] += 16;
  607. - h->intra_border_backup[1][j] = h->intra_border_backup[i][j];
  608. + if( !h->param.b_interlaced )
  609. + h->intra_border_backup[1][j] = h->intra_border_backup[i][j];
  610. + }
  611. + }
  612. + for( int i = 0; i < 4*h->mb.b_interlaced; i++ )
  613. + {
  614. + for( int j = 0; j < 3; j++ )
  615. + {
  616. + const int width = 1 + 8; // top left pixel + eight top right pixels (for luma)
  617. + CHECKED_MALLOCZERO( h->intra_diagonal_backup[i][j], (h->sps->i_mb_width*width+32) * sizeof(pixel) );
  618. + h->intra_diagonal_backup[i][j] += 16;
  619. + if( !h->param.b_interlaced )
  620. + h->intra_diagonal_backup[1][j] = h->intra_diagonal_backup[i][j];
  621. }
  622. + }
  623. + for( int i = 0; i <= h->param.b_interlaced; i++ )
  624. + {
  625. CHECKED_MALLOC( h->deblock_strength[i], sizeof(**h->deblock_strength) * h->mb.i_mb_width );
  626. h->deblock_strength[1] = h->deblock_strength[i];
  627. }
  628. + }
  629.  
  630. /* Allocate scratch buffer */
  631. int scratch_size = 0;
  632. @@ -353,12 +370,20 @@ fail:
  633. void x264_macroblock_thread_free( x264_t *h, int b_lookahead )
  634. {
  635. if( !b_lookahead )
  636. + {
  637. for( int i = 0; i <= h->param.b_interlaced; i++ )
  638. - {
  639. x264_free( h->deblock_strength[i] );
  640. + for( int i = 0; i <= 2*h->param.b_interlaced; i++ )
  641. + {
  642. for( int j = 0; j < 2; j++ )
  643. x264_free( h->intra_border_backup[i][j] - 16 );
  644. }
  645. + for( int i = 0; i < 4*h->param.b_interlaced; i++ )
  646. + {
  647. + for( int j = 0; j < 3; j++ )
  648. + x264_free( h->intra_diagonal_backup[i][j] - 16 );
  649. + }
  650. + }
  651. x264_free( h->scratch_buffer );
  652. }
  653.  
  654. @@ -494,14 +519,15 @@ static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x
  655. {
  656. int w = (i ? 8 : 16);
  657. int i_stride = h->fdec->i_stride[i];
  658. - int i_stride2 = i_stride << b_interlaced;
  659. - int i_pix_offset = b_interlaced
  660. + int i_stride2 = i_stride << h->mb.b_interlaced;
  661. + int i_pix_offset = h->mb.b_interlaced
  662. ? 16 * mb_x + w * (mb_y&~1) * i_stride + (mb_y&1) * i_stride
  663. : 16 * mb_x + w * mb_y * i_stride;
  664. pixel *plane_fdec = &h->fdec->plane[i][i_pix_offset];
  665. - pixel *intra_fdec = &h->intra_border_backup[mb_y&1][i][mb_x*16];
  666. + pixel *intra_fdec = &h->intra_border_backup[h->mb.intra_border_index][i][mb_x*16];
  667. int ref_pix_offset[2] = { i_pix_offset, i_pix_offset };
  668. - if( b_interlaced )
  669. + /* ref_pix_offset[0] references the current field and [1] the opposite field. */
  670. + if( h->mb.b_interlaced )
  671. ref_pix_offset[1] += (1-2*(mb_y&1)) * i_stride;
  672. h->mb.pic.i_stride[i] = i_stride2;
  673. h->mb.pic.p_fenc_plane[i] = &h->fenc->plane[i][i_pix_offset];
  674. @@ -510,11 +536,28 @@ static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x
  675. h->mc.load_deinterleave_8x8x2_fenc( h->mb.pic.p_fenc[1], h->mb.pic.p_fenc_plane[1], i_stride2 );
  676. memcpy( h->mb.pic.p_fdec[1]-FDEC_STRIDE, intra_fdec, 8*sizeof(pixel) );
  677. memcpy( h->mb.pic.p_fdec[2]-FDEC_STRIDE, intra_fdec+8, 8*sizeof(pixel) );
  678. + if( h->sh.b_mbaff )
  679. + {
  680. + // Top left samples.
  681. + h->mb.pic.p_fdec[1][-FDEC_STRIDE-1] = h->intra_diagonal_backup[h->mb.topleft_border_index][1][mb_x*9];
  682. + h->mb.pic.p_fdec[2][-FDEC_STRIDE-1] = h->intra_diagonal_backup[h->mb.topleft_border_index][2][mb_x*9];
  683. + // Top right samples.
  684. + CP32( &h->mb.pic.p_fdec[1][-FDEC_STRIDE+8], &h->intra_diagonal_backup[h->mb.topright_border_index][1][mb_x*9+1] );
  685. + CP32( &h->mb.pic.p_fdec[2][-FDEC_STRIDE+8], &h->intra_diagonal_backup[h->mb.topright_border_index][2][mb_x*9+1] );
  686. + }
  687. +
  688. }
  689. else
  690. {
  691. h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE, h->mb.pic.p_fenc_plane[0], i_stride2, 16 );
  692. - memcpy( h->mb.pic.p_fdec[0]-FDEC_STRIDE, intra_fdec, 24*sizeof(pixel) );
  693. + if( h->sh.b_mbaff )
  694. + {
  695. + memcpy( h->mb.pic.p_fdec[0]-FDEC_STRIDE, intra_fdec, 16*sizeof(pixel) );
  696. + h->mb.pic.p_fdec[0][-FDEC_STRIDE-1] = h->intra_diagonal_backup[h->mb.topleft_border_index][0][mb_x*9];
  697. + CP64( &h->mb.pic.p_fdec[0][-FDEC_STRIDE+16], &h->intra_diagonal_backup[h->mb.topright_border_index][0][mb_x*9+1] );
  698. + }
  699. + else
  700. + memcpy( h->mb.pic.p_fdec[0]-FDEC_STRIDE, intra_fdec, 24*sizeof(pixel) );
  701. }
  702. if( b_interlaced )
  703. {
  704. @@ -571,6 +614,10 @@ static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, i
  705. h->mb.i_mb_xy = mb_y * h->mb.i_mb_stride + mb_x;
  706. h->mb.i_b8_xy = 2*(mb_y * h->mb.i_b8_stride + mb_x);
  707. h->mb.i_b4_xy = 4*(mb_y * h->mb.i_b4_stride + mb_x);
  708. + h->mb.left_b8[0] =
  709. + h->mb.left_b8[1] = -1;
  710. + h->mb.left_b4[0] =
  711. + h->mb.left_b4[1] = -1;
  712. h->mb.i_neighbour = 0;
  713. h->mb.i_neighbour_intra = 0;
  714. h->mb.i_neighbour_frame = 0;
  715. @@ -583,16 +630,105 @@ static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, i
  716. h->mb.i_mb_type_topleft = -1;
  717. h->mb.i_mb_type_topright = -1;
  718. h->mb.left_index_table = left_indices[4];
  719. + h->mb.topleft_partition = 0;
  720. + h->mb.topright_border_index =
  721. + h->mb.topleft_border_index = !(mb_y&1);
  722. + h->mb.intra_border_index = mb_y&1;
  723. +
  724. + int topleft = top - 1;
  725. + int topright = top + 1;
  726. + int left[2];
  727. +
  728. + left[0] = left[1] = h->mb.i_mb_xy - 1;
  729. + h->mb.left_b8[0] = h->mb.left_b8[1] = h->mb.i_b8_xy - 2;
  730. + h->mb.left_b4[0] = h->mb.left_b4[1] = h->mb.i_b4_xy - 4;
  731. +
  732. + if( h->sh.b_mbaff )
  733. + {
  734. + if( mb_y&1 )
  735. + {
  736. + if( mb_x && h->mb.b_interlaced != h->mb.field[h->mb.i_mb_xy-1] )
  737. + {
  738. + left[0] = left[1] = h->mb.i_mb_xy - 1 - h->mb.i_mb_stride;
  739. + h->mb.left_b8[0] = h->mb.left_b8[1] = h->mb.i_b8_xy - 2 - 2*h->mb.i_b8_stride;
  740. + h->mb.left_b4[0] = h->mb.left_b4[1] = h->mb.i_b4_xy - 4 - 4*h->mb.i_b4_stride;
  741. +
  742. + if( h->mb.b_interlaced )
  743. + {
  744. + h->mb.left_index_table = left_indices[3];
  745. + left[1] += h->mb.i_mb_stride;
  746. + h->mb.left_b8[1] += 2*h->mb.i_b8_stride;
  747. + h->mb.left_b4[1] += 4*h->mb.i_b4_stride;
  748. + }
  749. + else
  750. + {
  751. + h->mb.left_index_table = left_indices[1];
  752. + topleft += h->mb.i_mb_stride;
  753. + h->mb.topleft_partition = 1;
  754. + h->mb.topleft_border_index = 3;
  755. + }
  756. + }
  757. + if( h->mb.b_interlaced )
  758. + h->mb.topleft_border_index = 1;
  759. + else
  760. + {
  761. + topright = -1;
  762. + h->mb.intra_border_index = 0;
  763. + }
  764. + h->mb.topright_border_index = 1;
  765. + }
  766. + else
  767. + {
  768. + if( h->mb.b_interlaced && top >= 0 )
  769. + {
  770. + if( !h->mb.field[top] )
  771. + {
  772. + top += h->mb.i_mb_stride;
  773. + h->mb.intra_border_index = 2;
  774. + }
  775. + if( mb_x )
  776. + topleft += h->mb.i_mb_stride*(!h->mb.field[topleft]);
  777. + if( mb_x < h->mb.i_mb_width-1 )
  778. + topright += h->mb.i_mb_stride*(!h->mb.field[topright]);
  779. +
  780. + if( topright >=0 && h->mb.field[topright] )
  781. + h->mb.topright_border_index = 0;
  782. + else
  783. + h->mb.topright_border_index = 2;
  784. + if( topleft >=0 && h->mb.field[topleft] )
  785. + h->mb.topleft_border_index = 0;
  786. + else
  787. + h->mb.topleft_border_index = 2;
  788. + }
  789. + else
  790. + h->mb.intra_border_index = 1;
  791. + if( mb_x && h->mb.b_interlaced != h->mb.field[h->mb.i_mb_xy-1] )
  792. + {
  793. + if( h->mb.b_interlaced )
  794. + {
  795. + h->mb.left_index_table = left_indices[2];
  796. + left[1] += h->mb.i_mb_stride;
  797. + h->mb.left_b8[1] += 2*h->mb.i_b8_stride;
  798. + h->mb.left_b4[1] += 4*h->mb.i_b4_stride;
  799. + }
  800. + else
  801. + h->mb.left_index_table = left_indices[0];
  802. + }
  803. + }
  804. + }
  805.  
  806. if( mb_x > 0 )
  807. {
  808. h->mb.i_neighbour_frame |= MB_LEFT;
  809. - h->mb.i_mb_left_xy[0] = h->mb.i_mb_xy - 1;
  810. + h->mb.i_mb_left_xy[0] = left[0];
  811. + h->mb.i_mb_left_xy[1] = left[1];
  812. h->mb.i_mb_type_left[0] = h->mb.type[h->mb.i_mb_left_xy[0]];
  813. + h->mb.i_mb_type_left[1] = h->mb.type[h->mb.i_mb_left_xy[1]];
  814. if( h->mb.i_mb_xy > h->sh.i_first_mb )
  815. {
  816. h->mb.i_neighbour |= MB_LEFT;
  817.  
  818. + // FIXME: We don't currently support constrained intra + mbaff.
  819. if( !h->param.b_constrained_intra || IS_INTRA( h->mb.i_mb_type_left[0] ) )
  820. h->mb.i_neighbour_intra |= MB_LEFT;
  821. }
  822. @@ -625,12 +761,12 @@ static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, i
  823. }
  824. }
  825.  
  826. - if( mb_x > 0 && top - 1 >= 0 )
  827. + if( mb_x > 0 && topleft >= 0 )
  828. {
  829. h->mb.i_neighbour_frame |= MB_TOPLEFT;
  830. - h->mb.i_mb_topleft_xy = top - 1;
  831. + h->mb.i_mb_topleft_xy = topleft;
  832. h->mb.i_mb_type_topleft = h->mb.type[h->mb.i_mb_topleft_xy];
  833. - if( top - 1 >= h->sh.i_first_mb )
  834. + if( topleft >= h->sh.i_first_mb )
  835. {
  836. h->mb.i_neighbour |= MB_TOPLEFT;
  837.  
  838. @@ -639,12 +775,12 @@ static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, i
  839. }
  840. }
  841.  
  842. - if( mb_x < h->mb.i_mb_width - 1 && top + 1 >= 0 )
  843. + if( mb_x < h->mb.i_mb_width - 1 && topright >= 0 )
  844. {
  845. h->mb.i_neighbour_frame |= MB_TOPRIGHT;
  846. - h->mb.i_mb_topright_xy = top + 1;
  847. + h->mb.i_mb_topright_xy = topright;
  848. h->mb.i_mb_type_topright = h->mb.type[h->mb.i_mb_topright_xy];
  849. - if( top + 1 >= h->sh.i_first_mb )
  850. + if( topright >= h->sh.i_first_mb )
  851. {
  852. h->mb.i_neighbour |= MB_TOPRIGHT;
  853.  
  854. @@ -659,9 +795,9 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
  855. {
  856. x264_macroblock_cache_load_neighbours( h, mb_x, mb_y );
  857.  
  858. - int left = h->mb.i_mb_left_xy[0];
  859. + int *left = h->mb.i_mb_left_xy;
  860. int top = h->mb.i_mb_top_xy;
  861. - int top_y = mb_y - (1 << h->mb.b_interlaced);
  862. + int top_y = top / h->mb.i_mb_stride;
  863. int s8x8 = h->mb.i_b8_stride;
  864. int s4x4 = h->mb.i_b4_stride;
  865. int top_8x8 = (2*top_y+1) * s8x8 + 2*mb_x;
  866. @@ -715,25 +851,25 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
  867.  
  868. if( h->mb.i_neighbour & MB_LEFT )
  869. {
  870. - h->mb.cache.i_cbp_left = cbp[left];
  871. + h->mb.cache.i_cbp_left = cbp[left[0]];
  872.  
  873. /* load intra4x4 */
  874. - h->mb.cache.intra4x4_pred_mode[x264_scan8[0 ] - 1] = i4x4[left][left_index_table[0]];
  875. - h->mb.cache.intra4x4_pred_mode[x264_scan8[2 ] - 1] = i4x4[left][left_index_table[1]];
  876. - h->mb.cache.intra4x4_pred_mode[x264_scan8[8 ] - 1] = i4x4[left][left_index_table[2]];
  877. - h->mb.cache.intra4x4_pred_mode[x264_scan8[10] - 1] = i4x4[left][left_index_table[3]];
  878. + h->mb.cache.intra4x4_pred_mode[x264_scan8[0 ] - 1] = i4x4[left[0]][left_index_table[0]];
  879. + h->mb.cache.intra4x4_pred_mode[x264_scan8[2 ] - 1] = i4x4[left[0]][left_index_table[1]];
  880. + h->mb.cache.intra4x4_pred_mode[x264_scan8[8 ] - 1] = i4x4[left[1]][left_index_table[2]];
  881. + h->mb.cache.intra4x4_pred_mode[x264_scan8[10] - 1] = i4x4[left[1]][left_index_table[3]];
  882.  
  883. /* load non_zero_count */
  884. - h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left][left_index_table[4+0]];
  885. - h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left][left_index_table[4+1]];
  886. - h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left][left_index_table[4+2]];
  887. - h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left][left_index_table[4+3]];
  888. + h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left[0]][left_index_table[4+0]];
  889. + h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left[0]][left_index_table[4+1]];
  890. + h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left[1]][left_index_table[4+2]];
  891. + h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left[1]][left_index_table[4+3]];
  892.  
  893. - h->mb.cache.non_zero_count[x264_scan8[16+0] - 1] = nnz[left][left_index_table[4+4]];
  894. - h->mb.cache.non_zero_count[x264_scan8[16+2] - 1] = nnz[left][left_index_table[4+5]];
  895. + h->mb.cache.non_zero_count[x264_scan8[16+0] - 1] = nnz[left[0]][left_index_table[4+4]];
  896. + h->mb.cache.non_zero_count[x264_scan8[16+2] - 1] = nnz[left[1]][left_index_table[4+5]];
  897.  
  898. - h->mb.cache.non_zero_count[x264_scan8[16+4+0] - 1] = nnz[left][left_index_table[4+6]];
  899. - h->mb.cache.non_zero_count[x264_scan8[16+4+2] - 1] = nnz[left][left_index_table[4+7]];
  900. + h->mb.cache.non_zero_count[x264_scan8[16+4+0] - 1] = nnz[left[0]][left_index_table[4+6]];
  901. + h->mb.cache.non_zero_count[x264_scan8[16+4+2] - 1] = nnz[left[1]][left_index_table[4+7]];
  902. }
  903. else
  904. {
  905. @@ -758,7 +894,7 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
  906. if( h->pps->b_transform_8x8_mode )
  907. {
  908. h->mb.cache.i_neighbour_transform_size =
  909. - ( (h->mb.i_neighbour & MB_LEFT) && h->mb.mb_transform_size[left] )
  910. + ( (h->mb.i_neighbour & MB_LEFT) && h->mb.mb_transform_size[left[0]] )
  911. + ( (h->mb.i_neighbour & MB_TOP) && h->mb.mb_transform_size[top] );
  912. }
  913.  
  914. @@ -771,7 +907,7 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
  915. + !!(h->mb.i_neighbour & MB_TOP);
  916. }
  917.  
  918. - if( !h->mb.b_interlaced )
  919. + if( !h->sh.b_mbaff )
  920. {
  921. x264_copy_column8( h->mb.pic.p_fdec[0]-1+ 4*FDEC_STRIDE, h->mb.pic.p_fdec[0]+15+ 4*FDEC_STRIDE );
  922. x264_copy_column8( h->mb.pic.p_fdec[0]-1+12*FDEC_STRIDE, h->mb.pic.p_fdec[0]+15+12*FDEC_STRIDE );
  923. @@ -872,10 +1008,10 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
  924.  
  925. if( h->mb.i_neighbour & MB_LEFT )
  926. {
  927. - CP16( h->mb.cache.mvd[l][x264_scan8[0 ] - 1], mvd[left][left_index_table[0]] );
  928. - CP16( h->mb.cache.mvd[l][x264_scan8[2 ] - 1], mvd[left][left_index_table[1]] );
  929. - CP16( h->mb.cache.mvd[l][x264_scan8[8 ] - 1], mvd[left][left_index_table[2]] );
  930. - CP16( h->mb.cache.mvd[l][x264_scan8[10] - 1], mvd[left][left_index_table[3]] );
  931. + CP16( h->mb.cache.mvd[l][x264_scan8[0 ] - 1], mvd[left[0]][left_index_table[0]] );
  932. + CP16( h->mb.cache.mvd[l][x264_scan8[2 ] - 1], mvd[left[0]][left_index_table[1]] );
  933. + CP16( h->mb.cache.mvd[l][x264_scan8[8 ] - 1], mvd[left[1]][left_index_table[2]] );
  934. + CP16( h->mb.cache.mvd[l][x264_scan8[10] - 1], mvd[left[1]][left_index_table[3]] );
  935. }
  936. else
  937. for( int i = 0; i < 4; i++ )
  938. @@ -892,7 +1028,7 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
  939. {
  940. uint8_t skipbp;
  941. x264_macroblock_cache_skip( h, 0, 0, 4, 4, 0 );
  942. - skipbp = (h->mb.i_neighbour & MB_LEFT) ? h->mb.skipbp[left] : 0;
  943. + skipbp = (h->mb.i_neighbour & MB_LEFT) ? h->mb.skipbp[left[0]] : 0;
  944. h->mb.cache.skip[x264_scan8[0] - 1] = skipbp & 0x2;
  945. h->mb.cache.skip[x264_scan8[8] - 1] = skipbp & 0x8;
  946. skipbp = (h->mb.i_neighbour & MB_TOP) ? h->mb.skipbp[top] : 0;
  947. @@ -971,11 +1107,11 @@ void x264_macroblock_cache_load_deblock( x264_t *h )
  948.  
  949. if( h->mb.i_neighbour & MB_LEFT )
  950. {
  951. - int left = h->mb.i_mb_left_xy[0];
  952. - h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left][left_index_table[4+0]];
  953. - h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left][left_index_table[4+1]];
  954. - h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left][left_index_table[4+2]];
  955. - h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left][left_index_table[4+3]];
  956. + int *left = h->mb.i_mb_left_xy;
  957. + h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left[0]][left_index_table[4+0]];
  958. + h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left[0]][left_index_table[4+1]];
  959. + h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left[1]][left_index_table[4+2]];
  960. + h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left[1]][left_index_table[4+3]];
  961. }
  962.  
  963. for( int l = 0; l <= (h->sh.i_type == SLICE_TYPE_B); l++ )
  964. @@ -1106,20 +1242,80 @@ static void ALWAYS_INLINE x264_macroblock_store_pic( x264_t *h, int mb_x, int mb
  965. int i_pix_offset = b_interlaced
  966. ? 16 * mb_x + w * (mb_y&~1) * i_stride + (mb_y&1) * i_stride
  967. : 16 * mb_x + w * mb_y * i_stride;
  968. + const int intra_diag_width = 8+1; // One top left sample, then eight top right samples.
  969. pixel *intra_fdec = &h->intra_border_backup[mb_y&1][i][mb_x*16];
  970. if( i )
  971. {
  972. + if( h->sh.b_mbaff )
  973. + {
  974. + /* Frame macroblocks use the macroblock directly above for intra
  975. + * prediction. Field macroblock pairs predict from fields of the same
  976. + * parity. However field macroblock pairs predicting from frame pairs
  977. + * use the bottom two rows of the frame for prediction, the penultimate
  978. + * row is stored in intra_border_backup[2]. */
  979. + if( mb_y&1 )
  980. + {
  981. + if( mb_x )
  982. + {
  983. + // Store top left.
  984. + h->intra_diagonal_backup[1][1][mb_x*intra_diag_width] = h->intra_border_backup[1][1][(mb_x-1)*16+7];
  985. + h->intra_diagonal_backup[1][2][mb_x*intra_diag_width] = h->intra_border_backup[1][1][(mb_x-1)*16+8+7];
  986. + h->intra_diagonal_backup[2][1][mb_x*intra_diag_width] = h->intra_border_backup[2][1][(mb_x-1)*16+7];
  987. + h->intra_diagonal_backup[2][2][mb_x*intra_diag_width] = h->intra_border_backup[2][1][(mb_x-1)*16+8+7];
  988. + // Store top right.
  989. + CP32( &h->intra_diagonal_backup[1][1][(mb_x-1)*intra_diag_width+1], h->mb.pic.p_fdec[0]+FDEC_STRIDE*7 );
  990. + CP32( &h->intra_diagonal_backup[1][2][(mb_x-1)*intra_diag_width+1], h->mb.pic.p_fdec[0]+FDEC_STRIDE*7 );
  991. + CP32( &h->intra_diagonal_backup[2][1][(mb_x-1)*intra_diag_width+1], h->mb.pic.p_fdec[0]+FDEC_STRIDE*6 );
  992. + CP32( &h->intra_diagonal_backup[2][2][(mb_x-1)*intra_diag_width+1], h->mb.pic.p_fdec[0]+FDEC_STRIDE*6 );
  993. + }
  994. + memcpy( &h->intra_border_backup[2][i][mb_x*16], h->mb.pic.p_fdec[1]+FDEC_STRIDE*6, 8*sizeof(pixel) );
  995. + memcpy( &h->intra_border_backup[2][i][mb_x*16]+8, h->mb.pic.p_fdec[2]+FDEC_STRIDE*6, 8*sizeof(pixel) );
  996. + }
  997. + else
  998. + {
  999. + if( mb_x )
  1000. + {
  1001. + h->intra_diagonal_backup[0][1][mb_x*intra_diag_width] = h->intra_border_backup[0][1][(mb_x-1)*16+7];
  1002. + h->intra_diagonal_backup[0][2][mb_x*intra_diag_width] = h->intra_border_backup[0][1][(mb_x-1)*16+8+7];
  1003. + CP32( &h->intra_diagonal_backup[0][1][(mb_x-1)*intra_diag_width+1], h->mb.pic.p_fdec[1]+FDEC_STRIDE*7 );
  1004. + CP32( &h->intra_diagonal_backup[0][2][(mb_x-1)*intra_diag_width+1], h->mb.pic.p_fdec[2]+FDEC_STRIDE*7 );
  1005. + }
  1006. + // Sometimes needed for bottom macroblock of this pair.
  1007. + h->intra_diagonal_backup[3][1][mb_x*intra_diag_width] = h->mb.pic.p_fdec[1][-1+7*FDEC_STRIDE];
  1008. + h->intra_diagonal_backup[3][2][mb_x*intra_diag_width] = h->mb.pic.p_fdec[2][-1+7*FDEC_STRIDE];
  1009. + }
  1010. + }
  1011. h->mc.store_interleave_8x8x2( &h->fdec->plane[1][i_pix_offset], i_stride2, h->mb.pic.p_fdec[1], h->mb.pic.p_fdec[2] );
  1012. memcpy( intra_fdec, h->mb.pic.p_fdec[1]+FDEC_STRIDE*7, 8*sizeof(pixel) );
  1013. memcpy( intra_fdec+8, h->mb.pic.p_fdec[2]+FDEC_STRIDE*7, 8*sizeof(pixel) );
  1014. - twiddle_topleft_pixel( h->mb.pic.p_fdec[1]-FDEC_STRIDE-1, h->mb.pic.p_fdec[1]-FDEC_STRIDE+7, b_interlaced );
  1015. - twiddle_topleft_pixel( h->mb.pic.p_fdec[2]-FDEC_STRIDE-1, h->mb.pic.p_fdec[2]-FDEC_STRIDE+7, b_interlaced );
  1016. + twiddle_topleft_pixel( h->mb.pic.p_fdec[1]-FDEC_STRIDE-1, h->mb.pic.p_fdec[1]-FDEC_STRIDE+7, h->sh.b_mbaff );
  1017. + twiddle_topleft_pixel( h->mb.pic.p_fdec[2]-FDEC_STRIDE-1, h->mb.pic.p_fdec[2]-FDEC_STRIDE+7, h->sh.b_mbaff );
  1018. }
  1019. else
  1020. {
  1021. + if( h->sh.b_mbaff )
  1022. + {
  1023. + if( mb_y&1 )
  1024. + {
  1025. + if( mb_x )
  1026. + {
  1027. + h->intra_diagonal_backup[1][0][mb_x*intra_diag_width] = h->intra_border_backup[1][0][(mb_x-1)*16+15];
  1028. + h->intra_diagonal_backup[2][0][mb_x*intra_diag_width] = h->intra_border_backup[2][0][(mb_x-1)*16+15];
  1029. + CP64( &h->intra_diagonal_backup[1][0][(mb_x-1)*intra_diag_width+1], h->mb.pic.p_fdec[0]+FDEC_STRIDE*15 );
  1030. + CP64( &h->intra_diagonal_backup[2][0][(mb_x-1)*intra_diag_width+1], h->mb.pic.p_fdec[0]+FDEC_STRIDE*14 );
  1031. + }
  1032. + memcpy( &h->intra_border_backup[2][i][mb_x*16], h->mb.pic.p_fdec[0]+FDEC_STRIDE*14, 16*sizeof(pixel) );
  1033. + }
  1034. + else
  1035. + {
  1036. + h->intra_diagonal_backup[0][0][mb_x*intra_diag_width] = h->intra_border_backup[0][0][(mb_x-1)*16+15];
  1037. + CP64( &h->intra_diagonal_backup[0][0][(mb_x-1)*intra_diag_width+1], h->mb.pic.p_fdec[0]+FDEC_STRIDE*15 );
  1038. + h->intra_diagonal_backup[3][0][mb_x*intra_diag_width] = h->mb.pic.p_fdec[0][-1+15*FDEC_STRIDE];
  1039. + }
  1040. + }
  1041. h->mc.copy[PIXEL_16x16]( &h->fdec->plane[0][i_pix_offset], i_stride2, h->mb.pic.p_fdec[0], FDEC_STRIDE, 16 );
  1042. memcpy( intra_fdec, h->mb.pic.p_fdec[0]+FDEC_STRIDE*15, 16*sizeof(pixel) );
  1043. - twiddle_topleft_pixel( h->mb.pic.p_fdec[0]-FDEC_STRIDE-1, h->mb.pic.p_fdec[0]-FDEC_STRIDE+15, b_interlaced );
  1044. + twiddle_topleft_pixel( h->mb.pic.p_fdec[0]-FDEC_STRIDE-1, h->mb.pic.p_fdec[0]-FDEC_STRIDE+15, h->sh.b_mbaff );
  1045. }
  1046. }
  1047.  
  1048. --
  1049. 1.7.4
  1050.  
  1051.  
  1052. From db33884079bf79074a67fac2851d8c9425c45bfa Mon Sep 17 00:00:00 2001
  1053. From: Simon Horlick <simonhorlick@gmail.com>
  1054. Date: Thu, 17 Feb 2011 00:56:59 +0000
  1055. Subject: [PATCH 06/25] Change b_interlaced in store_pic back to its original meaning
  1056.  
  1057. ---
  1058. common/macroblock.c | 10 +++++-----
  1059. 1 files changed, 5 insertions(+), 5 deletions(-)
  1060.  
  1061. diff --git a/common/macroblock.c b/common/macroblock.c
  1062. index 60275ae..63a8933 100644
  1063. --- a/common/macroblock.c
  1064. +++ b/common/macroblock.c
  1065. @@ -1238,15 +1238,15 @@ static void ALWAYS_INLINE x264_macroblock_store_pic( x264_t *h, int mb_x, int mb
  1066. {
  1067. int w = i ? 8 : 16;
  1068. int i_stride = h->fdec->i_stride[i];
  1069. - int i_stride2 = i_stride << b_interlaced;
  1070. - int i_pix_offset = b_interlaced
  1071. + int i_stride2 = i_stride << (b_interlaced && h->mb.b_interlaced);
  1072. + int i_pix_offset = (b_interlaced && h->mb.b_interlaced)
  1073. ? 16 * mb_x + w * (mb_y&~1) * i_stride + (mb_y&1) * i_stride
  1074. : 16 * mb_x + w * mb_y * i_stride;
  1075. const int intra_diag_width = 8+1; // One top left sample, then eight top right samples.
  1076. pixel *intra_fdec = &h->intra_border_backup[mb_y&1][i][mb_x*16];
  1077. if( i )
  1078. {
  1079. - if( h->sh.b_mbaff )
  1080. + if( b_interlaced )
  1081. {
  1082. /* Frame macroblocks use the macroblock directly above for intra
  1083. * prediction. Field macroblock pairs predict from fields of the same
  1084. @@ -1293,7 +1293,7 @@ static void ALWAYS_INLINE x264_macroblock_store_pic( x264_t *h, int mb_x, int mb
  1085. }
  1086. else
  1087. {
  1088. - if( h->sh.b_mbaff )
  1089. + if( b_interlaced )
  1090. {
  1091. if( mb_y&1 )
  1092. {
  1093. @@ -1333,7 +1333,7 @@ void x264_macroblock_cache_save( x264_t *h )
  1094. int8_t *i4x4 = h->mb.intra4x4_pred_mode[i_mb_xy];
  1095. uint8_t *nnz = h->mb.non_zero_count[i_mb_xy];
  1096.  
  1097. - if( h->mb.b_interlaced )
  1098. + if( h->sh.b_mbaff )
  1099. {
  1100. x264_macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 0, 1 );
  1101. x264_macroblock_store_pic( h, h->mb.i_mb_x, h->mb.i_mb_y, 1, 1 );
  1102. --
  1103. 1.7.4
  1104.  
  1105.  
  1106. From b816ff5ba65edb03226237e2fc6fff06d7d9e60a Mon Sep 17 00:00:00 2001
  1107. From: Simon Horlick <simonhorlick@gmail.com>
  1108. Date: Fri, 18 Feb 2011 18:57:15 +0000
  1109. Subject: [PATCH 07/25] Only enable twiddle_topleft in progressive
  1110.  
  1111. ---
  1112. common/macroblock.c | 3 ++-
  1113. 1 files changed, 2 insertions(+), 1 deletions(-)
  1114.  
  1115. diff --git a/common/macroblock.c b/common/macroblock.c
  1116. index 63a8933..f775030 100644
  1117. --- a/common/macroblock.c
  1118. +++ b/common/macroblock.c
  1119. @@ -1313,9 +1313,10 @@ static void ALWAYS_INLINE x264_macroblock_store_pic( x264_t *h, int mb_x, int mb
  1120. h->intra_diagonal_backup[3][0][mb_x*intra_diag_width] = h->mb.pic.p_fdec[0][-1+15*FDEC_STRIDE];
  1121. }
  1122. }
  1123. + else
  1124. + twiddle_topleft_pixel( h->mb.pic.p_fdec[0]-FDEC_STRIDE-1, h->mb.pic.p_fdec[0]-FDEC_STRIDE+15, h->sh.b_mbaff );
  1125. h->mc.copy[PIXEL_16x16]( &h->fdec->plane[0][i_pix_offset], i_stride2, h->mb.pic.p_fdec[0], FDEC_STRIDE, 16 );
  1126. memcpy( intra_fdec, h->mb.pic.p_fdec[0]+FDEC_STRIDE*15, 16*sizeof(pixel) );
  1127. - twiddle_topleft_pixel( h->mb.pic.p_fdec[0]-FDEC_STRIDE-1, h->mb.pic.p_fdec[0]-FDEC_STRIDE+15, h->sh.b_mbaff );
  1128. }
  1129. }
  1130.  
  1131. --
  1132. 1.7.4
  1133.  
  1134.  
  1135. From 2b1351a44177a7dc289c9bfe38c7ddc7b0f9a3a2 Mon Sep 17 00:00:00 2001
  1136. From: Simon Horlick <simonhorlick@gmail.com>
  1137. Date: Wed, 16 Mar 2011 21:34:28 +0000
  1138. Subject: [PATCH 08/25] Initial inter support
  1139.  
  1140. ---
  1141. common/common.h | 5 +
  1142. common/frame.c | 33 +++++++-
  1143. common/frame.h | 3 +
  1144. common/macroblock.c | 217 +++++++++++++++++++++++++++++++++++++++++++-------
  1145. common/mc.c | 37 +++++++--
  1146. common/mvpred.c | 23 +++++-
  1147. common/x86/util.h | 24 ------
  1148. encoder/macroblock.c | 4 +-
  1149. 8 files changed, 277 insertions(+), 69 deletions(-)
  1150.  
  1151. diff --git a/common/common.h b/common/common.h
  1152. index c993857..ef9b35a 100644
  1153. --- a/common/common.h
  1154. +++ b/common/common.h
  1155. @@ -600,6 +600,7 @@ struct x264_t
  1156. int i_mb_topright_xy;
  1157. const int *left_index_table;
  1158. int topleft_partition;
  1159. + int allow_skip;
  1160. int intra_border_index;
  1161. int topleft_border_index;
  1162. int topright_border_index;
  1163. @@ -730,6 +731,10 @@ struct x264_t
  1164. /* neighbor CBPs */
  1165. int i_cbp_top;
  1166. int i_cbp_left;
  1167. +
  1168. + /* extra data required for mbaff in mv prediction */
  1169. + int16_t topright_mv[2][3][2];
  1170. + int8_t topright_ref[2][3];
  1171. } cache;
  1172.  
  1173. /* */
  1174. diff --git a/common/frame.c b/common/frame.c
  1175. index eff8ca5..d04f047 100644
  1176. --- a/common/frame.c
  1177. +++ b/common/frame.c
  1178. @@ -48,7 +48,7 @@ x264_frame_t *x264_frame_new( x264_t *h, int b_fdec )
  1179.  
  1180. int i_mb_count = h->mb.i_mb_count;
  1181. int i_stride, i_width, i_lines;
  1182. - int i_padv = PADV << h->param.b_interlaced;
  1183. + int i_padv = PADV << 2*h->param.b_interlaced;
  1184. int luma_plane_size, chroma_plane_size;
  1185. int align = h->param.cpu&X264_CPU_CACHELINE_64 ? 64 : h->param.cpu&X264_CPU_CACHELINE_32 ? 32 : 16;
  1186. int disalign = h->param.cpu&X264_CPU_ALTIVEC ? 1<<9 : 1<<10;
  1187. @@ -99,21 +99,30 @@ x264_frame_t *x264_frame_new( x264_t *h, int b_fdec )
  1188. chroma_plane_size = (frame->i_stride[1] * (frame->i_lines[1] + i_padv));
  1189.  
  1190. CHECKED_MALLOC( frame->buffer[1], chroma_plane_size * sizeof(pixel) );
  1191. + CHECKED_MALLOC( frame->buffer_fld[1], chroma_plane_size * sizeof(pixel) );
  1192. frame->plane[1] = frame->buffer[1] + frame->i_stride[1] * i_padv/2 + PADH;
  1193. + frame->plane_fld[1] = frame->buffer_fld[1] + frame->i_stride[1] * i_padv/2 + PADH;
  1194.  
  1195. /* all 4 luma planes allocated together, since the cacheline split code
  1196. * requires them to be in-phase wrt cacheline alignment. */
  1197. if( h->param.analyse.i_subpel_refine && b_fdec )
  1198. {
  1199. CHECKED_MALLOC( frame->buffer[0], 4*luma_plane_size * sizeof(pixel) );
  1200. + CHECKED_MALLOC( frame->buffer_fld[0], 4*luma_plane_size * sizeof(pixel) );
  1201. for( int i = 0; i < 4; i++ )
  1202. + {
  1203. frame->filtered[i] = frame->buffer[0] + i*luma_plane_size + frame->i_stride[0] * i_padv + PADH;
  1204. + frame->filtered_fld[i] = frame->buffer_fld[0] + i*luma_plane_size + frame->i_stride[0] * i_padv + PADH;
  1205. + }
  1206. frame->plane[0] = frame->filtered[0];
  1207. + frame->plane_fld[0] = frame->filtered_fld[0];
  1208. }
  1209. else
  1210. {
  1211. CHECKED_MALLOC( frame->buffer[0], luma_plane_size * sizeof(pixel) );
  1212. + CHECKED_MALLOC( frame->buffer_fld[0], luma_plane_size * sizeof(pixel) );
  1213. frame->filtered[0] = frame->plane[0] = frame->buffer[0] + frame->i_stride[0] * i_padv + PADH;
  1214. + frame->filtered_fld[0] = frame->plane_fld[0] = frame->buffer_fld[0] + frame->i_stride[0] * i_padv + PADH;
  1215. }
  1216.  
  1217. frame->b_duplicate = 0;
  1218. @@ -200,7 +209,10 @@ void x264_frame_delete( x264_frame_t *frame )
  1219. if( !frame->b_duplicate )
  1220. {
  1221. for( int i = 0; i < 4; i++ )
  1222. + {
  1223. x264_free( frame->buffer[i] );
  1224. + x264_free( frame->buffer_fld[i] );
  1225. + }
  1226. for( int i = 0; i < 4; i++ )
  1227. x264_free( frame->buffer_lowres[i] );
  1228. for( int i = 0; i < X264_BFRAME_MAX+2; i++ )
  1229. @@ -363,16 +375,25 @@ void x264_frame_expand_border( x264_t *h, x264_frame_t *frame, int mb_y, int b_e
  1230. int padh = PADH;
  1231. int padv = PADV >> !!i;
  1232. // buffer: 2 chroma, 3 luma (rounded to 4) because deblocking goes beyond the top of the mb
  1233. - pixel *pix = frame->plane[i] + X264_MAX(0, (16*mb_y-4)*stride >> !!i);
  1234. if( b_end && !b_start )
  1235. height += 4 >> (!!i + h->sh.b_mbaff);
  1236. + pixel *pix;
  1237. if( h->sh.b_mbaff )
  1238. {
  1239. + // border samples for each field are extended separately
  1240. + pix = frame->plane_fld[i] + X264_MAX(0, (16*mb_y-4)*stride >> !!i);
  1241. plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end, i );
  1242. plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end, i );
  1243. +
  1244. + height = (b_end ? 16*(h->mb.i_mb_height - mb_y) : 32) >> !!i;
  1245. + if( b_end && !b_start )
  1246. + height += 4 >> (!!i);
  1247. + pix = frame->plane[i] + X264_MAX(0, (16*mb_y-4)*stride >> !!i);
  1248. + plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end, i );
  1249. }
  1250. else
  1251. {
  1252. + pix = frame->plane[i] + X264_MAX(0, (16*mb_y-4)*stride >> !!i);
  1253. plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end, i );
  1254. }
  1255. }
  1256. @@ -392,14 +413,16 @@ void x264_frame_expand_border_filtered( x264_t *h, x264_frame_t *frame, int mb_y
  1257. for( int i = 1; i < 4; i++ )
  1258. {
  1259. // buffer: 8 luma, to match the hpel filter
  1260. - pixel *pix = frame->filtered[i] + (16*mb_y - (8 << h->sh.b_mbaff)) * stride - 4;
  1261. + pixel *pix;
  1262. if( h->sh.b_mbaff )
  1263. {
  1264. + pix = frame->filtered_fld[i] + (16*mb_y - 16) * stride - 4;
  1265. plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end, 0 );
  1266. plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end, 0 );
  1267. }
  1268. - else
  1269. - plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end, 0 );
  1270. +
  1271. + pix = frame->filtered[i] + (16*mb_y - 8) * stride - 4;
  1272. + plane_expand_border( pix, stride, width, height << h->sh.b_mbaff, padh, padv, b_start, b_end, 0 );
  1273. }
  1274. }
  1275.  
  1276. diff --git a/common/frame.h b/common/frame.h
  1277. index 0e0ab3d..8fe0627 100644
  1278. --- a/common/frame.h
  1279. +++ b/common/frame.h
  1280. @@ -72,13 +72,16 @@ typedef struct x264_frame
  1281. int i_width_lowres;
  1282. int i_lines_lowres;
  1283. pixel *plane[2];
  1284. + pixel *plane_fld[2];
  1285. pixel *filtered[4]; /* plane[0], H, V, HV */
  1286. + pixel *filtered_fld[4];
  1287. pixel *lowres[4]; /* half-size copy of input frame: Orig, H, V, HV */
  1288. uint16_t *integral;
  1289.  
  1290. /* for unrestricted mv we allocate more data than needed
  1291. * allocated data are stored in buffer */
  1292. pixel *buffer[4];
  1293. + pixel *buffer_fld[4];
  1294. pixel *buffer_lowres[4];
  1295.  
  1296. x264_weight_t weight[X264_REF_MAX][3]; /* [ref_index][plane] */
  1297. diff --git a/common/macroblock.c b/common/macroblock.c
  1298. index f775030..a441981 100644
  1299. --- a/common/macroblock.c
  1300. +++ b/common/macroblock.c
  1301. @@ -523,7 +523,7 @@ static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x
  1302. int i_pix_offset = h->mb.b_interlaced
  1303. ? 16 * mb_x + w * (mb_y&~1) * i_stride + (mb_y&1) * i_stride
  1304. : 16 * mb_x + w * mb_y * i_stride;
  1305. - pixel *plane_fdec = &h->fdec->plane[i][i_pix_offset];
  1306. + pixel *plane_fdec = h->mb.b_interlaced ? &h->fdec->plane_fld[i][i_pix_offset] : &h->fdec->plane[i][i_pix_offset];
  1307. pixel *intra_fdec = &h->intra_border_backup[h->mb.intra_border_index][i][mb_x*16];
  1308. int ref_pix_offset[2] = { i_pix_offset, i_pix_offset };
  1309. /* ref_pix_offset[0] references the current field and [1] the opposite field. */
  1310. @@ -570,15 +570,28 @@ static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x
  1311. else
  1312. h->mb.pic.p_fdec[0][-1+j*FDEC_STRIDE] = plane_fdec[-1+j*i_stride2];
  1313. }
  1314. + pixel *plane_src, **filtered_src;
  1315. for( int j = 0; j < h->mb.pic.i_fref[0]; j++ )
  1316. {
  1317. - h->mb.pic.p_fref[0][j][i?4:0] = &h->fref[0][j >> b_interlaced]->plane[i][ref_pix_offset[j&1]];
  1318. + // Interpolate between pixels in same field.
  1319. + if( h->mb.b_interlaced )
  1320. + {
  1321. + plane_src = h->fref[0][j>>1]->plane_fld[i];
  1322. + filtered_src = h->fref[0][j>>1]->filtered_fld;
  1323. + }
  1324. + else
  1325. + {
  1326. + plane_src = h->fref[0][j]->plane[i];
  1327. + filtered_src = h->fref[0][j]->filtered;
  1328. + }
  1329. + h->mb.pic.p_fref[0][j][i?4:0] = plane_src + ref_pix_offset[j&1];
  1330. +
  1331. if( !i )
  1332. {
  1333. for( int k = 1; k < 4; k++ )
  1334. - h->mb.pic.p_fref[0][j][k] = &h->fref[0][j >> b_interlaced]->filtered[k][ref_pix_offset[j&1]];
  1335. + h->mb.pic.p_fref[0][j][k] = filtered_src[k] + ref_pix_offset[j&1];
  1336. if( h->sh.weight[j][0].weightfn )
  1337. - h->mb.pic.p_fref_w[j] = &h->fenc->weighted[j >> b_interlaced][ref_pix_offset[j&1]];
  1338. + h->mb.pic.p_fref_w[j] = &h->fenc->weighted[j >> h->mb.b_interlaced][ref_pix_offset[j&1]];
  1339. else
  1340. h->mb.pic.p_fref_w[j] = h->mb.pic.p_fref[0][j][0];
  1341. }
  1342. @@ -586,10 +599,21 @@ static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x
  1343. if( h->sh.i_type == SLICE_TYPE_B )
  1344. for( int j = 0; j < h->mb.pic.i_fref[1]; j++ )
  1345. {
  1346. - h->mb.pic.p_fref[1][j][i?4:0] = &h->fref[1][j >> b_interlaced]->plane[i][ref_pix_offset[j&1]];
  1347. + if( h->mb.b_interlaced )
  1348. + {
  1349. + plane_src = h->fref[1][j>>1]->plane_fld[i];
  1350. + filtered_src = h->fref[1][j>>1]->filtered_fld;
  1351. + }
  1352. + else
  1353. + {
  1354. + plane_src = h->fref[1][j]->plane[i];
  1355. + filtered_src = h->fref[1][j]->filtered;
  1356. + }
  1357. + h->mb.pic.p_fref[1][j][i?4:0] = plane_src + ref_pix_offset[j&1];
  1358. +
  1359. if( !i )
  1360. for( int k = 1; k < 4; k++ )
  1361. - h->mb.pic.p_fref[1][j][k] = &h->fref[1][j >> b_interlaced]->filtered[k][ref_pix_offset[j&1]];
  1362. + h->mb.pic.p_fref[1][j][k] = filtered_src[k] + ref_pix_offset[j&1];
  1363. }
  1364. }
  1365.  
  1366. @@ -851,7 +875,9 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
  1367.  
  1368. if( h->mb.i_neighbour & MB_LEFT )
  1369. {
  1370. - h->mb.cache.i_cbp_left = cbp[left[0]];
  1371. + const int16_t top_luma = (cbp[left[0]] >> (left_index_table[18+0]&(~1))) & 2;
  1372. + const int16_t bot_luma = (cbp[left[1]] >> (left_index_table[18+2]&(~1))) & 2;
  1373. + h->mb.cache.i_cbp_left = (cbp[left[0]] & 0xfff0) | (bot_luma<<2) | top_luma;
  1374.  
  1375. /* load intra4x4 */
  1376. h->mb.cache.intra4x4_pred_mode[x264_scan8[0 ] - 1] = i4x4[left[0]][left_index_table[0]];
  1377. @@ -903,8 +929,8 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
  1378. h->mb.pic.i_fref[0] = h->i_ref[0] << h->mb.b_interlaced;
  1379. h->mb.pic.i_fref[1] = h->i_ref[1] << h->mb.b_interlaced;
  1380. h->mb.cache.i_neighbour_interlaced =
  1381. - !!(h->mb.i_neighbour & MB_LEFT)
  1382. - + !!(h->mb.i_neighbour & MB_TOP);
  1383. + !!(h->mb.i_neighbour & MB_LEFT && h->mb.field[left[0]])
  1384. + + !!(h->mb.i_neighbour & MB_TOP && h->mb.field[top]);
  1385. }
  1386.  
  1387. if( !h->sh.b_mbaff )
  1388. @@ -941,8 +967,18 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
  1389. int i8 = x264_scan8[0] - 1 - 1*8;
  1390. if( h->mb.i_neighbour & MB_TOPLEFT )
  1391. {
  1392. - h->mb.cache.ref[l][i8] = ref[top_8x8 - 1];
  1393. - CP32( h->mb.cache.mv[l][i8], mv[top_4x4 - 1] );
  1394. + int y = h->mb.i_mb_topleft_xy / h->mb.i_mb_stride;
  1395. + int ir = 2*(s8x8*y + mb_x-1)+1+s8x8;
  1396. + int iv = 4*(s4x4*y + mb_x-1)+3+3*s4x4;
  1397. + if( h->mb.topleft_partition )
  1398. + {
  1399. + /* Take motion vector from the middle of macroblock instead of
  1400. + * the bottom right as usual. */
  1401. + iv -= 2*s4x4;
  1402. + ir -= s8x8;
  1403. + }
  1404. + h->mb.cache.ref[l][i8] = ref[ir];
  1405. + CP32( h->mb.cache.mv[l][i8], mv[iv] );
  1406. }
  1407. else
  1408. {
  1409. @@ -968,8 +1004,9 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
  1410. i8 = x264_scan8[0] + 4 - 1*8;
  1411. if( h->mb.i_neighbour & MB_TOPRIGHT )
  1412. {
  1413. - h->mb.cache.ref[l][i8] = ref[top_8x8 + 2];
  1414. - CP32( h->mb.cache.mv[l][i8], mv[top_4x4 + 4] );
  1415. + int y = h->mb.i_mb_topright_xy / h->mb.i_mb_stride;
  1416. + h->mb.cache.ref[l][i8] = ref[2*(s8x8*y + (mb_x+1))+s8x8];
  1417. + CP32( h->mb.cache.mv[l][i8], mv[4*(s4x4*y + (mb_x+1))+3*s4x4] );
  1418. }
  1419. else
  1420. h->mb.cache.ref[l][i8] = -2;
  1421. @@ -977,17 +1014,15 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
  1422. i8 = x264_scan8[0] - 1;
  1423. if( h->mb.i_neighbour & MB_LEFT )
  1424. {
  1425. - const int ir = h->mb.i_b8_xy - 1;
  1426. - const int iv = h->mb.i_b4_xy - 1;
  1427. - h->mb.cache.ref[l][i8+0*8] =
  1428. - h->mb.cache.ref[l][i8+1*8] = ref[ir + 0*s8x8];
  1429. - h->mb.cache.ref[l][i8+2*8] =
  1430. - h->mb.cache.ref[l][i8+3*8] = ref[ir + 1*s8x8];
  1431. -
  1432. - CP32( h->mb.cache.mv[l][i8+0*8], mv[iv + 0*s4x4] );
  1433. - CP32( h->mb.cache.mv[l][i8+1*8], mv[iv + 1*s4x4] );
  1434. - CP32( h->mb.cache.mv[l][i8+2*8], mv[iv + 2*s4x4] );
  1435. - CP32( h->mb.cache.mv[l][i8+3*8], mv[iv + 3*s4x4] );
  1436. + h->mb.cache.ref[l][i8+0*8] = ref[h->mb.left_b8[0] + 1 + s8x8*((left_index_table[12+0]&~1)>>1)];
  1437. + h->mb.cache.ref[l][i8+1*8] = ref[h->mb.left_b8[0] + 1 + s8x8*((left_index_table[12+1]&~1)>>1)];
  1438. + h->mb.cache.ref[l][i8+2*8] = ref[h->mb.left_b8[1] + 1 + s8x8*((left_index_table[12+2]&~1)>>1)];
  1439. + h->mb.cache.ref[l][i8+3*8] = ref[h->mb.left_b8[1] + 1 + s8x8*((left_index_table[12+3]&~1)>>1)];
  1440. +
  1441. + CP32( h->mb.cache.mv[l][i8+0*8], mv[h->mb.left_b4[0] + 3 + s4x4*left_index_table[12+0]] );
  1442. + CP32( h->mb.cache.mv[l][i8+1*8], mv[h->mb.left_b4[0] + 3 + s4x4*left_index_table[12+1]] );
  1443. + CP32( h->mb.cache.mv[l][i8+2*8], mv[h->mb.left_b4[1] + 3 + s4x4*left_index_table[12+2]] );
  1444. + CP32( h->mb.cache.mv[l][i8+3*8], mv[h->mb.left_b4[1] + 3 + s4x4*left_index_table[12+3]] );
  1445. }
  1446. else
  1447. {
  1448. @@ -998,6 +1033,42 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
  1449. }
  1450. }
  1451.  
  1452. + /* Extra logic for top right mv in mbaff.
  1453. + * . . . d . . a .
  1454. + * . . . e . . . .
  1455. + * . . . f b . c .
  1456. + * . . . . . . . .
  1457. + *
  1458. + * If the top right of the 4x4 partitions labeled a, b and c in the
  1459. + * above diagram do not exist, but the entries d, e and f exist (in
  1460. + * the macroblock to the left) then use those instead.
  1461. + */
  1462. + if( h->param.b_interlaced )
  1463. + {
  1464. + if( h->mb.i_neighbour & MB_LEFT )
  1465. + {
  1466. + if( h->mb.b_interlaced && !h->mb.field[h->mb.i_mb_xy-1] )
  1467. + {
  1468. + h->mb.cache.topright_ref[l][0] = ref[h->mb.left_b8[0] + 1 + s8x8*0];
  1469. + h->mb.cache.topright_ref[l][1] = ref[h->mb.left_b8[0] + 1 + s8x8*1];
  1470. + h->mb.cache.topright_ref[l][2] = ref[h->mb.left_b8[1] + 1 + s8x8*0];
  1471. + CP32( h->mb.cache.topright_mv[l][0], mv[h->mb.left_b4[0] + 3 + s4x4*(left_index_table[12+0]+1)] );
  1472. + CP32( h->mb.cache.topright_mv[l][1], mv[h->mb.left_b4[0] + 3 + s4x4*(left_index_table[12+1]+1)] );
  1473. + CP32( h->mb.cache.topright_mv[l][2], mv[h->mb.left_b4[1] + 3 + s4x4*(left_index_table[12+2]+1)] );
  1474. + }
  1475. + else if( !h->mb.b_interlaced && h->mb.field[h->mb.i_mb_xy-1] )
  1476. + {
  1477. + // Looking at the bottom field so always take the bottom macroblock of the pair.
  1478. + h->mb.cache.topright_ref[l][0] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table[12+4]];
  1479. + h->mb.cache.topright_ref[l][1] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table[12+4]];
  1480. + h->mb.cache.topright_ref[l][2] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table[12+5]];
  1481. + CP32( h->mb.cache.topright_mv[l][0], mv[h->mb.left_b4[0] + 3 + s4x4*4 + s4x4*left_index_table[12+0]] );
  1482. + CP32( h->mb.cache.topright_mv[l][1], mv[h->mb.left_b4[0] + 3 + s4x4*4 + s4x4*left_index_table[12+1]] );
  1483. + CP32( h->mb.cache.topright_mv[l][2], mv[h->mb.left_b4[0] + 3 + s4x4*4 + s4x4*left_index_table[12+2]] );
  1484. + }
  1485. + }
  1486. + }
  1487. +
  1488. if( h->param.b_cabac )
  1489. {
  1490. uint8_t (*mvd)[8][2] = h->mb.mvd[l];
  1491. @@ -1006,16 +1077,103 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
  1492. else
  1493. M64( h->mb.cache.mvd[l][x264_scan8[0] - 8] ) = 0;
  1494.  
  1495. - if( h->mb.i_neighbour & MB_LEFT )
  1496. + if( h->mb.cache.ref[l][x264_scan8[0]-1] >= 0 )
  1497. {
  1498. CP16( h->mb.cache.mvd[l][x264_scan8[0 ] - 1], mvd[left[0]][left_index_table[0]] );
  1499. CP16( h->mb.cache.mvd[l][x264_scan8[2 ] - 1], mvd[left[0]][left_index_table[1]] );
  1500. + }
  1501. + else
  1502. + {
  1503. + M16( h->mb.cache.mvd[l][x264_scan8[0]-1+0*8] ) = 0;
  1504. + M16( h->mb.cache.mvd[l][x264_scan8[0]-1+1*8] ) = 0;
  1505. + }
  1506. + if( h->mb.cache.ref[l][x264_scan8[0]-1+2*8] >=0 )
  1507. + {
  1508. CP16( h->mb.cache.mvd[l][x264_scan8[8 ] - 1], mvd[left[1]][left_index_table[2]] );
  1509. CP16( h->mb.cache.mvd[l][x264_scan8[10] - 1], mvd[left[1]][left_index_table[3]] );
  1510. }
  1511. else
  1512. - for( int i = 0; i < 4; i++ )
  1513. - M16( h->mb.cache.mvd[l][x264_scan8[0]-1+i*8] ) = 0;
  1514. + {
  1515. + M16( h->mb.cache.mvd[l][x264_scan8[0]-1+2*8] ) = 0;
  1516. + M16( h->mb.cache.mvd[l][x264_scan8[0]-1+3*8] ) = 0;
  1517. + }
  1518. + }
  1519. +
  1520. + /* If motion vectors are cached from frame macroblocks but this
  1521. + * macroblock is a field macroblock then the motion vector must be
  1522. + * halved. Similarly, motion vectors from field macroblocks are doubled. */
  1523. + if( h->sh.b_mbaff )
  1524. + {
  1525. +#define MAP_MVS\
  1526. + MAP_F2F(mv, ref, x264_scan8[0] - 1 - 1*8, h->mb.i_mb_topleft_xy)\
  1527. + MAP_F2F(mv, ref, x264_scan8[0] + 0 - 1*8, top)\
  1528. + MAP_F2F(mv, ref, x264_scan8[0] + 1 - 1*8, top)\
  1529. + MAP_F2F(mv, ref, x264_scan8[0] + 2 - 1*8, top)\
  1530. + MAP_F2F(mv, ref, x264_scan8[0] + 3 - 1*8, top)\
  1531. + MAP_F2F(mv, ref, x264_scan8[0] + 4 - 1*8, h->mb.i_mb_topright_xy)\
  1532. + MAP_F2F(mv, ref, x264_scan8[0] - 1 + 0*8, left[0])\
  1533. + MAP_F2F(mv, ref, x264_scan8[0] - 1 + 1*8, left[0])\
  1534. + MAP_F2F(mv, ref, x264_scan8[0] - 1 + 2*8, left[1])\
  1535. + MAP_F2F(mv, ref, x264_scan8[0] - 1 + 3*8, left[1])\
  1536. + MAP_F2F(topright_mv, topright_ref, 0, left[0])\
  1537. + MAP_F2F(topright_mv, topright_ref, 1, left[0])\
  1538. + MAP_F2F(topright_mv, topright_ref, 2, left[1])
  1539. +
  1540. + if( h->mb.b_interlaced )
  1541. + {
  1542. +#define MAP_F2F(varmv, varref, index, macroblock)\
  1543. + if( h->mb.cache.varref[l][index] >= 0 && macroblock >= 0 && !h->mb.field[macroblock] )\
  1544. + {\
  1545. + h->mb.cache.varref[l][index] <<= 1;\
  1546. + h->mb.cache.varmv[l][index][1] /= 2;\
  1547. + h->mb.cache.mvd[l][index][1] >>= 1;\
  1548. + }
  1549. + MAP_MVS
  1550. +#undef MAP_F2F
  1551. + }
  1552. + else
  1553. + {
  1554. +#define MAP_F2F(varmv, varref, index, macroblock)\
  1555. + if( h->mb.cache.varref[l][index] >= 0 && macroblock >= 0 && h->mb.field[macroblock] )\
  1556. + {\
  1557. + h->mb.cache.varref[l][index] >>= 1;\
  1558. + h->mb.cache.varmv[l][index][1] <<= 1;\
  1559. + h->mb.cache.mvd[l][index][1] <<= 1;\
  1560. + }
  1561. + MAP_MVS
  1562. +#undef MAP_F2F
  1563. + }
  1564. + }
  1565. + }
  1566. +
  1567. + /* Check whether skip here would cause decoder to predict interlace mode incorrectly. */
  1568. + h->mb.allow_skip = 1;
  1569. + if( h->sh.b_mbaff && (mb_y&1) && IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride]) )
  1570. + {
  1571. + if( h->mb.i_neighbour & MB_LEFT )
  1572. + {
  1573. + if( h->mb.field[h->mb.i_mb_xy - 1] != h->mb.b_interlaced )
  1574. + h->mb.allow_skip = 0;
  1575. + }
  1576. + else if( h->mb.i_neighbour & MB_TOP )
  1577. + {
  1578. + if( h->mb.field[h->mb.i_mb_top_xy] != h->mb.b_interlaced )
  1579. + h->mb.allow_skip = 0;
  1580. + }
  1581. + else // Frame mb pair is predicted
  1582. + {
  1583. + if( h->mb.b_interlaced )
  1584. + h->mb.allow_skip = 0;
  1585. + }
  1586. + if( !h->mb.allow_skip )
  1587. + {
  1588. + if( IS_SKIP(h->mb.i_type) )
  1589. + {
  1590. + if( h->mb.i_type == P_SKIP )
  1591. + h->mb.i_type = P_L0;
  1592. + else if( h->mb.i_type == B_SKIP )
  1593. + h->mb.i_type = B_DIRECT;
  1594. + }
  1595. }
  1596. }
  1597.  
  1598. @@ -1286,6 +1444,7 @@ static void ALWAYS_INLINE x264_macroblock_store_pic( x264_t *h, int mb_x, int mb
  1599. }
  1600. }
  1601. h->mc.store_interleave_8x8x2( &h->fdec->plane[1][i_pix_offset], i_stride2, h->mb.pic.p_fdec[1], h->mb.pic.p_fdec[2] );
  1602. + h->mc.store_interleave_8x8x2( &h->fdec->plane_fld[1][i_pix_offset], i_stride2, h->mb.pic.p_fdec[1], h->mb.pic.p_fdec[2] );
  1603. memcpy( intra_fdec, h->mb.pic.p_fdec[1]+FDEC_STRIDE*7, 8*sizeof(pixel) );
  1604. memcpy( intra_fdec+8, h->mb.pic.p_fdec[2]+FDEC_STRIDE*7, 8*sizeof(pixel) );
  1605. twiddle_topleft_pixel( h->mb.pic.p_fdec[1]-FDEC_STRIDE-1, h->mb.pic.p_fdec[1]-FDEC_STRIDE+7, h->sh.b_mbaff );
  1606. @@ -1299,12 +1458,13 @@ static void ALWAYS_INLINE x264_macroblock_store_pic( x264_t *h, int mb_x, int mb
  1607. {
  1608. if( mb_x )
  1609. {
  1610. + // Take rightmost sample from top border of left mb to use as topleft here.
  1611. h->intra_diagonal_backup[1][0][mb_x*intra_diag_width] = h->intra_border_backup[1][0][(mb_x-1)*16+15];
  1612. h->intra_diagonal_backup[2][0][mb_x*intra_diag_width] = h->intra_border_backup[2][0][(mb_x-1)*16+15];
  1613. CP64( &h->intra_diagonal_backup[1][0][(mb_x-1)*intra_diag_width+1], h->mb.pic.p_fdec[0]+FDEC_STRIDE*15 );
  1614. CP64( &h->intra_diagonal_backup[2][0][(mb_x-1)*intra_diag_width+1], h->mb.pic.p_fdec[0]+FDEC_STRIDE*14 );
  1615. }
  1616. - memcpy( &h->intra_border_backup[2][i][mb_x*16], h->mb.pic.p_fdec[0]+FDEC_STRIDE*14, 16*sizeof(pixel) );
  1617. + memcpy( &h->intra_border_backup[2][0][mb_x*16], h->mb.pic.p_fdec[0]+FDEC_STRIDE*14, 16*sizeof(pixel) );
  1618. }
  1619. else
  1620. {
  1621. @@ -1316,6 +1476,7 @@ static void ALWAYS_INLINE x264_macroblock_store_pic( x264_t *h, int mb_x, int mb
  1622. else
  1623. twiddle_topleft_pixel( h->mb.pic.p_fdec[0]-FDEC_STRIDE-1, h->mb.pic.p_fdec[0]-FDEC_STRIDE+15, h->sh.b_mbaff );
  1624. h->mc.copy[PIXEL_16x16]( &h->fdec->plane[0][i_pix_offset], i_stride2, h->mb.pic.p_fdec[0], FDEC_STRIDE, 16 );
  1625. + h->mc.copy[PIXEL_16x16]( &h->fdec->plane_fld[0][i_pix_offset], i_stride2, h->mb.pic.p_fdec[0], FDEC_STRIDE, 16 );
  1626. memcpy( intra_fdec, h->mb.pic.p_fdec[0]+FDEC_STRIDE*15, 16*sizeof(pixel) );
  1627. }
  1628. }
  1629. diff --git a/common/mc.c b/common/mc.c
  1630. index 76061c3..e594785 100644
  1631. --- a/common/mc.c
  1632. +++ b/common/mc.c
  1633. @@ -512,22 +512,43 @@ void x264_mc_init( int cpu, x264_mc_functions_t *pf )
  1634. void x264_frame_filter( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
  1635. {
  1636. const int b_interlaced = h->sh.b_mbaff;
  1637. - const int stride = frame->i_stride[0] << b_interlaced;
  1638. + int stride = frame->i_stride[0];
  1639. const int width = frame->i_width[0];
  1640. - int start = (mb_y*16 >> b_interlaced) - 8; // buffer = 4 for deblock + 3 for 6tap, rounded to 8
  1641. - int height = ((b_end ? frame->i_lines[0] : mb_y*16) >> b_interlaced) + 8;
  1642. + int start = mb_y*16 - 8; // buffer = 4 for deblock + 3 for 6tap, rounded to 8
  1643. + int height = (b_end ? frame->i_lines[0] + 16 : (mb_y+b_interlaced)*16) + 8;
  1644. int offs = start*stride - 8; // buffer = 3 for 6tap, aligned to 8 for simd
  1645.  
  1646. if( mb_y & b_interlaced )
  1647. return;
  1648.  
  1649. - for( int y = 0; y <= b_interlaced; y++, offs += frame->i_stride[0] )
  1650. + h->mc.hpel_filter(
  1651. + frame->filtered[1] + offs,
  1652. + frame->filtered[2] + offs,
  1653. + frame->filtered[3] + offs,
  1654. + frame->plane[0] + offs,
  1655. + stride, width + 16, height - start,
  1656. + h->scratch_buffer );
  1657. +
  1658. + if( b_interlaced )
  1659. {
  1660. + /* MC must happen between pixels in the same field. */
  1661. + stride = frame->i_stride[0] << 1;
  1662. + start = (mb_y*16 >> 1) - 8;
  1663. + height = ((b_end ? frame->i_lines[0] : mb_y*16) >> 1) + 8;
  1664. + offs = start*stride - 8;
  1665. + h->mc.hpel_filter(
  1666. + frame->filtered_fld[1] + offs,
  1667. + frame->filtered_fld[2] + offs,
  1668. + frame->filtered_fld[3] + offs,
  1669. + frame->plane_fld[0] + offs,
  1670. + stride, width + 16, height - start,
  1671. + h->scratch_buffer );
  1672. + offs += frame->i_stride[0];
  1673. h->mc.hpel_filter(
  1674. - frame->filtered[1] + offs,
  1675. - frame->filtered[2] + offs,
  1676. - frame->filtered[3] + offs,
  1677. - frame->plane[0] + offs,
  1678. + frame->filtered_fld[1] + offs,
  1679. + frame->filtered_fld[2] + offs,
  1680. + frame->filtered_fld[3] + offs,
  1681. + frame->plane_fld[0] + offs,
  1682. stride, width + 16, height - start,
  1683. h->scratch_buffer );
  1684. }
  1685. diff --git a/common/mvpred.c b/common/mvpred.c
  1686. index c8efe1f..278e0ac 100644
  1687. --- a/common/mvpred.c
  1688. +++ b/common/mvpred.c
  1689. @@ -38,12 +38,33 @@ void x264_mb_predict_mv( x264_t *h, int i_list, int idx, int i_width, int16_t mv
  1690. int i_refc = h->mb.cache.ref[i_list][i8 - 8 + i_width];
  1691. int16_t *mv_c = h->mb.cache.mv[i_list][i8 - 8 + i_width];
  1692.  
  1693. + // Partitions not yet reached in scan order are unavailable.
  1694. if( (idx&3) >= 2 + (i_width&1) || i_refc == -2 )
  1695. {
  1696. i_refc = h->mb.cache.ref[i_list][i8 - 8 - 1];
  1697. mv_c = h->mb.cache.mv[i_list][i8 - 8 - 1];
  1698. - }
  1699.  
  1700. + if( h->param.b_interlaced
  1701. + && h->mb.cache.ref[i_list][x264_scan8[0]-1] != -2
  1702. + && h->mb.b_interlaced != h->mb.field[h->mb.i_mb_left_xy[0]] )
  1703. + {
  1704. + if( idx == 2 )
  1705. + {
  1706. + mv_c = h->mb.cache.topright_mv[i_list][0];
  1707. + i_refc = h->mb.cache.topright_ref[i_list][0];
  1708. + }
  1709. + else if( idx == 8 )
  1710. + {
  1711. + mv_c = h->mb.cache.topright_mv[i_list][1];
  1712. + i_refc = h->mb.cache.topright_ref[i_list][1];
  1713. + }
  1714. + else if( idx == 10 )
  1715. + {
  1716. + mv_c = h->mb.cache.topright_mv[i_list][2];
  1717. + i_refc = h->mb.cache.topright_ref[i_list][2];
  1718. + }
  1719. + }
  1720. + }
  1721. if( h->mb.i_partition == D_16x8 )
  1722. {
  1723. if( idx == 0 )
  1724. diff --git a/common/x86/util.h b/common/x86/util.h
  1725. index 6544207..01e54f9 100644
  1726. --- a/common/x86/util.h
  1727. +++ b/common/x86/util.h
  1728. @@ -87,30 +87,6 @@ static ALWAYS_INLINE int x264_predictor_difference_mmxext( int16_t (*mvc)[2], in
  1729. return sum;
  1730. }
  1731.  
  1732. -#define x264_cabac_mvd_sum x264_cabac_mvd_sum_mmxext
  1733. -static ALWAYS_INLINE uint16_t x264_cabac_mvd_sum_mmxext(uint8_t *mvdleft, uint8_t *mvdtop)
  1734. -{
  1735. - static const uint64_t pb_2 = 0x0202020202020202ULL;
  1736. - static const uint64_t pb_32 = 0x2020202020202020ULL;
  1737. - int amvd;
  1738. - asm(
  1739. - "movd %1, %%mm0 \n"
  1740. - "movd %2, %%mm1 \n"
  1741. - "paddb %%mm1, %%mm0 \n"
  1742. - "pxor %%mm2, %%mm2 \n"
  1743. - "movq %%mm0, %%mm1 \n"
  1744. - "pcmpgtb %3, %%mm0 \n"
  1745. - "pcmpgtb %4, %%mm1 \n"
  1746. - "psubb %%mm0, %%mm2 \n"
  1747. - "psubb %%mm1, %%mm2 \n"
  1748. - "movd %%mm2, %0 \n"
  1749. - :"=r"(amvd)
  1750. - :"m"(M16( mvdleft )),"m"(M16( mvdtop )),
  1751. - "m"(pb_2),"m"(pb_32)
  1752. - );
  1753. - return amvd;
  1754. -}
  1755. -
  1756. #define x264_predictor_roundclip x264_predictor_roundclip_mmxext
  1757. static void ALWAYS_INLINE x264_predictor_roundclip_mmxext( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int mv_x_min, int mv_x_max, int mv_y_min, int mv_y_max )
  1758. {
  1759. diff --git a/encoder/macroblock.c b/encoder/macroblock.c
  1760. index a3fcd61..28609d8 100644
  1761. --- a/encoder/macroblock.c
  1762. +++ b/encoder/macroblock.c
  1763. @@ -609,9 +609,7 @@ void x264_macroblock_encode( x264_t *h )
  1764. return;
  1765. }
  1766.  
  1767. - if( h->sh.b_mbaff
  1768. - && h->mb.i_mb_xy == h->sh.i_first_mb + h->mb.i_mb_stride
  1769. - && IS_SKIP(h->mb.type[h->sh.i_first_mb]) )
  1770. + if( !h->mb.allow_skip )
  1771. {
  1772. /* The first skip is predicted to be a frame mb pair.
  1773. * We don't yet support the aff part of mbaff, so force it to non-skip
  1774. --
  1775. 1.7.4
  1776.  
  1777.  
  1778. From 88aa754aa9a2e3f2907f656e21439e7b93b8cfab Mon Sep 17 00:00:00 2001
  1779. From: Simon Horlick <simonhorlick@gmail.com>
  1780. Date: Wed, 16 Mar 2011 21:34:51 +0000
  1781. Subject: [PATCH 09/25] Copy deblocked pixels to other plane
  1782.  
  1783. ---
  1784. common/deblock.c | 15 +++++++++++++++
  1785. 1 files changed, 15 insertions(+), 0 deletions(-)
  1786.  
  1787. diff --git a/common/deblock.c b/common/deblock.c
  1788. index 0800461..52d410d 100644
  1789. --- a/common/deblock.c
  1790. +++ b/common/deblock.c
  1791. @@ -389,6 +389,21 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
  1792.  
  1793. #undef FILTER
  1794. }
  1795. +
  1796. + // FIXME: Don't copy the whole frame around.
  1797. + int y = mb_y*16;
  1798. + int start = mb_y == h->i_threadslice_start;
  1799. + int last = mb_y == h->i_threadslice_end - (1 << h->sh.b_mbaff);
  1800. + int height = last ? 32+4 : 32;
  1801. + if( !start ) y -= 4; // Make sure to copy the above four rows of deblocked pixels.
  1802. + for( int i = y; i < y+height; i++ )
  1803. + memcpy( h->fdec->plane_fld[0] + i*stridey, h->fdec->plane[0] + i*stridey, h->mb.i_mb_width*16*sizeof(pixel) );
  1804. +
  1805. + y = mb_y*8;
  1806. + height = last ? 16+2 : 16;
  1807. + if( !start ) y -=2;
  1808. + for( int i = y; i < y+height; i++ )
  1809. + memcpy( h->fdec->plane_fld[1] + i*strideuv, h->fdec->plane[1] + i*strideuv, h->mb.i_mb_width*16*sizeof(pixel) );
  1810. }
  1811.  
  1812. /* For deblock-aware RD.
  1813. --
  1814. 1.7.4
  1815.  
  1816.  
  1817. From d42239c146bd2fc1417987ce1794fbf049796112 Mon Sep 17 00:00:00 2001
  1818. From: Simon Horlick <simonhorlick@gmail.com>
  1819. Date: Sun, 6 Feb 2011 22:58:39 +0000
  1820. Subject: [PATCH 10/25] Fix thread max mv check
  1821.  
  1822. ---
  1823. encoder/analyse.c | 2 +-
  1824. 1 files changed, 1 insertions(+), 1 deletions(-)
  1825.  
  1826. diff --git a/encoder/analyse.c b/encoder/analyse.c
  1827. index 87125c1..4f439d4 100644
  1828. --- a/encoder/analyse.c
  1829. +++ b/encoder/analyse.c
  1830. @@ -460,7 +460,7 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int qp )
  1831.  
  1832. if( h->param.b_deterministic )
  1833. thread_mvy_range = h->param.analyse.i_mv_range_thread;
  1834. - if( h->mb.b_interlaced )
  1835. + if( h->sh.b_mbaff )
  1836. thread_mvy_range >>= 1;
  1837.  
  1838. x264_analyse_weight_frame( h, pix_y + thread_mvy_range );
  1839. --
  1840. 1.7.4
  1841.  
  1842.  
  1843. From 0ce16f54027e00ee0a9070e807eed3ca570325ad Mon Sep 17 00:00:00 2001
  1844. From: Simon Horlick <simonhorlick@gmail.com>
  1845. Date: Sun, 20 Feb 2011 15:31:55 +0000
  1846. Subject: [PATCH 11/25] Track what interlace decision the decoder is using
  1847.  
  1848. ---
  1849. common/common.h | 1 +
  1850. encoder/cabac.c | 17 ++++++++++++++++-
  1851. encoder/encoder.c | 11 +++++++++++
  1852. 3 files changed, 28 insertions(+), 1 deletions(-)
  1853.  
  1854. diff --git a/common/common.h b/common/common.h
  1855. index ef9b35a..bc14c10 100644
  1856. --- a/common/common.h
  1857. +++ b/common/common.h
  1858. @@ -604,6 +604,7 @@ struct x264_t
  1859. int intra_border_index;
  1860. int topleft_border_index;
  1861. int topright_border_index;
  1862. + int field_decoding_flag;
  1863.  
  1864. /**** thread synchronization ends here ****/
  1865. /* subsequent variables are either thread-local or constant,
  1866. diff --git a/encoder/cabac.c b/encoder/cabac.c
  1867. index 334318d..6138d06 100644
  1868. --- a/encoder/cabac.c
  1869. +++ b/encoder/cabac.c
  1870. @@ -66,6 +66,21 @@ static inline void x264_cabac_mb_type_intra( x264_t *h, x264_cabac_t *cb, int i_
  1871. }
  1872. }
  1873.  
  1874. +static void x264_cabac_field_decoding_flag( x264_t *h, x264_cabac_t *cb )
  1875. +{
  1876. + const int top = h->mb.i_mb_xy - 2*h->mb.i_mb_stride;
  1877. + int ctx = 0;
  1878. + ctx += h->mb.field_decoding_flag & !!h->mb.i_mb_x;
  1879. + ctx += (top >= 0
  1880. + && h->mb.slice_table[top] == h->sh.i_first_mb
  1881. + && h->mb.field[top]);
  1882. +
  1883. + x264_cabac_encode_decision_noup( cb, 70 + ctx, h->mb.b_interlaced );
  1884. +#if !RDO_SKIP_BS
  1885. + h->mb.field_decoding_flag = h->mb.b_interlaced;
  1886. +#endif
  1887. +}
  1888. +
  1889. static void x264_cabac_mb_type( x264_t *h, x264_cabac_t *cb )
  1890. {
  1891. const int i_mb_type = h->mb.i_type;
  1892. @@ -73,7 +88,7 @@ static void x264_cabac_mb_type( x264_t *h, x264_cabac_t *cb )
  1893. if( h->sh.b_mbaff &&
  1894. (!(h->mb.i_mb_y & 1) || IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride])) )
  1895. {
  1896. - x264_cabac_encode_decision_noup( cb, 70 + h->mb.cache.i_neighbour_interlaced, h->mb.b_interlaced );
  1897. + x264_cabac_field_decoding_flag( h, cb );
  1898. }
  1899.  
  1900. if( h->sh.i_type == SLICE_TYPE_I )
  1901. diff --git a/encoder/encoder.c b/encoder/encoder.c
  1902. index 9f294d1..2525fec 100644
  1903. --- a/encoder/encoder.c
  1904. +++ b/encoder/encoder.c
  1905. @@ -1932,6 +1932,8 @@ static int x264_slice_write( x264_t *h )
  1906. i_mb_x = h->sh.i_first_mb % h->mb.i_mb_width;
  1907. i_skip = 0;
  1908.  
  1909. + h->mb.field_decoding_flag = 0;
  1910. +
  1911. while( (mb_xy = i_mb_x + i_mb_y * h->mb.i_mb_width) <= h->sh.i_last_mb )
  1912. {
  1913. int mb_spos = bs_pos(&h->out.bs) + x264_cabac_pos(&h->cabac);
  1914. @@ -1988,7 +1990,12 @@ static int x264_slice_write( x264_t *h )
  1915. x264_cabac_encode_terminal( &h->cabac );
  1916.  
  1917. if( IS_SKIP( h->mb.i_type ) )
  1918. + {
  1919. + // FIXME: It might be better to change the interlace type
  1920. + // rather than forcing a skip to be non-skip, but this would
  1921. + // require modifying the already saved image data.
  1922. x264_cabac_mb_skip( h, 1 );
  1923. + }
  1924. else
  1925. {
  1926. if( h->sh.i_type != SLICE_TYPE_I )
  1927. @@ -2145,6 +2152,10 @@ static int x264_slice_write( x264_t *h )
  1928. {
  1929. i_mb_y++;
  1930. i_mb_x = 0;
  1931. + if( h->sh.b_mbaff && i_mb_y > 0 )
  1932. + h->mb.field_decoding_flag = h->mb.field[i_mb_x+(i_mb_y-1)*h->mb.i_mb_stride];
  1933. + else
  1934. + h->mb.field_decoding_flag = 0;
  1935. }
  1936. }
  1937. h->out.nal[h->out.i_nal].i_last_mb = h->sh.i_last_mb;
  1938. --
  1939. 1.7.4
  1940.  
  1941.  
  1942. From 02af876c83b5a6fc69a20c1928ad604069e533a7 Mon Sep 17 00:00:00 2001
  1943. From: Simon Horlick <simonhorlick@gmail.com>
  1944. Date: Sun, 20 Feb 2011 15:35:44 +0000
  1945. Subject: [PATCH 12/25] Disallow skip where interlace would be wrong
  1946.  
  1947. ---
  1948. common/macroblock.c | 27 +++++++++++++++++++--------
  1949. 1 files changed, 19 insertions(+), 8 deletions(-)
  1950.  
  1951. diff --git a/common/macroblock.c b/common/macroblock.c
  1952. index a441981..4fe1f82 100644
  1953. --- a/common/macroblock.c
  1954. +++ b/common/macroblock.c
  1955. @@ -1148,6 +1148,17 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
  1956.  
  1957. /* Check whether skip here would cause decoder to predict interlace mode incorrectly. */
  1958. h->mb.allow_skip = 1;
  1959. + int prevmb = mb_x + h->mb.i_mb_stride*mb_y;
  1960. + if( mb_y&1 )
  1961. + prevmb -= h->mb.i_mb_stride;
  1962. + else if( mb_x )
  1963. + prevmb = (mb_y+1)*h->mb.i_mb_stride + mb_x - 1;
  1964. + else
  1965. + prevmb = (mb_y-1)*h->mb.i_mb_stride + h->mb.i_mb_width;
  1966. + if( h->mb.b_interlaced != h->mb.field_decoding_flag && IS_SKIP(h->mb.type[prevmb]) )
  1967. + {
  1968. + h->mb.allow_skip = 0;
  1969. + }
  1970. if( h->sh.b_mbaff && (mb_y&1) && IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride]) )
  1971. {
  1972. if( h->mb.i_neighbour & MB_LEFT )
  1973. @@ -1165,15 +1176,15 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
  1974. if( h->mb.b_interlaced )
  1975. h->mb.allow_skip = 0;
  1976. }
  1977. - if( !h->mb.allow_skip )
  1978. + }
  1979. + if( !h->mb.allow_skip )
  1980. + {
  1981. + if( IS_SKIP(h->mb.i_type) )
  1982. {
  1983. - if( IS_SKIP(h->mb.i_type) )
  1984. - {
  1985. - if( h->mb.i_type == P_SKIP )
  1986. - h->mb.i_type = P_L0;
  1987. - else if( h->mb.i_type == B_SKIP )
  1988. - h->mb.i_type = B_DIRECT;
  1989. - }
  1990. + if( h->mb.i_type == P_SKIP )
  1991. + h->mb.i_type = P_L0;
  1992. + else if( h->mb.i_type == B_SKIP )
  1993. + h->mb.i_type = B_DIRECT;
  1994. }
  1995. }
  1996.  
  1997. --
  1998. 1.7.4
  1999.  
  2000.  
  2001. From 2a15d908619a780aa5a0f8bd9c6b61ade5d52237 Mon Sep 17 00:00:00 2001
  2002. From: Simon Horlick <simonhorlick@gmail.com>
  2003. Date: Sun, 20 Feb 2011 15:36:29 +0000
  2004. Subject: [PATCH 13/25] CABAC encoding of skips
  2005.  
  2006. ---
  2007. common/common.h | 2 ++
  2008. common/macroblock.c | 29 +++++++++++++++++++++++++++++
  2009. encoder/cabac.c | 14 +++++++-------
  2010. 3 files changed, 38 insertions(+), 7 deletions(-)
  2011.  
  2012. diff --git a/common/common.h b/common/common.h
  2013. index bc14c10..6d81496 100644
  2014. --- a/common/common.h
  2015. +++ b/common/common.h
  2016. @@ -605,6 +605,8 @@ struct x264_t
  2017. int topleft_border_index;
  2018. int topright_border_index;
  2019. int field_decoding_flag;
  2020. + int left_skip;
  2021. + int top_skip;
  2022.  
  2023. /**** thread synchronization ends here ****/
  2024. /* subsequent variables are either thread-local or constant,
  2025. diff --git a/common/macroblock.c b/common/macroblock.c
  2026. index 4fe1f82..c01320c 100644
  2027. --- a/common/macroblock.c
  2028. +++ b/common/macroblock.c
  2029. @@ -1188,6 +1188,35 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
  2030. }
  2031. }
  2032.  
  2033. + if( h->param.b_cabac )
  2034. + {
  2035. + /* Neighbours here are calculated based on field_decoding_flag */
  2036. + int left_xy, top_xy;
  2037. + if( h->sh.b_mbaff )
  2038. + {
  2039. + int mb_xy = mb_x + (h->mb.i_mb_y&~1)*h->mb.i_mb_stride;
  2040. + left_xy = mb_xy - 1;
  2041. + if( (mb_y&1) && mb_x > 0 && h->mb.field_decoding_flag == h->mb.field[left_xy] )
  2042. + left_xy += h->mb.i_mb_stride;
  2043. + if( h->mb.field_decoding_flag )
  2044. + {
  2045. + top_xy = mb_xy - h->mb.i_mb_stride;
  2046. + if( !(mb_y&1) && top_xy >= 0 && h->mb.slice_table[top_xy] == h->sh.i_first_mb && h->mb.field[top_xy] )
  2047. + top_xy -= h->mb.i_mb_stride;
  2048. + }
  2049. + else
  2050. + top_xy = mb_x + (mb_y-1)*h->mb.i_mb_stride;
  2051. + }
  2052. + else
  2053. + {
  2054. + left_xy = h->mb.i_mb_xy - 1;
  2055. + top_xy = h->mb.i_mb_xy - h->mb.i_mb_stride;
  2056. + }
  2057. +
  2058. + h->mb.left_skip = mb_x > 0 && h->mb.slice_table[left_xy] == h->sh.i_first_mb && !IS_SKIP( h->mb.type[left_xy] );
  2059. + h->mb.top_skip = top_xy >= 0 && (h->mb.slice_table[top_xy] == h->sh.i_first_mb) && !IS_SKIP( h->mb.type[top_xy] );
  2060. + }
  2061. +
  2062. /* load skip */
  2063. if( h->sh.i_type == SLICE_TYPE_B )
  2064. {
  2065. diff --git a/encoder/cabac.c b/encoder/cabac.c
  2066. index 6138d06..3435048 100644
  2067. --- a/encoder/cabac.c
  2068. +++ b/encoder/cabac.c
  2069. @@ -295,10 +295,10 @@ static void x264_cabac_mb_qp_delta( x264_t *h, x264_cabac_t *cb )
  2070. #if !RDO_SKIP_BS
  2071. void x264_cabac_mb_skip( x264_t *h, int b_skip )
  2072. {
  2073. - int ctx = ((h->mb.i_neighbour & MB_LEFT) && !IS_SKIP( h->mb.i_mb_type_left[0] ))
  2074. - + ((h->mb.i_neighbour & MB_TOP) && !IS_SKIP( h->mb.i_mb_type_top ))
  2075. - + (h->sh.i_type == SLICE_TYPE_P ? 11 : 24);
  2076. - x264_cabac_encode_decision( &h->cabac, ctx, b_skip );
  2077. + int ctx = h->mb.left_skip + h->mb.top_skip;
  2078. + if( h->sh.i_type != SLICE_TYPE_P )
  2079. + ctx += 13;
  2080. + x264_cabac_encode_decision( &h->cabac, 11+ctx, b_skip );
  2081. }
  2082. #endif
  2083.  
  2084. @@ -350,7 +350,7 @@ static void x264_cabac_mb_ref( x264_t *h, x264_cabac_t *cb, int i_list, int idx
  2085. const int i8 = x264_scan8[idx];
  2086. const int i_refa = h->mb.cache.ref[i_list][i8 - 1];
  2087. const int i_refb = h->mb.cache.ref[i_list][i8 - 8];
  2088. - int ctx = 0;
  2089. + int ctx = 0;
  2090.  
  2091. if( i_refa > 0 && !h->mb.cache.skip[i8 - 1] )
  2092. ctx++;
  2093. @@ -423,9 +423,9 @@ static ALWAYS_INLINE int x264_cabac_mb_mvd_cpn( x264_t *h, x264_cabac_t *cb, int
  2094. x264_cabac_encode_bypass( cb, mvd < 0 );
  2095. }
  2096. #endif
  2097. - /* Since we don't need to keep track of MVDs larger than 33, just cap the value.
  2098. + /* Since we don't need to keep track of MVDs larger than 70, just cap the value.
  2099. * This lets us store MVDs as 8-bit values instead of 16-bit. */
  2100. - return X264_MIN( i_abs, 33 );
  2101. + return X264_MIN( i_abs, 70 );
  2102. }
  2103.  
  2104. static NOINLINE uint16_t x264_cabac_mb_mvd( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int width )
  2105. --
  2106. 1.7.4
  2107.  
  2108.  
  2109. From b49fa962fcd25984e5f65a36a2784fbe702bdb4c Mon Sep 17 00:00:00 2001
  2110. From: Simon Horlick <simonhorlick@gmail.com>
  2111. Date: Wed, 16 Mar 2011 21:18:59 +0000
  2112. Subject: [PATCH 14/25] Add mbaff deblock strength calculation
  2113.  
  2114. ---
  2115. common/deblock.c | 38 ++++++++++++++++++++++++++++++++++++++
  2116. common/frame.h | 3 +++
  2117. encoder/encoder.c | 12 +++++++++---
  2118. 3 files changed, 50 insertions(+), 3 deletions(-)
  2119.  
  2120. diff --git a/common/deblock.c b/common/deblock.c
  2121. index 52d410d..55a0154 100644
  2122. --- a/common/deblock.c
  2123. +++ b/common/deblock.c
  2124. @@ -269,6 +269,44 @@ static void deblock_strength_c( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264
  2125. }
  2126. }
  2127. }
  2128. +void deblock_strength_mbaff( x264_t *h, uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
  2129. + int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][4][4], int mvy_limit, int bframe )
  2130. +{
  2131. + const int vertical = 0, horizontal = 1;
  2132. + int neighbour_field[2];
  2133. + int current_field = h->mb.b_interlaced;
  2134. + neighbour_field[vertical] = h->mb.i_mb_left_xy[0] >= 0 && h->mb.field[h->mb.i_mb_left_xy[0]];
  2135. + neighbour_field[horizontal] = h->mb.i_mb_top_xy >= 0 && h->mb.field[h->mb.i_mb_top_xy];
  2136. +
  2137. + for( int dir = 0; dir < 2; dir++ )
  2138. + {
  2139. + int edge_stride = dir ? 8 : 1;
  2140. + int part_stride = dir ? 1 : 8;
  2141. + for( int edge = 0; edge < 4; edge++ )
  2142. + {
  2143. + for( int i = 0, q = X264_SCAN8_0+edge*edge_stride; i < 4; i++, q += part_stride )
  2144. + {
  2145. + int p = q - edge_stride;
  2146. + if( nnz[q] || nnz[p] )
  2147. + {
  2148. + bs[dir][edge][i] = 2;
  2149. + }
  2150. + else if( (edge == 0 && current_field != neighbour_field[dir]) ||
  2151. + ref[0][q] != ref[0][p] ||
  2152. + abs( mv[0][q][0] - mv[0][p][0] ) >= 4 ||
  2153. + abs( mv[0][q][1] - mv[0][p][1] ) >= mvy_limit ||
  2154. + (bframe && (ref[1][q] != ref[1][p] ||
  2155. + abs( mv[1][q][0] - mv[1][p][0] ) >= 4 ||
  2156. + abs( mv[1][q][1] - mv[1][p][1] ) >= mvy_limit )) )
  2157. + {
  2158. + bs[dir][edge][i] = 1;
  2159. + }
  2160. + else
  2161. + bs[dir][edge][i] = 0;
  2162. + }
  2163. + }
  2164. + }
  2165. +}
  2166.  
  2167. static inline void deblock_edge( x264_t *h, pixel *pix, int i_stride, uint8_t bS[4], int i_qp, int b_chroma, x264_deblock_inter_t pf_inter )
  2168. {
  2169. diff --git a/common/frame.h b/common/frame.h
  2170. index 8fe0627..3296a2c 100644
  2171. --- a/common/frame.h
  2172. +++ b/common/frame.h
  2173. @@ -186,6 +186,9 @@ typedef struct
  2174. int bframe );
  2175. } x264_deblock_function_t;
  2176.  
  2177. +void deblock_strength_mbaff( x264_t *h, uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
  2178. + int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][4][4], int mvy_limit, int bframe );
  2179. +
  2180. x264_frame_t *x264_frame_new( x264_t *h, int b_fdec );
  2181. void x264_frame_delete( x264_frame_t *frame );
  2182.  
  2183. diff --git a/encoder/encoder.c b/encoder/encoder.c
  2184. index 2525fec..0319126 100644
  2185. --- a/encoder/encoder.c
  2186. +++ b/encoder/encoder.c
  2187. @@ -2129,14 +2129,20 @@ static int x264_slice_write( x264_t *h )
  2188. /* calculate deblock strength values (actual deblocking is done per-row along with hpel) */
  2189. if( b_deblock )
  2190. {
  2191. - int mvy_limit = 4 >> h->sh.b_mbaff;
  2192. + int mvy_limit = 4 >> h->mb.b_interlaced;
  2193. uint8_t (*bs)[4][4] = h->deblock_strength[h->mb.i_mb_y&1][h->mb.i_mb_x];
  2194. x264_macroblock_cache_load_deblock( h );
  2195. if( IS_INTRA( h->mb.type[h->mb.i_mb_xy] ) )
  2196. memset( bs, 3, 2*4*4*sizeof(uint8_t) );
  2197. else
  2198. - h->loopf.deblock_strength( h->mb.cache.non_zero_count, h->mb.cache.ref, h->mb.cache.mv,
  2199. - bs, mvy_limit, h->sh.i_type == SLICE_TYPE_B );
  2200. + {
  2201. + if( h->sh.b_mbaff )
  2202. + deblock_strength_mbaff( h, h->mb.cache.non_zero_count, h->mb.cache.ref, h->mb.cache.mv,
  2203. + bs, mvy_limit, h->sh.i_type == SLICE_TYPE_B );
  2204. + else
  2205. + h->loopf.deblock_strength( h->mb.cache.non_zero_count, h->mb.cache.ref, h->mb.cache.mv,
  2206. + bs, mvy_limit, h->sh.i_type == SLICE_TYPE_B );
  2207. + }
  2208. }
  2209.  
  2210. x264_ratecontrol_mb( h, mb_size );
  2211. --
  2212. 1.7.4
  2213.  
  2214.  
  2215. From 55bb59fb940208de76113875c0744694043a5f4d Mon Sep 17 00:00:00 2001
  2216. From: Simon Horlick <simonhorlick@gmail.com>
  2217. Date: Wed, 16 Mar 2011 21:27:07 +0000
  2218. Subject: [PATCH 15/25] Initial deblocking support
  2219.  
  2220. ---
  2221. common/deblock.c | 13 +++++++--
  2222. common/macroblock.c | 67 ++++++++++++++++++++++++++++++++++----------------
  2223. 2 files changed, 55 insertions(+), 25 deletions(-)
  2224.  
  2225. diff --git a/common/deblock.c b/common/deblock.c
  2226. index 55a0154..48788ae 100644
  2227. --- a/common/deblock.c
  2228. +++ b/common/deblock.c
  2229. @@ -345,13 +345,17 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
  2230. int b_interlaced = h->sh.b_mbaff;
  2231. int qp_thresh = 15 - X264_MIN( h->sh.i_alpha_c0_offset, h->sh.i_beta_offset ) - X264_MAX( 0, h->param.analyse.i_chroma_qp_offset );
  2232. int stridey = h->fdec->i_stride[0];
  2233. - int stride2y = stridey << b_interlaced;
  2234. int strideuv = h->fdec->i_stride[1];
  2235. - int stride2uv = strideuv << b_interlaced;
  2236. +
  2237. + // Backup mb.b_interlaced because it will be changed in x264_macroblock_cache_load_neighbours_deblock.
  2238. + int interlaced_backup = h->mb.b_interlaced;
  2239.  
  2240. for( int mb_x = 0; mb_x < h->mb.i_mb_width; mb_x += (~b_interlaced | mb_y)&1, mb_y ^= b_interlaced )
  2241. {
  2242. x264_prefetch_fenc( h, h->fdec, mb_x, mb_y );
  2243. + h->mb.i_mb_x = mb_x;
  2244. + h->mb.i_mb_y = mb_y;
  2245. + h->mb.i_mb_xy = mb_x + h->mb.i_mb_stride*mb_y;
  2246. x264_macroblock_cache_load_neighbours_deblock( h, mb_x, mb_y );
  2247.  
  2248. int mb_xy = h->mb.i_mb_xy;
  2249. @@ -361,12 +365,14 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
  2250.  
  2251. pixel *pixy = h->fdec->plane[0] + 16*mb_y*stridey + 16*mb_x;
  2252. pixel *pixuv = h->fdec->plane[1] + 8*mb_y*strideuv + 16*mb_x;
  2253. - if( mb_y & b_interlaced )
  2254. + if( mb_y & h->mb.b_interlaced )
  2255. {
  2256. pixy -= 15*stridey;
  2257. pixuv -= 7*strideuv;
  2258. }
  2259.  
  2260. + int stride2y = stridey << h->mb.b_interlaced;
  2261. + int stride2uv = strideuv << h->mb.b_interlaced;
  2262. int qp = h->mb.qp[mb_xy];
  2263. int qpc = h->chroma_qp_table[qp];
  2264. int first_edge_only = h->mb.type[mb_xy] == P_SKIP || qp <= qp_thresh;
  2265. @@ -427,6 +433,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
  2266.  
  2267. #undef FILTER
  2268. }
  2269. + h->mb.b_interlaced = interlaced_backup;
  2270.  
  2271. // FIXME: Don't copy the whole frame around.
  2272. int y = mb_y*16;
  2273. diff --git a/common/macroblock.c b/common/macroblock.c
  2274. index c01320c..47bb2ff 100644
  2275. --- a/common/macroblock.c
  2276. +++ b/common/macroblock.c
  2277. @@ -1257,18 +1257,36 @@ void x264_macroblock_cache_load_neighbours_deblock( x264_t *h, int mb_x, int mb_
  2278. int deblock_on_slice_edges = h->sh.i_disable_deblocking_filter_idc != 2;
  2279.  
  2280. h->mb.i_neighbour = 0;
  2281. - h->mb.i_mb_xy = mb_y * h->mb.i_mb_stride + mb_x;
  2282. +
  2283. + if( h->sh.b_mbaff )
  2284. + h->mb.b_interlaced = h->mb.field[h->mb.i_mb_xy];
  2285. +
  2286. + h->mb.i_mb_top_xy = h->mb.i_mb_xy - (h->mb.i_mb_stride << h->mb.b_interlaced);
  2287. + h->mb.i_mb_left_xy[1] =
  2288. + h->mb.i_mb_left_xy[0] = h->mb.i_mb_xy - 1;
  2289. + if( h->sh.b_mbaff )
  2290. + {
  2291. + if( mb_y&1 )
  2292. + {
  2293. + if( h->mb.field[h->mb.i_mb_xy - 1] != h->mb.b_interlaced )
  2294. + h->mb.i_mb_left_xy[0] -= h->mb.i_mb_stride;
  2295. + }
  2296. + else
  2297. + {
  2298. + if( h->mb.b_interlaced && !h->mb.field[h->mb.i_mb_top_xy] )
  2299. + h->mb.i_mb_top_xy += h->mb.i_mb_stride;
  2300. + if( h->mb.field[h->mb.i_mb_xy - 1] != h->mb.b_interlaced )
  2301. + h->mb.i_mb_left_xy[1] += h->mb.i_mb_stride;
  2302. + }
  2303. + }
  2304.  
  2305. if( mb_x > 0 )
  2306. {
  2307. - h->mb.i_mb_left_xy[0] = h->mb.i_mb_xy - 1;
  2308. if( deblock_on_slice_edges || h->mb.slice_table[h->mb.i_mb_left_xy[0]] == h->mb.slice_table[h->mb.i_mb_xy] )
  2309. h->mb.i_neighbour |= MB_LEFT;
  2310. }
  2311. -
  2312. if( mb_y > h->mb.b_interlaced )
  2313. {
  2314. - h->mb.i_mb_top_xy = h->mb.i_mb_xy - (h->mb.i_mb_stride << h->mb.b_interlaced);
  2315. if( deblock_on_slice_edges || h->mb.slice_table[h->mb.i_mb_top_xy] == h->mb.slice_table[h->mb.i_mb_xy] )
  2316. h->mb.i_neighbour |= MB_TOP;
  2317. }
  2318. @@ -1276,7 +1294,7 @@ void x264_macroblock_cache_load_neighbours_deblock( x264_t *h, int mb_x, int mb_
  2319.  
  2320. void x264_macroblock_cache_load_deblock( x264_t *h )
  2321. {
  2322. - if( IS_INTRA( h->mb.type[h->mb.i_mb_xy] ) )
  2323. + if( !h->sh.b_mbaff && IS_INTRA( h->mb.type[h->mb.i_mb_xy] ) )
  2324. return;
  2325.  
  2326. /* If we have multiple slices and we're deblocking on slice edges, we
  2327. @@ -1291,7 +1309,7 @@ void x264_macroblock_cache_load_deblock( x264_t *h )
  2328. h->mb.i_neighbour &= ~old_neighbour;
  2329. if( h->mb.i_neighbour )
  2330. {
  2331. - int top_y = mb_y - (1 << h->mb.b_interlaced);
  2332. + int top_y = h->mb.i_mb_top_xy / h->mb.i_mb_stride;
  2333. int top_8x8 = (2*top_y+1) * h->mb.i_b8_stride + 2*mb_x;
  2334. int top_4x4 = (4*top_y+3) * h->mb.i_b4_stride + 4*mb_x;
  2335. int s8x8 = h->mb.i_b8_stride;
  2336. @@ -1330,17 +1348,15 @@ void x264_macroblock_cache_load_deblock( x264_t *h )
  2337. i8 = x264_scan8[0] - 1;
  2338. if( h->mb.i_neighbour & MB_LEFT )
  2339. {
  2340. - int ir = h->mb.i_b8_xy - 1;
  2341. - int iv = h->mb.i_b4_xy - 1;
  2342. h->mb.cache.ref[l][i8+0*8] =
  2343. - h->mb.cache.ref[l][i8+1*8] = ref[ir + 0*s8x8];
  2344. + h->mb.cache.ref[l][i8+1*8] = ref[h->mb.left_b8[0] + 1 + s8x8*left_index_table[12+4]];
  2345. h->mb.cache.ref[l][i8+2*8] =
  2346. - h->mb.cache.ref[l][i8+3*8] = ref[ir + 1*s8x8];
  2347. + h->mb.cache.ref[l][i8+3*8] = ref[h->mb.left_b8[1] + 1 + s8x8*left_index_table[12+5]];
  2348.  
  2349. - CP32( h->mb.cache.mv[l][i8+0*8], mv[iv + 0*s4x4] );
  2350. - CP32( h->mb.cache.mv[l][i8+1*8], mv[iv + 1*s4x4] );
  2351. - CP32( h->mb.cache.mv[l][i8+2*8], mv[iv + 2*s4x4] );
  2352. - CP32( h->mb.cache.mv[l][i8+3*8], mv[iv + 3*s4x4] );
  2353. + CP32( h->mb.cache.mv[l][i8+0*8], mv[h->mb.left_b4[0] + 3 + s4x4*left_index_table[12+0]] );
  2354. + CP32( h->mb.cache.mv[l][i8+1*8], mv[h->mb.left_b4[0] + 3 + s4x4*left_index_table[12+1]] );
  2355. + CP32( h->mb.cache.mv[l][i8+2*8], mv[h->mb.left_b4[1] + 3 + s4x4*left_index_table[12+2]] );
  2356. + CP32( h->mb.cache.mv[l][i8+3*8], mv[h->mb.left_b4[1] + 3 + s4x4*left_index_table[12+3]] );
  2357. }
  2358. }
  2359. }
  2360. @@ -1380,7 +1396,7 @@ void x264_macroblock_cache_load_deblock( x264_t *h )
  2361. {
  2362. uint8_t (*nnz)[24] = h->mb.non_zero_count;
  2363. int top = h->mb.i_mb_top_xy;
  2364. - int left = h->mb.i_mb_left_xy[0];
  2365. + int *left = h->mb.i_mb_left_xy;
  2366.  
  2367. if( (h->mb.i_neighbour & MB_TOP) && h->mb.mb_transform_size[top] )
  2368. {
  2369. @@ -1391,15 +1407,22 @@ void x264_macroblock_cache_load_deblock( x264_t *h )
  2370. M16( &h->mb.cache.non_zero_count[i8+2] ) = nnz_top1 ? 0x0101 : 0;
  2371. }
  2372.  
  2373. - if( (h->mb.i_neighbour & MB_LEFT) && h->mb.mb_transform_size[left] )
  2374. + if( h->mb.i_neighbour & MB_LEFT )
  2375. {
  2376. + // TODO: Merge code in deblock
  2377. int i8 = x264_scan8[0] - 1;
  2378. - int nnz_left0 = M16( &nnz[left][2] ) | M16( &nnz[left][6] );
  2379. - int nnz_left1 = M16( &nnz[left][10] ) | M16( &nnz[left][14] );
  2380. - h->mb.cache.non_zero_count[i8+8*0] = !!nnz_left0;
  2381. - h->mb.cache.non_zero_count[i8+8*1] = !!nnz_left0;
  2382. - h->mb.cache.non_zero_count[i8+8*2] = !!nnz_left1;
  2383. - h->mb.cache.non_zero_count[i8+8*3] = !!nnz_left1;
  2384. + if( h->mb.mb_transform_size[left[0]] )
  2385. + {
  2386. + int nnz_left0 = M16( &nnz[left[0]][2] ) | M16( &nnz[left[0]][6] );
  2387. + h->mb.cache.non_zero_count[i8+8*0] = !!nnz_left0;
  2388. + h->mb.cache.non_zero_count[i8+8*1] = !!nnz_left0;
  2389. + }
  2390. + if( h->mb.mb_transform_size[left[1]] )
  2391. + {
  2392. + int nnz_left1 = M16( &nnz[left[1]][10] ) | M16( &nnz[left[1]][14] );
  2393. + h->mb.cache.non_zero_count[i8+8*2] = !!nnz_left1;
  2394. + h->mb.cache.non_zero_count[i8+8*3] = !!nnz_left1;
  2395. + }
  2396. }
  2397.  
  2398. if( h->mb.mb_transform_size[h->mb.i_mb_xy] )
  2399. --
  2400. 1.7.4
  2401.  
  2402.  
  2403. From c1420786e639c96f7fd0b11bac69952edeabbafc Mon Sep 17 00:00:00 2001
  2404. From: Simon Horlick <simonhorlick@gmail.com>
  2405. Date: Thu, 17 Mar 2011 18:15:06 +0000
  2406. Subject: [PATCH 16/25] Calculate deblock strength for mbaff
  2407.  
  2408. ---
  2409. common/common.h | 1 +
  2410. common/deblock.c | 1 +
  2411. common/macroblock.c | 5 +++++
  2412. 3 files changed, 7 insertions(+), 0 deletions(-)
  2413.  
  2414. diff --git a/common/common.h b/common/common.h
  2415. index 6d81496..7990cf0 100644
  2416. --- a/common/common.h
  2417. +++ b/common/common.h
  2418. @@ -844,6 +844,7 @@ struct x264_t
  2419. pixel *intra_border_backup[3][2]; /* bottom pixels of the previous mb row, used for intra prediction after the framebuffer has been deblocked */
  2420. pixel *intra_diagonal_backup[5][3];
  2421. uint8_t (*deblock_strength[2])[2][4][4];
  2422. + uint8_t (*deblock_strength_mbaff[2])[2][8]; /* store [field][mb_x][dir][partition], there can be 8 different block strengths in mbaff left/top edges */
  2423.  
  2424. /* CPU functions dependents */
  2425. x264_predict_t predict_16x16[4+3];
  2426. diff --git a/common/deblock.c b/common/deblock.c
  2427. index 48788ae..c1bb10a 100644
  2428. --- a/common/deblock.c
  2429. +++ b/common/deblock.c
  2430. @@ -362,6 +362,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
  2431. int transform_8x8 = h->mb.mb_transform_size[h->mb.i_mb_xy];
  2432. int intra_cur = IS_INTRA( h->mb.type[mb_xy] );
  2433. uint8_t (*bs)[4][4] = h->deblock_strength[mb_y&1][mb_x];
  2434. + uint8_t (*bs_mbaff)[8] = h->deblock_strength_mbaff[mb_y&1][mb_x];
  2435.  
  2436. pixel *pixy = h->fdec->plane[0] + 16*mb_y*stridey + 16*mb_x;
  2437. pixel *pixuv = h->fdec->plane[1] + 8*mb_y*strideuv + 16*mb_x;
  2438. diff --git a/common/macroblock.c b/common/macroblock.c
  2439. index 47bb2ff..3a4c873 100644
  2440. --- a/common/macroblock.c
  2441. +++ b/common/macroblock.c
  2442. @@ -340,7 +340,9 @@ int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
  2443. for( int i = 0; i <= h->param.b_interlaced; i++ )
  2444. {
  2445. CHECKED_MALLOC( h->deblock_strength[i], sizeof(**h->deblock_strength) * h->mb.i_mb_width );
  2446. + CHECKED_MALLOC( h->deblock_strength_mbaff[i], sizeof(**h->deblock_strength_mbaff) * h->mb.i_mb_width );
  2447. h->deblock_strength[1] = h->deblock_strength[i];
  2448. + h->deblock_strength_mbaff[1] = h->deblock_strength_mbaff[i];
  2449. }
  2450. }
  2451.  
  2452. @@ -372,7 +374,10 @@ void x264_macroblock_thread_free( x264_t *h, int b_lookahead )
  2453. if( !b_lookahead )
  2454. {
  2455. for( int i = 0; i <= h->param.b_interlaced; i++ )
  2456. + {
  2457. x264_free( h->deblock_strength[i] );
  2458. + x264_free( h->deblock_strength_mbaff[i] );
  2459. + }
  2460. for( int i = 0; i <= 2*h->param.b_interlaced; i++ )
  2461. {
  2462. for( int j = 0; j < 2; j++ )
  2463. --
  2464. 1.7.4
  2465.  
  2466.  
  2467. From 6cda0dc785db073de2c496c93e412bdb54c37a2d Mon Sep 17 00:00:00 2001
  2468. From: Simon Horlick <simonhorlick@gmail.com>
  2469. Date: Wed, 16 Mar 2011 22:06:27 +0000
  2470. Subject: [PATCH 17/25] Left edge deblocking
  2471.  
  2472. ---
  2473. common/deblock.c | 181 ++++++++++++++++++++++++++++++++++++++++++++++++--
  2474. common/macroblock.c | 63 ++++++++++++++++++
  2475. 2 files changed, 236 insertions(+), 8 deletions(-)
  2476.  
  2477. diff --git a/common/deblock.c b/common/deblock.c
  2478. index c1bb10a..be96fc8 100644
  2479. --- a/common/deblock.c
  2480. +++ b/common/deblock.c
  2481. @@ -118,6 +118,128 @@ static inline void deblock_luma_c( pixel *pix, int xstride, int ystride, int alp
  2482. }
  2483. }
  2484. }
  2485. +
  2486. +static inline void deblock_v_luma_mbaff_c( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 )
  2487. +{
  2488. + /* Only filter 8 pixels at a time... */
  2489. + for( int d = 0; d < 8; d++, pix += stride )
  2490. + {
  2491. + int p2 = pix[-3];
  2492. + int p1 = pix[-2];
  2493. + int p0 = pix[-1];
  2494. + int q0 = pix[ 0];
  2495. + int q1 = pix[ 1];
  2496. + int q2 = pix[ 2];
  2497. +
  2498. + if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
  2499. + {
  2500. + int tc = tc0[d>>1];
  2501. + int delta;
  2502. + if( abs( p2 - p0 ) < beta )
  2503. + {
  2504. + if( tc0[d>>1] )
  2505. + pix[-2] = p1 + x264_clip3( (( p2 + ((p0 + q0 + 1) >> 1)) >> 1) - p1, -tc0[d>>1], tc0[d>>1] );
  2506. + tc++;
  2507. + }
  2508. + if( abs( q2 - q0 ) < beta )
  2509. + {
  2510. + if( tc0[d>>1] )
  2511. + pix[ 1] = q1 + x264_clip3( (( q2 + ((p0 + q0 + 1) >> 1)) >> 1) - q1, -tc0[d>>1], tc0[d>>1] );
  2512. + tc++;
  2513. + }
  2514. +
  2515. + delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
  2516. + pix[-1] = x264_clip_pixel( p0 + delta ); /* p0' */
  2517. + pix[ 0] = x264_clip_pixel( q0 - delta ); /* q0' */
  2518. + }
  2519. + }
  2520. +}
  2521. +
  2522. +static inline void deblock_v_luma_intra_mbaff_c( pixel *pix, int stride, int alpha, int beta )
  2523. +{
  2524. + /* Only filter 8 pixels at a time... */
  2525. + for( int d = 0; d < 8; d++, pix += stride )
  2526. + {
  2527. + /* Filter intra samples normally */
  2528. + int p2 = pix[-3];
  2529. + int p1 = pix[-2];
  2530. + int p0 = pix[-1];
  2531. + int q0 = pix[ 0];
  2532. + int q1 = pix[ 1];
  2533. + int q2 = pix[ 2];
  2534. +
  2535. + if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
  2536. + {
  2537. + if(abs( p0 - q0 ) < ((alpha >> 2) + 2) )
  2538. + {
  2539. + if( abs( p2 - p0 ) < beta ) /* p0', p1', p2' */
  2540. + {
  2541. + const int p3 = pix[-4];
  2542. + pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
  2543. + pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
  2544. + pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
  2545. + }
  2546. + else /* p0' */
  2547. + pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
  2548. + if( abs( q2 - q0 ) < beta ) /* q0', q1', q2' */
  2549. + {
  2550. + const int q3 = pix[3];
  2551. + pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
  2552. + pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
  2553. + pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
  2554. + }
  2555. + else /* q0' */
  2556. + pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
  2557. + }
  2558. + else /* p0', q0' */
  2559. + {
  2560. + pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
  2561. + pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
  2562. + }
  2563. + }
  2564. + }
  2565. +}
  2566. +
  2567. +static inline void deblock_v_chroma_mbaff_c( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 )
  2568. +{
  2569. + const int xstride = 2;
  2570. + for( int i = 0; i < 4; i++, pix += stride )
  2571. + {
  2572. + /* We don't worry about p2 or q2 */
  2573. + int tc = tc0[i];
  2574. + int p1 = pix[xstride*-2];
  2575. + int p0 = pix[xstride*-1];
  2576. + int q0 = pix[xstride* 0];
  2577. + int q1 = pix[xstride* 1];
  2578. +
  2579. + if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
  2580. + {
  2581. + int delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
  2582. + pix[xstride*-1] = x264_clip_pixel( p0 + delta ); /* p0' */
  2583. + pix[xstride* 0] = x264_clip_pixel( q0 - delta ); /* q0' */
  2584. + }
  2585. + }
  2586. +}
  2587. +
  2588. +static inline void deblock_v_chroma_intra_mbaff_c( pixel *pix, int stride, int alpha, int beta )
  2589. +{
  2590. + const int xstride = 2;
  2591. + for( int i = 0; i < 4; i++, pix += stride )
  2592. + {
  2593. + int p1 = pix[xstride*-2];
  2594. + int p0 = pix[xstride*-1];
  2595. + int q0 = pix[xstride* 0];
  2596. + int q1 = pix[xstride* 1];
  2597. +
  2598. + if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
  2599. + {
  2600. + /* p0', q0' */
  2601. + pix[xstride*-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
  2602. + pix[xstride* 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
  2603. + }
  2604. + }
  2605. +}
  2606. +
  2607. static void deblock_v_luma_c( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 )
  2608. {
  2609. deblock_luma_c( pix, stride, 1, alpha, beta, tc0 );
  2610. @@ -392,16 +514,59 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
  2611.  
  2612. if( h->mb.i_neighbour & MB_LEFT )
  2613. {
  2614. - int qpl = h->mb.qp[h->mb.i_mb_left_xy[0]];
  2615. - int qp_left = (qp + qpl + 1) >> 1;
  2616. - int qpc_left = (h->chroma_qp_table[qp] + h->chroma_qp_table[qpl] + 1) >> 1;
  2617. - int intra_left = IS_INTRA( h->mb.type[h->mb.i_mb_left_xy[0]] );
  2618. - if( intra_cur || intra_left )
  2619. - FILTER( _intra, 0, 0, qp_left, qpc_left );
  2620. + if( b_interlaced && h->mb.field[h->mb.i_mb_left_xy[0]] != h->mb.b_interlaced )
  2621. + {
  2622. + int luma_qp[2];
  2623. + int chroma_qp[2];
  2624. + int left_qp[2];
  2625. + int current_qp = h->mb.qp[mb_xy];
  2626. + left_qp[0] = h->mb.qp[h->mb.i_mb_left_xy[0]];
  2627. + luma_qp[0] = (current_qp + left_qp[0] + 1) >> 1;
  2628. + chroma_qp[0] = (h->chroma_qp_table[current_qp] + h->chroma_qp_table[left_qp[0]] + 1) >> 1;
  2629. + if( bs_mbaff[0][0] == 4)
  2630. + {
  2631. + deblock_edge_intra( h, pixy, 2*stridey, bs_mbaff[0], luma_qp[0], 0, deblock_v_luma_intra_mbaff_c );
  2632. + deblock_edge_intra( h, pixuv, 2*strideuv, bs_mbaff[0], chroma_qp[0], 1, deblock_v_chroma_intra_mbaff_c );
  2633. + deblock_edge_intra( h, pixuv + 1, 2*strideuv, bs_mbaff[0], chroma_qp[0], 1, deblock_v_chroma_intra_mbaff_c );
  2634. + }
  2635. + else
  2636. + {
  2637. + deblock_edge( h, pixy, 2*stridey, bs_mbaff[0], luma_qp[0], 0, deblock_v_luma_mbaff_c );
  2638. + deblock_edge( h, pixuv, 2*strideuv, bs_mbaff[0], chroma_qp[0], 1, deblock_v_chroma_mbaff_c );
  2639. + deblock_edge( h, pixuv + 1, 2*strideuv, bs_mbaff[0], chroma_qp[0], 1, deblock_v_chroma_mbaff_c );
  2640. + }
  2641. +
  2642. + int offy = h->mb.b_interlaced ? 4 : 0;
  2643. + int offuv = h->mb.b_interlaced ? 3 : 0;
  2644. + left_qp[1] = h->mb.qp[h->mb.i_mb_left_xy[1]];
  2645. + luma_qp[1] = (current_qp + left_qp[1] + 1) >> 1;
  2646. + chroma_qp[1] = (h->chroma_qp_table[current_qp] + h->chroma_qp_table[left_qp[1]] + 1) >> 1;
  2647. + if( bs_mbaff[0][4] == 4)
  2648. + {
  2649. + deblock_edge_intra( h, pixy + (stridey<<offy), 2*stridey, bs_mbaff[0]+4, luma_qp[1], 0, deblock_v_luma_intra_mbaff_c );
  2650. + deblock_edge_intra( h, pixuv + (strideuv<<offuv), 2*strideuv, bs_mbaff[0]+4, chroma_qp[1], 1, deblock_v_chroma_intra_mbaff_c );
  2651. + deblock_edge_intra( h, pixuv + 1 + (strideuv<<offuv), 2*strideuv, bs_mbaff[0]+4, chroma_qp[1], 1, deblock_v_chroma_intra_mbaff_c );
  2652. + }
  2653. + else
  2654. + {
  2655. + deblock_edge( h, pixy + (stridey<<offy), 2*stridey, bs_mbaff[0]+4, luma_qp[1], 0, deblock_v_luma_mbaff_c );
  2656. + deblock_edge( h, pixuv + (strideuv<<offuv), 2*strideuv, bs_mbaff[0]+4, chroma_qp[1], 1, deblock_v_chroma_mbaff_c );
  2657. + deblock_edge( h, pixuv + 1 + (strideuv<<offuv), 2*strideuv, bs_mbaff[0]+4, chroma_qp[1], 1, deblock_v_chroma_mbaff_c );
  2658. + }
  2659. + }
  2660. else
  2661. - FILTER( , 0, 0, qp_left, qpc_left );
  2662. - }
  2663. + {
  2664. + int qpl = h->mb.qp[h->mb.i_mb_xy-1];
  2665. + int qp_left = (qp + qpl + 1) >> 1;
  2666. + int qpc_left = (h->chroma_qp_table[qp] + h->chroma_qp_table[qpl] + 1) >> 1;
  2667. + int intra_left = IS_INTRA( h->mb.type[h->mb.i_mb_xy-1] );
  2668.  
  2669. + if( intra_cur || intra_left ) // bs=4
  2670. + FILTER( _intra, 0, 0, qp_left, qpc_left );
  2671. + else
  2672. + FILTER( , 0, 0, qp_left, qpc_left );
  2673. + }
  2674. + }
  2675. if( !first_edge_only )
  2676. {
  2677. if( !transform_8x8 ) FILTER( , 0, 1, qp, qpc );
  2678. diff --git a/common/macroblock.c b/common/macroblock.c
  2679. index 3a4c873..1d72fe8 100644
  2680. --- a/common/macroblock.c
  2681. +++ b/common/macroblock.c
  2682. @@ -1445,6 +1445,69 @@ void x264_macroblock_cache_load_deblock( x264_t *h )
  2683. M32( &h->mb.cache.non_zero_count[x264_scan8[0]+8*3] ) = nnzbot;
  2684. }
  2685. }
  2686. +
  2687. + int mb_x = h->mb.i_mb_x;
  2688. + int mb_y = h->mb.i_mb_y;
  2689. + int mb_xy = h->mb.i_mb_xy;
  2690. +
  2691. + // left is wrong without this here
  2692. + x264_macroblock_cache_load_neighbours_deblock( h, mb_x, mb_y );
  2693. +
  2694. + uint8_t (*bs_mbaff)[8] = h->deblock_strength_mbaff[h->mb.i_mb_y&1][h->mb.i_mb_x];
  2695. +
  2696. + int intra_cur = IS_INTRA( h->mb.type[mb_xy] );
  2697. +
  2698. + if( h->mb.i_neighbour & MB_LEFT )
  2699. + {
  2700. + if( h->sh.b_mbaff && h->mb.field[h->mb.i_mb_left_xy[0]] != h->mb.b_interlaced )
  2701. + {
  2702. + static const uint8_t offset[2][2][8] = {
  2703. + { { 0, 0, 0, 0, 1, 1, 1, 1 },
  2704. + { 2, 2, 2, 2, 3, 3, 3, 3 }, },
  2705. + { { 0, 1, 2, 3, 0, 1, 2, 3 },
  2706. + { 0, 1, 2, 3, 0, 1, 2, 3 }, }
  2707. + };
  2708. + uint8_t bS[8];
  2709. +
  2710. + if( intra_cur )
  2711. + bS[0] = bS[1] = bS[2] = bS[3] =
  2712. + bS[4] = bS[5] = bS[6] = bS[7] = 4;
  2713. + else
  2714. + {
  2715. + const uint8_t *off = offset[h->mb.b_interlaced][mb_y&1];
  2716. + uint8_t (*nnz)[24] = h->mb.non_zero_count;
  2717. +
  2718. + for( int i=0; i<8; i++ )
  2719. + {
  2720. + int left = h->mb.i_mb_left_xy[h->mb.b_interlaced ? i>>2 : i&1];
  2721. + int nnz_this = h->mb.cache.non_zero_count[x264_scan8[0]+8*(i>>1)];
  2722. + int nnz_left = nnz[left][3 + 4*off[i]];
  2723. + if( !h->param.b_cabac && h->pps->b_transform_8x8_mode )
  2724. + {
  2725. + int j = off[i]&~1;
  2726. + if( h->mb.mb_transform_size[left] )
  2727. + nnz_left = !!(M16( &nnz[left][2+4*j] ) | M16( &nnz[left][2+4*(1+j)] ));
  2728. + }
  2729. + if( IS_INTRA( h->mb.type[left] ) )
  2730. + bS[i] = 4;
  2731. + else if( nnz_left || nnz_this )
  2732. + bS[i] = 2;
  2733. + else // As left is different interlaced.
  2734. + bS[i] = 1;
  2735. + }
  2736. + }
  2737. +
  2738. + if( h->mb.b_interlaced )
  2739. + {
  2740. + for( int i=0; i<8; i++ ) bs_mbaff[0][i] = bS[i];
  2741. + }
  2742. + else
  2743. + {
  2744. + for( int i=0; i<4; i++ ) bs_mbaff[0][i] = bS[2*i];
  2745. + for( int i=0; i<4; i++ ) bs_mbaff[0][i+4] = bS[1+2*i];
  2746. + }
  2747. + }
  2748. + }
  2749. }
  2750.  
  2751. static void ALWAYS_INLINE twiddle_topleft_pixel( pixel *dst, pixel *src, int b_interlaced )
  2752. --
  2753. 1.7.4
  2754.  
  2755.  
  2756. From 374cba76623f48c2084b89c471e5fdf7951dfc76 Mon Sep 17 00:00:00 2001
  2757. From: Simon Horlick <simonhorlick@gmail.com>
  2758. Date: Wed, 16 Mar 2011 21:24:42 +0000
  2759. Subject: [PATCH 18/25] Top edge deblocking
  2760.  
  2761. ---
  2762. common/deblock.c | 41 ++++++++++++++++++++++++++-------
  2763. common/macroblock.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++
  2764. 2 files changed, 95 insertions(+), 9 deletions(-)
  2765.  
  2766. diff --git a/common/deblock.c b/common/deblock.c
  2767. index be96fc8..de9d9fb 100644
  2768. --- a/common/deblock.c
  2769. +++ b/common/deblock.c
  2770. @@ -576,17 +576,40 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
  2771.  
  2772. if( h->mb.i_neighbour & MB_TOP )
  2773. {
  2774. - int qpt = h->mb.qp[h->mb.i_mb_top_xy];
  2775. - int qp_top = (qp + qpt + 1) >> 1;
  2776. - int qpc_top = (h->chroma_qp_table[qp] + h->chroma_qp_table[qpt] + 1) >> 1;
  2777. - int intra_top = IS_INTRA( h->mb.type[h->mb.i_mb_top_xy] );
  2778. - if( ~b_interlaced & (intra_cur | intra_top) )
  2779. - FILTER( _intra, 1, 0, qp_top, qpc_top );
  2780. + if( b_interlaced && !(mb_y&1) && !h->mb.b_interlaced && h->mb.field[h->mb.i_mb_top_xy] )
  2781. + {
  2782. + /* Need to filter both fields (even for frame macroblocks) */
  2783. + /* Filter top two rows using the top and then bottom macroblocks of the above pair. */
  2784. + int mbn_xy = mb_xy - 2 * h->mb.i_mb_stride;
  2785. +
  2786. + for(int j=0; j<2; j++, mbn_xy += h->mb.i_mb_stride)
  2787. + {
  2788. + int qpt = h->mb.qp[mbn_xy];
  2789. + int qp_top = (qp + qpt + 1) >> 1;
  2790. + int qpc_top = (h->chroma_qp_table[qp] + h->chroma_qp_table[qpt] + 1) >> 1;
  2791. +
  2792. + deblock_edge( h, pixy + j*stridey, 2* stridey, bs_mbaff[1]+4*j, qp_top, 0, deblock_v_luma_c );
  2793. + deblock_edge( h, pixuv + j*strideuv, 2*strideuv, bs_mbaff[1]+4*j, qpc_top, 1, deblock_v_chroma_c );
  2794. + }
  2795. + }
  2796. else
  2797. {
  2798. - if( intra_top )
  2799. - M32( bs[1][0] ) = 0x03030303;
  2800. - FILTER( , 1, 0, qp_top, qpc_top );
  2801. + int qpt = h->mb.qp[h->mb.i_mb_top_xy];
  2802. + int qp_top = (qp + qpt + 1) >> 1;
  2803. + int qpc_top = (h->chroma_qp_table[qp] + h->chroma_qp_table[qpt] + 1) >> 1;
  2804. + int intra_top = IS_INTRA( h->mb.type[h->mb.i_mb_top_xy] );
  2805. +
  2806. + if( (!b_interlaced || (!h->mb.b_interlaced && !h->mb.field[h->mb.i_mb_top_xy]))
  2807. + && (intra_cur || intra_top) )
  2808. + {
  2809. + FILTER( _intra, 1, 0, qp_top, qpc_top );
  2810. + }
  2811. + else
  2812. + {
  2813. + if( intra_top )
  2814. + M32( bs[1][0] ) = 0x03030303;
  2815. + FILTER( , 1, 0, qp_top, qpc_top );
  2816. + }
  2817. }
  2818. }
  2819.  
  2820. diff --git a/common/macroblock.c b/common/macroblock.c
  2821. index 1d72fe8..0da5958 100644
  2822. --- a/common/macroblock.c
  2823. +++ b/common/macroblock.c
  2824. @@ -1508,6 +1508,69 @@ void x264_macroblock_cache_load_deblock( x264_t *h )
  2825. }
  2826. }
  2827. }
  2828. +
  2829. + if( h->mb.i_neighbour & MB_TOP )
  2830. + {
  2831. + if( h->sh.b_mbaff && !(mb_y&1) && !h->mb.b_interlaced && h->mb.field[h->mb.i_mb_top_xy] )
  2832. + {
  2833. + /* Need to filter both fields (even for frame macroblocks) */
  2834. + /* Filter top two rows using the top and then bottom macroblocks of the above pair. */
  2835. + int mbn_xy = mb_xy - 2 * h->mb.i_mb_stride;
  2836. +
  2837. + for(int j=0; j<2; j++, mbn_xy += h->mb.i_mb_stride)
  2838. + {
  2839. + int mbn_intra = IS_INTRA( h->mb.type[mbn_xy] );
  2840. + uint8_t (*nnz)[24] = h->mb.non_zero_count;
  2841. +
  2842. + uint32_t nnz_top[4];
  2843. + uint32_t nnz_cur[4];
  2844. + nnz_top[0] = nnz[mbn_xy][3*4+0];
  2845. + nnz_top[1] = nnz[mbn_xy][3*4+1];
  2846. + nnz_top[2] = nnz[mbn_xy][3*4+2];
  2847. + nnz_top[3] = nnz[mbn_xy][3*4+3];
  2848. + nnz_cur[0] = h->mb.cache.non_zero_count[x264_scan8[0]+0];
  2849. + nnz_cur[1] = h->mb.cache.non_zero_count[x264_scan8[0]+1];
  2850. + nnz_cur[2] = h->mb.cache.non_zero_count[x264_scan8[0]+2];
  2851. + nnz_cur[3] = h->mb.cache.non_zero_count[x264_scan8[0]+3];
  2852. +
  2853. + /* Munge NNZ for cavlc + 8x8dct */
  2854. + if( !h->param.b_cabac && h->pps->b_transform_8x8_mode )
  2855. + {
  2856. + int top = mbn_xy;
  2857. + if( (h->mb.i_neighbour & MB_TOP) && h->mb.mb_transform_size[top] )
  2858. + {
  2859. + int nnz_top0 = M16( &nnz[top][8] ) | M16( &nnz[top][12] );
  2860. + int nnz_top1 = M16( &nnz[top][10] ) | M16( &nnz[top][14] );
  2861. + nnz_top[0] = nnz_top[1] = nnz_top0 ? 0x0101 : 0;
  2862. + nnz_top[2] = nnz_top[3] = nnz_top1 ? 0x0101 : 0;
  2863. + }
  2864. + if( h->mb.mb_transform_size[h->mb.i_mb_xy] )
  2865. + {
  2866. + int nnz0 = M16( &h->mb.cache.non_zero_count[x264_scan8[ 0]] ) | M16( &h->mb.cache.non_zero_count[x264_scan8[ 2]] );
  2867. + int nnz1 = M16( &h->mb.cache.non_zero_count[x264_scan8[ 4]] ) | M16( &h->mb.cache.non_zero_count[x264_scan8[ 6]] );
  2868. + nnz_cur[0] = nnz_cur[1] = !!nnz0;
  2869. + nnz_cur[2] = nnz_cur[3] = !!nnz1;
  2870. + }
  2871. + }
  2872. +
  2873. + uint8_t bS[4];
  2874. + if( intra_cur || mbn_intra )
  2875. + bS[0] = bS[1] = bS[2] = bS[3] = 3;
  2876. + else
  2877. + {
  2878. + for( int i = 0; i < 4; i++ )
  2879. + {
  2880. + if( nnz_cur[i] || nnz_top[i] )
  2881. + bS[i] = 2;
  2882. + else
  2883. + bS[i] = 1;
  2884. + }
  2885. + }
  2886. + for( int i=0; i<4; i++ )
  2887. + bs_mbaff[1][i+4*j] = bS[i];
  2888. + }
  2889. + }
  2890. + }
  2891. }
  2892.  
  2893. static void ALWAYS_INLINE twiddle_topleft_pixel( pixel *dst, pixel *src, int b_interlaced )
  2894. --
  2895. 1.7.4
  2896.  
  2897.  
  2898. From 388145f81f75bfcd549335774457ad7d7fcd475f Mon Sep 17 00:00:00 2001
  2899. From: Simon Horlick <simonhorlick@gmail.com>
  2900. Date: Tue, 15 Mar 2011 01:39:49 +0000
  2901. Subject: [PATCH 19/25] Use both left macroblocks for ref_idx calculation
  2902.  
  2903. ---
  2904. common/macroblock.c | 58 ++++++++++++++++++++++++++++++++++++++++++++------
  2905. 1 files changed, 51 insertions(+), 7 deletions(-)
  2906.  
  2907. diff --git a/common/macroblock.c b/common/macroblock.c
  2908. index 0da5958..5b42966 100644
  2909. --- a/common/macroblock.c
  2910. +++ b/common/macroblock.c
  2911. @@ -1229,14 +1229,58 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
  2912. h->mb.dist_scale_factor = h->mb.dist_scale_factor_buf[h->mb.b_interlaced&(mb_y&1)];
  2913. if( h->param.b_cabac )
  2914. {
  2915. - uint8_t skipbp;
  2916. x264_macroblock_cache_skip( h, 0, 0, 4, 4, 0 );
  2917. - skipbp = (h->mb.i_neighbour & MB_LEFT) ? h->mb.skipbp[left[0]] : 0;
  2918. - h->mb.cache.skip[x264_scan8[0] - 1] = skipbp & 0x2;
  2919. - h->mb.cache.skip[x264_scan8[8] - 1] = skipbp & 0x8;
  2920. - skipbp = (h->mb.i_neighbour & MB_TOP) ? h->mb.skipbp[top] : 0;
  2921. - h->mb.cache.skip[x264_scan8[0] - 8] = skipbp & 0x4;
  2922. - h->mb.cache.skip[x264_scan8[4] - 8] = skipbp & 0x8;
  2923. +
  2924. + if( h->mb.i_neighbour & MB_LEFT )
  2925. + {
  2926. + if( h->mb.skipbp[left[0]] == 0xf )
  2927. + h->mb.cache.skip[x264_scan8[0] - 1] = 1;
  2928. + else if( h->mb.partition[left[0]] == D_8x8 )
  2929. + {
  2930. + int off = 1+(left_index_table[18]&~1);
  2931. + h->mb.cache.skip[x264_scan8[0] - 1] = (h->mb.skipbp[left[0]] >> off) & 1;
  2932. + }
  2933. + else
  2934. + h->mb.cache.skip[x264_scan8[0] - 1] = h->mb.skipbp[left[0]] & 0x2;
  2935. +
  2936. + if( h->mb.skipbp[left[1]] == 0xf )
  2937. + h->mb.cache.skip[x264_scan8[8] - 1] = 1;
  2938. + else if( h->mb.partition[left[1]] == D_8x8 )
  2939. + {
  2940. + int off = 1+(left_index_table[20]&~1);
  2941. + h->mb.cache.skip[x264_scan8[8] - 1] = (h->mb.skipbp[left[1]] >> off) & 1;
  2942. + }
  2943. + else
  2944. + h->mb.cache.skip[x264_scan8[8] - 1] = h->mb.skipbp[left[1]] & 0x8;
  2945. + }
  2946. + else
  2947. + {
  2948. + h->mb.cache.skip[x264_scan8[0] - 1 + 0*8] = 0;
  2949. + h->mb.cache.skip[x264_scan8[0] - 1 + 1*8] = 0;
  2950. + h->mb.cache.skip[x264_scan8[0] - 1 + 2*8] = 0;
  2951. + h->mb.cache.skip[x264_scan8[0] - 1 + 3*8] = 0;
  2952. + }
  2953. +
  2954. + if( h->mb.i_neighbour & MB_TOP )
  2955. + {
  2956. + if( h->mb.skipbp[top] == 0xf )
  2957. + {
  2958. + h->mb.cache.skip[x264_scan8[0] - 8] = 1;
  2959. + h->mb.cache.skip[x264_scan8[4] - 8] = 1;
  2960. + }
  2961. + else if( h->mb.partition[top] == D_8x8 )
  2962. + {
  2963. + h->mb.cache.skip[x264_scan8[0] - 8] = h->mb.skipbp[top] & 0x4;
  2964. + h->mb.cache.skip[x264_scan8[4] - 8] = h->mb.skipbp[top] & 0x8;
  2965. + }
  2966. + else
  2967. + {
  2968. + h->mb.cache.skip[x264_scan8[0] - 8] = 0;
  2969. + h->mb.cache.skip[x264_scan8[4] - 8] = 0;
  2970. + }
  2971. + }
  2972. + else
  2973. + M32( &h->mb.cache.skip[x264_scan8[0] - 8] ) = 0;
  2974. }
  2975. }
  2976.  
  2977. --
  2978. 1.7.4
  2979.  
  2980.  
  2981. From e30f270b1830c3fc2b12014aa85d6dd959e3c4d3 Mon Sep 17 00:00:00 2001
  2982. From: Simon Horlick <simonhorlick@gmail.com>
  2983. Date: Tue, 15 Mar 2011 01:14:16 +0000
  2984. Subject: [PATCH 20/25] Fix min/max mv calculation
  2985.  
  2986. ---
  2987. common/macroblock.c | 5 +++++
  2988. encoder/analyse.c | 12 ++++++++----
  2989. 2 files changed, 13 insertions(+), 4 deletions(-)
  2990.  
  2991. diff --git a/common/macroblock.c b/common/macroblock.c
  2992. index 5b42966..00e9403 100644
  2993. --- a/common/macroblock.c
  2994. +++ b/common/macroblock.c
  2995. @@ -841,6 +841,11 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
  2996.  
  2997. const int *left_index_table = h->mb.left_index_table;
  2998.  
  2999. + int my = h->mb.i_mb_y >> h->mb.b_interlaced;
  3000. + int mb_height = h->mb.i_mb_height >> h->mb.b_interlaced;
  3001. + h->mb.mv_min[1] = 4*( -16*my - 24 );
  3002. + h->mb.mv_max[1] = 4*( 16*( mb_height - my - 1 ) + 24 );
  3003. +
  3004. /* load cache */
  3005. if( h->mb.i_neighbour & MB_TOP )
  3006. {
  3007. diff --git a/encoder/analyse.c b/encoder/analyse.c
  3008. index 4f439d4..d004c66 100644
  3009. --- a/encoder/analyse.c
  3010. +++ b/encoder/analyse.c
  3011. @@ -443,13 +443,11 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int qp )
  3012. h->mb.mv_max_fpel[0] = (h->mb.mv_max_spel[0]>>2) - i_fpel_border;
  3013. if( h->mb.i_mb_x == 0 )
  3014. {
  3015. - int mb_y = h->mb.i_mb_y >> h->sh.b_mbaff;
  3016. - int mb_height = h->sps->i_mb_height >> h->sh.b_mbaff;
  3017. int thread_mvy_range = i_fmv_range;
  3018.  
  3019. if( h->i_thread_frames > 1 )
  3020. {
  3021. - int pix_y = (h->mb.i_mb_y | h->mb.b_interlaced) * 16;
  3022. + int pix_y = (h->mb.i_mb_y | h->sh.b_mbaff) * 16;
  3023. int thresh = pix_y + h->param.analyse.i_mv_range_thread;
  3024. for( int i = (h->sh.i_type == SLICE_TYPE_B); i >= 0; i-- )
  3025. for( int j = 0; j < h->i_ref[i]; j++ )
  3026. @@ -465,7 +463,8 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int qp )
  3027.  
  3028. x264_analyse_weight_frame( h, pix_y + thread_mvy_range );
  3029. }
  3030. -
  3031. + int mb_y = h->mb.i_mb_y >> h->sh.b_mbaff;
  3032. + int mb_height = h->sps->i_mb_height >> h->sh.b_mbaff;
  3033. h->mb.mv_min[1] = 4*( -16*mb_y - 24 );
  3034. h->mb.mv_max[1] = 4*( 16*( mb_height - mb_y - 1 ) + 24 );
  3035. h->mb.mv_min_spel[1] = x264_clip3( h->mb.mv_min[1], -i_fmv_range, i_fmv_range );
  3036. @@ -473,6 +472,11 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int qp )
  3037. h->mb.mv_max_spel[1] = X264_MIN( h->mb.mv_max_spel[1], thread_mvy_range*4 );
  3038. h->mb.mv_min_fpel[1] = (h->mb.mv_min_spel[1]>>2) + i_fpel_border;
  3039. h->mb.mv_max_fpel[1] = (h->mb.mv_max_spel[1]>>2) - i_fpel_border;
  3040. +
  3041. + mb_y = h->mb.i_mb_y >> h->mb.b_interlaced;
  3042. + mb_height = h->sps->i_mb_height >> h->mb.b_interlaced;
  3043. + h->mb.mv_min[1] = 4*( -16*mb_y - 24 );
  3044. + h->mb.mv_max[1] = 4*( 16*( mb_height - mb_y - 1 ) + 24 );
  3045. }
  3046. #undef CLIP_FMV
  3047.  
  3048. --
  3049. 1.7.4
  3050.  
  3051.  
  3052. From 962a6ce9a008d7239c11920635e570903bed7a85 Mon Sep 17 00:00:00 2001
  3053. From: Simon Horlick <simonhorlick@gmail.com>
  3054. Date: Tue, 15 Mar 2011 01:15:06 +0000
  3055. Subject: [PATCH 21/25] Calculate bipred POCs correctly
  3056.  
  3057. ---
  3058. common/common.h | 4 +-
  3059. common/macroblock.c | 67 ++++++++++++++++++++++++++-------------------------
  3060. 2 files changed, 36 insertions(+), 35 deletions(-)
  3061.  
  3062. diff --git a/common/common.h b/common/common.h
  3063. index 7990cf0..c7670e7 100644
  3064. --- a/common/common.h
  3065. +++ b/common/common.h
  3066. @@ -756,9 +756,9 @@ struct x264_t
  3067. int i_chroma_lambda2_offset;
  3068.  
  3069. /* B_direct and weighted prediction */
  3070. - int16_t dist_scale_factor_buf[2][X264_REF_MAX*2][4];
  3071. + int16_t dist_scale_factor_buf[2][2][X264_REF_MAX*2][4];
  3072. int16_t (*dist_scale_factor)[4];
  3073. - int8_t bipred_weight_buf[2][X264_REF_MAX*2][4];
  3074. + int8_t bipred_weight_buf[2][2][X264_REF_MAX*2][4];
  3075. int8_t (*bipred_weight)[4];
  3076. /* maps fref1[0]'s ref indices into the current list0 */
  3077. #define map_col_to_list0(col) h->mb.map_col_to_list0[(col)+2]
  3078. diff --git a/common/macroblock.c b/common/macroblock.c
  3079. index 00e9403..f6406fe 100644
  3080. --- a/common/macroblock.c
  3081. +++ b/common/macroblock.c
  3082. @@ -1230,8 +1230,8 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
  3083. /* load skip */
  3084. if( h->sh.i_type == SLICE_TYPE_B )
  3085. {
  3086. - h->mb.bipred_weight = h->mb.bipred_weight_buf[h->mb.b_interlaced&(mb_y&1)];
  3087. - h->mb.dist_scale_factor = h->mb.dist_scale_factor_buf[h->mb.b_interlaced&(mb_y&1)];
  3088. + h->mb.bipred_weight = h->mb.bipred_weight_buf[h->mb.b_interlaced][h->mb.b_interlaced&(mb_y&1)];
  3089. + h->mb.dist_scale_factor = h->mb.dist_scale_factor_buf[h->mb.b_interlaced][h->mb.b_interlaced&(mb_y&1)];
  3090. if( h->param.b_cabac )
  3091. {
  3092. x264_macroblock_cache_skip( h, 0, 0, 4, 4, 0 );
  3093. @@ -1903,42 +1903,43 @@ void x264_macroblock_cache_save( x264_t *h )
  3094.  
  3095. void x264_macroblock_bipred_init( x264_t *h )
  3096. {
  3097. - for( int field = 0; field <= h->sh.b_mbaff; field++ )
  3098. - for( int i_ref0 = 0; i_ref0 < (h->i_ref[0]<<h->sh.b_mbaff); i_ref0++ )
  3099. - {
  3100. - x264_frame_t *l0 = h->fref[0][i_ref0>>h->sh.b_mbaff];
  3101. - int poc0 = l0->i_poc + l0->i_delta_poc[field^(i_ref0&1)];
  3102. - for( int i_ref1 = 0; i_ref1 < (h->i_ref[1]<<h->sh.b_mbaff); i_ref1++ )
  3103. + for( int mbfield = 0; mbfield <= h->sh.b_mbaff; mbfield++ )
  3104. + for( int field = 0; field <= h->sh.b_mbaff; field++ )
  3105. + for( int i_ref0 = 0; i_ref0 < (h->i_ref[0]<<mbfield); i_ref0++ )
  3106. {
  3107. - int dist_scale_factor;
  3108. - x264_frame_t *l1 = h->fref[1][i_ref1>>h->sh.b_mbaff];
  3109. - int poc1 = l1->i_poc + l1->i_delta_poc[field^(i_ref1&1)];
  3110. - int cur_poc = h->fdec->i_poc + h->fdec->i_delta_poc[field];
  3111. - int td = x264_clip3( poc1 - poc0, -128, 127 );
  3112. - if( td == 0 /* || pic0 is a long-term ref */ )
  3113. - dist_scale_factor = 256;
  3114. - else
  3115. + x264_frame_t *l0 = h->fref[0][i_ref0>>mbfield];
  3116. + int poc0 = l0->i_poc + mbfield*l0->i_delta_poc[field^(i_ref0&1)];
  3117. + for( int i_ref1 = 0; i_ref1 < (h->i_ref[1]<<mbfield); i_ref1++ )
  3118. {
  3119. - int tb = x264_clip3( cur_poc - poc0, -128, 127 );
  3120. - int tx = (16384 + (abs(td) >> 1)) / td;
  3121. - dist_scale_factor = x264_clip3( (tb * tx + 32) >> 6, -1024, 1023 );
  3122. - }
  3123. + int dist_scale_factor;
  3124. + x264_frame_t *l1 = h->fref[1][i_ref1>>mbfield];
  3125. + int cur_poc = h->fdec->i_poc + h->fdec->i_delta_poc[field];
  3126. + int poc1 = l1->i_poc + mbfield*l1->i_delta_poc[field^(i_ref1&1)];
  3127. + int td = x264_clip3( poc1 - poc0, -128, 127 );
  3128. + if( td == 0 /* || pic0 is a long-term ref */ )
  3129. + dist_scale_factor = 256;
  3130. + else
  3131. + {
  3132. + int tb = x264_clip3( cur_poc - poc0, -128, 127 );
  3133. + int tx = (16384 + (abs(td) >> 1)) / td;
  3134. + dist_scale_factor = x264_clip3( (tb * tx + 32) >> 6, -1024, 1023 );
  3135. + }
  3136.  
  3137. - h->mb.dist_scale_factor_buf[field][i_ref0][i_ref1] = dist_scale_factor;
  3138. + h->mb.dist_scale_factor_buf[mbfield][field][i_ref0][i_ref1] = dist_scale_factor;
  3139.  
  3140. - dist_scale_factor >>= 2;
  3141. - if( h->param.analyse.b_weighted_bipred
  3142. - && dist_scale_factor >= -64
  3143. - && dist_scale_factor <= 128 )
  3144. - {
  3145. - h->mb.bipred_weight_buf[field][i_ref0][i_ref1] = 64 - dist_scale_factor;
  3146. - // ssse3 implementation of biweight doesn't support the extrema.
  3147. - // if we ever generate them, we'll have to drop that optimization.
  3148. - assert( dist_scale_factor >= -63 && dist_scale_factor <= 127 );
  3149. + dist_scale_factor >>= 2;
  3150. + if( h->param.analyse.b_weighted_bipred
  3151. + && dist_scale_factor >= -64
  3152. + && dist_scale_factor <= 128 )
  3153. + {
  3154. + h->mb.bipred_weight_buf[mbfield][field][i_ref0][i_ref1] = 64 - dist_scale_factor;
  3155. + // ssse3 implementation of biweight doesn't support the extrema.
  3156. + // if we ever generate them, we'll have to drop that optimization.
  3157. + assert( dist_scale_factor >= -63 && dist_scale_factor <= 127 );
  3158. + }
  3159. + else
  3160. + h->mb.bipred_weight_buf[mbfield][field][i_ref0][i_ref1] = 32;
  3161. }
  3162. - else
  3163. - h->mb.bipred_weight_buf[field][i_ref0][i_ref1] = 32;
  3164. }
  3165. - }
  3166. }
  3167.  
  3168. --
  3169. 1.7.4
  3170.  
  3171.  
  3172. From a9f5b05e3dc0c7482e6661a1ee8d457f1e35ee75 Mon Sep 17 00:00:00 2001
  3173. From: Simon Horlick <simonhorlick@gmail.com>
  3174. Date: Tue, 15 Mar 2011 01:16:20 +0000
  3175. Subject: [PATCH 22/25] Direct temporal
  3176.  
  3177. ---
  3178. common/mvpred.c | 100 +++++++++++++++++++++++++++++++++++++++++--------------
  3179. 1 files changed, 75 insertions(+), 25 deletions(-)
  3180.  
  3181. diff --git a/common/mvpred.c b/common/mvpred.c
  3182. index 278e0ac..28eabca 100644
  3183. --- a/common/mvpred.c
  3184. +++ b/common/mvpred.c
  3185. @@ -182,50 +182,100 @@ void x264_mb_predict_mv_pskip( x264_t *h, int16_t mv[2] )
  3186.  
  3187. static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h )
  3188. {
  3189. - int i_mb_4x4 = 16 * h->mb.i_mb_stride * h->mb.i_mb_y + 4 * h->mb.i_mb_x;
  3190. - int i_mb_8x8 = 4 * h->mb.i_mb_stride * h->mb.i_mb_y + 2 * h->mb.i_mb_x;
  3191. - const int type_col = h->fref[1][0]->mb_type[h->mb.i_mb_xy];
  3192. - const int partition_col = h->fref[1][0]->mb_partition[h->mb.i_mb_xy];
  3193. -
  3194. - x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, 0 );
  3195. -
  3196. - h->mb.i_partition = partition_col;
  3197. -
  3198. - if( IS_INTRA( type_col ) )
  3199. + int mb_x = h->mb.i_mb_x;
  3200. + int mb_y = h->mb.i_mb_y;
  3201. + int mb_xy = mb_x + h->mb.i_mb_stride * mb_y;
  3202. + int type_col[2] = { h->fref[1][0]->mb_type[mb_xy], h->fref[1][0]->mb_type[mb_xy] };
  3203. + int partition_col[2] = { h->fref[1][0]->mb_partition[mb_xy], h->fref[1][0]->mb_partition[mb_xy] };
  3204. + int col_parity = abs(h->fref[1][0]->i_poc - h->fdec->i_poc)
  3205. + >= abs(h->fref[1][0]->i_poc + h->sh.i_delta_poc_bottom - h->fdec->i_poc);
  3206. + int preshift = h->mb.b_interlaced;
  3207. + int postshift = h->mb.b_interlaced;
  3208. + int offset = 1;
  3209. + int yshift = 1;
  3210. + h->mb.i_partition = partition_col[0];
  3211. + if( h->param.b_interlaced && h->fref[1][0]->field[mb_xy] != h->mb.b_interlaced )
  3212. {
  3213. - x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, 0 );
  3214. - x264_macroblock_cache_mv( h, 0, 0, 4, 4, 0, 0 );
  3215. - x264_macroblock_cache_mv( h, 0, 0, 4, 4, 1, 0 );
  3216. - return 1;
  3217. + if( h->mb.b_interlaced )
  3218. + {
  3219. + mb_y = h->mb.i_mb_y&~1;
  3220. + mb_xy = mb_x + h->mb.i_mb_stride * mb_y;
  3221. + type_col[0] = h->fref[1][0]->mb_type[mb_xy];
  3222. + type_col[1] = h->fref[1][0]->mb_type[mb_xy + h->mb.i_mb_stride];
  3223. + partition_col[0] = h->fref[1][0]->mb_partition[mb_xy];
  3224. + partition_col[1] = h->fref[1][0]->mb_partition[mb_xy + h->mb.i_mb_stride];
  3225. + preshift = 0;
  3226. + yshift = 0;
  3227. +
  3228. + if( (IS_INTRA(type_col[0]) || partition_col[0] == D_16x16)
  3229. + && (IS_INTRA(type_col[1]) || partition_col[1] == D_16x16)
  3230. + && partition_col[0] != D_8x8 )
  3231. + h->mb.i_partition = D_16x8;
  3232. + else
  3233. + h->mb.i_partition = D_8x8;
  3234. + }
  3235. + else
  3236. + {
  3237. + mb_y = (h->mb.i_mb_y&~1) + col_parity;
  3238. + mb_xy = mb_x + h->mb.i_mb_stride * mb_y;
  3239. + type_col[0] = type_col[1] = h->fref[1][0]->mb_type[mb_xy];
  3240. + partition_col[0] = partition_col[1] = h->fref[1][0]->mb_partition[mb_xy];
  3241. + preshift = 1;
  3242. + yshift = 2;
  3243. + h->mb.i_partition = partition_col[0];
  3244. + }
  3245. + offset = 0;
  3246. }
  3247. + int i_mb_4x4 = 16 * h->mb.i_mb_stride * mb_y + 4 * mb_x;
  3248. + int i_mb_8x8 = 4 * h->mb.i_mb_stride * mb_y + 2 * mb_x;
  3249. +
  3250. + x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, 0 );
  3251.  
  3252. /* Don't do any checks other than the ones we have to, based
  3253. * on the size of the colocated partitions.
  3254. * Depends on the enum order: D_8x8, D_16x8, D_8x16, D_16x16 */
  3255. - int max_i8 = (D_16x16 - partition_col) + 1;
  3256. - int step = (partition_col == D_16x8) + 1;
  3257. - int width = 4 >> ((D_16x16 - partition_col)&1);
  3258. - int height = 4 >> ((D_16x16 - partition_col)>>1);
  3259. -
  3260. + int max_i8 = (D_16x16 - h->mb.i_partition) + 1;
  3261. + int step = (h->mb.i_partition == D_16x8) + 1;
  3262. + int width = 4 >> ((D_16x16 - h->mb.i_partition)&1);
  3263. + int height = 4 >> ((D_16x16 - h->mb.i_partition)>>1);
  3264. for( int i8 = 0; i8 < max_i8; i8 += step )
  3265. {
  3266. int x8 = i8&1;
  3267. int y8 = i8>>1;
  3268. - int i_part_8x8 = i_mb_8x8 + x8 + y8 * h->mb.i_b8_stride;
  3269. +
  3270. + if( IS_INTRA( type_col[y8] ) )
  3271. + {
  3272. + x264_macroblock_cache_ref( h, 2*x8, 2*y8, width, height, 0, 0 );
  3273. + x264_macroblock_cache_mv( h, 2*x8, 2*y8, width, height, 0, 0 );
  3274. + x264_macroblock_cache_mv( h, 2*x8, 2*y8, width, height, 1, 0 );
  3275. + continue;
  3276. + }
  3277. +
  3278. + int yM = 3*y8;
  3279. + if( h->param.b_interlaced && h->fref[1][0]->field[mb_xy] != h->mb.b_interlaced )
  3280. + {
  3281. + if( h->mb.b_interlaced )
  3282. + yM = y8*6;
  3283. + else
  3284. + yM = 2*(h->mb.i_mb_y&1) + y8;
  3285. + }
  3286. +
  3287. + int i_part_8x8 = i_mb_8x8 + x8 + (yM>>1) * h->mb.i_b8_stride;
  3288. int i_ref1_ref = h->fref[1][0]->ref[0][i_part_8x8];
  3289. - int i_ref = (map_col_to_list0(i_ref1_ref>>h->sh.b_mbaff) << h->sh.b_mbaff) + (i_ref1_ref&h->sh.b_mbaff);
  3290. + int i_ref = (map_col_to_list0(i_ref1_ref>>preshift) << postshift) + (offset&i_ref1_ref&h->mb.b_interlaced);
  3291.  
  3292. if( i_ref >= 0 )
  3293. {
  3294. int dist_scale_factor = h->mb.dist_scale_factor[i_ref][0];
  3295. - int16_t *mv_col = h->fref[1][0]->mv[0][i_mb_4x4 + 3*x8 + 3*y8 * h->mb.i_b4_stride];
  3296. + int16_t *mv_col = h->fref[1][0]->mv[0][i_mb_4x4 + 3*x8 + yM * h->mb.i_b4_stride];
  3297. + int16_t mv_y = (mv_col[1]<<yshift)/2;
  3298. int l0x = ( dist_scale_factor * mv_col[0] + 128 ) >> 8;
  3299. - int l0y = ( dist_scale_factor * mv_col[1] + 128 ) >> 8;
  3300. - if( h->param.i_threads > 1 && (l0y > h->mb.mv_max_spel[1] || l0y-mv_col[1] > h->mb.mv_max_spel[1]) )
  3301. + int l0y = ( dist_scale_factor * mv_y + 128 ) >> 8;
  3302. + if( h->param.i_threads > 1 && (l0y > h->mb.mv_max_spel[1] || l0y-mv_y > h->mb.mv_max_spel[1]) )
  3303. return 0;
  3304. x264_macroblock_cache_ref( h, 2*x8, 2*y8, width, height, 0, i_ref );
  3305. x264_macroblock_cache_mv( h, 2*x8, 2*y8, width, height, 0, pack16to32_mask(l0x, l0y) );
  3306. - x264_macroblock_cache_mv( h, 2*x8, 2*y8, width, height, 1, pack16to32_mask(l0x-mv_col[0], l0y-mv_col[1]) );
  3307. + x264_macroblock_cache_mv( h, 2*x8, 2*y8, width, height, 1, pack16to32_mask(l0x-mv_col[0], l0y-mv_y) );
  3308. }
  3309. else
  3310. {
  3311. --
  3312. 1.7.4
  3313.  
  3314.  
  3315. From fe257a3f3ad1b5121c52999f1db6727aa50082c5 Mon Sep 17 00:00:00 2001
  3316. From: Simon Horlick <simonhorlick@gmail.com>
  3317. Date: Tue, 15 Mar 2011 01:17:01 +0000
  3318. Subject: [PATCH 23/25] Direct spatial
  3319.  
  3320. ---
  3321. common/mvpred.c | 78 +++++++++++++++++++++++++++++++++++++++++++-----------
  3322. 1 files changed, 62 insertions(+), 16 deletions(-)
  3323.  
  3324. diff --git a/common/mvpred.c b/common/mvpred.c
  3325. index 28eabca..f25fa03 100644
  3326. --- a/common/mvpred.c
  3327. +++ b/common/mvpred.c
  3328. @@ -295,15 +295,6 @@ static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h )
  3329. {
  3330. int8_t ref[2];
  3331. ALIGNED_ARRAY_8( int16_t, mv,[2],[2] );
  3332. - const int8_t *l1ref0 = &h->fref[1][0]->ref[0][h->mb.i_b8_xy];
  3333. - const int8_t *l1ref1 = &h->fref[1][0]->ref[1][h->mb.i_b8_xy];
  3334. - const int16_t (*l1mv[2])[2] = { (const int16_t (*)[2]) &h->fref[1][0]->mv[0][h->mb.i_b4_xy],
  3335. - (const int16_t (*)[2]) &h->fref[1][0]->mv[1][h->mb.i_b4_xy] };
  3336. - const int type_col = h->fref[1][0]->mb_type[h->mb.i_mb_xy];
  3337. - const int partition_col = h->fref[1][0]->mb_partition[h->mb.i_mb_xy];
  3338. -
  3339. - h->mb.i_partition = partition_col;
  3340. -
  3341. for( int i_list = 0; i_list < 2; i_list++ )
  3342. {
  3343. int i_refa = h->mb.cache.ref[i_list][X264_SCAN8_0 - 1];
  3344. @@ -348,6 +339,49 @@ static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h )
  3345. ref[i_list] = i_ref;
  3346. }
  3347.  
  3348. + int mb_x = h->mb.i_mb_x;
  3349. + int mb_y = h->mb.i_mb_y;
  3350. + int mb_xy = mb_x + h->mb.i_mb_stride * mb_y;
  3351. + int type_col[2] = { h->fref[1][0]->mb_type[mb_xy], h->fref[1][0]->mb_type[mb_xy] };
  3352. + int partition_col[2] = { h->fref[1][0]->mb_partition[mb_xy], h->fref[1][0]->mb_partition[mb_xy] };
  3353. + int col_parity = abs(h->fref[1][0]->i_poc - h->fdec->i_poc)
  3354. + >= abs(h->fref[1][0]->i_poc + h->sh.i_delta_poc_bottom - h->fdec->i_poc);
  3355. + h->mb.i_partition = partition_col[0];
  3356. + if( h->param.b_interlaced && h->fref[1][0]->field[mb_xy] != h->mb.b_interlaced )
  3357. + {
  3358. + if( h->mb.b_interlaced )
  3359. + {
  3360. + mb_y = h->mb.i_mb_y&~1;
  3361. + mb_xy = mb_x + h->mb.i_mb_stride * mb_y;
  3362. + type_col[0] = h->fref[1][0]->mb_type[mb_xy];
  3363. + type_col[1] = h->fref[1][0]->mb_type[mb_xy + h->mb.i_mb_stride];
  3364. + partition_col[0] = h->fref[1][0]->mb_partition[mb_xy];
  3365. + partition_col[1] = h->fref[1][0]->mb_partition[mb_xy + h->mb.i_mb_stride];
  3366. +
  3367. + if( (IS_INTRA(type_col[0]) || partition_col[0] == D_16x16)
  3368. + && (IS_INTRA(type_col[1]) || partition_col[1] == D_16x16)
  3369. + && partition_col[0] != D_8x8 )
  3370. + h->mb.i_partition = D_16x8;
  3371. + else
  3372. + h->mb.i_partition = D_8x8;
  3373. + }
  3374. + else
  3375. + {
  3376. + mb_y = (h->mb.i_mb_y&~1) + col_parity;
  3377. + mb_xy = mb_x + h->mb.i_mb_stride * mb_y;
  3378. + type_col[0] = type_col[1] = h->fref[1][0]->mb_type[mb_xy];
  3379. + partition_col[0] = partition_col[1] = h->fref[1][0]->mb_partition[mb_xy];
  3380. + h->mb.i_partition = partition_col[0];
  3381. + }
  3382. + }
  3383. + int i_mb_4x4 = 16 * h->mb.i_mb_stride * mb_y + 4 * mb_x;
  3384. + int i_mb_8x8 = 4 * h->mb.i_mb_stride * mb_y + 2 * mb_x;
  3385. +
  3386. + int8_t *l1ref0 = &h->fref[1][0]->ref[0][i_mb_8x8];
  3387. + int8_t *l1ref1 = &h->fref[1][0]->ref[1][i_mb_8x8];
  3388. + int16_t (*l1mv[2])[2] = { (int16_t (*)[2]) &h->fref[1][0]->mv[0][i_mb_4x4],
  3389. + (int16_t (*)[2]) &h->fref[1][0]->mv[1][i_mb_4x4] };
  3390. +
  3391. if( (M16( ref ) & 0x8080) == 0x8080 ) /* if( ref[0] < 0 && ref[1] < 0 ) */
  3392. {
  3393. x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, 0 );
  3394. @@ -367,24 +401,33 @@ static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h )
  3395. return 0;
  3396. }
  3397.  
  3398. - if( !M64( mv ) || IS_INTRA( type_col ) || (ref[0]&&ref[1]) )
  3399. + if( !M64( mv ) || (ref[0]&&ref[1]) )
  3400. return 1;
  3401.  
  3402. /* Don't do any checks other than the ones we have to, based
  3403. * on the size of the colocated partitions.
  3404. * Depends on the enum order: D_8x8, D_16x8, D_8x16, D_16x16 */
  3405. - int max_i8 = (D_16x16 - partition_col) + 1;
  3406. - int step = (partition_col == D_16x8) + 1;
  3407. - int width = 4 >> ((D_16x16 - partition_col)&1);
  3408. - int height = 4 >> ((D_16x16 - partition_col)>>1);
  3409. + int max_i8 = (D_16x16 - h->mb.i_partition) + 1;
  3410. + int step = (h->mb.i_partition == D_16x8) + 1;
  3411. + int width = 4 >> ((D_16x16 - h->mb.i_partition)&1);
  3412. + int height = 4 >> ((D_16x16 - h->mb.i_partition)>>1);
  3413.  
  3414. /* col_zero_flag */
  3415. for( int i8 = 0; i8 < max_i8; i8 += step )
  3416. {
  3417. const int x8 = i8&1;
  3418. const int y8 = i8>>1;
  3419. - const int o8 = x8 + y8 * h->mb.i_b8_stride;
  3420. - const int o4 = 3*(x8 + y8 * h->mb.i_b4_stride);
  3421. + int yM = 3*y8;
  3422. + if( h->param.b_interlaced && h->fref[1][0]->field[mb_xy] != h->mb.b_interlaced )
  3423. + {
  3424. + if( h->mb.b_interlaced )
  3425. + yM = y8*6;
  3426. + else
  3427. + yM = 2*(h->mb.i_mb_y&1) + y8;
  3428. + }
  3429. + int o8 = x8 + (yM>>1) * h->mb.i_b8_stride;
  3430. + int o4 = 3*x8 + yM * h->mb.i_b4_stride;
  3431. +
  3432. int idx;
  3433. if( l1ref0[o8] == 0 )
  3434. idx = 0;
  3435. @@ -393,6 +436,9 @@ static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h )
  3436. else
  3437. continue;
  3438.  
  3439. + if( IS_INTRA( type_col[y8] ) )
  3440. + continue;
  3441. +
  3442. if( abs( l1mv[idx][o4][0] ) <= 1 && abs( l1mv[idx][o4][1] ) <= 1 )
  3443. {
  3444. if( ref[0] == 0 ) x264_macroblock_cache_mv( h, 2*x8, 2*y8, width, height, 0, 0 );
  3445. --
  3446. 1.7.4
  3447.  
  3448.  
  3449. From 53c6284008aa1471a62de4f6da6d587698a817c2 Mon Sep 17 00:00:00 2001
  3450. From: Simon Horlick <simonhorlick@gmail.com>
  3451. Date: Thu, 17 Mar 2011 17:39:18 +0000
  3452. Subject: [PATCH 24/25] Fix non-determinism with AQ
  3453.  
  3454. ---
  3455. encoder/ratecontrol.c | 4 ++--
  3456. 1 files changed, 2 insertions(+), 2 deletions(-)
  3457.  
  3458. diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
  3459. index e1a673f..bcbcb02 100644
  3460. --- a/encoder/ratecontrol.c
  3461. +++ b/encoder/ratecontrol.c
  3462. @@ -219,10 +219,10 @@ static ALWAYS_INLINE uint32_t ac_energy_plane( x264_t *h, int mb_x, int mb_y, x2
  3463. {
  3464. int w = i ? 8 : 16;
  3465. int stride = frame->i_stride[i];
  3466. - int offset = h->mb.b_interlaced
  3467. + int offset = h->sh.b_mbaff
  3468. ? 16 * mb_x + w * (mb_y&~1) * stride + (mb_y&1) * stride
  3469. : 16 * mb_x + w * mb_y * stride;
  3470. - stride <<= h->mb.b_interlaced;
  3471. + stride <<= h->sh.b_mbaff;
  3472. if( i )
  3473. {
  3474. ALIGNED_ARRAY_16( pixel, pix,[FENC_STRIDE*8] );
  3475. --
  3476. 1.7.4
  3477.  
  3478.  
  3479. From 7e2d83f1ef2dc762ffe6880ee54686088aeff660 Mon Sep 17 00:00:00 2001
  3480. From: Simon Horlick <simonhorlick@gmail.com>
  3481. Date: Mon, 14 Mar 2011 02:54:30 +0000
  3482. Subject: [PATCH 25/25] Adaptive mbaff with vsad decision
  3483.  
  3484. ---
  3485. encoder/encoder.c | 21 ++++++++++++++++++++-
  3486. 1 files changed, 20 insertions(+), 1 deletions(-)
  3487.  
  3488. diff --git a/encoder/encoder.c b/encoder/encoder.c
  3489. index 0319126..d885f31 100644
  3490. --- a/encoder/encoder.c
  3491. +++ b/encoder/encoder.c
  3492. @@ -1873,6 +1873,25 @@ static inline void x264_slice_init( x264_t *h, int i_nal_type, int i_global_qp )
  3493. x264_macroblock_slice_init( h );
  3494. }
  3495.  
  3496. +static int field_vsad( x264_t *h, int mb_x, int mb_y )
  3497. +{
  3498. + int score_field = 0;
  3499. + int score_frame = 0;
  3500. + int stride = h->fenc->i_stride[0];
  3501. + uint8_t *fenc = h->fenc->plane[0] + 16 * (mb_x + mb_y * stride);
  3502. +
  3503. + for( int i = 1; i < 16; i++ )
  3504. + for( int j = 0; j < 16; j++ )
  3505. + score_frame += abs(fenc[i*stride+j] - fenc[(i-1)*stride+j]);
  3506. + for( int i = 2; i < 16; i+=2 )
  3507. + for( int j = 0; j < 16; j++ )
  3508. + score_field += abs(fenc[i*stride+j] - fenc[(i-2)*stride+j]);
  3509. + for( int i = 3; i < 16; i+=2 )
  3510. + for( int j = 0; j < 16; j++ )
  3511. + score_field += abs(fenc[i*stride+j] - fenc[(i-2)*stride+j]);
  3512. + return (score_field < score_frame);
  3513. +}
  3514. +
  3515. static int x264_slice_write( x264_t *h )
  3516. {
  3517. int i_skip;
  3518. @@ -1967,7 +1986,7 @@ static int x264_slice_write( x264_t *h )
  3519. if( h->mb.b_adaptive_mbaff )
  3520. {
  3521. if( !(i_mb_y&1) )
  3522. - h->mb.b_interlaced = 1;
  3523. + h->mb.b_interlaced = field_vsad( h, i_mb_x, i_mb_y );
  3524. x264_zigzag_init( h->param.cpu, &h->zigzagf, h->mb.b_interlaced );
  3525. }
  3526. h->mb.field[mb_xy] = h->mb.b_interlaced;
  3527. --
  3528. 1.7.4
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement