Advertisement
Guest User

Dark Shikari

a guest
Aug 5th, 2009
668
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 65.43 KB | None | 0 0
  1. From f21e71a04ba65aff9b5a4bfa8a73fd86c463f4ee Mon Sep 17 00:00:00 2001
  2. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  3. Date: Mon, 3 Aug 2009 20:52:30 -0700
  4. Subject: [PATCH 1/2] Various 1-pass VBV tweaks
  5. Make predictors have an offset in addition to a multiplier.
  6. This primarily fixes issues in sources with lots of extremely static scenes, such as anime and CGI.
  7. We tried linear regressions, but they were very unreliable as predictors.
  8. Also allow VBV to be slightly more aggressive in raising QPs to avoid not having enough bits left in some situations.
  9. Up to 1db improvement on some clips.
  10.  
  11. ---
  12. encoder/ratecontrol.c | 32 +++++++++++++++++++++-----------
  13. 1 files changed, 21 insertions(+), 11 deletions(-)
  14.  
  15. diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
  16. index 2f88708..087e658 100644
  17. --- a/encoder/ratecontrol.c
  18. +++ b/encoder/ratecontrol.c
  19. @@ -58,6 +58,7 @@ typedef struct
  20. double coeff;
  21. double count;
  22. double decay;
  23. + double offset;
  24. } predictor_t;
  25.  
  26. struct x264_ratecontrol_t
  27. @@ -409,9 +410,11 @@ int x264_ratecontrol_new( x264_t *h )
  28. rc->pred[i].coeff= 2.0;
  29. rc->pred[i].count= 1.0;
  30. rc->pred[i].decay= 0.5;
  31. + rc->pred[i].offset= 0.0;
  32. rc->row_preds[i].coeff= .25;
  33. rc->row_preds[i].count= 1.0;
  34. rc->row_preds[i].decay= 0.5;
  35. + rc->row_preds[i].offset= 0.0;
  36. }
  37. *rc->pred_b_from_p = rc->pred[0];
  38.  
  39. @@ -953,7 +956,7 @@ void x264_ratecontrol_mb( x264_t *h, int bits )
  40. if( y < h->sps->i_mb_height-1 )
  41. {
  42. int i_estimated;
  43. - int avg_qp = X264_MAX(h->fref0[0]->i_row_qp[y+1], h->fref1[0]->i_row_qp[y+1])
  44. + int avg_qp = X264_MIN(h->fref0[0]->i_row_qp[y+1], h->fref1[0]->i_row_qp[y+1])
  45. + rc->pb_offset * ((h->fenc->i_type == X264_TYPE_BREF) ? 0.5 : 1);
  46. rc->qpm = X264_MIN(X264_MAX( rc->qp, avg_qp), 51); //avg_qp could go higher than 51 due to pb_offset
  47. i_estimated = row_bits_so_far(h, y); //FIXME: compute full estimated size
  48. @@ -1153,10 +1156,6 @@ void x264_ratecontrol_end( x264_t *h, int bits )
  49. {
  50. update_predictor( rc->pred_b_from_p, qp2qscale(rc->qpa_rc),
  51. h->fref1[h->i_ref1-1]->i_satd, rc->bframe_bits / rc->bframes );
  52. - /* In some cases, such as completely blank scenes, pred_b_from_p can go nuts */
  53. - /* Hackily cap the predictor coeff in case this happens. */
  54. - /* FIXME FIXME FIXME */
  55. - rc->pred_b_from_p->coeff = X264_MIN( rc->pred_b_from_p->coeff, 10. );
  56. rc->bframe_bits = 0;
  57. }
  58. }
  59. @@ -1270,17 +1269,28 @@ static double get_diff_limited_q(x264_t *h, ratecontrol_entry_t *rce, double q)
  60.  
  61. static double predict_size( predictor_t *p, double q, double var )
  62. {
  63. - return p->coeff*var / (q*p->count);
  64. + return (p->coeff*var + p->offset) / (q*p->count);
  65. }
  66.  
  67. static void update_predictor( predictor_t *p, double q, double var, double bits )
  68. {
  69. + const double range = 1.5;
  70. if( var < 10 )
  71. return;
  72. - p->count *= p->decay;
  73. - p->coeff *= p->decay;
  74. - p->count ++;
  75. - p->coeff += bits*q / var;
  76. + double old_coeff = p->coeff / p->count;
  77. + double new_coeff = bits*q / var;
  78. + double new_coeff_clipped = x264_clip3f( new_coeff, old_coeff/range, old_coeff*range );
  79. + double new_offset = bits*q - new_coeff_clipped * var;
  80. + if( new_offset >= 0 )
  81. + new_coeff = new_coeff_clipped;
  82. + else
  83. + new_offset = 0;
  84. + p->count *= p->decay;
  85. + p->coeff *= p->decay;
  86. + p->offset *= p->decay;
  87. + p->count ++;
  88. + p->coeff += new_coeff;
  89. + p->offset += new_offset;
  90. }
  91.  
  92. // update VBV after encoding a frame
  93. @@ -1350,7 +1360,7 @@ static double clip_qscale( x264_t *h, int pict_type, double q )
  94. double bits = predict_size( &rcc->pred[h->sh.i_type], q, rcc->last_satd );
  95. double qf = 1.0;
  96. if( bits > rcc->buffer_fill/2 )
  97. - qf = x264_clip3f( rcc->buffer_fill/(2*bits), 0.2, 1.0 );
  98. + qf = rcc->buffer_fill/(2*bits);
  99. q /= qf;
  100. bits *= qf;
  101. if( bits < rcc->buffer_rate/2 )
  102. --
  103. 1.6.1.2
  104.  
  105.  
  106. From 5ed78fa5b21d686682b8779cc114844b4b204f4d Mon Sep 17 00:00:00 2001
  107. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  108. Date: Tue, 4 Aug 2009 17:46:33 -0700
  109. Subject: [PATCH 2/2] Macroblock-tree ratecontrol
  110. On by default; can be turned off with --no-mbtree.
  111. Uses a large lookahead to track temporal propagation of data and weight quality accordingly.
  112. Requires a very large separate statsfile (4 bytes per macroblock) in multi-pass mode.
  113. Doesn't work with b-pyramid yet.
  114. Note that MB-tree inherently measures quality different from the standard qcomp method, so bitrates produced by CRF may change somewhat.
  115. This makes the "medium" preset a bit slower. Accordingly, make "fast" slower as well, and introduce a new preset "faster" between "fast" and "veryfast".
  116. Add a new option, --lookahead, to control the distance MB tree looks ahead to perform propagation analysis.
  117. Default is 50; larger values will be slower and require more memory but give more accurate results.
  118. Add a new option, --no-psy, to disable all psy optimizations that don't improve PSNR or SSIM.
  119. This disables psy-RD/trellis, but also other more subtle internal psy optimizations that can't be controlled directly via external parameters.
  120. Quality improvement from MB-tree is about 2-70% depending on content.
  121. Strength of MB-tree adjustments can be tweaked using qcompress; higher values mean lower MB-tree strength.
  122.  
  123. ---
  124. common/common.c | 22 ++-
  125. common/common.h | 25 +++-
  126. common/frame.c | 10 +-
  127. common/frame.h | 3 +
  128. encoder/analyse.c | 4 +-
  129. encoder/encoder.c | 39 +++++-
  130. encoder/ratecontrol.c | 136 +++++++++++++---
  131. encoder/ratecontrol.h | 1 +
  132. encoder/slicetype.c | 422 ++++++++++++++++++++++++++++++++++++++-----------
  133. x264.c | 28 +++-
  134. x264.h | 5 +-
  135. 11 files changed, 556 insertions(+), 139 deletions(-)
  136.  
  137. diff --git a/common/common.c b/common/common.c
  138. index 9260c64..8513217 100644
  139. --- a/common/common.c
  140. +++ b/common/common.c
  141. @@ -72,6 +72,7 @@ void x264_param_default( x264_param_t *param )
  142. param->i_bframe_adaptive = X264_B_ADAPT_FAST;
  143. param->i_bframe_bias = 0;
  144. param->b_bframe_pyramid = 0;
  145. + param->i_lookahead = 50;
  146.  
  147. param->b_deblocking_filter = 1;
  148. param->i_deblocking_filter_alphac0 = 0;
  149. @@ -104,6 +105,7 @@ void x264_param_default( x264_param_t *param )
  150. param->rc.f_qblur = 0.5;
  151. param->rc.f_complexity_blur = 20;
  152. param->rc.i_zones = 0;
  153. + param->rc.b_mb_tree = 1;
  154.  
  155. /* Log */
  156. param->pf_log = x264_log_default;
  157. @@ -117,6 +119,7 @@ void x264_param_default( x264_param_t *param )
  158. param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_SPATIAL;
  159. param->analyse.i_me_method = X264_ME_HEX;
  160. param->analyse.f_psy_rd = 1.0;
  161. + param->analyse.b_psy = 1;
  162. param->analyse.f_psy_trellis = 0;
  163. param->analyse.i_me_range = 16;
  164. param->analyse.i_subpel_refine = 7;
  165. @@ -337,6 +340,8 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
  166. }
  167. OPT("bframes")
  168. p->i_bframe = atoi(value);
  169. + OPT("lookahead")
  170. + p->i_lookahead = atoi(value);
  171. OPT("b-adapt")
  172. {
  173. p->i_bframe_adaptive = atobool(value);
  174. @@ -493,6 +498,8 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
  175. p->analyse.f_psy_trellis = 0;
  176. }
  177. }
  178. + OPT("psy")
  179. + p->analyse.b_psy = atobool(value);
  180. OPT("chroma-me")
  181. p->analyse.b_chroma_me = atobool(value);
  182. OPT("mixed-refs")
  183. @@ -559,6 +566,8 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
  184. }
  185. OPT("qcomp")
  186. p->rc.f_qcompress = atof(value);
  187. + OPT("mbtree")
  188. + p->rc.b_mb_tree = atobool(value);
  189. OPT("qblur")
  190. p->rc.f_qblur = atof(value);
  191. OPT2("cplxblur", "cplx-blur")
  192. @@ -843,7 +852,9 @@ char *x264_param2string( x264_param_t *p, int b_res )
  193. s += sprintf( s, " analyse=%#x:%#x", p->analyse.intra, p->analyse.inter );
  194. s += sprintf( s, " me=%s", x264_motion_est_names[ p->analyse.i_me_method ] );
  195. s += sprintf( s, " subme=%d", p->analyse.i_subpel_refine );
  196. - s += sprintf( s, " psy_rd=%.1f:%.1f", p->analyse.f_psy_rd, p->analyse.f_psy_trellis );
  197. + s += sprintf( s, " psy=%d", p->analyse.b_psy );
  198. + if( p->analyse.b_psy )
  199. + s += sprintf( s, " psy_rd=%.1f:%.1f", p->analyse.f_psy_rd, p->analyse.f_psy_trellis );
  200. s += sprintf( s, " mixed_ref=%d", p->analyse.b_mixed_references );
  201. s += sprintf( s, " me_range=%d", p->analyse.i_me_range );
  202. s += sprintf( s, " chroma_me=%d", p->analyse.b_chroma_me );
  203. @@ -868,9 +879,12 @@ char *x264_param2string( x264_param_t *p, int b_res )
  204. s += sprintf( s, " keyint=%d keyint_min=%d scenecut=%d",
  205. p->i_keyint_max, p->i_keyint_min, p->i_scenecut_threshold );
  206.  
  207. - s += sprintf( s, " rc=%s", p->rc.i_rc_method == X264_RC_ABR ?
  208. + if( p->i_bframe_adaptive == X264_B_ADAPT_TRELLIS )
  209. + s += sprintf( s, " lookahead=%d", p->i_lookahead );
  210. +
  211. + s += sprintf( s, " rc=%s mbtree=%d", p->rc.i_rc_method == X264_RC_ABR ?
  212. ( p->rc.b_stat_read ? "2pass" : p->rc.i_vbv_buffer_size ? "cbr" : "abr" )
  213. - : p->rc.i_rc_method == X264_RC_CRF ? "crf" : "cqp" );
  214. + : p->rc.i_rc_method == X264_RC_CRF ? "crf" : "cqp", p->rc.b_mb_tree );
  215. if( p->rc.i_rc_method == X264_RC_ABR || p->rc.i_rc_method == X264_RC_CRF )
  216. {
  217. if( p->rc.i_rc_method == X264_RC_CRF )
  218. @@ -892,7 +906,7 @@ char *x264_param2string( x264_param_t *p, int b_res )
  219. if( !(p->rc.i_rc_method == X264_RC_CQP && p->rc.i_qp_constant == 0) )
  220. {
  221. s += sprintf( s, " ip_ratio=%.2f", p->rc.f_ip_factor );
  222. - if( p->i_bframe )
  223. + if( p->i_bframe && !p->rc.b_mb_tree )
  224. s += sprintf( s, " pb_ratio=%.2f", p->rc.f_pb_factor );
  225. s += sprintf( s, " aq=%d", p->rc.i_aq_mode );
  226. if( p->rc.i_aq_mode )
  227. diff --git a/common/common.h b/common/common.h
  228. index 8a25a13..5f9284e 100644
  229. --- a/common/common.h
  230. +++ b/common/common.h
  231. @@ -51,6 +51,7 @@
  232. #define X264_SLICE_MAX 4
  233. #define X264_NAL_MAX (4 + X264_SLICE_MAX)
  234. #define X264_PCM_COST (386*8)
  235. +#define X264_LOOKAHEAD_MAX 250
  236.  
  237. // number of pixels (per thread) in progress at any given time.
  238. // 16 for the macroblock in progress + 3 for deblocking + 3 for motion compensation filter + 2 for extra safety
  239. @@ -152,6 +153,24 @@ static inline uint32_t x264_cabac_amvd_sum( int16_t *mvdleft, int16_t *mvdtop )
  240. return amvd0 + (amvd1<<16);
  241. }
  242.  
  243. +static const uint8_t exp2_lut[64] = {
  244. + 1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 37, 40, 44, 47,
  245. + 50, 53, 57, 60, 64, 67, 71, 74, 78, 81, 85, 89, 93, 96, 100, 104,
  246. + 108, 112, 116, 120, 124, 128, 132, 137, 141, 145, 150, 154, 159, 163, 168, 172,
  247. + 177, 182, 186, 191, 196, 201, 206, 211, 216, 221, 226, 232, 237, 242, 248, 253,
  248. +};
  249. +
  250. +static ALWAYS_INLINE int x264_exp2fix8( float x )
  251. +{
  252. + int i, f;
  253. + x += 8;
  254. + if( x <= 0 ) return 0;
  255. + if( x >= 16 ) return 0xffff;
  256. + i = x;
  257. + f = (x-i)*64;
  258. + return (exp2_lut[f]+256) << i >> 8;
  259. +}
  260. +
  261. /****************************************************************************
  262. *
  263. ****************************************************************************/
  264. @@ -327,11 +346,11 @@ struct x264_t
  265. struct
  266. {
  267. /* Frames to be encoded (whose types have been decided) */
  268. - x264_frame_t *current[X264_BFRAME_MAX*4+3];
  269. + x264_frame_t *current[X264_LOOKAHEAD_MAX+3];
  270. /* Temporary buffer (frames types not yet decided) */
  271. - x264_frame_t *next[X264_BFRAME_MAX*4+3];
  272. + x264_frame_t *next[X264_LOOKAHEAD_MAX+3];
  273. /* Unused frames */
  274. - x264_frame_t *unused[X264_BFRAME_MAX*4 + X264_THREAD_MAX*2 + 16+4];
  275. + x264_frame_t *unused[X264_LOOKAHEAD_MAX + X264_THREAD_MAX*2 + 16+4];
  276. /* For adaptive B decision */
  277. x264_frame_t *last_nonb;
  278.  
  279. diff --git a/common/frame.c b/common/frame.c
  280. index 23e6824..98c1e2c 100644
  281. --- a/common/frame.c
  282. +++ b/common/frame.c
  283. @@ -96,6 +96,15 @@ x264_frame_t *x264_frame_new( x264_t *h )
  284. memset( frame->lowres_mvs[j][i], 0, 2*h->mb.i_mb_count*sizeof(int16_t) );
  285. CHECKED_MALLOC( frame->lowres_mv_costs[j][i], h->mb.i_mb_count*sizeof(int) );
  286. }
  287. + CHECKED_MALLOC( frame->i_intra_cost, i_mb_count * sizeof(uint16_t) );
  288. + memset( frame->i_intra_cost, -1, i_mb_count * sizeof(int16_t) );
  289. + CHECKED_MALLOC( frame->i_propagate_cost, i_mb_count * sizeof(uint32_t) );
  290. + for( j = 0; j <= h->param.i_bframe+1; j++ )
  291. + for( i = 0; i <= h->param.i_bframe+1; i++ )
  292. + {
  293. + CHECKED_MALLOC( frame->lowres_costs[j][i], i_mb_count * sizeof(uint16_t) );
  294. + CHECKED_MALLOC( frame->lowres_inter_types[j][i], i_mb_count * sizeof(uint8_t) );
  295. + }
  296. }
  297.  
  298. if( h->param.analyse.i_me_method >= X264_ME_ESA )
  299. @@ -116,7 +125,6 @@ x264_frame_t *x264_frame_new( x264_t *h )
  300. CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t));
  301. CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
  302. CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
  303. - CHECKED_MALLOC( frame->i_intra_cost, i_mb_count * sizeof(uint16_t) );
  304. if( h->param.i_bframe )
  305. {
  306. CHECKED_MALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
  307. diff --git a/common/frame.h b/common/frame.h
  308. index aad77f5..a3da4e4 100644
  309. --- a/common/frame.h
  310. +++ b/common/frame.h
  311. @@ -63,6 +63,8 @@ typedef struct
  312. int8_t *mb_type;
  313. int16_t (*mv[2])[2];
  314. int16_t (*lowres_mvs[2][X264_BFRAME_MAX+1])[2];
  315. + uint16_t (*lowres_costs[X264_BFRAME_MAX+2][X264_BFRAME_MAX+2]);
  316. + uint8_t (*lowres_inter_types[X264_BFRAME_MAX+2][X264_BFRAME_MAX+2]);
  317. int *lowres_mv_costs[2][X264_BFRAME_MAX+1];
  318. int8_t *ref[2];
  319. int i_ref[2];
  320. @@ -83,6 +85,7 @@ typedef struct
  321. float *f_qp_offset;
  322. int b_intra_calculated;
  323. uint16_t *i_intra_cost;
  324. + uint32_t *i_propagate_cost;
  325. uint16_t *i_inv_qscale_factor;
  326.  
  327. /* threading */
  328. diff --git a/encoder/analyse.c b/encoder/analyse.c
  329. index 4a36fcd..38b9976 100644
  330. --- a/encoder/analyse.c
  331. +++ b/encoder/analyse.c
  332. @@ -276,8 +276,8 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
  333. h->mb.i_trellis_lambda2[1][1] = x264_trellis_lambda2_tab[1][h->mb.i_chroma_qp];
  334. }
  335. h->mb.i_psy_rd_lambda = a->i_lambda;
  336. - /* Adjusting chroma lambda based on QP offset hurts PSNR, so we'll leave it as part of psy-RD. */
  337. - h->mb.i_chroma_lambda2_offset = h->mb.i_psy_rd ? x264_chroma_lambda2_offset_tab[h->mb.i_qp-h->mb.i_chroma_qp+12] : 256;
  338. + /* Adjusting chroma lambda based on QP offset hurts PSNR but improves visual quality. */
  339. + h->mb.i_chroma_lambda2_offset = h->param.analyse.b_psy ? x264_chroma_lambda2_offset_tab[h->mb.i_qp-h->mb.i_chroma_qp+12] : 256;
  340.  
  341. h->mb.i_me_method = h->param.analyse.i_me_method;
  342. h->mb.i_subpel_refine = h->param.analyse.i_subpel_refine;
  343. diff --git a/encoder/encoder.c b/encoder/encoder.c
  344. index 0f1ccc8..bce1f1a 100644
  345. --- a/encoder/encoder.c
  346. +++ b/encoder/encoder.c
  347. @@ -441,6 +441,7 @@ static int x264_validate_parameters( x264_t *h )
  348. h->param.rc.i_qp_min = x264_clip3( (int)(X264_MIN3( qp_p, qp_i, qp_b )), 0, 51 );
  349. h->param.rc.i_qp_max = x264_clip3( (int)(X264_MAX3( qp_p, qp_i, qp_b ) + .999), 0, 51 );
  350. h->param.rc.i_aq_mode = 0;
  351. + h->param.rc.b_mb_tree = 0;
  352. }
  353. h->param.rc.i_qp_max = x264_clip3( h->param.rc.i_qp_max, 0, 51 );
  354. h->param.rc.i_qp_min = x264_clip3( h->param.rc.i_qp_min, 0, h->param.rc.i_qp_max );
  355. @@ -473,6 +474,15 @@ static int x264_validate_parameters( x264_t *h )
  356. if( !h->param.i_bframe )
  357. h->param.i_bframe_adaptive = X264_B_ADAPT_NONE;
  358. h->param.analyse.b_weighted_bipred = h->param.analyse.b_weighted_bipred && h->param.i_bframe > 0;
  359. + h->param.i_lookahead = X264_MIN( h->param.i_lookahead, X264_LOOKAHEAD_MAX );
  360. + h->param.i_lookahead = X264_MIN( h->param.i_lookahead, h->param.i_keyint_max );
  361. + if( h->param.rc.b_stat_read )
  362. + h->param.i_lookahead = 0;
  363. + else if( !h->param.i_lookahead )
  364. + h->param.rc.b_mb_tree = 0;
  365. + if( h->param.rc.f_qcompress == 1 )
  366. + h->param.rc.b_mb_tree = 0;
  367. +
  368. h->mb.b_direct_auto_write = h->param.analyse.i_direct_mv_pred == X264_DIRECT_PRED_AUTO
  369. && h->param.i_bframe
  370. && ( h->param.rc.b_stat_write || !h->param.rc.b_stat_read );
  371. @@ -513,6 +523,11 @@ static int x264_validate_parameters( x264_t *h )
  372. if( !h->param.b_cabac )
  373. h->param.analyse.i_trellis = 0;
  374. h->param.analyse.i_trellis = x264_clip3( h->param.analyse.i_trellis, 0, 2 );
  375. + if( !h->param.analyse.b_psy )
  376. + {
  377. + h->param.analyse.f_psy_rd = 0;
  378. + h->param.analyse.f_psy_trellis = 0;
  379. + }
  380. if( !h->param.analyse.i_trellis )
  381. h->param.analyse.f_psy_trellis = 0;
  382. h->param.analyse.f_psy_rd = x264_clip3f( h->param.analyse.f_psy_rd, 0, 10 );
  383. @@ -537,6 +552,17 @@ static int x264_validate_parameters( x264_t *h )
  384. h->param.rc.f_aq_strength = x264_clip3f( h->param.rc.f_aq_strength, 0, 3 );
  385. if( h->param.rc.f_aq_strength == 0 )
  386. h->param.rc.i_aq_mode = 0;
  387. + /* MB-tree requires AQ to be on, even if the strength is zero. */
  388. + if( !h->param.rc.i_aq_mode && h->param.rc.b_mb_tree )
  389. + {
  390. + h->param.rc.i_aq_mode = 1;
  391. + h->param.rc.f_aq_strength = 0;
  392. + if( h->param.b_bframe_pyramid )
  393. + {
  394. + x264_log( h, X264_LOG_WARNING, "b-pyramid + mb-tree is not supported\n" );
  395. + h->param.b_bframe_pyramid = 0;
  396. + }
  397. + }
  398. h->param.analyse.i_noise_reduction = x264_clip3( h->param.analyse.i_noise_reduction, 0, 1<<16 );
  399. if( h->param.analyse.i_subpel_refine == 10 && (h->param.analyse.i_trellis != 2 || !h->param.rc.i_aq_mode) )
  400. h->param.analyse.i_subpel_refine = 9;
  401. @@ -723,6 +749,9 @@ x264_t *x264_encoder_open ( x264_param_t *param )
  402. h->frames.i_delay = X264_MAX(h->param.i_bframe,3)*4 + h->param.i_threads - 1;
  403. else
  404. h->frames.i_delay = h->param.i_bframe + h->param.i_threads - 1;
  405. + if( h->param.rc.b_mb_tree )
  406. + h->frames.i_delay = X264_MAX( h->frames.i_delay, h->param.i_lookahead );
  407. +
  408. h->frames.i_max_ref0 = h->param.i_frame_reference;
  409. h->frames.i_max_ref1 = h->sps->vui.i_num_reorder_frames;
  410. h->frames.i_max_dpb = h->sps->vui.i_max_dec_frame_buffering;
  411. @@ -730,7 +759,8 @@ x264_t *x264_encoder_open ( x264_param_t *param )
  412. && ( h->param.rc.i_rc_method == X264_RC_ABR
  413. || h->param.rc.i_rc_method == X264_RC_CRF
  414. || h->param.i_bframe_adaptive
  415. - || h->param.i_scenecut_threshold );
  416. + || h->param.i_scenecut_threshold
  417. + || h->param.rc.b_mb_tree );
  418. h->frames.b_have_lowres |= (h->param.rc.b_stat_read && h->param.rc.i_vbv_buffer_size > 0);
  419. h->frames.b_have_sub8x8_esa = !!(h->param.analyse.inter & X264_ANALYSE_PSUB8x8);
  420.  
  421. @@ -1443,7 +1473,12 @@ int x264_encoder_encode( x264_t *h,
  422. if( h->frames.b_have_lowres )
  423. x264_frame_init_lowres( h, fenc );
  424.  
  425. - if( h->param.rc.i_aq_mode )
  426. + if( h->param.rc.b_mb_tree && h->param.rc.b_stat_read )
  427. + {
  428. + if( x264_macroblock_tree_read( h, fenc ) )
  429. + return -1;
  430. + }
  431. + else if( h->param.rc.i_aq_mode )
  432. x264_adaptive_quant_frame( h, fenc );
  433.  
  434. if( h->frames.i_input <= h->frames.i_delay + 1 - h->param.i_threads )
  435. diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
  436. index 087e658..2a85a52 100644
  437. --- a/encoder/ratecontrol.c
  438. +++ b/encoder/ratecontrol.c
  439. @@ -71,6 +71,7 @@ struct x264_ratecontrol_t
  440. double fps;
  441. double bitrate;
  442. double rate_tolerance;
  443. + double qcompress;
  444. int nmb; /* number of macroblocks in a frame */
  445. int qp_constant[5];
  446.  
  447. @@ -106,6 +107,10 @@ struct x264_ratecontrol_t
  448. /* 2pass stuff */
  449. FILE *p_stat_file_out;
  450. char *psz_stat_file_tmpname;
  451. + FILE *p_mbtree_stat_file_out;
  452. + char *psz_mbtree_stat_file_tmpname;
  453. + char *psz_mbtree_stat_file_name;
  454. + FILE *p_mbtree_stat_file_in;
  455.  
  456. int num_entries; /* number of ratecontrol_entry_ts */
  457. ratecontrol_entry_t *entry; /* FIXME: copy needed data and free this once init is done */
  458. @@ -210,30 +215,12 @@ static const float log2_lut[128] = {
  459. 0.95420, 0.96000, 0.96578, 0.97154, 0.97728, 0.98299, 0.98868, 0.99435,
  460. };
  461.  
  462. -static const uint8_t exp2_lut[64] = {
  463. - 1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 37, 40, 44, 47,
  464. - 50, 53, 57, 60, 64, 67, 71, 74, 78, 81, 85, 89, 93, 96, 100, 104,
  465. - 108, 112, 116, 120, 124, 128, 132, 137, 141, 145, 150, 154, 159, 163, 168, 172,
  466. - 177, 182, 186, 191, 196, 201, 206, 211, 216, 221, 226, 232, 237, 242, 248, 253,
  467. -};
  468. -
  469. static ALWAYS_INLINE float x264_log2( uint32_t x )
  470. {
  471. int lz = x264_clz( x );
  472. return log2_lut[(x<<lz>>24)&0x7f] + (31 - lz);
  473. }
  474.  
  475. -static ALWAYS_INLINE int x264_exp2fix8( float x )
  476. -{
  477. - int i, f;
  478. - x += 8;
  479. - if( x <= 0 ) return 0;
  480. - if( x >= 16 ) return 0xffff;
  481. - i = x;
  482. - f = (x-i)*64;
  483. - return (exp2_lut[f]+256) << i >> 8;
  484. -}
  485. -
  486. void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame )
  487. {
  488. /* constants chosen to result in approximately the same overall bitrate as without AQ.
  489. @@ -241,6 +228,17 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame )
  490. int mb_x, mb_y;
  491. float strength;
  492. float avg_adj = 0.f;
  493. + /* Need to init it anyways for MB tree. */
  494. + if( h->param.rc.f_aq_strength == 0 )
  495. + {
  496. + int mb_xy;
  497. + memset( frame->f_qp_offset, 0, sizeof(float) * h->mb.i_mb_count );
  498. + if( h->frames.b_have_lowres )
  499. + for( mb_xy = 0; mb_xy < h->mb.i_mb_count; mb_xy++ )
  500. + frame->i_inv_qscale_factor[mb_xy] = 256;
  501. + return;
  502. + }
  503. +
  504. if( h->param.rc.i_aq_mode == X264_AQ_AUTOVARIANCE )
  505. {
  506. for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y++ )
  507. @@ -257,6 +255,7 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame )
  508. }
  509. else
  510. strength = h->param.rc.f_aq_strength * 1.0397f;
  511. +
  512. for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y++ )
  513. for( mb_x = 0; mb_x < h->sps->i_mb_width; mb_x++ )
  514. {
  515. @@ -291,6 +290,34 @@ void x264_adaptive_quant( x264_t *h )
  516. h->mb.i_qp = x264_clip3( h->rc->f_qpm + h->fenc->f_qp_offset[h->mb.i_mb_xy] + .5, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
  517. }
  518.  
  519. +int x264_macroblock_tree_read( x264_t *h, x264_frame_t *frame )
  520. +{
  521. + x264_ratecontrol_t *rc = h->rc;
  522. + uint8_t i_type_actual = rc->entry[frame->i_frame].pict_type;
  523. +
  524. + if( i_type_actual != SLICE_TYPE_B )
  525. + {
  526. + uint8_t i_type;
  527. + if( !fread( &i_type, 1, 1, rc->p_mbtree_stat_file_in ) )
  528. + goto fail;
  529. +
  530. + if( i_type != i_type_actual )
  531. + {
  532. + x264_log(h, X264_LOG_ERROR, "MB-tree frametype %d doesn't match actual frametype %d.\n", i_type,i_type_actual);
  533. + return -1;
  534. + }
  535. +
  536. + if( fread( frame->f_qp_offset, sizeof(float), h->mb.i_mb_count, rc->p_mbtree_stat_file_in ) != h->mb.i_mb_count )
  537. + goto fail;
  538. + }
  539. + else
  540. + x264_adaptive_quant_frame( h, frame );
  541. + return 0;
  542. +fail:
  543. + x264_log(h, X264_LOG_ERROR, "Incomplete MB-tree stats file.\n");
  544. + return -1;
  545. +}
  546. +
  547. int x264_ratecontrol_new( x264_t *h )
  548. {
  549. x264_ratecontrol_t *rc;
  550. @@ -310,6 +337,14 @@ int x264_ratecontrol_new( x264_t *h )
  551. else
  552. rc->fps = 25.0;
  553.  
  554. + if( h->param.rc.b_mb_tree )
  555. + {
  556. + h->param.rc.f_pb_factor = 1;
  557. + rc->qcompress = 1;
  558. + }
  559. + else
  560. + rc->qcompress = h->param.rc.f_qcompress;
  561. +
  562. rc->bitrate = h->param.rc.i_bitrate * 1000.;
  563. rc->rate_tolerance = h->param.rc.f_rate_tolerance;
  564. rc->nmb = h->mb.i_mb_count;
  565. @@ -379,17 +414,18 @@ int x264_ratecontrol_new( x264_t *h )
  566. rc->accum_p_norm = .01;
  567. rc->accum_p_qp = ABR_INIT_QP * rc->accum_p_norm;
  568. /* estimated ratio that produces a reasonable QP for the first I-frame */
  569. - rc->cplxr_sum = .01 * pow( 7.0e5, h->param.rc.f_qcompress ) * pow( h->mb.i_mb_count, 0.5 );
  570. + rc->cplxr_sum = .01 * pow( 7.0e5, rc->qcompress ) * pow( h->mb.i_mb_count, 0.5 );
  571. rc->wanted_bits_window = 1.0 * rc->bitrate / rc->fps;
  572. rc->last_non_b_pict_type = SLICE_TYPE_I;
  573. }
  574.  
  575. if( h->param.rc.i_rc_method == X264_RC_CRF )
  576. {
  577. - /* arbitrary rescaling to make CRF somewhat similar to QP */
  578. + /* Arbitrary rescaling to make CRF somewhat similar to QP.
  579. + * Try to compensate for MB-tree's effects as well. */
  580. double base_cplx = h->mb.i_mb_count * (h->param.i_bframe ? 120 : 80);
  581. - rc->rate_factor_constant = pow( base_cplx, 1 - h->param.rc.f_qcompress )
  582. - / qp2qscale( h->param.rc.f_rf_constant );
  583. + rc->rate_factor_constant = pow( base_cplx, 1 - rc->qcompress )
  584. + / qp2qscale( h->param.rc.f_rf_constant + (h->param.rc.b_mb_tree?5:0) );
  585. }
  586.  
  587. rc->ip_offset = 6.0 * log(h->param.rc.f_ip_factor) / log(2.0);
  588. @@ -437,6 +473,19 @@ int x264_ratecontrol_new( x264_t *h )
  589. x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n");
  590. return -1;
  591. }
  592. + if( h->param.rc.b_mb_tree )
  593. + {
  594. + char *mbtree_stats_in = x264_malloc( strlen(h->param.rc.psz_stat_in) + 8 );
  595. + strcpy( mbtree_stats_in, h->param.rc.psz_stat_in );
  596. + strcat( mbtree_stats_in, ".mbtree" );
  597. + rc->p_mbtree_stat_file_in = fopen( mbtree_stats_in, "rb" );
  598. + x264_free( mbtree_stats_in );
  599. + if( !rc->p_mbtree_stat_file_in )
  600. + {
  601. + x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open mbtree stats file\n");
  602. + return -1;
  603. + }
  604. + }
  605.  
  606. /* check whether 1st pass options were compatible with current options */
  607. if( !strncmp( stats_buf, "#options:", 9 ) )
  608. @@ -600,6 +649,22 @@ int x264_ratecontrol_new( x264_t *h )
  609. p = x264_param2string( &h->param, 1 );
  610. fprintf( rc->p_stat_file_out, "#options: %s\n", p );
  611. x264_free( p );
  612. + if( h->param.rc.b_mb_tree && !h->param.rc.b_stat_read )
  613. + {
  614. + rc->psz_mbtree_stat_file_tmpname = x264_malloc( strlen(h->param.rc.psz_stat_out) + 13 );
  615. + strcpy( rc->psz_mbtree_stat_file_tmpname, h->param.rc.psz_stat_out );
  616. + strcat( rc->psz_mbtree_stat_file_tmpname, ".mbtree.temp" );
  617. + rc->psz_mbtree_stat_file_name = x264_malloc( strlen(h->param.rc.psz_stat_out) + 8 );
  618. + strcpy( rc->psz_mbtree_stat_file_name, h->param.rc.psz_stat_out );
  619. + strcat( rc->psz_mbtree_stat_file_name, ".mbtree" );
  620. +
  621. + rc->p_mbtree_stat_file_out = fopen( rc->psz_mbtree_stat_file_tmpname, "wb" );
  622. + if( rc->p_mbtree_stat_file_out == NULL )
  623. + {
  624. + x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open mbtree stats file\n");
  625. + return -1;
  626. + }
  627. + }
  628. }
  629.  
  630. for( i=0; i<h->param.i_threads; i++ )
  631. @@ -739,8 +804,8 @@ void x264_ratecontrol_summary( x264_t *h )
  632. {
  633. double base_cplx = h->mb.i_mb_count * (h->param.i_bframe ? 120 : 80);
  634. x264_log( h, X264_LOG_INFO, "final ratefactor: %.2f\n",
  635. - qscale2qp( pow( base_cplx, 1 - h->param.rc.f_qcompress )
  636. - * rc->cplxr_sum / rc->wanted_bits_window ) );
  637. + qscale2qp( pow( base_cplx, 1 - rc->qcompress )
  638. + * rc->cplxr_sum / rc->wanted_bits_window ) - (h->param.rc.b_mb_tree?5:0) );
  639. }
  640. }
  641.  
  642. @@ -760,6 +825,18 @@ void x264_ratecontrol_delete( x264_t *h )
  643. }
  644. x264_free( rc->psz_stat_file_tmpname );
  645. }
  646. + if( rc->p_mbtree_stat_file_out )
  647. + {
  648. + fclose( rc->p_mbtree_stat_file_out );
  649. + if( h->i_frame >= rc->num_entries )
  650. + if( rename( rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name ) != 0 )
  651. + {
  652. + x264_log( h, X264_LOG_ERROR, "failed to rename \"%s\" to \"%s\"\n",
  653. + rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name );
  654. + }
  655. + x264_free( rc->psz_mbtree_stat_file_tmpname );
  656. + x264_free( rc->psz_mbtree_stat_file_name );
  657. + }
  658. x264_free( rc->pred );
  659. x264_free( rc->pred_b_from_p );
  660. x264_free( rc->entry );
  661. @@ -1125,6 +1202,15 @@ void x264_ratecontrol_end( x264_t *h, int bits )
  662. h->stat.frame.i_mb_count_p,
  663. h->stat.frame.i_mb_count_skip,
  664. c_direct);
  665. +
  666. + /* TODO: deal with endianness.
  667. + * Don't re-write the data in multi-pass mode. */
  668. + if( h->param.rc.b_mb_tree && h->fenc->b_kept_as_ref && !h->param.rc.b_stat_read )
  669. + {
  670. + uint8_t i_type = h->sh.i_type;
  671. + fwrite( &i_type, 1, 1, rc->p_mbtree_stat_file_out );
  672. + fwrite( h->fenc->f_qp_offset, sizeof(float), h->mb.i_mb_count, rc->p_mbtree_stat_file_out );
  673. + }
  674. }
  675.  
  676. if( rc->b_abr )
  677. @@ -1177,7 +1263,7 @@ static double get_qscale(x264_t *h, ratecontrol_entry_t *rce, double rate_factor
  678. double q;
  679. x264_zone_t *zone = get_zone( h, frame_num );
  680.  
  681. - q = pow( rce->blurred_complexity, 1 - h->param.rc.f_qcompress );
  682. + q = pow( rce->blurred_complexity, 1 - rcc->qcompress );
  683.  
  684. // avoid NaN's in the rc_eq
  685. if(!isfinite(q) || rce->tex_bits + rce->mv_bits == 0)
  686. diff --git a/encoder/ratecontrol.h b/encoder/ratecontrol.h
  687. index 3310d3c..a0b62b2 100644
  688. --- a/encoder/ratecontrol.h
  689. +++ b/encoder/ratecontrol.h
  690. @@ -29,6 +29,7 @@ void x264_ratecontrol_delete( x264_t * );
  691.  
  692. void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame );
  693. void x264_adaptive_quant( x264_t * );
  694. +int x264_macroblock_tree_read( x264_t *h, x264_frame_t *frame );
  695. void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next );
  696. void x264_ratecontrol_start( x264_t *, int i_force_qp );
  697. int x264_ratecontrol_slice_type( x264_t *, int i_frame );
  698. diff --git a/encoder/slicetype.c b/encoder/slicetype.c
  699. index 2c16429..7b15781 100644
  700. --- a/encoder/slicetype.c
  701. +++ b/encoder/slicetype.c
  702. @@ -63,6 +63,7 @@ static int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
  703. x264_me_t m[2];
  704. int i_bcost = COST_MAX;
  705. int l, i;
  706. + int list_used = 0;
  707.  
  708. h->mb.pic.p_fenc[0] = h->mb.pic.fenc_buf;
  709. h->mc.copy[PIXEL_8x8]( h->mb.pic.p_fenc[0], FENC_STRIDE, &fenc->lowres[0][i_pel_offset], i_stride, 8 );
  710. @@ -107,8 +108,7 @@ static int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
  711. h->mc.avg[PIXEL_8x8]( pix1, 16, src1, stride1, src2, stride2, i_bipred_weight ); \
  712. i_cost = penalty + h->pixf.mbcmp[PIXEL_8x8]( \
  713. m[0].p_fenc[0], FENC_STRIDE, pix1, 16 ); \
  714. - if( i_bcost > i_cost ) \
  715. - i_bcost = i_cost; \
  716. + COPY2_IF_LT( i_bcost, i_cost, list_used, 3 ); \
  717. }
  718.  
  719. m[0].i_pixel = PIXEL_8x8;
  720. @@ -138,8 +138,7 @@ static int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
  721. int i_cost;
  722. h->mc.avg[PIXEL_8x8]( pix1, 16, m[0].p_fref[0], m[0].i_stride[0], m[1].p_fref[0], m[1].i_stride[0], i_bipred_weight );
  723. i_cost = h->pixf.mbcmp[PIXEL_8x8]( m[0].p_fenc[0], FENC_STRIDE, pix1, 16 );
  724. - if( i_bcost > i_cost )
  725. - i_bcost = i_cost;
  726. + COPY2_IF_LT( i_bcost, i_cost, list_used, 3 );
  727. }
  728. }
  729.  
  730. @@ -181,16 +180,18 @@ static int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
  731. *(uint32_t*)m[l].mv = *(uint32_t*)fenc_mvs[l];
  732. m[l].cost = *fenc_costs[l];
  733. }
  734. - i_bcost = X264_MIN( i_bcost, m[l].cost );
  735. + COPY2_IF_LT( i_bcost, m[l].cost, list_used, l+1 );
  736. }
  737.  
  738. if( b_bidir && ( *(uint32_t*)m[0].mv || *(uint32_t*)m[1].mv ) )
  739. TRY_BIDIR( m[0].mv, m[1].mv, 5 );
  740.  
  741. + frames[b]->lowres_inter_types[b-p0][p1-b][i_mb_xy] = list_used;
  742. +
  743. lowres_intra_mb:
  744. /* forbid intra-mbs in B-frames, because it's rare and not worth checking */
  745. /* FIXME: Should we still forbid them now that we cache intra scores? */
  746. - if( !b_bidir )
  747. + if( !b_bidir || h->param.rc.b_mb_tree )
  748. {
  749. int i_icost, b_intra;
  750. if( !fenc->b_intra_calculated )
  751. @@ -237,18 +238,23 @@ lowres_intra_mb:
  752. }
  753. else
  754. i_icost = fenc->i_intra_cost[i_mb_xy];
  755. - b_intra = i_icost < i_bcost;
  756. - if( b_intra )
  757. - i_bcost = i_icost;
  758. - if( (i_mb_x > 0 && i_mb_x < h->sps->i_mb_width - 1
  759. - && i_mb_y > 0 && i_mb_y < h->sps->i_mb_height - 1)
  760. - || h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 )
  761. + if( !b_bidir )
  762. {
  763. - fenc->i_intra_mbs[b-p0] += b_intra;
  764. - fenc->i_cost_est[0][0] += i_icost;
  765. + b_intra = i_icost < i_bcost;
  766. + if( b_intra )
  767. + i_bcost = i_icost;
  768. + if( (i_mb_x > 0 && i_mb_x < h->sps->i_mb_width - 1
  769. + && i_mb_y > 0 && i_mb_y < h->sps->i_mb_height - 1)
  770. + || h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 )
  771. + {
  772. + fenc->i_intra_mbs[b-p0] += b_intra;
  773. + fenc->i_cost_est[0][0] += i_icost;
  774. + }
  775. }
  776. }
  777.  
  778. + frames[b]->lowres_costs[b-p0][p1-b][i_mb_xy] = i_bcost;
  779. +
  780. return i_bcost;
  781. }
  782. #undef TRY_BIDIR
  783. @@ -262,6 +268,7 @@ static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
  784. x264_frame_t **frames, int p0, int p1, int b,
  785. int b_intra_penalty )
  786. {
  787. +
  788. int i_score = 0;
  789. /* Don't use the AQ'd scores for slicetype decision. */
  790. int i_score_aq = 0;
  791. @@ -299,7 +306,8 @@ static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
  792.  
  793. /* the edge mbs seem to reduce the predictive quality of the
  794. * whole frame's score, but are needed for a spatial distribution. */
  795. - if( h->param.rc.i_vbv_buffer_size || h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 )
  796. + if( h->param.rc.b_mb_tree || h->param.rc.i_vbv_buffer_size ||
  797. + h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 )
  798. {
  799. for( h->mb.i_mb_y = h->sps->i_mb_height - 1; h->mb.i_mb_y >= 0; h->mb.i_mb_y-- )
  800. {
  801. @@ -355,7 +363,170 @@ static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
  802. return i_score;
  803. }
  804.  
  805. -#define MAX_LENGTH (X264_BFRAME_MAX*4)
  806. +/* If MB-tree changes the quantizers, we need to recalculate the frame cost without
  807. + * re-running lookahead. */
  808. +static int x264_slicetype_frame_cost_recalculate( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames,
  809. + int p0, int p1, int b )
  810. +{
  811. + int i_score = 0;
  812. + int *row_satd = frames[b]->i_row_satds[b-p0][p1-b];
  813. + x264_emms();
  814. + for( h->mb.i_mb_y = h->sps->i_mb_height - 1; h->mb.i_mb_y >= 0; h->mb.i_mb_y-- )
  815. + {
  816. + row_satd[ h->mb.i_mb_y ] = 0;
  817. + for( h->mb.i_mb_x = h->sps->i_mb_width - 1; h->mb.i_mb_x >= 0; h->mb.i_mb_x-- )
  818. + {
  819. + int i_mb_xy = h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride;
  820. + int i_mb_cost = frames[b]->lowres_costs[b-p0][p1-b][i_mb_xy];
  821. + float qp_adj = frames[b]->f_qp_offset[i_mb_xy];
  822. + i_mb_cost = (i_mb_cost * x264_exp2fix8(qp_adj*(-1.f/6.f)) + 128) >> 8;
  823. + row_satd[ h->mb.i_mb_y ] += i_mb_cost;
  824. + if( (h->mb.i_mb_y > 0 && h->mb.i_mb_y < h->sps->i_mb_height - 1 &&
  825. + h->mb.i_mb_x > 0 && h->mb.i_mb_x < h->sps->i_mb_width - 1) ||
  826. + h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 )
  827. + {
  828. + i_score += i_mb_cost;
  829. + }
  830. + }
  831. + }
  832. + return i_score;
  833. +}
  834. +
  835. +static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, int p0, int p1, int b )
  836. +{
  837. + int refs[2] = {p0,p1};
  838. + int dist_scale_factor = p1 != p0 ? 128 : ( ((b-p0) << 8) + ((p1-p0) >> 1) ) / (p1-p0);
  839. + int i_bipred_weight = h->param.analyse.b_weighted_bipred ? 64 - (dist_scale_factor>>2) : 32;
  840. +
  841. + for( h->mb.i_mb_y = 0; h->mb.i_mb_y < h->sps->i_mb_height; h->mb.i_mb_y++ )
  842. + {
  843. + for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->sps->i_mb_width; h->mb.i_mb_x++ )
  844. + {
  845. + int mb_index = h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride;
  846. + int inter_cost = frames[b]->lowres_costs[b-p0][p1-b][mb_index];
  847. + int intra_cost = (frames[b]->i_intra_cost[mb_index] * frames[b]->i_inv_qscale_factor[mb_index]+128)>>8;
  848. + int lists_used = frames[b]->lowres_inter_types[b-p0][p1-b][mb_index];
  849. + /* The approximate amount of data that this block contains. */
  850. + int propagate_amount = intra_cost + frames[b]->i_propagate_cost[mb_index];
  851. +
  852. + /* Divide by 64 for per-pixel summing. */
  853. + propagate_amount = (((uint64_t)propagate_amount*(intra_cost-inter_cost)) / intra_cost + 32) >> 6;
  854. +
  855. + /* Don't propagate for an intra block. */
  856. + if( inter_cost < intra_cost )
  857. + {
  858. + int mv[2][2], list;
  859. + mv[0][0] = frames[b]->lowres_mvs[0][b-p0-1][mb_index][0];
  860. + mv[0][1] = frames[b]->lowres_mvs[0][b-p0-1][mb_index][1];
  861. + if( b != p1 )
  862. + {
  863. + mv[1][0] = frames[b]->lowres_mvs[1][p1-b-1][mb_index][0];
  864. + mv[1][1] = frames[b]->lowres_mvs[1][p1-b-1][mb_index][1];
  865. + }
  866. +
  867. + /* Follow the MVs to the previous frame(s). */
  868. + for( list = 0; list < 2; list++ )
  869. + if( (lists_used >> list)&1 )
  870. + {
  871. + int x = mv[list][0];
  872. + int y = mv[list][1];
  873. + int listamount = propagate_amount;
  874. + int mbx = (x>>5)+h->mb.i_mb_x;
  875. + int mby = ((y>>5)+h->mb.i_mb_y);
  876. + int idx0 = mbx + mby*h->mb.i_mb_stride;
  877. + int idx1 = idx0 + 1;
  878. + int idx2 = idx0 + h->mb.i_mb_stride;
  879. + int idx3 = idx0 + h->mb.i_mb_stride + 1;
  880. + int idx0weight = (32-(y&31))*(32-(x&31));
  881. + int idx1weight = (32-(y&31))*(x&31);
  882. + int idx2weight = (y&31)*(32-(x&31));
  883. + int idx3weight = (y&31)*(x&31);
  884. +
  885. + /* Apply bipred weighting. */
  886. + if( lists_used == 3 )
  887. + listamount = (listamount * (list?(64-i_bipred_weight):i_bipred_weight) + 32) >> 6;
  888. +
  889. + /* We could just clip the MVs, but pixels that lie outside the frame probably shouldn't
  890. + * be counted. */
  891. + if( mbx < h->sps->i_mb_width-1 && mby < h->sps->i_mb_height-1 && mbx >= 0 && mby >= 0 )
  892. + {
  893. + frames[refs[list]]->i_propagate_cost[idx0] += (listamount*idx0weight+8)>>4;
  894. + frames[refs[list]]->i_propagate_cost[idx1] += (listamount*idx1weight+8)>>4;
  895. + frames[refs[list]]->i_propagate_cost[idx2] += (listamount*idx2weight+8)>>4;
  896. + frames[refs[list]]->i_propagate_cost[idx3] += (listamount*idx3weight+8)>>4;
  897. + }
  898. + else /* Check offsets individually */
  899. + {
  900. + if( mbx < h->sps->i_mb_width && mby < h->sps->i_mb_height && mbx >= 0 && mby >= 0 )
  901. + frames[refs[list]]->i_propagate_cost[idx0] += (listamount*idx0weight+8)>>4;
  902. + if( mbx+1 < h->sps->i_mb_width && mby < h->sps->i_mb_height && mbx+1 >= 0 && mby >= 0 )
  903. + frames[refs[list]]->i_propagate_cost[idx1] += (listamount*idx1weight+8)>>4;
  904. + if( mbx < h->sps->i_mb_width && mby+1 < h->sps->i_mb_height && mbx >= 0 && mby+1 >= 0 )
  905. + frames[refs[list]]->i_propagate_cost[idx2] += (listamount*idx2weight+8)>>4;
  906. + if( mbx+1 < h->sps->i_mb_width && mby+1 < h->sps->i_mb_height && mbx+1 >= 0 && mby+1 >= 0 )
  907. + frames[refs[list]]->i_propagate_cost[idx3] += (listamount*idx3weight+8)>>4;
  908. + }
  909. + }
  910. + }
  911. + }
  912. + }
  913. +}
  914. +
  915. +static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int num_frames, int b_intra )
  916. +{
  917. + int i, idx = !b_intra;
  918. + int last_nonb, cur_nonb = 1;
  919. + if( b_intra )
  920. + x264_slicetype_frame_cost( h, a, frames, 0, 0, 0, 0 );
  921. +
  922. + i = num_frames-1;
  923. + while( i > 0 && frames[i]->i_type == X264_TYPE_B )
  924. + i--;
  925. + last_nonb = i;
  926. +
  927. + if( last_nonb < 0 )
  928. + return;
  929. +
  930. + memset( frames[last_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint32_t) );
  931. + while( i-- > idx )
  932. + {
  933. + cur_nonb = i;
  934. + while( frames[cur_nonb]->i_type == X264_TYPE_B && cur_nonb > 0 )
  935. + cur_nonb--;
  936. + if( cur_nonb < idx )
  937. + break;
  938. + x264_slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, last_nonb, 0 );
  939. + memset( frames[cur_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint32_t) );
  940. + x264_macroblock_tree_propagate( h, frames, cur_nonb, last_nonb, last_nonb );
  941. + while( frames[i]->i_type == X264_TYPE_B && i > 0 )
  942. + {
  943. + x264_slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, i, 0 );
  944. + memset( frames[i]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint32_t) );
  945. + x264_macroblock_tree_propagate( h, frames, cur_nonb, last_nonb, i );
  946. + i--;
  947. + }
  948. + last_nonb = cur_nonb;
  949. + }
  950. + x264_emms();
  951. +
  952. + for( h->mb.i_mb_y = 0; h->mb.i_mb_y < h->sps->i_mb_height; h->mb.i_mb_y++ )
  953. + {
  954. + for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->sps->i_mb_width; h->mb.i_mb_x++ )
  955. + {
  956. + int mb_index = h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride;
  957. + float intra_cost = (frames[last_nonb]->i_intra_cost[mb_index] * frames[last_nonb]->i_inv_qscale_factor[mb_index]+128)>>8;
  958. +
  959. + if( intra_cost )
  960. + {
  961. + float propagate_cost = frames[last_nonb]->i_propagate_cost[mb_index];
  962. + float ratio = (intra_cost + propagate_cost) / (intra_cost);
  963. + /* Allow the constant to be adjusted via qcompress, since the two
  964. + * concepts are very similar. */
  965. + frames[last_nonb]->f_qp_offset[mb_index] -= 5.0 * (1.0 - h->param.rc.f_qcompress) * log2f(ratio);
  966. + }
  967. + }
  968. + }
  969. +}
  970.  
  971. static int x264_slicetype_path_cost( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, char *path, int threshold )
  972. {
  973. @@ -393,14 +564,14 @@ static int x264_slicetype_path_cost( x264_t *h, x264_mb_analysis_t *a, x264_fram
  974. /* Uses strings due to the fact that the speed of the control functions is
  975. negligable compared to the cost of running slicetype_frame_cost, and because
  976. it makes debugging easier. */
  977. -static void x264_slicetype_path( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int length, int max_bframes, int buffer_size, char (*best_paths)[MAX_LENGTH] )
  978. +static void x264_slicetype_path( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int length, int max_bframes, int buffer_size, char (*best_paths)[X264_LOOKAHEAD_MAX] )
  979. {
  980. - char paths[X264_BFRAME_MAX+2][MAX_LENGTH] = {{0}};
  981. + char paths[X264_BFRAME_MAX+2][X264_LOOKAHEAD_MAX] = {{0}};
  982. int num_paths = X264_MIN(max_bframes+1, length);
  983. int suffix_size, loc, path;
  984. int best_cost = COST_MAX;
  985. int best_path_index = 0;
  986. - length = X264_MIN(length,MAX_LENGTH);
  987. + length = X264_MIN(length,X264_LOOKAHEAD_MAX);
  988.  
  989. /* Iterate over all currently possible paths and add suffixes to each one */
  990. for( suffix_size = 0; suffix_size < num_paths; suffix_size++ )
  991. @@ -426,15 +597,6 @@ static void x264_slicetype_path( x264_t *h, x264_mb_analysis_t *a, x264_frame_t
  992. memcpy( best_paths[length], paths[best_path_index], length );
  993. }
  994.  
  995. -static int x264_slicetype_path_search( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int length, int bframes, int buffer )
  996. -{
  997. - char best_paths[MAX_LENGTH][MAX_LENGTH] = {"","P"};
  998. - int n;
  999. - for( n = 2; n < length-1; n++ )
  1000. - x264_slicetype_path( h, a, frames, n, bframes, buffer, best_paths );
  1001. - return strspn( best_paths[length-2], "B" );
  1002. -}
  1003. -
  1004. static int scenecut( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int p0, int p1 )
  1005. {
  1006. x264_frame_t *frame = frames[p1];
  1007. @@ -477,13 +639,13 @@ static int scenecut( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, in
  1008. return res;
  1009. }
  1010.  
  1011. -static void x264_slicetype_analyse( x264_t *h )
  1012. +static void x264_slicetype_analyse( x264_t *h, int keyframe )
  1013. {
  1014. x264_mb_analysis_t a;
  1015. - x264_frame_t *frames[X264_BFRAME_MAX*4+3] = { NULL, };
  1016. + x264_frame_t *frames[X264_LOOKAHEAD_MAX+3] = { NULL, };
  1017. int num_frames;
  1018. int keyint_limit;
  1019. - int j;
  1020. + int i,j;
  1021. int i_mb_count = NUM_MBS;
  1022. int cost1p0, cost2p0, cost1b1, cost2p1;
  1023. int idr_frame_type;
  1024. @@ -497,96 +659,150 @@ static void x264_slicetype_analyse( x264_t *h )
  1025. frames[j+1] = h->frames.next[j];
  1026. keyint_limit = h->param.i_keyint_max - frames[0]->i_frame + h->frames.i_last_idr - 1;
  1027. num_frames = X264_MIN( j, keyint_limit );
  1028. - if( num_frames == 0 )
  1029. +
  1030. + if( num_frames == 0 && (!j || !h->param.rc.b_mb_tree) )
  1031. return;
  1032.  
  1033. x264_lowres_context_init( h, &a );
  1034. idr_frame_type = frames[1]->i_frame - h->frames.i_last_idr >= h->param.i_keyint_min ? X264_TYPE_IDR : X264_TYPE_I;
  1035.  
  1036. - if( num_frames == 1 )
  1037. + if( num_frames == 1 && !h->param.rc.b_mb_tree )
  1038. {
  1039. -no_b_frames:
  1040. frames[1]->i_type = X264_TYPE_P;
  1041. if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1 ) )
  1042. frames[1]->i_type = idr_frame_type;
  1043. return;
  1044. }
  1045.  
  1046. - if( h->param.i_bframe_adaptive == X264_B_ADAPT_TRELLIS )
  1047. + /* This is important psy-wise: if we have a non-scenecut keyframe,
  1048. + * there will be significant visual artifacts if the frames just before
  1049. + * go down in quality due to being referenced less, despite it being
  1050. + * more RD-optimal. */
  1051. + if( h->param.analyse.b_psy && h->param.rc.b_mb_tree )
  1052. + num_frames = j;
  1053. +
  1054. + char best_paths[X264_LOOKAHEAD_MAX][X264_LOOKAHEAD_MAX] = {"","P"};
  1055. + int n;
  1056. + int num_bframes = 0;
  1057. + int max_bframes = X264_MIN(num_frames-1, h->param.i_bframe);
  1058. + int num_analysed_frames = num_frames;
  1059. + int reset_start;
  1060. + if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1 ) )
  1061. {
  1062. - int num_bframes;
  1063. - int max_bframes = X264_MIN(num_frames-1, h->param.i_bframe);
  1064. - if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1 ) )
  1065. + frames[1]->i_type = idr_frame_type;
  1066. + return;
  1067. + }
  1068. +
  1069. + if( h->param.i_bframe )
  1070. + {
  1071. + if( h->param.i_bframe_adaptive == X264_B_ADAPT_TRELLIS )
  1072. {
  1073. - frames[1]->i_type = idr_frame_type;
  1074. - return;
  1075. + /* Perform the frametype analysis. */
  1076. + for( n = 2; n < num_frames-1; n++ )
  1077. + x264_slicetype_path( h, &a, frames, n, max_bframes, num_frames-max_bframes, best_paths );
  1078. + num_bframes = strspn( best_paths[num_frames-2], "B" );
  1079. + /* Load the results of the analysis into the frame types. */
  1080. + for( j = 1; j < num_frames; j++ )
  1081. + frames[j]->i_type = best_paths[num_frames-2][j-1] == 'B' ? X264_TYPE_B : X264_TYPE_P;
  1082. + frames[num_frames]->i_type = X264_TYPE_P;
  1083. }
  1084. - num_bframes = x264_slicetype_path_search( h, &a, frames, num_frames, max_bframes, num_frames-max_bframes );
  1085. - assert(num_bframes < num_frames);
  1086. -
  1087. - for( j = 1; j < num_bframes+1; j++ )
  1088. + else if( h->param.i_bframe_adaptive == X264_B_ADAPT_FAST )
  1089. {
  1090. - if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, j, j+1 ) )
  1091. + for( i = 0; i < num_frames-(2-!i); )
  1092. {
  1093. - frames[j]->i_type = X264_TYPE_P;
  1094. - return;
  1095. - }
  1096. - frames[j]->i_type = X264_TYPE_B;
  1097. - }
  1098. - frames[num_bframes+1]->i_type = X264_TYPE_P;
  1099. - }
  1100. - else if( h->param.i_bframe_adaptive == X264_B_ADAPT_FAST )
  1101. - {
  1102. - cost2p1 = x264_slicetype_frame_cost( h, &a, frames, 0, 2, 2, 1 );
  1103. - if( frames[2]->i_intra_mbs[2] > i_mb_count / 2 )
  1104. - goto no_b_frames;
  1105. + cost2p1 = x264_slicetype_frame_cost( h, &a, frames, i+0, i+2, i+2, 1 );
  1106. + if( frames[i+2]->i_intra_mbs[2] > i_mb_count / 2 )
  1107. + {
  1108. + frames[i+1]->i_type = X264_TYPE_P;
  1109. + frames[i+2]->i_type = X264_TYPE_P;
  1110. + i += 2;
  1111. + continue;
  1112. + }
  1113.  
  1114. - cost1b1 = x264_slicetype_frame_cost( h, &a, frames, 0, 2, 1, 0 );
  1115. - cost1p0 = x264_slicetype_frame_cost( h, &a, frames, 0, 1, 1, 0 );
  1116. - cost2p0 = x264_slicetype_frame_cost( h, &a, frames, 1, 2, 2, 0 );
  1117. + cost1b1 = x264_slicetype_frame_cost( h, &a, frames, i+0, i+2, i+1, 0 );
  1118. + cost1p0 = x264_slicetype_frame_cost( h, &a, frames, i+0, i+1, i+1, 0 );
  1119. + cost2p0 = x264_slicetype_frame_cost( h, &a, frames, i+1, i+2, i+2, 0 );
  1120.  
  1121. - if( cost1p0 + cost2p0 < cost1b1 + cost2p1 )
  1122. - goto no_b_frames;
  1123. + if( cost1p0 + cost2p0 < cost1b1 + cost2p1 )
  1124. + {
  1125. + frames[i+1]->i_type = X264_TYPE_P;
  1126. + frames[i+2]->i_type = X264_TYPE_P;
  1127. + i += 2;
  1128. + continue;
  1129. + }
  1130.  
  1131. - // arbitrary and untuned
  1132. - #define INTER_THRESH 300
  1133. - #define P_SENS_BIAS (50 - h->param.i_bframe_bias)
  1134. - frames[1]->i_type = X264_TYPE_B;
  1135. + // arbitrary and untuned
  1136. + #define INTER_THRESH 300
  1137. + #define P_SENS_BIAS (50 - h->param.i_bframe_bias)
  1138. + frames[i+1]->i_type = X264_TYPE_B;
  1139. + frames[i+2]->i_type = X264_TYPE_P;
  1140.  
  1141. - for( j = 2; j <= X264_MIN( h->param.i_bframe, num_frames-1 ); j++ )
  1142. + for( j = i+2; j <= X264_MIN( h->param.i_bframe, num_frames-2 ); j++ )
  1143. + {
  1144. + int pthresh = X264_MAX(INTER_THRESH - P_SENS_BIAS * (j-i-1), INTER_THRESH/10);
  1145. + int pcost = x264_slicetype_frame_cost( h, &a, frames, i+0, j+1, j+1, 1 );
  1146. +
  1147. + if( pcost > pthresh*i_mb_count || frames[j+1]->i_intra_mbs[j-i+1] > i_mb_count/3 )
  1148. + {
  1149. + frames[j]->i_type = X264_TYPE_P;
  1150. + break;
  1151. + }
  1152. + else
  1153. + frames[j]->i_type = X264_TYPE_B;
  1154. + }
  1155. + i = j;
  1156. + }
  1157. + frames[i+!i]->i_type = X264_TYPE_P;
  1158. + num_bframes = 0;
  1159. + while( num_bframes < num_frames && frames[num_bframes+1]->i_type == X264_TYPE_B )
  1160. + num_bframes++;
  1161. + }
  1162. + else
  1163. {
  1164. - int pthresh = X264_MAX(INTER_THRESH - P_SENS_BIAS * (j-1), INTER_THRESH/10);
  1165. - int pcost = x264_slicetype_frame_cost( h, &a, frames, 0, j+1, j+1, 1 );
  1166. + num_bframes = X264_MIN(num_frames-1, h->param.i_bframe);
  1167. + for( j = 1; j < num_frames; j++ )
  1168. + frames[j]->i_type = (j%(num_bframes+1)) ? X264_TYPE_B : X264_TYPE_P;
  1169. + frames[num_frames]->i_type = X264_TYPE_P;
  1170. + }
  1171.  
  1172. - if( pcost > pthresh*i_mb_count || frames[j+1]->i_intra_mbs[j+1] > i_mb_count/3 )
  1173. + /* Check scenecut on the first minigop. */
  1174. + for( j = 1; j < num_bframes+1; j++ )
  1175. + if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, j, j+1 ) )
  1176. {
  1177. frames[j]->i_type = X264_TYPE_P;
  1178. + num_analysed_frames = j;
  1179. break;
  1180. }
  1181. - else
  1182. - frames[j]->i_type = X264_TYPE_B;
  1183. - }
  1184. +
  1185. + reset_start = keyframe ? 1 : X264_MIN( num_bframes+2, num_analysed_frames+1 );
  1186. }
  1187. else
  1188. {
  1189. - int max_bframes = X264_MIN(num_frames-1, h->param.i_bframe);
  1190. - if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1 ) )
  1191. - {
  1192. - frames[1]->i_type = idr_frame_type;
  1193. - return;
  1194. - }
  1195. + for( j = 1; j < num_frames; j++ )
  1196. + frames[j]->i_type = X264_TYPE_P;
  1197. + reset_start = !keyframe + 1;
  1198. + }
  1199.  
  1200. - for( j = 1; j < max_bframes+1; j++ )
  1201. - {
  1202. - if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, j, j+1 ) )
  1203. + /* Perform the actual macroblock tree analysis.
  1204. + * Don't go farther than the lookahead parameter; this helps in short GOPs. */
  1205. + if( h->param.rc.b_mb_tree )
  1206. + x264_macroblock_tree( h, &a, frames, X264_MIN(num_analysed_frames, h->param.i_lookahead), keyframe );
  1207. +
  1208. + /* Enforce keyframe limit. */
  1209. + if( h->param.i_bframe )
  1210. + for( j = 0; j <= num_bframes; j++ )
  1211. + if( j+1 > keyint_limit )
  1212. {
  1213. - frames[j]->i_type = X264_TYPE_P;
  1214. - return;
  1215. + if( j )
  1216. + frames[j]->i_type = X264_TYPE_P;
  1217. + frames[j+1]->i_type = idr_frame_type;
  1218. + reset_start = j+2;
  1219. + break;
  1220. }
  1221. - frames[j]->i_type = X264_TYPE_B;
  1222. - }
  1223. - frames[max_bframes+1]->i_type = X264_TYPE_P;
  1224. - }
  1225. +
  1226. + /* Restore frametypes for all frames that haven't actually been decided yet. */
  1227. + for( j = reset_start; j <= num_frames; j++ )
  1228. + frames[j]->i_type = X264_TYPE_AUTO;
  1229. }
  1230.  
  1231. void x264_slicetype_decide( x264_t *h )
  1232. @@ -606,8 +822,9 @@ void x264_slicetype_decide( x264_t *h )
  1233. x264_ratecontrol_slice_type( h, h->frames.next[i]->i_frame );
  1234. }
  1235. else if( (h->param.i_bframe && h->param.i_bframe_adaptive)
  1236. - || h->param.i_scenecut_threshold )
  1237. - x264_slicetype_analyse( h );
  1238. + || h->param.i_scenecut_threshold
  1239. + || h->param.rc.b_mb_tree )
  1240. + x264_slicetype_analyse( h, 0 );
  1241.  
  1242. for( bframes = 0;; bframes++ )
  1243. {
  1244. @@ -645,7 +862,9 @@ void x264_slicetype_decide( x264_t *h )
  1245. frm->i_type = X264_TYPE_P;
  1246. }
  1247.  
  1248. - if( frm->i_type == X264_TYPE_AUTO ) frm->i_type = X264_TYPE_B;
  1249. + if( frm->i_type == X264_TYPE_AUTO )
  1250. + frm->i_type = X264_TYPE_B;
  1251. +
  1252. else if( !IS_X264_TYPE_B( frm->i_type ) ) break;
  1253. }
  1254. }
  1255. @@ -653,7 +872,7 @@ void x264_slicetype_decide( x264_t *h )
  1256. int x264_rc_analyse_slice( x264_t *h )
  1257. {
  1258. x264_mb_analysis_t a;
  1259. - x264_frame_t *frames[X264_BFRAME_MAX*4+2] = { NULL, };
  1260. + x264_frame_t *frames[X264_LOOKAHEAD_MAX+2] = { NULL, };
  1261. int p0=0, p1, b;
  1262. int cost;
  1263.  
  1264. @@ -662,6 +881,12 @@ int x264_rc_analyse_slice( x264_t *h )
  1265. if( IS_X264_TYPE_I(h->fenc->i_type) )
  1266. {
  1267. p1 = b = 0;
  1268. + /* For MB-tree, we have to perform propagation analysis on I-frames too. */
  1269. + if( h->param.rc.b_mb_tree )
  1270. + {
  1271. + h->frames.last_nonb = h->fenc;
  1272. + x264_slicetype_analyse( h, 1 );
  1273. + }
  1274. }
  1275. else if( X264_TYPE_P == h->fenc->i_type )
  1276. {
  1277. @@ -680,11 +905,16 @@ int x264_rc_analyse_slice( x264_t *h )
  1278. frames[p0] = h->fref0[0];
  1279. frames[b] = h->fenc;
  1280.  
  1281. - cost = x264_slicetype_frame_cost( h, &a, frames, p0, p1, b, 0 );
  1282. + if( h->param.rc.b_mb_tree )
  1283. + cost = x264_slicetype_frame_cost_recalculate( h, &a, frames, p0, p1, b );
  1284. + else
  1285. + {
  1286. + cost = x264_slicetype_frame_cost( h, &a, frames, p0, p1, b, 0 );
  1287.  
  1288. - /* In AQ, use the weighted score instead. */
  1289. - if( h->param.rc.i_aq_mode )
  1290. - cost = frames[b]->i_cost_est[b-p0][p1-b];
  1291. + /* In AQ, use the weighted score instead. */
  1292. + if( h->param.rc.i_aq_mode )
  1293. + cost = frames[b]->i_cost_est[b-p0][p1-b];
  1294. + }
  1295.  
  1296. h->fenc->i_row_satd = h->fenc->i_row_satds[b-p0][p1-b];
  1297. h->fdec->i_row_satd = h->fdec->i_row_satds[b-p0][p1-b];
  1298. diff --git a/x264.c b/x264.c
  1299. index c3b4f29..5452dba 100644
  1300. --- a/x264.c
  1301. +++ b/x264.c
  1302. @@ -168,9 +168,8 @@ static void Help( x264_param_t *defaults, int b_longhelp )
  1303. H0( " - baseline,main,high\n" );
  1304. H0( " --preset Use a preset to select encoding settings [medium]\n" );
  1305. H0( " Overridden by user settings\n");
  1306. - H1( " - ultrafast,veryfast,fast,medium\n"
  1307. - " - slow,slower,placebo\n" );
  1308. - else H0( " - ultrafast,veryfast,fast,medium,slow,slower\n" );
  1309. + H0( " - ultrafast,veryfast,faster,fast\n"
  1310. + " - medium,slow,slower,placebo\n" );
  1311. H0( " --tune Tune the settings for a particular type of source\n" );
  1312. H0( " Overridden by user settings\n");
  1313. H1( " - film,animation,grain,psnr,ssim\n"
  1314. @@ -184,6 +183,7 @@ static void Help( x264_param_t *defaults, int b_longhelp )
  1315. H1( " -i, --min-keyint <integer> Minimum GOP size [%d]\n", defaults->i_keyint_min );
  1316. H1( " --no-scenecut Disable adaptive I-frame decision\n" );
  1317. H1( " --scenecut <integer> How aggressively to insert extra I-frames [%d]\n", defaults->i_scenecut_threshold );
  1318. + H0( " --lookahead <integer> Number of frames for frametype lookahead [%d]\n", defaults->i_lookahead );
  1319. H0( " -b, --bframes <integer> Number of B-frames between I and P [%d]\n", defaults->i_bframe );
  1320. H1( " --b-adapt Adaptive B-frame decision method [%d]\n"
  1321. " Higher values may lower threading efficiency.\n"
  1322. @@ -228,6 +228,7 @@ static void Help( x264_param_t *defaults, int b_longhelp )
  1323. " - 2: Last pass, does not overwrite stats file\n"
  1324. " - 3: Nth pass, overwrites stats file\n" );
  1325. H0( " --stats <string> Filename for 2 pass stats [\"%s\"]\n", defaults->rc.psz_stat_out );
  1326. + H0( " --no-mbtree Disable mb-tree ratecontrol.\n");
  1327. H0( " --qcomp <float> QP curve compression: 0.0 => CBR, 1.0 => CQP [%.2f]\n", defaults->rc.f_qcompress );
  1328. H1( " --cplxblur <float> Reduce fluctuations in QP (before curve compression) [%.1f]\n", defaults->rc.f_complexity_blur );
  1329. H1( " --qblur <float> Reduce fluctuations in QP (after curve compression) [%.1f]\n", defaults->rc.f_qblur );
  1330. @@ -277,6 +278,8 @@ static void Help( x264_param_t *defaults, int b_longhelp )
  1331. " #1: RD (requires subme>=6)\n"
  1332. " #2: Trellis (requires trellis, experimental)\n",
  1333. defaults->analyse.f_psy_rd, defaults->analyse.f_psy_trellis );
  1334. + H1( " --no-psy Disable all visual optimizations that worsen\n"
  1335. + " both PSNR and SSIM.\n" );
  1336. H0( " --no-mixed-refs Don't decide references on a per partition basis\n" );
  1337. H1( " --no-chroma-me Ignore chroma in motion estimation\n" );
  1338. H0( " --no-8x8dct Disable adaptive spatial transform size\n" );
  1339. @@ -383,6 +386,7 @@ static struct option long_options[] =
  1340. { "slow-firstpass", no_argument, NULL, OPT_SLOWFIRSTPASS },
  1341. { "bitrate", required_argument, NULL, 'B' },
  1342. { "bframes", required_argument, NULL, 'b' },
  1343. + { "lookahead", required_argument, NULL, 0 },
  1344. { "b-adapt", required_argument, NULL, 0 },
  1345. { "no-b-adapt", no_argument, NULL, 0 },
  1346. { "b-bias", required_argument, NULL, 0 },
  1347. @@ -422,6 +426,7 @@ static struct option long_options[] =
  1348. { "mvrange-thread", required_argument, NULL, 0 },
  1349. { "subme", required_argument, NULL, 'm' },
  1350. { "psy-rd", required_argument, NULL, 0 },
  1351. + { "no-psy", no_argument, NULL, 0 },
  1352. { "mixed-refs", no_argument, NULL, 0 },
  1353. { "no-mixed-refs", no_argument, NULL, 0 },
  1354. { "no-chroma-me", no_argument, NULL, 0 },
  1355. @@ -446,6 +451,8 @@ static struct option long_options[] =
  1356. { "pass", required_argument, NULL, 'p' },
  1357. { "stats", required_argument, NULL, 0 },
  1358. { "qcomp", required_argument, NULL, 0 },
  1359. + { "mbtree", no_argument, NULL, 0 },
  1360. + { "no-mbtree", no_argument, NULL, 0 },
  1361. { "qblur", required_argument, NULL, 0 },
  1362. { "cplxblur", required_argument, NULL, 0 },
  1363. { "zones", required_argument, NULL, 0 },
  1364. @@ -542,6 +549,8 @@ static int Parse( int argc, char **argv,
  1365. param->rc.i_aq_mode = 0;
  1366. param->analyse.b_mixed_references = 0;
  1367. param->analyse.i_trellis = 0;
  1368. + param->i_bframe_adaptive = X264_B_ADAPT_NONE;
  1369. + param->rc.b_mb_tree = 0;
  1370. }
  1371. else if( !strcasecmp( optarg, "veryfast" ) )
  1372. {
  1373. @@ -551,12 +560,20 @@ static int Parse( int argc, char **argv,
  1374. param->i_frame_reference = 1;
  1375. param->analyse.b_mixed_references = 0;
  1376. param->analyse.i_trellis = 0;
  1377. + param->rc.b_mb_tree = 0;
  1378. }
  1379. - else if( !strcasecmp( optarg, "fast" ) )
  1380. + else if( !strcasecmp( optarg, "faster" ) )
  1381. {
  1382. param->analyse.b_mixed_references = 0;
  1383. param->i_frame_reference = 2;
  1384. param->analyse.i_subpel_refine = 4;
  1385. + param->i_lookahead = 30;
  1386. + }
  1387. + else if( !strcasecmp( optarg, "fast" ) )
  1388. + {
  1389. + param->i_frame_reference = 2;
  1390. + param->analyse.i_subpel_refine = 6;
  1391. + param->i_lookahead = 40;
  1392. }
  1393. else if( !strcasecmp( optarg, "medium" ) )
  1394. {
  1395. @@ -644,11 +661,13 @@ static int Parse( int argc, char **argv,
  1396. {
  1397. param->analyse.f_psy_rd = 0;
  1398. param->rc.i_aq_mode = X264_AQ_NONE;
  1399. + param->analyse.b_psy = 0;
  1400. }
  1401. else if( !strcasecmp( optarg, "ssim" ) )
  1402. {
  1403. param->analyse.f_psy_rd = 0;
  1404. param->rc.i_aq_mode = X264_AQ_AUTOVARIANCE;
  1405. + param->analyse.b_psy = 0;
  1406. }
  1407. else if( !strcasecmp( optarg, "fastdecode" ) )
  1408. {
  1409. @@ -662,7 +681,6 @@ static int Parse( int argc, char **argv,
  1410. param->i_deblocking_filter_alphac0 = -1;
  1411. param->i_deblocking_filter_beta = -1;
  1412. param->analyse.f_psy_trellis = 0.2;
  1413. - param->rc.f_ip_factor = 2.1;
  1414. param->rc.f_aq_strength = 1.3;
  1415. if( param->analyse.inter & X264_ANALYSE_PSUB16x16 )
  1416. param->analyse.inter |= X264_ANALYSE_PSUB8x8;
  1417. diff --git a/x264.h b/x264.h
  1418. index 2dfcc8d..9ea74af 100644
  1419. --- a/x264.h
  1420. +++ b/x264.h
  1421. @@ -35,7 +35,7 @@
  1422.  
  1423. #include <stdarg.h>
  1424.  
  1425. -#define X264_BUILD 68
  1426. +#define X264_BUILD 69
  1427.  
  1428. /* x264_t:
  1429. * opaque handler for encoder */
  1430. @@ -188,6 +188,7 @@ typedef struct x264_param_t
  1431. int i_keyint_max; /* Force an IDR keyframe at this interval */
  1432. int i_keyint_min; /* Scenecuts closer together than this are coded as I, not IDR. */
  1433. int i_scenecut_threshold; /* how aggressively to insert extra I frames */
  1434. + int i_lookahead;
  1435. int i_bframe; /* how many b-frame between 2 references pictures */
  1436. int i_bframe_adaptive;
  1437. int i_bframe_bias;
  1438. @@ -242,6 +243,7 @@ typedef struct x264_param_t
  1439. int i_noise_reduction; /* adaptive pseudo-deadzone */
  1440. float f_psy_rd; /* Psy RD strength */
  1441. float f_psy_trellis; /* Psy trellis strength */
  1442. + int b_psy; /* Toggle all psy optimizations */
  1443.  
  1444. /* the deadzone size that will be used in luma quantization */
  1445. int i_luma_deadzone[2]; /* {inter, intra} */
  1446. @@ -271,6 +273,7 @@ typedef struct x264_param_t
  1447.  
  1448. int i_aq_mode; /* psy adaptive QP. (X264_AQ_*) */
  1449. float f_aq_strength;
  1450. + int b_mb_tree; /* Macroblock-tree ratecontrol. */
  1451.  
  1452. /* 2pass */
  1453. int b_stat_write; /* Enable stat writing in psz_stat_out */
  1454. --
  1455. 1.6.1.2
  1456.  
  1457.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement