Pastebin launched a little side project called VERYVIRAL.com, check it out ;-) Want more features on Pastebin? Sign Up, it's FREE!
Guest

Dark Shikari

By: a guest on Aug 5th, 2009  |  syntax: None  |  size: 75.67 KB  |  views: 370  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
This paste has a previous version, view the difference. Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. From f21e71a04ba65aff9b5a4bfa8a73fd86c463f4ee Mon Sep 17 00:00:00 2001
  2. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  3. Date: Mon, 3 Aug 2009 20:52:30 -0700
  4. Subject: [PATCH 1/2] Various 1-pass VBV tweaks
  5.  Make predictors have an offset in addition to a multiplier.
  6.  This primarily fixes issues in sources with lots of extremely static scenes, such as anime and CGI.
  7.  We tried linear regressions, but they were very unreliable as predictors.
  8.  Also allow VBV to be slightly more aggressive in raising QPs to avoid not having enough bits left in some situations.
  9.  Up to 1db improvement on some clips.
  10.  
  11. ---
  12.  encoder/ratecontrol.c |   32 +++++++++++++++++++++-----------
  13.  1 files changed, 21 insertions(+), 11 deletions(-)
  14.  
  15. diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
  16. index 2f88708..087e658 100644
  17. --- a/encoder/ratecontrol.c
  18. +++ b/encoder/ratecontrol.c
  19.  -58,6 +58,7 @@ typedef struct
  20.      double coeff;
  21.      double count;
  22.      double decay;
  23. +    double offset;
  24.  } predictor_t;
  25.  
  26.  struct x264_ratecontrol_t
  27.  -409,9 +410,11 @@ int x264_ratecontrol_new( x264_t *h )
  28.          rc->pred[i].coeff= 2.0;
  29.          rc->pred[i].count= 1.0;
  30.          rc->pred[i].decay= 0.5;
  31. +        rc->pred[i].offset= 0.0;
  32.          rc->row_preds[i].coeff= .25;
  33.          rc->row_preds[i].count= 1.0;
  34.          rc->row_preds[i].decay= 0.5;
  35. +        rc->row_preds[i].offset= 0.0;
  36.      }
  37.      *rc->pred_b_from_p = rc->pred[0];
  38.  
  39.  -953,7 +956,7 @@ void x264_ratecontrol_mb( x264_t *h, int bits )
  40.          if( y < h->sps->i_mb_height-1 )
  41.          {
  42.              int i_estimated;
  43. -            int avg_qp = X264_MAX(h->fref0[0]->i_row_qp[y+1], h->fref1[0]->i_row_qp[y+1])
  44. +            int avg_qp = X264_MIN(h->fref0[0]->i_row_qp[y+1], h->fref1[0]->i_row_qp[y+1])
  45.                         + rc->pb_offset * ((h->fenc->i_type == X264_TYPE_BREF) ? 0.5 : 1);
  46.              rc->qpm = X264_MIN(X264_MAX( rc->qp, avg_qp), 51); //avg_qp could go higher than 51 due to pb_offset
  47.              i_estimated = row_bits_so_far(h, y); //FIXME: compute full estimated size
  48.  -1153,10 +1156,6 @@ void x264_ratecontrol_end( x264_t *h, int bits )
  49.              {
  50.                  update_predictor( rc->pred_b_from_p, qp2qscale(rc->qpa_rc),
  51.                                    h->fref1[h->i_ref1-1]->i_satd, rc->bframe_bits / rc->bframes );
  52. -                /* In some cases, such as completely blank scenes, pred_b_from_p can go nuts */
  53. -                /* Hackily cap the predictor coeff in case this happens. */
  54. -                /* FIXME FIXME FIXME */
  55. -                rc->pred_b_from_p->coeff = X264_MIN( rc->pred_b_from_p->coeff, 10. );
  56.                  rc->bframe_bits = 0;
  57.              }
  58.          }
  59.  -1270,17 +1269,28 @@ static double get_diff_limited_q(x264_t *h, ratecontrol_entry_t *rce, double q)
  60.  
  61.  static double predict_size( predictor_t *p, double q, double var )
  62.  {
  63. -     return p->coeff*var / (q*p->count);
  64. +     return (p->coeff*var + p->offset) / (q*p->count);
  65.  }
  66.  
  67.  static void update_predictor( predictor_t *p, double q, double var, double bits )
  68.  {
  69. +    const double range = 1.5;
  70.      if( var < 10 )
  71.          return;
  72. -    p->count *= p->decay;
  73. -    p->coeff *= p->decay;
  74. -    p->count ++;
  75. -    p->coeff += bits*q / var;
  76. +    double old_coeff = p->coeff / p->count;
  77. +    double new_coeff = bits*q / var;
  78. +    double new_coeff_clipped = x264_clip3f( new_coeff, old_coeff/range, old_coeff*range );
  79. +    double new_offset = bits*q - new_coeff_clipped * var;
  80. +    if( new_offset >= 0 )
  81. +        new_coeff = new_coeff_clipped;
  82. +    else
  83. +        new_offset = 0;
  84. +    p->count  *= p->decay;
  85. +    p->coeff  *= p->decay;
  86. +    p->offset *= p->decay;
  87. +    p->count  ++;
  88. +    p->coeff  += new_coeff;
  89. +    p->offset += new_offset;
  90.  }
  91.  
  92.  // update VBV after encoding a frame
  93.  -1350,7 +1360,7 @@ static double clip_qscale( x264_t *h, int pict_type, double q )
  94.          double bits = predict_size( &rcc->pred[h->sh.i_type], q, rcc->last_satd );
  95.          double qf = 1.0;
  96.          if( bits > rcc->buffer_fill/2 )
  97. -            qf = x264_clip3f( rcc->buffer_fill/(2*bits), 0.2, 1.0 );
  98. +            qf = rcc->buffer_fill/(2*bits);
  99.          q /= qf;
  100.          bits *= qf;
  101.          if( bits < rcc->buffer_rate/2 )
  102. --
  103. 1.6.1.2
  104.  
  105.  
  106. From e7182499c7bc23d3376090f66d7617b2080f2b46 Mon Sep 17 00:00:00 2001
  107. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  108. Date: Tue, 4 Aug 2009 17:46:33 -0700
  109. Subject: [PATCH 2/2] Macroblock-tree ratecontrol
  110.  On by default; can be turned off with --no-mbtree.
  111.  Uses a large lookahead to track temporal propagation of data and weight quality accordingly.
  112.  Requires a very large separate statsfile (2 bytes per macroblock) in multi-pass mode.
  113.  Doesn't work with b-pyramid yet.
  114.  Note that MB-tree inherently measures quality different from the standard qcomp method, so bitrates produced by CRF may change somewhat.
  115.  This makes the "medium" preset a bit slower.  Accordingly, make "fast" slower as well, and introduce a new preset "faster" between "fast" and "veryfast".
  116.  All presets "fast" and above will have MB-tree on.
  117.  Add a new option, --rc-lookahead, to control the distance MB tree looks ahead to perform propagation analysis.
  118.  Default is 40; larger values will be slower and require more memory but give more accurate results.
  119.  This value will be used in the future to control ratecontrol lookahead (VBV).
  120.  Add a new option, --no-psy, to disable all psy optimizations that don't improve PSNR or SSIM.
  121.  This disables psy-RD/trellis, but also other more subtle internal psy optimizations that can't be controlled directly via external parameters.
  122.  Quality improvement from MB-tree is about 2-70% depending on content.
  123.  Strength of MB-tree adjustments can be tweaked using qcompress; higher values mean lower MB-tree strength.
  124.  
  125. ---
  126.  common/common.c       |   22 ++-
  127.  common/common.h       |   50 ++++++-
  128.  common/frame.c        |   10 +-
  129.  common/frame.h        |    3 +
  130.  common/osdep.h        |    9 +-
  131.  encoder/analyse.c     |    4 +-
  132.  encoder/encoder.c     |   56 ++++++-
  133.  encoder/ratecontrol.c |  201 +++++++++++++++++-------
  134.  encoder/ratecontrol.h |    3 +-
  135.  encoder/slicetype.c   |  424 ++++++++++++++++++++++++++++++++++++++-----------
  136.  x264.c                |   31 +++-
  137.  x264.h                |    5 +-
  138.  12 files changed, 639 insertions(+), 179 deletions(-)
  139.  
  140. diff --git a/common/common.c b/common/common.c
  141. index 9260c64..371ed1e 100644
  142. --- a/common/common.c
  143. +++ b/common/common.c
  144.  -95,6 +95,7 @@ void    x264_param_default( x264_param_t *param )
  145.      param->rc.f_pb_factor = 1.3;
  146.      param->rc.i_aq_mode = X264_AQ_VARIANCE;
  147.      param->rc.f_aq_strength = 1.0;
  148. +    param->rc.i_lookahead = 40;
  149.  
  150.      param->rc.b_stat_write = 0;
  151.      param->rc.psz_stat_out = "x264_2pass.log";
  152.  -104,6 +105,7 @@ void    x264_param_default( x264_param_t *param )
  153.      param->rc.f_qblur = 0.5;
  154.      param->rc.f_complexity_blur = 20;
  155.      param->rc.i_zones = 0;
  156. +    param->rc.b_mb_tree = 1;
  157.  
  158.      /* Log */
  159.      param->pf_log = x264_log_default;
  160.  -117,6 +119,7 @@ void    x264_param_default( x264_param_t *param )
  161.      param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_SPATIAL;
  162.      param->analyse.i_me_method = X264_ME_HEX;
  163.      param->analyse.f_psy_rd = 1.0;
  164. +    param->analyse.b_psy = 1;
  165.      param->analyse.f_psy_trellis = 0;
  166.      param->analyse.i_me_range = 16;
  167.      param->analyse.i_subpel_refine = 7;
  168.  -493,6 +496,8 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
  169.              p->analyse.f_psy_trellis = 0;
  170.          }
  171.      }
  172. +    OPT("psy")
  173. +        p->analyse.b_psy = atobool(value);
  174.      OPT("chroma-me")
  175.          p->analyse.b_chroma_me = atobool(value);
  176.      OPT("mixed-refs")
  177.  -524,6 +529,8 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
  178.          p->rc.f_rf_constant = atof(value);
  179.          p->rc.i_rc_method = X264_RC_CRF;
  180.      }
  181. +    OPT("rc-lookahead")
  182. +        p->rc.i_lookahead = atoi(value);
  183.      OPT2("qpmin", "qp-min")
  184.          p->rc.i_qp_min = atoi(value);
  185.      OPT2("qpmax", "qp-max")
  186.  -559,6 +566,8 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
  187.      }
  188.      OPT("qcomp")
  189.          p->rc.f_qcompress = atof(value);
  190. +    OPT("mbtree")
  191. +        p->rc.b_mb_tree = atobool(value);
  192.      OPT("qblur")
  193.          p->rc.f_qblur = atof(value);
  194.      OPT2("cplxblur", "cplx-blur")
  195.  -843,7 +852,9 @@ char *x264_param2string( x264_param_t *p, int b_res )
  196.      s += sprintf( s, " analyse=%#x:%#x", p->analyse.intra, p->analyse.inter );
  197.      s += sprintf( s, " me=%s", x264_motion_est_names[ p->analyse.i_me_method ] );
  198.      s += sprintf( s, " subme=%d", p->analyse.i_subpel_refine );
  199. -    s += sprintf( s, " psy_rd=%.1f:%.1f", p->analyse.f_psy_rd, p->analyse.f_psy_trellis );
  200. +    s += sprintf( s, " psy=%d", p->analyse.b_psy );
  201. +    if( p->analyse.b_psy )
  202. +        s += sprintf( s, " psy_rd=%.1f:%.1f", p->analyse.f_psy_rd, p->analyse.f_psy_trellis );
  203.      s += sprintf( s, " mixed_ref=%d", p->analyse.b_mixed_references );
  204.      s += sprintf( s, " me_range=%d", p->analyse.i_me_range );
  205.      s += sprintf( s, " chroma_me=%d", p->analyse.b_chroma_me );
  206.  -868,9 +879,12 @@ char *x264_param2string( x264_param_t *p, int b_res )
  207.      s += sprintf( s, " keyint=%d keyint_min=%d scenecut=%d",
  208.                    p->i_keyint_max, p->i_keyint_min, p->i_scenecut_threshold );
  209.  
  210. -    s += sprintf( s, " rc=%s", p->rc.i_rc_method == X264_RC_ABR ?
  211. +    if( p->rc.b_mb_tree )
  212. +        s += sprintf( s, " rc_lookahead=%d", p->rc.i_lookahead );
  213. +
  214. +    s += sprintf( s, " rc=%s mbtree=%d", p->rc.i_rc_method == X264_RC_ABR ?
  215.                                 ( p->rc.b_stat_read ? "2pass" : p->rc.i_vbv_buffer_size ? "cbr" : "abr" )
  216. -                               : p->rc.i_rc_method == X264_RC_CRF ? "crf" : "cqp" );
  217. +                               : p->rc.i_rc_method == X264_RC_CRF ? "crf" : "cqp", p->rc.b_mb_tree );
  218.      if( p->rc.i_rc_method == X264_RC_ABR || p->rc.i_rc_method == X264_RC_CRF )
  219.      {
  220.          if( p->rc.i_rc_method == X264_RC_CRF )
  221.  -892,7 +906,7 @@ char *x264_param2string( x264_param_t *p, int b_res )
  222.      if( !(p->rc.i_rc_method == X264_RC_CQP && p->rc.i_qp_constant == 0) )
  223.      {
  224.          s += sprintf( s, " ip_ratio=%.2f", p->rc.f_ip_factor );
  225. -        if( p->i_bframe )
  226. +        if( p->i_bframe && !p->rc.b_mb_tree )
  227.              s += sprintf( s, " pb_ratio=%.2f", p->rc.f_pb_factor );
  228.          s += sprintf( s, " aq=%d", p->rc.i_aq_mode );
  229.          if( p->rc.i_aq_mode )
  230. diff --git a/common/common.h b/common/common.h
  231. index 8a25a13..30163ab 100644
  232. --- a/common/common.h
  233. +++ b/common/common.h
  234.  -51,6 +51,7 @@
  235.  #define X264_SLICE_MAX 4
  236.  #define X264_NAL_MAX (4 + X264_SLICE_MAX)
  237.  #define X264_PCM_COST (386*8)
  238. +#define X264_LOOKAHEAD_MAX 250
  239.  
  240.  // number of pixels (per thread) in progress at any given time.
  241.  // 16 for the macroblock in progress + 3 for deblocking + 3 for motion compensation filter + 2 for extra safety
  242.  -152,6 +153,49 @@ static inline uint32_t x264_cabac_amvd_sum( int16_t *mvdleft, int16_t *mvdtop )
  243.      return amvd0 + (amvd1<<16);
  244.  }
  245.  
  246. +static const uint8_t exp2_lut[64] = {
  247. +      1,   4,   7,  10,  13,  16,  19,  22,  25,  28,  31,  34,  37,  40,  44,  47,
  248. +     50,  53,  57,  60,  64,  67,  71,  74,  78,  81,  85,  89,  93,  96, 100, 104,
  249. +    108, 112, 116, 120, 124, 128, 132, 137, 141, 145, 150, 154, 159, 163, 168, 172,
  250. +    177, 182, 186, 191, 196, 201, 206, 211, 216, 221, 226, 232, 237, 242, 248, 253,
  251. +};
  252. +
  253. +static ALWAYS_INLINE int x264_exp2fix8( float x )
  254. +{
  255. +    int i, f;
  256. +    x += 8;
  257. +    if( x <= 0 ) return 0;
  258. +    if( x >= 16 ) return 0xffff;
  259. +    i = x;
  260. +    f = (x-i)*64;
  261. +    return (exp2_lut[f]+256) << i >> 8;
  262. +}
  263. +
  264. +static const float log2_lut[128] = {
  265. +    0.00000, 0.01123, 0.02237, 0.03342, 0.04439, 0.05528, 0.06609, 0.07682,
  266. +    0.08746, 0.09803, 0.10852, 0.11894, 0.12928, 0.13955, 0.14975, 0.15987,
  267. +    0.16993, 0.17991, 0.18982, 0.19967, 0.20945, 0.21917, 0.22882, 0.23840,
  268. +    0.24793, 0.25739, 0.26679, 0.27612, 0.28540, 0.29462, 0.30378, 0.31288,
  269. +    0.32193, 0.33092, 0.33985, 0.34873, 0.35755, 0.36632, 0.37504, 0.38370,
  270. +    0.39232, 0.40088, 0.40939, 0.41785, 0.42626, 0.43463, 0.44294, 0.45121,
  271. +    0.45943, 0.46761, 0.47573, 0.48382, 0.49185, 0.49985, 0.50779, 0.51570,
  272. +    0.52356, 0.53138, 0.53916, 0.54689, 0.55459, 0.56224, 0.56986, 0.57743,
  273. +    0.58496, 0.59246, 0.59991, 0.60733, 0.61471, 0.62205, 0.62936, 0.63662,
  274. +    0.64386, 0.65105, 0.65821, 0.66534, 0.67243, 0.67948, 0.68650, 0.69349,
  275. +    0.70044, 0.70736, 0.71425, 0.72110, 0.72792, 0.73471, 0.74147, 0.74819,
  276. +    0.75489, 0.76155, 0.76818, 0.77479, 0.78136, 0.78790, 0.79442, 0.80090,
  277. +    0.80735, 0.81378, 0.82018, 0.82655, 0.83289, 0.83920, 0.84549, 0.85175,
  278. +    0.85798, 0.86419, 0.87036, 0.87652, 0.88264, 0.88874, 0.89482, 0.90087,
  279. +    0.90689, 0.91289, 0.91886, 0.92481, 0.93074, 0.93664, 0.94251, 0.94837,
  280. +    0.95420, 0.96000, 0.96578, 0.97154, 0.97728, 0.98299, 0.98868, 0.99435,
  281. +};
  282. +
  283. +static ALWAYS_INLINE float x264_log2( uint32_t x )
  284. +{
  285. +    int lz = x264_clz( x );
  286. +    return log2_lut[(x<<lz>>24)&0x7f] + (31 - lz);
  287. +}
  288. +
  289.  /****************************************************************************
  290.   *
  291.   ****************************************************************************/
  292.  -327,11 +371,11 @@ struct x264_t
  293.      struct
  294.      {
  295.          /* Frames to be encoded (whose types have been decided) */
  296. -        x264_frame_t *current[X264_BFRAME_MAX*4+3];
  297. +        x264_frame_t *current[X264_LOOKAHEAD_MAX+3];
  298.          /* Temporary buffer (frames types not yet decided) */
  299. -        x264_frame_t *next[X264_BFRAME_MAX*4+3];
  300. +        x264_frame_t *next[X264_LOOKAHEAD_MAX+3];
  301.          /* Unused frames */
  302. -        x264_frame_t *unused[X264_BFRAME_MAX*4 + X264_THREAD_MAX*2 + 16+4];
  303. +        x264_frame_t *unused[X264_LOOKAHEAD_MAX + X264_THREAD_MAX*2 + 16+4];
  304.          /* For adaptive B decision */
  305.          x264_frame_t *last_nonb;
  306.  
  307. diff --git a/common/frame.c b/common/frame.c
  308. index 23e6824..2097d52 100644
  309. --- a/common/frame.c
  310. +++ b/common/frame.c
  311.  -96,6 +96,15 @@ x264_frame_t *x264_frame_new( x264_t *h )
  312.                  memset( frame->lowres_mvs[j][i], 0, 2*h->mb.i_mb_count*sizeof(int16_t) );
  313.                  CHECKED_MALLOC( frame->lowres_mv_costs[j][i], h->mb.i_mb_count*sizeof(int) );
  314.              }
  315. +        CHECKED_MALLOC( frame->i_intra_cost, i_mb_count * sizeof(uint16_t) );
  316. +        memset( frame->i_intra_cost, -1, i_mb_count * sizeof(uint16_t) );
  317. +        CHECKED_MALLOC( frame->i_propagate_cost, i_mb_count * sizeof(uint32_t) );
  318. +        for( j = 0; j <= h->param.i_bframe+1; j++ )
  319. +            for( i = 0; i <= h->param.i_bframe+1; i++ )
  320. +            {
  321. +                CHECKED_MALLOC( frame->lowres_costs[j][i], i_mb_count * sizeof(uint16_t) );
  322. +                CHECKED_MALLOC( frame->lowres_inter_types[j][i], i_mb_count * sizeof(uint8_t) );
  323. +            }
  324.      }
  325.  
  326.      if( h->param.analyse.i_me_method >= X264_ME_ESA )
  327.  -116,7 +125,6 @@ x264_frame_t *x264_frame_new( x264_t *h )
  328.      CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t));
  329.      CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
  330.      CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
  331. -    CHECKED_MALLOC( frame->i_intra_cost, i_mb_count * sizeof(uint16_t) );
  332.      if( h->param.i_bframe )
  333.      {
  334.          CHECKED_MALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
  335. diff --git a/common/frame.h b/common/frame.h
  336. index aad77f5..a3da4e4 100644
  337. --- a/common/frame.h
  338. +++ b/common/frame.h
  339.  -63,6 +63,8 @@ typedef struct
  340.      int8_t  *mb_type;
  341.      int16_t (*mv[2])[2];
  342.      int16_t (*lowres_mvs[2][X264_BFRAME_MAX+1])[2];
  343. +    uint16_t (*lowres_costs[X264_BFRAME_MAX+2][X264_BFRAME_MAX+2]);
  344. +    uint8_t  (*lowres_inter_types[X264_BFRAME_MAX+2][X264_BFRAME_MAX+2]);
  345.      int     *lowres_mv_costs[2][X264_BFRAME_MAX+1];
  346.      int8_t  *ref[2];
  347.      int     i_ref[2];
  348.  -83,6 +85,7 @@ typedef struct
  349.      float   *f_qp_offset;
  350.      int     b_intra_calculated;
  351.      uint16_t *i_intra_cost;
  352. +    uint32_t *i_propagate_cost;
  353.      uint16_t *i_inv_qscale_factor;
  354.  
  355.      /* threading */
  356. diff --git a/common/osdep.h b/common/osdep.h
  357. index 915ec05..2095198 100644
  358. --- a/common/osdep.h
  359. +++ b/common/osdep.h
  360.  -147,7 +147,9 @@
  361.  #ifdef WORDS_BIGENDIAN
  362.  #define endian_fix(x) (x)
  363.  #define endian_fix32(x) (x)
  364. -#elif defined(__GNUC__) && defined(HAVE_MMX)
  365. +#define endian_fix16(x) (x)
  366. +#else
  367. +#if defined(__GNUC__) && defined(HAVE_MMX)
  368.  static ALWAYS_INLINE uint32_t endian_fix32( uint32_t x )
  369.  {
  370.      asm("bswap %0":"+r"(x));
  371.  -171,6 +173,11 @@ static ALWAYS_INLINE intptr_t endian_fix( intptr_t x )
  372.          return endian_fix32(x);
  373.  }
  374.  #endif
  375. +static ALWAYS_INLINE uint16_t endian_fix16( uint16_t x )
  376. +{
  377. +    return (x<<8)|(x>>8);
  378. +}
  379. +#endif
  380.  
  381.  #if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 3)
  382.  #define x264_clz(x) __builtin_clz(x)
  383. diff --git a/encoder/analyse.c b/encoder/analyse.c
  384. index 4a36fcd..38b9976 100644
  385. --- a/encoder/analyse.c
  386. +++ b/encoder/analyse.c
  387.  -276,8 +276,8 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
  388.          h->mb.i_trellis_lambda2[1][1] = x264_trellis_lambda2_tab[1][h->mb.i_chroma_qp];
  389.      }
  390.      h->mb.i_psy_rd_lambda = a->i_lambda;
  391. -    /* Adjusting chroma lambda based on QP offset hurts PSNR, so we'll leave it as part of psy-RD. */
  392. -    h->mb.i_chroma_lambda2_offset = h->mb.i_psy_rd ? x264_chroma_lambda2_offset_tab[h->mb.i_qp-h->mb.i_chroma_qp+12] : 256;
  393. +    /* Adjusting chroma lambda based on QP offset hurts PSNR but improves visual quality. */
  394. +    h->mb.i_chroma_lambda2_offset = h->param.analyse.b_psy ? x264_chroma_lambda2_offset_tab[h->mb.i_qp-h->mb.i_chroma_qp+12] : 256;
  395.  
  396.      h->mb.i_me_method = h->param.analyse.i_me_method;
  397.      h->mb.i_subpel_refine = h->param.analyse.i_subpel_refine;
  398. diff --git a/encoder/encoder.c b/encoder/encoder.c
  399. index 0f1ccc8..74ff97d 100644
  400. --- a/encoder/encoder.c
  401. +++ b/encoder/encoder.c
  402.  -42,7 +42,7 @@
  403.  
  404.  #define bs_write_ue bs_write_ue_big
  405.  
  406. -static void x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
  407. +static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
  408.                                      x264_nal_t **pp_nal, int *pi_nal,
  409.                                      x264_picture_t *pic_out );
  410.  
  411.  -441,6 +441,7 @@ static int x264_validate_parameters( x264_t *h )
  412.          h->param.rc.i_qp_min = x264_clip3( (int)(X264_MIN3( qp_p, qp_i, qp_b )), 0, 51 );
  413.          h->param.rc.i_qp_max = x264_clip3( (int)(X264_MAX3( qp_p, qp_i, qp_b ) + .999), 0, 51 );
  414.          h->param.rc.i_aq_mode = 0;
  415. +        h->param.rc.b_mb_tree = 0;
  416.      }
  417.      h->param.rc.i_qp_max = x264_clip3( h->param.rc.i_qp_max, 0, 51 );
  418.      h->param.rc.i_qp_min = x264_clip3( h->param.rc.i_qp_min, 0, h->param.rc.i_qp_max );
  419.  -473,6 +474,15 @@ static int x264_validate_parameters( x264_t *h )
  420.      if( !h->param.i_bframe )
  421.          h->param.i_bframe_adaptive = X264_B_ADAPT_NONE;
  422.      h->param.analyse.b_weighted_bipred = h->param.analyse.b_weighted_bipred && h->param.i_bframe > 0;
  423. +    h->param.rc.i_lookahead = X264_MIN( h->param.rc.i_lookahead, X264_LOOKAHEAD_MAX );
  424. +    h->param.rc.i_lookahead = X264_MIN( h->param.rc.i_lookahead, h->param.i_keyint_max );
  425. +    if( h->param.rc.b_stat_read )
  426. +        h->param.rc.i_lookahead = 0;
  427. +    else if( !h->param.rc.i_lookahead )
  428. +        h->param.rc.b_mb_tree = 0;
  429. +    if( h->param.rc.f_qcompress == 1 )
  430. +        h->param.rc.b_mb_tree = 0;
  431. +
  432.      h->mb.b_direct_auto_write = h->param.analyse.i_direct_mv_pred == X264_DIRECT_PRED_AUTO
  433.                                  && h->param.i_bframe
  434.                                  && ( h->param.rc.b_stat_write || !h->param.rc.b_stat_read );
  435.  -513,6 +523,11 @@ static int x264_validate_parameters( x264_t *h )
  436.      if( !h->param.b_cabac )
  437.          h->param.analyse.i_trellis = 0;
  438.      h->param.analyse.i_trellis = x264_clip3( h->param.analyse.i_trellis, 0, 2 );
  439. +    if( !h->param.analyse.b_psy )
  440. +    {
  441. +        h->param.analyse.f_psy_rd = 0;
  442. +        h->param.analyse.f_psy_trellis = 0;
  443. +    }
  444.      if( !h->param.analyse.i_trellis )
  445.          h->param.analyse.f_psy_trellis = 0;
  446.      h->param.analyse.f_psy_rd = x264_clip3f( h->param.analyse.f_psy_rd, 0, 10 );
  447.  -537,6 +552,17 @@ static int x264_validate_parameters( x264_t *h )
  448.      h->param.rc.f_aq_strength = x264_clip3f( h->param.rc.f_aq_strength, 0, 3 );
  449.      if( h->param.rc.f_aq_strength == 0 )
  450.          h->param.rc.i_aq_mode = 0;
  451. +    /* MB-tree requires AQ to be on, even if the strength is zero. */
  452. +    if( !h->param.rc.i_aq_mode && h->param.rc.b_mb_tree )
  453. +    {
  454. +        h->param.rc.i_aq_mode = 1;
  455. +        h->param.rc.f_aq_strength = 0;
  456. +    }
  457. +    if( h->param.rc.b_mb_tree && h->param.b_bframe_pyramid )
  458. +    {
  459. +        x264_log( h, X264_LOG_WARNING, "b-pyramid + mb-tree is not supported\n" );
  460. +        h->param.b_bframe_pyramid = 0;
  461. +    }
  462.      h->param.analyse.i_noise_reduction = x264_clip3( h->param.analyse.i_noise_reduction, 0, 1<<16 );
  463.      if( h->param.analyse.i_subpel_refine == 10 && (h->param.analyse.i_trellis != 2 || !h->param.rc.i_aq_mode) )
  464.          h->param.analyse.i_subpel_refine = 9;
  465.  -723,6 +749,9 @@ x264_t *x264_encoder_open   ( x264_param_t *param )
  466.          h->frames.i_delay = X264_MAX(h->param.i_bframe,3)*4 + h->param.i_threads - 1;
  467.      else
  468.          h->frames.i_delay = h->param.i_bframe + h->param.i_threads - 1;
  469. +    if( h->param.rc.b_mb_tree )
  470. +        h->frames.i_delay = X264_MAX( h->frames.i_delay, h->param.rc.i_lookahead );
  471. +
  472.      h->frames.i_max_ref0 = h->param.i_frame_reference;
  473.      h->frames.i_max_ref1 = h->sps->vui.i_num_reorder_frames;
  474.      h->frames.i_max_dpb  = h->sps->vui.i_max_dec_frame_buffering;
  475.  -730,7 +759,8 @@ x264_t *x264_encoder_open   ( x264_param_t *param )
  476.          && ( h->param.rc.i_rc_method == X264_RC_ABR
  477.            || h->param.rc.i_rc_method == X264_RC_CRF
  478.            || h->param.i_bframe_adaptive
  479. -          || h->param.i_scenecut_threshold );
  480. +          || h->param.i_scenecut_threshold
  481. +          || h->param.rc.b_mb_tree );
  482.      h->frames.b_have_lowres |= (h->param.rc.b_stat_read && h->param.rc.i_vbv_buffer_size > 0);
  483.      h->frames.b_have_sub8x8_esa = !!(h->param.analyse.inter & X264_ANALYSE_PSUB8x8);
  484.  
  485.  -1443,7 +1473,12 @@ int     x264_encoder_encode( x264_t *h,
  486.          if( h->frames.b_have_lowres )
  487.              x264_frame_init_lowres( h, fenc );
  488.  
  489. -        if( h->param.rc.i_aq_mode )
  490. +        if( h->param.rc.b_mb_tree && h->param.rc.b_stat_read )
  491. +        {
  492. +            if( x264_macroblock_tree_read( h, fenc ) )
  493. +                return -1;
  494. +        }
  495. +        else if( h->param.rc.i_aq_mode )
  496.              x264_adaptive_quant_frame( h, fenc );
  497.  
  498.          if( h->frames.i_input <= h->frames.i_delay + 1 - h->param.i_threads )
  499.  -1461,7 +1496,8 @@ int     x264_encoder_encode( x264_t *h,
  500.          /* 2: Select frame types */
  501.          if( h->frames.next[0] == NULL )
  502.          {
  503. -            x264_encoder_frame_end( thread_oldest, thread_current, pp_nal, pi_nal, pic_out );
  504. +            if( x264_encoder_frame_end( thread_oldest, thread_current, pp_nal, pi_nal, pic_out ) < 0 )
  505. +                return -1;
  506.              return 0;
  507.          }
  508.  
  509.  -1621,11 +1657,12 @@ int     x264_encoder_encode( x264_t *h,
  510.      else
  511.          x264_slices_write( h );
  512.  
  513. -    x264_encoder_frame_end( thread_oldest, thread_current, pp_nal, pi_nal, pic_out );
  514. +    if( x264_encoder_frame_end( thread_oldest, thread_current, pp_nal, pi_nal, pic_out ) < 0 )
  515. +        return -1;
  516.      return 0;
  517.  }
  518.  
  519. -static void x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
  520. +static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
  521.                                      x264_nal_t **pp_nal, int *pi_nal,
  522.                                      x264_picture_t *pic_out )
  523.  {
  524.  -1640,7 +1677,7 @@ static void x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
  525.      if( !h->out.i_nal )
  526.      {
  527.          pic_out->i_type = X264_TYPE_AUTO;
  528. -        return;
  529. +        return 0;
  530.      }
  531.  
  532.      x264_frame_push_unused( thread_current, h->fenc );
  533.  -1670,7 +1707,8 @@ static void x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
  534.  
  535.      /* update rc */
  536.      x264_emms();
  537. -    x264_ratecontrol_end( h, h->out.i_frame_size * 8 );
  538. +    if( x264_ratecontrol_end( h, h->out.i_frame_size * 8 ) < 0 )
  539. +        return -1;
  540.  
  541.      /* restore CPU state (before using float again) */
  542.      x264_emms();
  543.  -1784,6 +1822,8 @@ static void x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
  544.  
  545.      if( h->param.psz_dump_yuv )
  546.          x264_frame_dump( h );
  547. +
  548. +    return 0;
  549.  }
  550.  
  551.  static void x264_print_intra( int64_t *i_mb_count, double i_count, int b_print_pcm, char *intra )
  552. diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
  553. index 087e658..f669007 100644
  554. --- a/encoder/ratecontrol.c
  555. +++ b/encoder/ratecontrol.c
  556.  -71,6 +71,7 @@ struct x264_ratecontrol_t
  557.      double fps;
  558.      double bitrate;
  559.      double rate_tolerance;
  560. +    double qcompress;
  561.      int nmb;                    /* number of macroblocks in a frame */
  562.      int qp_constant[5];
  563.  
  564.  -106,6 +107,10 @@ struct x264_ratecontrol_t
  565.      /* 2pass stuff */
  566.      FILE *p_stat_file_out;
  567.      char *psz_stat_file_tmpname;
  568. +    FILE *p_mbtree_stat_file_out;
  569. +    char *psz_mbtree_stat_file_tmpname;
  570. +    char *psz_mbtree_stat_file_name;
  571. +    FILE *p_mbtree_stat_file_in;
  572.  
  573.      int num_entries;            /* number of ratecontrol_entry_ts */
  574.      ratecontrol_entry_t *entry; /* FIXME: copy needed data and free this once init is done */
  575.  -118,6 +123,7 @@ struct x264_ratecontrol_t
  576.      double lmin[5];             /* min qscale by frame type */
  577.      double lmax[5];
  578.      double lstep;               /* max change (multiply) in qscale per frame */
  579. +    uint16_t *qp_buffer; /* Global buffer for converting MB-tree quantizer data. */
  580.  
  581.      /* MBRC stuff */
  582.      double frame_size_estimated;
  583.  -191,49 +197,6 @@ static NOINLINE uint32_t ac_energy_mb( x264_t *h, int mb_x, int mb_y, x264_frame
  584.      return var;
  585.  }
  586.  
  587. -static const float log2_lut[128] = {
  588. -    0.00000, 0.01123, 0.02237, 0.03342, 0.04439, 0.05528, 0.06609, 0.07682,
  589. -    0.08746, 0.09803, 0.10852, 0.11894, 0.12928, 0.13955, 0.14975, 0.15987,
  590. -    0.16993, 0.17991, 0.18982, 0.19967, 0.20945, 0.21917, 0.22882, 0.23840,
  591. -    0.24793, 0.25739, 0.26679, 0.27612, 0.28540, 0.29462, 0.30378, 0.31288,
  592. -    0.32193, 0.33092, 0.33985, 0.34873, 0.35755, 0.36632, 0.37504, 0.38370,
  593. -    0.39232, 0.40088, 0.40939, 0.41785, 0.42626, 0.43463, 0.44294, 0.45121,
  594. -    0.45943, 0.46761, 0.47573, 0.48382, 0.49185, 0.49985, 0.50779, 0.51570,
  595. -    0.52356, 0.53138, 0.53916, 0.54689, 0.55459, 0.56224, 0.56986, 0.57743,
  596. -    0.58496, 0.59246, 0.59991, 0.60733, 0.61471, 0.62205, 0.62936, 0.63662,
  597. -    0.64386, 0.65105, 0.65821, 0.66534, 0.67243, 0.67948, 0.68650, 0.69349,
  598. -    0.70044, 0.70736, 0.71425, 0.72110, 0.72792, 0.73471, 0.74147, 0.74819,
  599. -    0.75489, 0.76155, 0.76818, 0.77479, 0.78136, 0.78790, 0.79442, 0.80090,
  600. -    0.80735, 0.81378, 0.82018, 0.82655, 0.83289, 0.83920, 0.84549, 0.85175,
  601. -    0.85798, 0.86419, 0.87036, 0.87652, 0.88264, 0.88874, 0.89482, 0.90087,
  602. -    0.90689, 0.91289, 0.91886, 0.92481, 0.93074, 0.93664, 0.94251, 0.94837,
  603. -    0.95420, 0.96000, 0.96578, 0.97154, 0.97728, 0.98299, 0.98868, 0.99435,
  604. -};
  605. -
  606. -static const uint8_t exp2_lut[64] = {
  607. -      1,   4,   7,  10,  13,  16,  19,  22,  25,  28,  31,  34,  37,  40,  44,  47,
  608. -     50,  53,  57,  60,  64,  67,  71,  74,  78,  81,  85,  89,  93,  96, 100, 104,
  609. -    108, 112, 116, 120, 124, 128, 132, 137, 141, 145, 150, 154, 159, 163, 168, 172,
  610. -    177, 182, 186, 191, 196, 201, 206, 211, 216, 221, 226, 232, 237, 242, 248, 253,
  611. -};
  612. -
  613. -static ALWAYS_INLINE float x264_log2( uint32_t x )
  614. -{
  615. -    int lz = x264_clz( x );
  616. -    return log2_lut[(x<<lz>>24)&0x7f] + (31 - lz);
  617. -}
  618. -
  619. -static ALWAYS_INLINE int x264_exp2fix8( float x )
  620. -{
  621. -    int i, f;
  622. -    x += 8;
  623. -    if( x <= 0 ) return 0;
  624. -    if( x >= 16 ) return 0xffff;
  625. -    i = x;
  626. -    f = (x-i)*64;
  627. -    return (exp2_lut[f]+256) << i >> 8;
  628. -}
  629. -
  630.  void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame )
  631.  {
  632.      /* constants chosen to result in approximately the same overall bitrate as without AQ.
  633.  -241,6 +204,17 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame )
  634.      int mb_x, mb_y;
  635.      float strength;
  636.      float avg_adj = 0.f;
  637. +    /* Need to init it anyways for MB tree. */
  638. +    if( h->param.rc.f_aq_strength == 0 )
  639. +    {
  640. +        int mb_xy;
  641. +        memset( frame->f_qp_offset, 0, h->mb.i_mb_count * sizeof(float) );
  642. +        if( h->frames.b_have_lowres )
  643. +            for( mb_xy = 0; mb_xy < h->mb.i_mb_count; mb_xy++ )
  644. +                frame->i_inv_qscale_factor[mb_xy] = 256;
  645. +        return;
  646. +    }
  647. +
  648.      if( h->param.rc.i_aq_mode == X264_AQ_AUTOVARIANCE )
  649.      {
  650.          for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y++ )
  651.  -257,6 +231,7 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame )
  652.      }
  653.      else
  654.          strength = h->param.rc.f_aq_strength * 1.0397f;
  655. +
  656.      for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y++ )
  657.          for( mb_x = 0; mb_x < h->sps->i_mb_width; mb_x++ )
  658.          {
  659.  -291,6 +266,47 @@ void x264_adaptive_quant( x264_t *h )
  660.      h->mb.i_qp = x264_clip3( h->rc->f_qpm + h->fenc->f_qp_offset[h->mb.i_mb_xy] + .5, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
  661.  }
  662.  
  663. +int x264_macroblock_tree_read( x264_t *h, x264_frame_t *frame )
  664. +{
  665. +    x264_ratecontrol_t *rc = h->rc;
  666. +    uint8_t i_type_actual = rc->entry[frame->i_frame].pict_type;
  667. +    int i;
  668. +
  669. +    if( i_type_actual != SLICE_TYPE_B )
  670. +    {
  671. +        uint8_t i_type;
  672. +
  673. +        if( !fread( &i_type, 1, 1, rc->p_mbtree_stat_file_in ) )
  674. +            goto fail;
  675. +
  676. +        if( i_type != i_type_actual )
  677. +        {
  678. +            x264_log(h, X264_LOG_ERROR, "MB-tree frametype %d doesn't match actual frametype %d.\n", i_type,i_type_actual);
  679. +            return -1;
  680. +        }
  681. +
  682. +        if( fread( rc->qp_buffer, sizeof(uint16_t), h->mb.i_mb_count, rc->p_mbtree_stat_file_in ) != h->mb.i_mb_count )
  683. +            goto fail;
  684. +
  685. +        for( i = 0; i < h->mb.i_mb_count; i++ )
  686. +            frame->f_qp_offset[i] = ((float)(int16_t)endian_fix16( rc->qp_buffer[i] )) * (1/256.0);
  687. +    }
  688. +    else
  689. +        x264_adaptive_quant_frame( h, frame );
  690. +    return 0;
  691. +fail:
  692. +    x264_log(h, X264_LOG_ERROR, "Incomplete MB-tree stats file.\n");
  693. +    return -1;
  694. +}
  695. +
  696. +static char *x264_strcat_filename( char *input, char *suffix )
  697. +{
  698. +    char *output = x264_malloc( strlen( input ) + strlen( suffix ) + 1 );
  699. +    strcpy( output, input );
  700. +    strcat( output, suffix );
  701. +    return output;
  702. +}
  703. +
  704.  int x264_ratecontrol_new( x264_t *h )
  705.  {
  706.      x264_ratecontrol_t *rc;
  707.  -310,6 +326,14 @@ int x264_ratecontrol_new( x264_t *h )
  708.      else
  709.          rc->fps = 25.0;
  710.  
  711. +    if( h->param.rc.b_mb_tree )
  712. +    {
  713. +        h->param.rc.f_pb_factor = 1;
  714. +        rc->qcompress = 1;
  715. +    }
  716. +    else
  717. +        rc->qcompress = h->param.rc.f_qcompress;
  718. +
  719.      rc->bitrate = h->param.rc.i_bitrate * 1000.;
  720.      rc->rate_tolerance = h->param.rc.f_rate_tolerance;
  721.      rc->nmb = h->mb.i_mb_count;
  722.  -379,17 +403,18 @@ int x264_ratecontrol_new( x264_t *h )
  723.          rc->accum_p_norm = .01;
  724.          rc->accum_p_qp = ABR_INIT_QP * rc->accum_p_norm;
  725.          /* estimated ratio that produces a reasonable QP for the first I-frame */
  726. -        rc->cplxr_sum = .01 * pow( 7.0e5, h->param.rc.f_qcompress ) * pow( h->mb.i_mb_count, 0.5 );
  727. +        rc->cplxr_sum = .01 * pow( 7.0e5, rc->qcompress ) * pow( h->mb.i_mb_count, 0.5 );
  728.          rc->wanted_bits_window = 1.0 * rc->bitrate / rc->fps;
  729.          rc->last_non_b_pict_type = SLICE_TYPE_I;
  730.      }
  731.  
  732.      if( h->param.rc.i_rc_method == X264_RC_CRF )
  733.      {
  734. -        /* arbitrary rescaling to make CRF somewhat similar to QP */
  735. +        /* Arbitrary rescaling to make CRF somewhat similar to QP.
  736. +         * Try to compensate for MB-tree's effects as well. */
  737.          double base_cplx = h->mb.i_mb_count * (h->param.i_bframe ? 120 : 80);
  738. -        rc->rate_factor_constant = pow( base_cplx, 1 - h->param.rc.f_qcompress )
  739. -                                 / qp2qscale( h->param.rc.f_rf_constant );
  740. +        rc->rate_factor_constant = pow( base_cplx, 1 - rc->qcompress )
  741. +                                 / qp2qscale( h->param.rc.f_rf_constant + (h->param.rc.b_mb_tree?5:0) );
  742.      }
  743.  
  744.      rc->ip_offset = 6.0 * log(h->param.rc.f_ip_factor) / log(2.0);
  745.  -437,6 +462,17 @@ int x264_ratecontrol_new( x264_t *h )
  746.              x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n");
  747.              return -1;
  748.          }
  749. +        if( h->param.rc.b_mb_tree )
  750. +        {
  751. +            char *mbtree_stats_in = x264_strcat_filename( h->param.rc.psz_stat_in, ".mbtree" );
  752. +            rc->p_mbtree_stat_file_in = fopen( mbtree_stats_in, "rb" );
  753. +            x264_free( mbtree_stats_in );
  754. +            if( !rc->p_mbtree_stat_file_in )
  755. +            {
  756. +                x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open mbtree stats file\n");
  757. +                return -1;
  758. +            }
  759. +        }
  760.  
  761.          /* check whether 1st pass options were compatible with current options */
  762.          if( !strncmp( stats_buf, "#options:", 9 ) )
  763.  -483,6 +519,9 @@ int x264_ratecontrol_new( x264_t *h )
  764.                  x264_log( h, X264_LOG_ERROR, "b_adapt method specified in stats file not valid\n" );
  765.                  return -1;
  766.              }
  767. +
  768. +            if( h->param.rc.b_mb_tree && ( p = strstr( opts, "rc-lookahead=" ) ) && sscanf( p, "rc-lookahead=%d", &i ) )
  769. +                h->param.rc.i_lookahead = i;
  770.          }
  771.  
  772.          /* find number of pics */
  773.  -585,10 +624,7 @@ int x264_ratecontrol_new( x264_t *h )
  774.      if( h->param.rc.b_stat_write )
  775.      {
  776.          char *p;
  777. -
  778. -        rc->psz_stat_file_tmpname = x264_malloc( strlen(h->param.rc.psz_stat_out) + 6 );
  779. -        strcpy( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out );
  780. -        strcat( rc->psz_stat_file_tmpname, ".temp" );
  781. +        rc->psz_stat_file_tmpname = x264_strcat_filename( h->param.rc.psz_stat_out, ".temp" );
  782.  
  783.          rc->p_stat_file_out = fopen( rc->psz_stat_file_tmpname, "wb" );
  784.          if( rc->p_stat_file_out == NULL )
  785.  -600,6 +636,25 @@ int x264_ratecontrol_new( x264_t *h )
  786.          p = x264_param2string( &h->param, 1 );
  787.          fprintf( rc->p_stat_file_out, "#options: %s\n", p );
  788.          x264_free( p );
  789. +        if( h->param.rc.b_mb_tree && !h->param.rc.b_stat_read )
  790. +        {
  791. +            rc->psz_mbtree_stat_file_tmpname = x264_strcat_filename( h->param.rc.psz_stat_out, ".mbtree.temp" );
  792. +            rc->psz_mbtree_stat_file_name = x264_strcat_filename( h->param.rc.psz_stat_out, ".mbtree" );
  793. +
  794. +            rc->p_mbtree_stat_file_out = fopen( rc->psz_mbtree_stat_file_tmpname, "wb" );
  795. +            if( rc->p_mbtree_stat_file_out == NULL )
  796. +            {
  797. +                x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open mbtree stats file\n");
  798. +                return -1;
  799. +            }
  800. +        }
  801. +    }
  802. +
  803. +    if( h->param.rc.b_mb_tree && (h->param.rc.b_stat_read || h->param.rc.b_stat_write) )
  804. +    {
  805. +        rc->qp_buffer = x264_malloc( h->mb.i_mb_count * sizeof(uint16_t));
  806. +        if( !rc->qp_buffer )
  807. +            return -1;
  808.      }
  809.  
  810.      for( i=0; i<h->param.i_threads; i++ )
  811.  -739,8 +794,8 @@ void x264_ratecontrol_summary( x264_t *h )
  812.      {
  813.          double base_cplx = h->mb.i_mb_count * (h->param.i_bframe ? 120 : 80);
  814.          x264_log( h, X264_LOG_INFO, "final ratefactor: %.2f\n",
  815. -                  qscale2qp( pow( base_cplx, 1 - h->param.rc.f_qcompress )
  816. -                             * rc->cplxr_sum / rc->wanted_bits_window ) );
  817. +                  qscale2qp( pow( base_cplx, 1 - rc->qcompress )
  818. +                             * rc->cplxr_sum / rc->wanted_bits_window ) - (h->param.rc.b_mb_tree?5:0) );
  819.      }
  820.  }
  821.  
  822.  -760,9 +815,22 @@ void x264_ratecontrol_delete( x264_t *h )
  823.              }
  824.          x264_free( rc->psz_stat_file_tmpname );
  825.      }
  826. +    if( rc->p_mbtree_stat_file_out )
  827. +    {
  828. +        fclose( rc->p_mbtree_stat_file_out );
  829. +        if( h->i_frame >= rc->num_entries )
  830. +            if( rename( rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name ) != 0 )
  831. +            {
  832. +                x264_log( h, X264_LOG_ERROR, "failed to rename \"%s\" to \"%s\"\n",
  833. +                          rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name );
  834. +            }
  835. +        x264_free( rc->psz_mbtree_stat_file_tmpname );
  836. +        x264_free( rc->psz_mbtree_stat_file_name );
  837. +    }
  838.      x264_free( rc->pred );
  839.      x264_free( rc->pred_b_from_p );
  840.      x264_free( rc->entry );
  841. +    x264_free( rc->qp_buffer );
  842.      if( rc->zones )
  843.      {
  844.          x264_free( rc->zones[0].param );
  845.  -1086,7 +1154,7 @@ int x264_ratecontrol_slice_type( x264_t *h, int frame_num )
  846.  }
  847.  
  848.  /* After encoding one frame, save stats and update ratecontrol state */
  849. -void x264_ratecontrol_end( x264_t *h, int bits )
  850. +int x264_ratecontrol_end( x264_t *h, int bits )
  851.  {
  852.      x264_ratecontrol_t *rc = h->rc;
  853.      const int *mbs = h->stat.frame.i_mb_count;
  854.  -1114,7 +1182,7 @@ void x264_ratecontrol_end( x264_t *h, int bits )
  855.                          ( dir_frame>0 ? 's' : dir_frame<0 ? 't' :
  856.                            dir_avg>0 ? 's' : dir_avg<0 ? 't' : '-' )
  857.                          : '-';
  858. -        fprintf( rc->p_stat_file_out,
  859. +        if( fprintf( rc->p_stat_file_out,
  860.                   "in:%d out:%d type:%c q:%.2f tex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d d:%c;\n",
  861.                   h->fenc->i_frame, h->i_frame,
  862.                   c_type, rc->qpa_rc,
  863.  -1124,7 +1192,22 @@ void x264_ratecontrol_end( x264_t *h, int bits )
  864.                   h->stat.frame.i_mb_count_i,
  865.                   h->stat.frame.i_mb_count_p,
  866.                   h->stat.frame.i_mb_count_skip,
  867. -                 c_direct);
  868. +                 c_direct) < 0 )
  869. +             goto fail;
  870. +
  871. +        /* Don't re-write the data in multi-pass mode. */
  872. +        if( h->param.rc.b_mb_tree && h->fenc->b_kept_as_ref && !h->param.rc.b_stat_read )
  873. +        {
  874. +            uint8_t i_type = h->sh.i_type;
  875. +            int i;
  876. +            /* Values are stored as big-endian FIX8.8 */
  877. +            for( i = 0; i < h->mb.i_mb_count; i++ )
  878. +                rc->qp_buffer[i] = endian_fix16( h->fenc->f_qp_offset[i]*256.0 );
  879. +            if( fwrite( &i_type, 1, 1, rc->p_mbtree_stat_file_out ) < 1 )
  880. +                goto fail;
  881. +            if( fwrite( rc->qp_buffer, sizeof(uint16_t), h->mb.i_mb_count, rc->p_mbtree_stat_file_out ) < h->mb.i_mb_count )
  882. +                goto fail;
  883. +        }
  884.      }
  885.  
  886.      if( rc->b_abr )
  887.  -1162,6 +1245,10 @@ void x264_ratecontrol_end( x264_t *h, int bits )
  888.      }
  889.  
  890.      update_vbv( h, bits );
  891. +    return 0;
  892. +fail:
  893. +    x264_log(h, X264_LOG_ERROR, "ratecontrol_end: stats file could not be written to\n");
  894. +    return -1;
  895.  }
  896.  
  897.  /****************************************************************************
  898.  -1177,7 +1264,7 @@ static double get_qscale(x264_t *h, ratecontrol_entry_t *rce, double rate_factor
  899.      double q;
  900.      x264_zone_t *zone = get_zone( h, frame_num );
  901.  
  902. -    q = pow( rce->blurred_complexity, 1 - h->param.rc.f_qcompress );
  903. +    q = pow( rce->blurred_complexity, 1 - rcc->qcompress );
  904.  
  905.      // avoid NaN's in the rc_eq
  906.      if(!isfinite(q) || rce->tex_bits + rce->mv_bits == 0)
  907. diff --git a/encoder/ratecontrol.h b/encoder/ratecontrol.h
  908. index 3310d3c..ed8abab 100644
  909. --- a/encoder/ratecontrol.h
  910. +++ b/encoder/ratecontrol.h
  911.  -29,12 +29,13 @@ void x264_ratecontrol_delete( x264_t * );
  912.  
  913.  void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame );
  914.  void x264_adaptive_quant( x264_t * );
  915. +int  x264_macroblock_tree_read( x264_t *h, x264_frame_t *frame );
  916.  void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next );
  917.  void x264_ratecontrol_start( x264_t *, int i_force_qp );
  918.  int  x264_ratecontrol_slice_type( x264_t *, int i_frame );
  919.  void x264_ratecontrol_mb( x264_t *, int bits );
  920.  int  x264_ratecontrol_qp( x264_t * );
  921. -void x264_ratecontrol_end( x264_t *, int bits );
  922. +int  x264_ratecontrol_end( x264_t *, int bits );
  923.  void x264_ratecontrol_summary( x264_t * );
  924.  void x264_ratecontrol_set_estimated_size( x264_t *, int bits );
  925.  int  x264_ratecontrol_get_estimated_size( x264_t const *);
  926. diff --git a/encoder/slicetype.c b/encoder/slicetype.c
  927. index 2c16429..be3eaf7 100644
  928. --- a/encoder/slicetype.c
  929. +++ b/encoder/slicetype.c
  930.  -63,6 +63,7 @@ static int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
  931.      x264_me_t m[2];
  932.      int i_bcost = COST_MAX;
  933.      int l, i;
  934. +    int list_used = 0;
  935.  
  936.      h->mb.pic.p_fenc[0] = h->mb.pic.fenc_buf;
  937.      h->mc.copy[PIXEL_8x8]( h->mb.pic.p_fenc[0], FENC_STRIDE, &fenc->lowres[0][i_pel_offset], i_stride, 8 );
  938.  -107,8 +108,7 @@ static int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
  939.          h->mc.avg[PIXEL_8x8]( pix1, 16, src1, stride1, src2, stride2, i_bipred_weight ); \
  940.          i_cost = penalty + h->pixf.mbcmp[PIXEL_8x8]( \
  941.                             m[0].p_fenc[0], FENC_STRIDE, pix1, 16 ); \
  942. -        if( i_bcost > i_cost ) \
  943. -            i_bcost = i_cost; \
  944. +        COPY2_IF_LT( i_bcost, i_cost, list_used, 3 ); \
  945.      }
  946.  
  947.      m[0].i_pixel = PIXEL_8x8;
  948.  -138,8 +138,7 @@ static int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
  949.              int i_cost;
  950.              h->mc.avg[PIXEL_8x8]( pix1, 16, m[0].p_fref[0], m[0].i_stride[0], m[1].p_fref[0], m[1].i_stride[0], i_bipred_weight );
  951.              i_cost = h->pixf.mbcmp[PIXEL_8x8]( m[0].p_fenc[0], FENC_STRIDE, pix1, 16 );
  952. -            if( i_bcost > i_cost )
  953. -                i_bcost = i_cost;
  954. +            COPY2_IF_LT( i_bcost, i_cost, list_used, 3 );
  955.          }
  956.      }
  957.  
  958.  -181,16 +180,18 @@ static int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
  959.              *(uint32_t*)m[l].mv = *(uint32_t*)fenc_mvs[l];
  960.              m[l].cost = *fenc_costs[l];
  961.          }
  962. -        i_bcost = X264_MIN( i_bcost, m[l].cost );
  963. +        COPY2_IF_LT( i_bcost, m[l].cost, list_used, l+1 );
  964.      }
  965.  
  966.      if( b_bidir && ( *(uint32_t*)m[0].mv || *(uint32_t*)m[1].mv ) )
  967.          TRY_BIDIR( m[0].mv, m[1].mv, 5 );
  968.  
  969. +    frames[b]->lowres_inter_types[b-p0][p1-b][i_mb_xy] = list_used;
  970. +
  971.  lowres_intra_mb:
  972.      /* forbid intra-mbs in B-frames, because it's rare and not worth checking */
  973.      /* FIXME: Should we still forbid them now that we cache intra scores? */
  974. -    if( !b_bidir )
  975. +    if( !b_bidir || h->param.rc.b_mb_tree )
  976.      {
  977.          int i_icost, b_intra;
  978.          if( !fenc->b_intra_calculated )
  979.  -237,18 +238,23 @@ lowres_intra_mb:
  980.          }
  981.          else
  982.              i_icost = fenc->i_intra_cost[i_mb_xy];
  983. -        b_intra = i_icost < i_bcost;
  984. -        if( b_intra )
  985. -            i_bcost = i_icost;
  986. -        if(   (i_mb_x > 0 && i_mb_x < h->sps->i_mb_width - 1
  987. -            && i_mb_y > 0 && i_mb_y < h->sps->i_mb_height - 1)
  988. -            || h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 )
  989. +        if( !b_bidir )
  990.          {
  991. -            fenc->i_intra_mbs[b-p0] += b_intra;
  992. -            fenc->i_cost_est[0][0] += i_icost;
  993. +            b_intra = i_icost < i_bcost;
  994. +            if( b_intra )
  995. +                i_bcost = i_icost;
  996. +            if(   (i_mb_x > 0 && i_mb_x < h->sps->i_mb_width - 1
  997. +                && i_mb_y > 0 && i_mb_y < h->sps->i_mb_height - 1)
  998. +                || h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 )
  999. +            {
  1000. +                fenc->i_intra_mbs[b-p0] += b_intra;
  1001. +                fenc->i_cost_est[0][0] += i_icost;
  1002. +            }
  1003.          }
  1004.      }
  1005.  
  1006. +    frames[b]->lowres_costs[b-p0][p1-b][i_mb_xy] = i_bcost;
  1007. +
  1008.      return i_bcost;
  1009.  }
  1010.  #undef TRY_BIDIR
  1011.  -262,6 +268,7 @@ static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
  1012.                                 x264_frame_t **frames, int p0, int p1, int b,
  1013.                                 int b_intra_penalty )
  1014.  {
  1015. +
  1016.      int i_score = 0;
  1017.      /* Don't use the AQ'd scores for slicetype decision. */
  1018.      int i_score_aq = 0;
  1019.  -299,7 +306,8 @@ static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
  1020.  
  1021.          /* the edge mbs seem to reduce the predictive quality of the
  1022.           * whole frame's score, but are needed for a spatial distribution. */
  1023. -        if( h->param.rc.i_vbv_buffer_size || h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 )
  1024. +        if( h->param.rc.b_mb_tree || h->param.rc.i_vbv_buffer_size ||
  1025. +            h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 )
  1026.          {
  1027.              for( h->mb.i_mb_y = h->sps->i_mb_height - 1; h->mb.i_mb_y >= 0; h->mb.i_mb_y-- )
  1028.              {
  1029.  -355,7 +363,172 @@ static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
  1030.      return i_score;
  1031.  }
  1032.  
  1033. -#define MAX_LENGTH (X264_BFRAME_MAX*4)
  1034. +/* If MB-tree changes the quantizers, we need to recalculate the frame cost without
  1035. + * re-running lookahead. */
  1036. +static int x264_slicetype_frame_cost_recalculate( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames,
  1037. +                                                  int p0, int p1, int b )
  1038. +{
  1039. +    int i_score = 0;
  1040. +    int *row_satd = frames[b]->i_row_satds[b-p0][p1-b];
  1041. +    x264_emms();
  1042. +    for( h->mb.i_mb_y = h->sps->i_mb_height - 1; h->mb.i_mb_y >= 0; h->mb.i_mb_y-- )
  1043. +    {
  1044. +        row_satd[ h->mb.i_mb_y ] = 0;
  1045. +        for( h->mb.i_mb_x = h->sps->i_mb_width - 1; h->mb.i_mb_x >= 0; h->mb.i_mb_x-- )
  1046. +        {
  1047. +            int i_mb_xy = h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride;
  1048. +            int i_mb_cost = frames[b]->lowres_costs[b-p0][p1-b][i_mb_xy];
  1049. +            float qp_adj = frames[b]->f_qp_offset[i_mb_xy];
  1050. +            i_mb_cost = (i_mb_cost * x264_exp2fix8(qp_adj*(-1.f/6.f)) + 128) >> 8;
  1051. +            row_satd[ h->mb.i_mb_y ] += i_mb_cost;
  1052. +            if( (h->mb.i_mb_y > 0 && h->mb.i_mb_y < h->sps->i_mb_height - 1 &&
  1053. +                 h->mb.i_mb_x > 0 && h->mb.i_mb_x < h->sps->i_mb_width - 1) ||
  1054. +                 h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 )
  1055. +            {
  1056. +                i_score += i_mb_cost;
  1057. +            }
  1058. +        }
  1059. +    }
  1060. +    return i_score;
  1061. +}
  1062. +
  1063. +static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, int p0, int p1, int b )
  1064. +{
  1065. +    x264_frame_t *refs[2] = {frames[p0],frames[p1]};
  1066. +    int dist_scale_factor = p1 != p0 ? 128 : ( ((b-p0) << 8) + ((p1-p0) >> 1) ) / (p1-p0);
  1067. +    int i_bipred_weight = h->param.analyse.b_weighted_bipred ? 64 - (dist_scale_factor>>2) : 32;
  1068. +
  1069. +    for( h->mb.i_mb_y = 0; h->mb.i_mb_y < h->sps->i_mb_height; h->mb.i_mb_y++ )
  1070. +    {
  1071. +        for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->sps->i_mb_width; h->mb.i_mb_x++ )
  1072. +        {
  1073. +            int mb_index = h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride;
  1074. +            int inter_cost = frames[b]->lowres_costs[b-p0][p1-b][mb_index];
  1075. +            int intra_cost = (frames[b]->i_intra_cost[mb_index] * frames[b]->i_inv_qscale_factor[mb_index]+128)>>8;
  1076. +            int lists_used = frames[b]->lowres_inter_types[b-p0][p1-b][mb_index];
  1077. +            /* The approximate amount of data that this block contains. */
  1078. +            int propagate_amount = intra_cost + frames[b]->i_propagate_cost[mb_index];
  1079. +
  1080. +            /* Divide by 64 for per-pixel summing. */
  1081. +            propagate_amount = (((uint64_t)propagate_amount*(intra_cost-inter_cost)) / intra_cost + 32) >> 6;
  1082. +
  1083. +            /* Don't propagate for an intra block. */
  1084. +            if( inter_cost < intra_cost )
  1085. +            {
  1086. +                int mv[2][2], list;
  1087. +                mv[0][0] = frames[b]->lowres_mvs[0][b-p0-1][mb_index][0];
  1088. +                mv[0][1] = frames[b]->lowres_mvs[0][b-p0-1][mb_index][1];
  1089. +                if( b != p1 )
  1090. +                {
  1091. +                    mv[1][0] = frames[b]->lowres_mvs[1][p1-b-1][mb_index][0];
  1092. +                    mv[1][1] = frames[b]->lowres_mvs[1][p1-b-1][mb_index][1];
  1093. +                }
  1094. +
  1095. +                /* Follow the MVs to the previous frame(s). */
  1096. +                for( list = 0; list < 2; list++ )
  1097. +                    if( (lists_used >> list)&1 )
  1098. +                    {
  1099. +                        int x = mv[list][0];
  1100. +                        int y = mv[list][1];
  1101. +                        int listamount = propagate_amount;
  1102. +                        int mbx = (x>>5)+h->mb.i_mb_x;
  1103. +                        int mby = ((y>>5)+h->mb.i_mb_y);
  1104. +                        int idx0 = mbx + mby*h->mb.i_mb_stride;
  1105. +                        int idx1 = idx0 + 1;
  1106. +                        int idx2 = idx0 + h->mb.i_mb_stride;
  1107. +                        int idx3 = idx0 + h->mb.i_mb_stride + 1;
  1108. +                        int idx0weight = (32-(y&31))*(32-(x&31));
  1109. +                        int idx1weight = (32-(y&31))*(x&31);
  1110. +                        int idx2weight = (y&31)*(32-(x&31));
  1111. +                        int idx3weight = (y&31)*(x&31);
  1112. +
  1113. +                        /* Apply bipred weighting. */
  1114. +                        if( lists_used == 3 )
  1115. +                            listamount = (listamount * (list?(64-i_bipred_weight):i_bipred_weight) + 32) >> 6;
  1116. +
  1117. +#define CLIP_ADD(s,x) (s) = X264_MIN((s)+(x),(1<<16)-1)
  1118. +
  1119. +                        /* We could just clip the MVs, but pixels that lie outside the frame probably shouldn't
  1120. +                         * be counted. */
  1121. +                        if( mbx < h->sps->i_mb_width-1 && mby < h->sps->i_mb_height-1 && mbx >= 0 && mby >= 0 )
  1122. +                        {
  1123. +                            CLIP_ADD( refs[list]->i_propagate_cost[idx0], (listamount*idx0weight+8)>>4 );
  1124. +                            CLIP_ADD( refs[list]->i_propagate_cost[idx1], (listamount*idx1weight+8)>>4 );
  1125. +                            CLIP_ADD( refs[list]->i_propagate_cost[idx2], (listamount*idx2weight+8)>>4 );
  1126. +                            CLIP_ADD( refs[list]->i_propagate_cost[idx3], (listamount*idx3weight+8)>>4 );
  1127. +                        }
  1128. +                        else /* Check offsets individually */
  1129. +                        {
  1130. +                            if( mbx < h->sps->i_mb_width && mby < h->sps->i_mb_height && mbx >= 0 && mby >= 0 )
  1131. +                                CLIP_ADD( refs[list]->i_propagate_cost[idx0], (listamount*idx0weight+8)>>4 );
  1132. +                            if( mbx+1 < h->sps->i_mb_width && mby < h->sps->i_mb_height && mbx+1 >= 0 && mby >= 0 )
  1133. +                                CLIP_ADD( refs[list]->i_propagate_cost[idx1], (listamount*idx1weight+8)>>4 );
  1134. +                            if( mbx < h->sps->i_mb_width && mby+1 < h->sps->i_mb_height && mbx >= 0 && mby+1 >= 0 )
  1135. +                                CLIP_ADD( refs[list]->i_propagate_cost[idx2], (listamount*idx2weight+8)>>4 );
  1136. +                            if( mbx+1 < h->sps->i_mb_width && mby+1 < h->sps->i_mb_height && mbx+1 >= 0 && mby+1 >= 0 )
  1137. +                                CLIP_ADD( refs[list]->i_propagate_cost[idx3], (listamount*idx3weight+8)>>4 );
  1138. +                        }
  1139. +                    }
  1140. +            }
  1141. +        }
  1142. +    }
  1143. +}
  1144. +
  1145. +static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int num_frames, int b_intra )
  1146. +{
  1147. +    int i, idx = !b_intra;
  1148. +    int last_nonb, cur_nonb = 1;
  1149. +    if( b_intra )
  1150. +       x264_slicetype_frame_cost( h, a, frames, 0, 0, 0, 0 );
  1151. +
  1152. +    i = num_frames-1;
  1153. +    while( i > 0 && frames[i]->i_type == X264_TYPE_B )
  1154. +        i--;
  1155. +    last_nonb = i;
  1156. +
  1157. +    if( last_nonb < 0 )
  1158. +        return;
  1159. +
  1160. +    memset( frames[last_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint32_t) );
  1161. +    while( i-- > idx )
  1162. +    {
  1163. +        cur_nonb = i;
  1164. +        while( frames[cur_nonb]->i_type == X264_TYPE_B && cur_nonb > 0 )
  1165. +            cur_nonb--;
  1166. +        if( cur_nonb < idx )
  1167. +            break;
  1168. +        x264_slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, last_nonb, 0 );
  1169. +        memset( frames[cur_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint32_t) );
  1170. +        x264_macroblock_tree_propagate( h, frames, cur_nonb, last_nonb, last_nonb );
  1171. +        while( frames[i]->i_type == X264_TYPE_B && i > 0 )
  1172. +        {
  1173. +            x264_slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, i, 0 );
  1174. +            memset( frames[i]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint32_t) );
  1175. +            x264_macroblock_tree_propagate( h, frames, cur_nonb, last_nonb, i );
  1176. +            i--;
  1177. +        }
  1178. +        last_nonb = cur_nonb;
  1179. +    }
  1180. +    x264_emms();
  1181. +
  1182. +    for( h->mb.i_mb_y = 0; h->mb.i_mb_y < h->sps->i_mb_height; h->mb.i_mb_y++ )
  1183. +    {
  1184. +        for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->sps->i_mb_width; h->mb.i_mb_x++ )
  1185. +        {
  1186. +            int mb_index = h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride;
  1187. +            int intra_cost = (frames[last_nonb]->i_intra_cost[mb_index] * frames[last_nonb]->i_inv_qscale_factor[mb_index]+128)>>8;
  1188. +
  1189. +            if( intra_cost )
  1190. +            {
  1191. +                int propagate_cost = frames[last_nonb]->i_propagate_cost[mb_index];
  1192. +                float log2_ratio = x264_log2(intra_cost + propagate_cost) - x264_log2(intra_cost);
  1193. +                /* Allow the constant to be adjusted via qcompress, since the two
  1194. +                 * concepts are very similar. */
  1195. +                frames[last_nonb]->f_qp_offset[mb_index] -= 5.0 * (1.0 - h->param.rc.f_qcompress) * log2_ratio;
  1196. +            }
  1197. +        }
  1198. +    }
  1199. +}
  1200.  
  1201.  static int x264_slicetype_path_cost( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, char *path, int threshold )
  1202.  {
  1203.  -393,14 +566,14 @@ static int x264_slicetype_path_cost( x264_t *h, x264_mb_analysis_t *a, x264_fram
  1204.  /* Uses strings due to the fact that the speed of the control functions is
  1205.     negligable compared to the cost of running slicetype_frame_cost, and because
  1206.     it makes debugging easier. */
  1207. -static void x264_slicetype_path( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int length, int max_bframes, int buffer_size, char (*best_paths)[MAX_LENGTH] )
  1208. +static void x264_slicetype_path( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int length, int max_bframes, int buffer_size, char (*best_paths)[X264_LOOKAHEAD_MAX] )
  1209.  {
  1210. -    char paths[X264_BFRAME_MAX+2][MAX_LENGTH] = {{0}};
  1211. +    char paths[X264_BFRAME_MAX+2][X264_LOOKAHEAD_MAX] = {{0}};
  1212.      int num_paths = X264_MIN(max_bframes+1, length);
  1213.      int suffix_size, loc, path;
  1214.      int best_cost = COST_MAX;
  1215.      int best_path_index = 0;
  1216. -    length = X264_MIN(length,MAX_LENGTH);
  1217. +    length = X264_MIN(length,X264_LOOKAHEAD_MAX);
  1218.  
  1219.      /* Iterate over all currently possible paths and add suffixes to each one */
  1220.      for( suffix_size = 0; suffix_size < num_paths; suffix_size++ )
  1221.  -426,15 +599,6 @@ static void x264_slicetype_path( x264_t *h, x264_mb_analysis_t *a, x264_frame_t
  1222.      memcpy( best_paths[length], paths[best_path_index], length );
  1223.  }
  1224.  
  1225. -static int x264_slicetype_path_search( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int length, int bframes, int buffer )
  1226. -{
  1227. -    char best_paths[MAX_LENGTH][MAX_LENGTH] = {"","P"};
  1228. -    int n;
  1229. -    for( n = 2; n < length-1; n++ )
  1230. -        x264_slicetype_path( h, a, frames, n, bframes, buffer, best_paths );
  1231. -    return strspn( best_paths[length-2], "B" );
  1232. -}
  1233. -
  1234.  static int scenecut( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int p0, int p1 )
  1235.  {
  1236.      x264_frame_t *frame = frames[p1];
  1237.  -477,13 +641,13 @@ static int scenecut( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, in
  1238.      return res;
  1239.  }
  1240.  
  1241. -static void x264_slicetype_analyse( x264_t *h )
  1242. +static void x264_slicetype_analyse( x264_t *h, int keyframe )
  1243.  {
  1244.      x264_mb_analysis_t a;
  1245. -    x264_frame_t *frames[X264_BFRAME_MAX*4+3] = { NULL, };
  1246. +    x264_frame_t *frames[X264_LOOKAHEAD_MAX+3] = { NULL, };
  1247.      int num_frames;
  1248.      int keyint_limit;
  1249. -    int j;
  1250. +    int i,j;
  1251.      int i_mb_count = NUM_MBS;
  1252.      int cost1p0, cost2p0, cost1b1, cost2p1;
  1253.      int idr_frame_type;
  1254.  -497,96 +661,150 @@ static void x264_slicetype_analyse( x264_t *h )
  1255.          frames[j+1] = h->frames.next[j];
  1256.      keyint_limit = h->param.i_keyint_max - frames[0]->i_frame + h->frames.i_last_idr - 1;
  1257.      num_frames = X264_MIN( j, keyint_limit );
  1258. -    if( num_frames == 0 )
  1259. +
  1260. +    if( num_frames == 0 && (!j || !h->param.rc.b_mb_tree) )
  1261.          return;
  1262.  
  1263.      x264_lowres_context_init( h, &a );
  1264.      idr_frame_type = frames[1]->i_frame - h->frames.i_last_idr >= h->param.i_keyint_min ? X264_TYPE_IDR : X264_TYPE_I;
  1265.  
  1266. -    if( num_frames == 1 )
  1267. +    if( num_frames == 1 && !h->param.rc.b_mb_tree )
  1268.      {
  1269. -no_b_frames:
  1270.          frames[1]->i_type = X264_TYPE_P;
  1271.          if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1 ) )
  1272.              frames[1]->i_type = idr_frame_type;
  1273.          return;
  1274.      }
  1275.  
  1276. -    if( h->param.i_bframe_adaptive == X264_B_ADAPT_TRELLIS )
  1277. +    /* This is important psy-wise: if we have a non-scenecut keyframe,
  1278. +     * there will be significant visual artifacts if the frames just before
  1279. +     * go down in quality due to being referenced less, despite it being
  1280. +     * more RD-optimal. */
  1281. +    if( h->param.analyse.b_psy && h->param.rc.b_mb_tree )
  1282. +        num_frames = j;
  1283. +
  1284. +    char best_paths[X264_LOOKAHEAD_MAX][X264_LOOKAHEAD_MAX] = {"","P"};
  1285. +    int n;
  1286. +    int num_bframes = 0;
  1287. +    int max_bframes = X264_MIN(num_frames-1, h->param.i_bframe);
  1288. +    int num_analysed_frames = num_frames;
  1289. +    int reset_start;
  1290. +    if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1 ) )
  1291.      {
  1292. -        int num_bframes;
  1293. -        int max_bframes = X264_MIN(num_frames-1, h->param.i_bframe);
  1294. -        if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1 ) )
  1295. +        frames[1]->i_type = idr_frame_type;
  1296. +        return;
  1297. +    }
  1298. +
  1299. +    if( h->param.i_bframe )
  1300. +    {
  1301. +        if( h->param.i_bframe_adaptive == X264_B_ADAPT_TRELLIS )
  1302.          {
  1303. -            frames[1]->i_type = idr_frame_type;
  1304. -            return;
  1305. +            /* Perform the frametype analysis. */
  1306. +            for( n = 2; n < num_frames-1; n++ )
  1307. +                x264_slicetype_path( h, &a, frames, n, max_bframes, num_frames-max_bframes, best_paths );
  1308. +            num_bframes = strspn( best_paths[num_frames-2], "B" );
  1309. +            /* Load the results of the analysis into the frame types. */
  1310. +            for( j = 1; j < num_frames; j++ )
  1311. +                frames[j]->i_type = best_paths[num_frames-2][j-1] == 'B' ? X264_TYPE_B : X264_TYPE_P;
  1312. +            frames[num_frames]->i_type = X264_TYPE_P;
  1313.          }
  1314. -        num_bframes = x264_slicetype_path_search( h, &a, frames, num_frames, max_bframes, num_frames-max_bframes );
  1315. -        assert(num_bframes < num_frames);
  1316. -
  1317. -        for( j = 1; j < num_bframes+1; j++ )
  1318. +        else if( h->param.i_bframe_adaptive == X264_B_ADAPT_FAST )
  1319.          {
  1320. -            if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, j, j+1 ) )
  1321. +            for( i = 0; i < num_frames-(2-!i); )
  1322.              {
  1323. -                frames[j]->i_type = X264_TYPE_P;
  1324. -                return;
  1325. -            }
  1326. -            frames[j]->i_type = X264_TYPE_B;
  1327. -        }
  1328. -        frames[num_bframes+1]->i_type = X264_TYPE_P;
  1329. -    }
  1330. -    else if( h->param.i_bframe_adaptive == X264_B_ADAPT_FAST )
  1331. -    {
  1332. -        cost2p1 = x264_slicetype_frame_cost( h, &a, frames, 0, 2, 2, 1 );
  1333. -        if( frames[2]->i_intra_mbs[2] > i_mb_count / 2 )
  1334. -            goto no_b_frames;
  1335. +                cost2p1 = x264_slicetype_frame_cost( h, &a, frames, i+0, i+2, i+2, 1 );
  1336. +                if( frames[i+2]->i_intra_mbs[2] > i_mb_count / 2 )
  1337. +                {
  1338. +                    frames[i+1]->i_type = X264_TYPE_P;
  1339. +                    frames[i+2]->i_type = X264_TYPE_P;
  1340. +                    i += 2;
  1341. +                    continue;
  1342. +                }
  1343.  
  1344. -        cost1b1 = x264_slicetype_frame_cost( h, &a, frames, 0, 2, 1, 0 );
  1345. -        cost1p0 = x264_slicetype_frame_cost( h, &a, frames, 0, 1, 1, 0 );
  1346. -        cost2p0 = x264_slicetype_frame_cost( h, &a, frames, 1, 2, 2, 0 );
  1347. +                cost1b1 = x264_slicetype_frame_cost( h, &a, frames, i+0, i+2, i+1, 0 );
  1348. +                cost1p0 = x264_slicetype_frame_cost( h, &a, frames, i+0, i+1, i+1, 0 );
  1349. +                cost2p0 = x264_slicetype_frame_cost( h, &a, frames, i+1, i+2, i+2, 0 );
  1350.  
  1351. -        if( cost1p0 + cost2p0 < cost1b1 + cost2p1 )
  1352. -            goto no_b_frames;
  1353. +                if( cost1p0 + cost2p0 < cost1b1 + cost2p1 )
  1354. +                {
  1355. +                    frames[i+1]->i_type = X264_TYPE_P;
  1356. +                    frames[i+2]->i_type = X264_TYPE_P;
  1357. +                    i += 2;
  1358. +                    continue;
  1359. +                }
  1360.  
  1361. -        // arbitrary and untuned
  1362. -        #define INTER_THRESH 300
  1363. -        #define P_SENS_BIAS (50 - h->param.i_bframe_bias)
  1364. -        frames[1]->i_type = X264_TYPE_B;
  1365. +                // arbitrary and untuned
  1366. +                #define INTER_THRESH 300
  1367. +                #define P_SENS_BIAS (50 - h->param.i_bframe_bias)
  1368. +                frames[i+1]->i_type = X264_TYPE_B;
  1369. +                frames[i+2]->i_type = X264_TYPE_P;
  1370.  
  1371. -        for( j = 2; j <= X264_MIN( h->param.i_bframe, num_frames-1 ); j++ )
  1372. +                for( j = i+2; j <= X264_MIN( h->param.i_bframe, num_frames-2 ); j++ )
  1373. +                {
  1374. +                    int pthresh = X264_MAX(INTER_THRESH - P_SENS_BIAS * (j-i-1), INTER_THRESH/10);
  1375. +                    int pcost = x264_slicetype_frame_cost( h, &a, frames, i+0, j+1, j+1, 1 );
  1376. +
  1377. +                    if( pcost > pthresh*i_mb_count || frames[j+1]->i_intra_mbs[j-i+1] > i_mb_count/3 )
  1378. +                    {
  1379. +                        frames[j]->i_type = X264_TYPE_P;
  1380. +                        break;
  1381. +                    }
  1382. +                    else
  1383. +                        frames[j]->i_type = X264_TYPE_B;
  1384. +                }
  1385. +                i = j;
  1386. +            }
  1387. +            frames[i+!i]->i_type = X264_TYPE_P;
  1388. +            num_bframes = 0;
  1389. +            while( num_bframes < num_frames && frames[num_bframes+1]->i_type == X264_TYPE_B )
  1390. +                num_bframes++;
  1391. +        }
  1392. +        else
  1393.          {
  1394. -            int pthresh = X264_MAX(INTER_THRESH - P_SENS_BIAS * (j-1), INTER_THRESH/10);
  1395. -            int pcost = x264_slicetype_frame_cost( h, &a, frames, 0, j+1, j+1, 1 );
  1396. +            num_bframes = X264_MIN(num_frames-1, h->param.i_bframe);
  1397. +            for( j = 1; j < num_frames; j++ )
  1398. +                frames[j]->i_type = (j%(num_bframes+1)) ? X264_TYPE_B : X264_TYPE_P;
  1399. +            frames[num_frames]->i_type = X264_TYPE_P;
  1400. +        }
  1401.  
  1402. -            if( pcost > pthresh*i_mb_count || frames[j+1]->i_intra_mbs[j+1] > i_mb_count/3 )
  1403. +        /* Check scenecut on the first minigop. */
  1404. +        for( j = 1; j < num_bframes+1; j++ )
  1405. +            if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, j, j+1 ) )
  1406.              {
  1407.                  frames[j]->i_type = X264_TYPE_P;
  1408. +                num_analysed_frames = j;
  1409.                  break;
  1410.              }
  1411. -            else
  1412. -                frames[j]->i_type = X264_TYPE_B;
  1413. -        }
  1414. +
  1415. +        reset_start = keyframe ? 1 : X264_MIN( num_bframes+2, num_analysed_frames+1 );
  1416.      }
  1417.      else
  1418.      {
  1419. -        int max_bframes = X264_MIN(num_frames-1, h->param.i_bframe);
  1420. -        if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1 ) )
  1421. -        {
  1422. -            frames[1]->i_type = idr_frame_type;
  1423. -            return;
  1424. -        }
  1425. +        for( j = 1; j < num_frames; j++ )
  1426. +            frames[j]->i_type = X264_TYPE_P;
  1427. +        reset_start = !keyframe + 1;
  1428. +    }
  1429.  
  1430. -        for( j = 1; j < max_bframes+1; j++ )
  1431. -        {
  1432. -            if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, j, j+1 ) )
  1433. +    /* Perform the actual macroblock tree analysis.
  1434. +     * Don't go farther than the lookahead parameter; this helps in short GOPs. */
  1435. +    if( h->param.rc.b_mb_tree )
  1436. +        x264_macroblock_tree( h, &a, frames, X264_MIN(num_analysed_frames, h->param.rc.i_lookahead), keyframe );
  1437. +
  1438. +    /* Enforce keyframe limit. */
  1439. +    if( h->param.i_bframe )
  1440. +        for( j = 0; j <= num_bframes; j++ )
  1441. +            if( j+1 > keyint_limit )
  1442.              {
  1443. -                frames[j]->i_type = X264_TYPE_P;
  1444. -                return;
  1445. +                if( j )
  1446. +                    frames[j]->i_type = X264_TYPE_P;
  1447. +                frames[j+1]->i_type = idr_frame_type;
  1448. +                reset_start = j+2;
  1449. +                break;
  1450.              }
  1451. -            frames[j]->i_type = X264_TYPE_B;
  1452. -        }
  1453. -        frames[max_bframes+1]->i_type = X264_TYPE_P;
  1454. -    }
  1455. +
  1456. +    /* Restore frametypes for all frames that haven't actually been decided yet. */
  1457. +    for( j = reset_start; j <= num_frames; j++ )
  1458. +        frames[j]->i_type = X264_TYPE_AUTO;
  1459.  }
  1460.  
  1461.  void x264_slicetype_decide( x264_t *h )
  1462.  -606,8 +824,9 @@ void x264_slicetype_decide( x264_t *h )
  1463.                  x264_ratecontrol_slice_type( h, h->frames.next[i]->i_frame );
  1464.      }
  1465.      else if( (h->param.i_bframe && h->param.i_bframe_adaptive)
  1466. -             || h->param.i_scenecut_threshold )
  1467. -        x264_slicetype_analyse( h );
  1468. +             || h->param.i_scenecut_threshold
  1469. +             || h->param.rc.b_mb_tree )
  1470. +        x264_slicetype_analyse( h, 0 );
  1471.  
  1472.      for( bframes = 0;; bframes++ )
  1473.      {
  1474.  -645,7 +864,9 @@ void x264_slicetype_decide( x264_t *h )
  1475.                  frm->i_type = X264_TYPE_P;
  1476.          }
  1477.  
  1478. -        if( frm->i_type == X264_TYPE_AUTO ) frm->i_type = X264_TYPE_B;
  1479. +        if( frm->i_type == X264_TYPE_AUTO )
  1480. +            frm->i_type = X264_TYPE_B;
  1481. +
  1482.          else if( !IS_X264_TYPE_B( frm->i_type ) ) break;
  1483.      }
  1484.  }
  1485.  -653,7 +874,7 @@ void x264_slicetype_decide( x264_t *h )
  1486.  int x264_rc_analyse_slice( x264_t *h )
  1487.  {
  1488.      x264_mb_analysis_t a;
  1489. -    x264_frame_t *frames[X264_BFRAME_MAX*4+2] = { NULL, };
  1490. +    x264_frame_t *frames[X264_LOOKAHEAD_MAX+2] = { NULL, };
  1491.      int p0=0, p1, b;
  1492.      int cost;
  1493.  
  1494.  -662,6 +883,12 @@ int x264_rc_analyse_slice( x264_t *h )
  1495.      if( IS_X264_TYPE_I(h->fenc->i_type) )
  1496.      {
  1497.          p1 = b = 0;
  1498. +        /* For MB-tree, we have to perform propagation analysis on I-frames too. */
  1499. +        if( h->param.rc.b_mb_tree )
  1500. +        {
  1501. +            h->frames.last_nonb = h->fenc;
  1502. +            x264_slicetype_analyse( h, 1 );
  1503. +        }
  1504.      }
  1505.      else if( X264_TYPE_P == h->fenc->i_type )
  1506.      {
  1507.  -680,11 +907,16 @@ int x264_rc_analyse_slice( x264_t *h )
  1508.      frames[p0] = h->fref0[0];
  1509.      frames[b] = h->fenc;
  1510.  
  1511. -    cost = x264_slicetype_frame_cost( h, &a, frames, p0, p1, b, 0 );
  1512. +    if( h->param.rc.b_mb_tree )
  1513. +        cost = x264_slicetype_frame_cost_recalculate( h, &a, frames, p0, p1, b );
  1514. +    else
  1515. +    {
  1516. +        cost = x264_slicetype_frame_cost( h, &a, frames, p0, p1, b, 0 );
  1517.  
  1518. -    /* In AQ, use the weighted score instead. */
  1519. -    if( h->param.rc.i_aq_mode )
  1520. -        cost = frames[b]->i_cost_est[b-p0][p1-b];
  1521. +        /* In AQ, use the weighted score instead. */
  1522. +        if( h->param.rc.i_aq_mode )
  1523. +            cost = frames[b]->i_cost_est[b-p0][p1-b];
  1524. +    }
  1525.  
  1526.      h->fenc->i_row_satd = h->fenc->i_row_satds[b-p0][p1-b];
  1527.      h->fdec->i_row_satd = h->fdec->i_row_satds[b-p0][p1-b];
  1528. diff --git a/x264.c b/x264.c
  1529. index c3b4f29..c75bfde 100644
  1530. --- a/x264.c
  1531. +++ b/x264.c
  1532.  -168,9 +168,8 @@ static void Help( x264_param_t *defaults, int b_longhelp )
  1533.      H0( "                                  - baseline,main,high\n" );
  1534.      H0( "      --preset                Use a preset to select encoding settings [medium]\n" );
  1535.      H0( "                                  Overridden by user settings\n");
  1536. -    H1( "                                  - ultrafast,veryfast,fast,medium\n"
  1537. -        "                                  - slow,slower,placebo\n" );
  1538. -    else H0( "                                  - ultrafast,veryfast,fast,medium,slow,slower\n" );
  1539. +    H0( "                                  - ultrafast,veryfast,faster,fast\n"
  1540. +        "                                  - medium,slow,slower,placebo\n" );
  1541.      H0( "      --tune                  Tune the settings for a particular type of source\n" );
  1542.      H0( "                                  Overridden by user settings\n");
  1543.      H1( "                                  - film,animation,grain,psnr,ssim\n"
  1544.  -204,6 +203,7 @@ static void Help( x264_param_t *defaults, int b_longhelp )
  1545.      H0( "  -q, --qp <integer>          Set QP (0-51, 0=lossless)\n" );
  1546.      H0( "  -B, --bitrate <integer>     Set bitrate (kbit/s)\n" );
  1547.      H0( "      --crf <float>           Quality-based VBR (0-51, 0=lossless) [%.1f]\n", defaults->rc.f_rf_constant );
  1548. +    H0( "      --rc-lookahead <integer> Number of frames for frametype lookahead [%d]\n", defaults->rc.i_lookahead );
  1549.      H0( "      --vbv-maxrate <integer> Max local bitrate (kbit/s) [%d]\n", defaults->rc.i_vbv_max_bitrate );
  1550.      H0( "      --vbv-bufsize <integer> Set size of the VBV buffer (kbit) [%d]\n", defaults->rc.i_vbv_buffer_size );
  1551.      H1( "      --vbv-init <float>      Initial VBV buffer occupancy [%.1f]\n", defaults->rc.f_vbv_buffer_init );
  1552.  -228,6 +228,7 @@ static void Help( x264_param_t *defaults, int b_longhelp )
  1553.          "                                  - 2: Last pass, does not overwrite stats file\n"
  1554.          "                                  - 3: Nth pass, overwrites stats file\n" );
  1555.      H0( "      --stats <string>        Filename for 2 pass stats [\"%s\"]\n", defaults->rc.psz_stat_out );
  1556. +    H0( "      --no-mbtree                Disable mb-tree ratecontrol.\n");
  1557.      H0( "      --qcomp <float>         QP curve compression: 0.0 => CBR, 1.0 => CQP [%.2f]\n", defaults->rc.f_qcompress );
  1558.      H1( "      --cplxblur <float>      Reduce fluctuations in QP (before curve compression) [%.1f]\n", defaults->rc.f_complexity_blur );
  1559.      H1( "      --qblur <float>         Reduce fluctuations in QP (after curve compression) [%.1f]\n", defaults->rc.f_qblur );
  1560.  -277,6 +278,8 @@ static void Help( x264_param_t *defaults, int b_longhelp )
  1561.          "                                  #1: RD (requires subme>=6)\n"
  1562.          "                                  #2: Trellis (requires trellis, experimental)\n",
  1563.                                         defaults->analyse.f_psy_rd, defaults->analyse.f_psy_trellis );
  1564. +    H1( "      --no-psy                Disable all visual optimizations that worsen\n"
  1565. +        "                              both PSNR and SSIM.\n" );
  1566.      H0( "      --no-mixed-refs         Don't decide references on a per partition basis\n" );
  1567.      H1( "      --no-chroma-me          Ignore chroma in motion estimation\n" );
  1568.      H0( "      --no-8x8dct             Disable adaptive spatial transform size\n" );
  1569.  -403,6 +406,7 @@ static struct option long_options[] =
  1570.      { "qpmax",       required_argument, NULL, 0 },
  1571.      { "qpstep",      required_argument, NULL, 0 },
  1572.      { "crf",         required_argument, NULL, 0 },
  1573. +    { "rc-lookahead",required_argument, NULL, 0 },
  1574.      { "ref",         required_argument, NULL, 'r' },
  1575.      { "asm",         required_argument, NULL, 0 },
  1576.      { "no-asm",            no_argument, NULL, 0 },
  1577.  -422,6 +426,7 @@ static struct option long_options[] =
  1578.      { "mvrange-thread", required_argument, NULL, 0 },
  1579.      { "subme",       required_argument, NULL, 'm' },
  1580.      { "psy-rd",      required_argument, NULL, 0 },
  1581. +    { "no-psy",            no_argument, NULL, 0 },
  1582.      { "mixed-refs",        no_argument, NULL, 0 },
  1583.      { "no-mixed-refs",     no_argument, NULL, 0 },
  1584.      { "no-chroma-me",      no_argument, NULL, 0 },
  1585.  -446,6 +451,8 @@ static struct option long_options[] =
  1586.      { "pass",        required_argument, NULL, 'p' },
  1587.      { "stats",       required_argument, NULL, 0 },
  1588.      { "qcomp",       required_argument, NULL, 0 },
  1589. +    { "mbtree",            no_argument, NULL, 0 },
  1590. +    { "no-mbtree",         no_argument, NULL, 0 },
  1591.      { "qblur",       required_argument, NULL, 0 },
  1592.      { "cplxblur",    required_argument, NULL, 0 },
  1593.      { "zones",       required_argument, NULL, 0 },
  1594.  -542,6 +549,8 @@ static int  Parse( int argc, char **argv,
  1595.                  param->rc.i_aq_mode = 0;
  1596.                  param->analyse.b_mixed_references = 0;
  1597.                  param->analyse.i_trellis = 0;
  1598. +                param->i_bframe_adaptive = X264_B_ADAPT_NONE;
  1599. +                param->rc.b_mb_tree = 0;
  1600.              }
  1601.              else if( !strcasecmp( optarg, "veryfast" ) )
  1602.              {
  1603.  -551,12 +560,20 @@ static int  Parse( int argc, char **argv,
  1604.                  param->i_frame_reference = 1;
  1605.                  param->analyse.b_mixed_references = 0;
  1606.                  param->analyse.i_trellis = 0;
  1607. +                param->rc.b_mb_tree = 0;
  1608.              }
  1609. -            else if( !strcasecmp( optarg, "fast" ) )
  1610. +            else if( !strcasecmp( optarg, "faster" ) )
  1611.              {
  1612.                  param->analyse.b_mixed_references = 0;
  1613.                  param->i_frame_reference = 2;
  1614.                  param->analyse.i_subpel_refine = 4;
  1615. +                param->rc.b_mb_tree = 0;
  1616. +            }
  1617. +            else if( !strcasecmp( optarg, "fast" ) )
  1618. +            {
  1619. +                param->i_frame_reference = 2;
  1620. +                param->analyse.i_subpel_refine = 6;
  1621. +                param->rc.i_lookahead = 30;
  1622.              }
  1623.              else if( !strcasecmp( optarg, "medium" ) )
  1624.              {
  1625.  -569,6 +586,7 @@ static int  Parse( int argc, char **argv,
  1626.                  param->i_frame_reference = 5;
  1627.                  param->i_bframe_adaptive = X264_B_ADAPT_TRELLIS;
  1628.                  param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_AUTO;
  1629. +                param->rc.i_lookahead = 50;
  1630.              }
  1631.              else if( !strcasecmp( optarg, "slower" ) )
  1632.              {
  1633.  -579,6 +597,7 @@ static int  Parse( int argc, char **argv,
  1634.                  param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_AUTO;
  1635.                  param->analyse.inter |= X264_ANALYSE_PSUB8x8;
  1636.                  param->analyse.i_trellis = 2;
  1637. +                param->rc.i_lookahead = 60;
  1638.              }
  1639.              else if( !strcasecmp( optarg, "placebo" ) )
  1640.              {
  1641.  -592,6 +611,7 @@ static int  Parse( int argc, char **argv,
  1642.                  param->analyse.b_fast_pskip = 0;
  1643.                  param->analyse.i_trellis = 2;
  1644.                  param->i_bframe = 16;
  1645. +                param->rc.i_lookahead = 60;
  1646.              }
  1647.              else
  1648.              {
  1649.  -644,11 +664,13 @@ static int  Parse( int argc, char **argv,
  1650.              {
  1651.                  param->analyse.f_psy_rd = 0;
  1652.                  param->rc.i_aq_mode = X264_AQ_NONE;
  1653. +                param->analyse.b_psy = 0;
  1654.              }
  1655.              else if( !strcasecmp( optarg, "ssim" ) )
  1656.              {
  1657.                  param->analyse.f_psy_rd = 0;
  1658.                  param->rc.i_aq_mode = X264_AQ_AUTOVARIANCE;
  1659. +                param->analyse.b_psy = 0;
  1660.              }
  1661.              else if( !strcasecmp( optarg, "fastdecode" ) )
  1662.              {
  1663.  -662,7 +684,6 @@ static int  Parse( int argc, char **argv,
  1664.                  param->i_deblocking_filter_alphac0 = -1;
  1665.                  param->i_deblocking_filter_beta = -1;
  1666.                  param->analyse.f_psy_trellis = 0.2;
  1667. -                param->rc.f_ip_factor = 2.1;
  1668.                  param->rc.f_aq_strength = 1.3;
  1669.                  if( param->analyse.inter & X264_ANALYSE_PSUB16x16 )
  1670.                      param->analyse.inter |= X264_ANALYSE_PSUB8x8;
  1671. diff --git a/x264.h b/x264.h
  1672. index 2dfcc8d..4982b2e 100644
  1673. --- a/x264.h
  1674. +++ b/x264.h
  1675.  -35,7 +35,7 @@
  1676.  
  1677.  #include <stdarg.h>
  1678.  
  1679. -#define X264_BUILD 68
  1680. +#define X264_BUILD 69
  1681.  
  1682.  /* x264_t:
  1683.   *      opaque handler for encoder */
  1684.  -242,6 +242,7 @@ typedef struct x264_param_t
  1685.          int          i_noise_reduction; /* adaptive pseudo-deadzone */
  1686.          float        f_psy_rd; /* Psy RD strength */
  1687.          float        f_psy_trellis; /* Psy trellis strength */
  1688. +        int          b_psy; /* Toggle all psy optimizations */
  1689.  
  1690.          /* the deadzone size that will be used in luma quantization */
  1691.          int          i_luma_deadzone[2]; /* {inter, intra} */
  1692.  -271,6 +272,8 @@ typedef struct x264_param_t
  1693.  
  1694.          int         i_aq_mode;      /* psy adaptive QP. (X264_AQ_*) */
  1695.          float       f_aq_strength;
  1696. +        int         b_mb_tree;      /* Macroblock-tree ratecontrol. */
  1697. +        int         i_lookahead;
  1698.  
  1699.          /* 2pass */
  1700.          int         b_stat_write;   /* Enable stat writing in psz_stat_out */
  1701. --
  1702. 1.6.1.2