Guest
Public paste!

Dark Shikari

By: a guest | Aug 2nd, 2009 | Syntax: None | Size: 47.76 KB | Hits: 60 | Expires: Never
This paste has a previous version, view the difference. Copy text to clipboard
  1. diff --git a/common/common.c b/common/common.c
  2. index 9260c64..37fd332 100644
  3. --- a/common/common.c
  4. +++ b/common/common.c
  5.  -72,6 +72,7 @@ void    x264_param_default( x264_param_t *param )
  6.      param->i_bframe_adaptive = X264_B_ADAPT_FAST;
  7.      param->i_bframe_bias = 0;
  8.      param->b_bframe_pyramid = 0;
  9. +    param->i_lookahead = 50;
  10.  
  11.      param->b_deblocking_filter = 1;
  12.      param->i_deblocking_filter_alphac0 = 0;
  13.  -104,6 +105,7 @@ void    x264_param_default( x264_param_t *param )
  14.      param->rc.f_qblur = 0.5;
  15.      param->rc.f_complexity_blur = 20;
  16.      param->rc.i_zones = 0;
  17. +    param->rc.b_mb_tree = 0;
  18.  
  19.      /* Log */
  20.      param->pf_log = x264_log_default;
  21.  -337,6 +339,8 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
  22.      }
  23.      OPT("bframes")
  24.          p->i_bframe = atoi(value);
  25. +    OPT("lookahead")
  26. +        p->i_lookahead = atoi(value);
  27.      OPT("b-adapt")
  28.      {
  29.          p->i_bframe_adaptive = atobool(value);
  30.  -559,6 +563,8 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
  31.      }
  32.      OPT("qcomp")
  33.          p->rc.f_qcompress = atof(value);
  34. +    OPT("mbtree")
  35. +        p->rc.b_mb_tree = atobool(value);
  36.      OPT("qblur")
  37.          p->rc.f_qblur = atof(value);
  38.      OPT2("cplxblur", "cplx-blur")
  39.  -868,9 +874,12 @@ char *x264_param2string( x264_param_t *p, int b_res )
  40.      s += sprintf( s, " keyint=%d keyint_min=%d scenecut=%d",
  41.                    p->i_keyint_max, p->i_keyint_min, p->i_scenecut_threshold );
  42.  
  43. -    s += sprintf( s, " rc=%s", p->rc.i_rc_method == X264_RC_ABR ?
  44. +    if( p->i_bframe_adaptive == X264_B_ADAPT_TRELLIS )
  45. +        s += sprintf( s, " lookahead=%d", p->i_lookahead );
  46. +
  47. +    s += sprintf( s, " rc=%s mbtree=%d", p->rc.i_rc_method == X264_RC_ABR ?
  48.                                 ( p->rc.b_stat_read ? "2pass" : p->rc.i_vbv_buffer_size ? "cbr" : "abr" )
  49. -                               : p->rc.i_rc_method == X264_RC_CRF ? "crf" : "cqp" );
  50. +                               : p->rc.i_rc_method == X264_RC_CRF ? "crf" : "cqp", p->rc.b_mb_tree );
  51.      if( p->rc.i_rc_method == X264_RC_ABR || p->rc.i_rc_method == X264_RC_CRF )
  52.      {
  53.          if( p->rc.i_rc_method == X264_RC_CRF )
  54.  -878,8 +887,10 @@ char *x264_param2string( x264_param_t *p, int b_res )
  55.          else
  56.              s += sprintf( s, " bitrate=%d ratetol=%.1f",
  57.                            p->rc.i_bitrate, p->rc.f_rate_tolerance );
  58. -        s += sprintf( s, " qcomp=%.2f qpmin=%d qpmax=%d qpstep=%d",
  59. -                      p->rc.f_qcompress, p->rc.i_qp_min, p->rc.i_qp_max, p->rc.i_qp_step );
  60. +        if( !p->rc.b_mb_tree )
  61. +            s += sprintf( s, " qcomp=%.2f", p->rc.f_qcompress );
  62. +        s += sprintf( s, " qpmin=%d qpmax=%d qpstep=%d",
  63. +                      p->rc.i_qp_min, p->rc.i_qp_max, p->rc.i_qp_step );
  64.          if( p->rc.b_stat_read )
  65.              s += sprintf( s, " cplxblur=%.1f qblur=%.1f",
  66.                            p->rc.f_complexity_blur, p->rc.f_qblur );
  67.  -892,7 +903,7 @@ char *x264_param2string( x264_param_t *p, int b_res )
  68.      if( !(p->rc.i_rc_method == X264_RC_CQP && p->rc.i_qp_constant == 0) )
  69.      {
  70.          s += sprintf( s, " ip_ratio=%.2f", p->rc.f_ip_factor );
  71. -        if( p->i_bframe )
  72. +        if( p->i_bframe && !p->rc.b_mb_tree )
  73.              s += sprintf( s, " pb_ratio=%.2f", p->rc.f_pb_factor );
  74.          s += sprintf( s, " aq=%d", p->rc.i_aq_mode );
  75.          if( p->rc.i_aq_mode )
  76. diff --git a/common/common.h b/common/common.h
  77. index 8a25a13..5f9284e 100644
  78. --- a/common/common.h
  79. +++ b/common/common.h
  80.  -51,6 +51,7 @@
  81.  #define X264_SLICE_MAX 4
  82.  #define X264_NAL_MAX (4 + X264_SLICE_MAX)
  83.  #define X264_PCM_COST (386*8)
  84. +#define X264_LOOKAHEAD_MAX 250
  85.  
  86.  // number of pixels (per thread) in progress at any given time.
  87.  // 16 for the macroblock in progress + 3 for deblocking + 3 for motion compensation filter + 2 for extra safety
  88.  -152,6 +153,24 @@ static inline uint32_t x264_cabac_amvd_sum( int16_t *mvdleft, int16_t *mvdtop )
  89.      return amvd0 + (amvd1<<16);
  90.  }
  91.  
  92. +static const uint8_t exp2_lut[64] = {
  93. +      1,   4,   7,  10,  13,  16,  19,  22,  25,  28,  31,  34,  37,  40,  44,  47,
  94. +     50,  53,  57,  60,  64,  67,  71,  74,  78,  81,  85,  89,  93,  96, 100, 104,
  95. +    108, 112, 116, 120, 124, 128, 132, 137, 141, 145, 150, 154, 159, 163, 168, 172,
  96. +    177, 182, 186, 191, 196, 201, 206, 211, 216, 221, 226, 232, 237, 242, 248, 253,
  97. +};
  98. +
  99. +static ALWAYS_INLINE int x264_exp2fix8( float x )
  100. +{
  101. +    int i, f;
  102. +    x += 8;
  103. +    if( x <= 0 ) return 0;
  104. +    if( x >= 16 ) return 0xffff;
  105. +    i = x;
  106. +    f = (x-i)*64;
  107. +    return (exp2_lut[f]+256) << i >> 8;
  108. +}
  109. +
  110.  /****************************************************************************
  111.   *
  112.   ****************************************************************************/
  113.  -327,11 +346,11 @@ struct x264_t
  114.      struct
  115.      {
  116.          /* Frames to be encoded (whose types have been decided) */
  117. -        x264_frame_t *current[X264_BFRAME_MAX*4+3];
  118. +        x264_frame_t *current[X264_LOOKAHEAD_MAX+3];
  119.          /* Temporary buffer (frames types not yet decided) */
  120. -        x264_frame_t *next[X264_BFRAME_MAX*4+3];
  121. +        x264_frame_t *next[X264_LOOKAHEAD_MAX+3];
  122.          /* Unused frames */
  123. -        x264_frame_t *unused[X264_BFRAME_MAX*4 + X264_THREAD_MAX*2 + 16+4];
  124. +        x264_frame_t *unused[X264_LOOKAHEAD_MAX + X264_THREAD_MAX*2 + 16+4];
  125.          /* For adaptive B decision */
  126.          x264_frame_t *last_nonb;
  127.  
  128. diff --git a/common/frame.c b/common/frame.c
  129. index 23e6824..4cba313 100644
  130. --- a/common/frame.c
  131. +++ b/common/frame.c
  132.  -96,6 +96,15 @@ x264_frame_t *x264_frame_new( x264_t *h )
  133.                  memset( frame->lowres_mvs[j][i], 0, 2*h->mb.i_mb_count*sizeof(int16_t) );
  134.                  CHECKED_MALLOC( frame->lowres_mv_costs[j][i], h->mb.i_mb_count*sizeof(int) );
  135.              }
  136. +        CHECKED_MALLOC( frame->i_intra_cost, i_mb_count * sizeof(uint16_t) );
  137. +        memset( frame->i_intra_cost, -1, i_mb_count * sizeof(int16_t) );
  138. +        CHECKED_MALLOC( frame->i_propagate_cost, i_mb_count * sizeof(uint32_t) );
  139. +        for( j = 0; j <= h->param.i_bframe+1; j++ )
  140. +            for( i = 0; i <= h->param.i_bframe+1; i++ )
  141. +            {
  142. +                CHECKED_MALLOC( frame->lowres_costs[j][i], i_mb_count * sizeof(uint16_t) );
  143. +                CHECKED_MALLOC( frame->lowres_inter_types[j][i], i_mb_count * sizeof(uint8_t) );
  144. +            }
  145.      }
  146.  
  147.      if( h->param.analyse.i_me_method >= X264_ME_ESA )
  148.  -116,7 +125,6 @@ x264_frame_t *x264_frame_new( x264_t *h )
  149.      CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t));
  150.      CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
  151.      CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
  152. -    CHECKED_MALLOC( frame->i_intra_cost, i_mb_count * sizeof(uint16_t) );
  153.      if( h->param.i_bframe )
  154.      {
  155.          CHECKED_MALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
  156.  -141,6 +149,7 @@ x264_frame_t *x264_frame_new( x264_t *h )
  157.              CHECKED_MALLOC( frame->i_inv_qscale_factor, h->mb.i_mb_count * sizeof(uint16_t) );
  158.      }
  159.  
  160. +
  161.      x264_pthread_mutex_init( &frame->mutex, NULL );
  162.      x264_pthread_cond_init( &frame->cv, NULL );
  163.  
  164. diff --git a/common/frame.h b/common/frame.h
  165. index aad77f5..a3da4e4 100644
  166. --- a/common/frame.h
  167. +++ b/common/frame.h
  168.  -63,6 +63,8 @@ typedef struct
  169.      int8_t  *mb_type;
  170.      int16_t (*mv[2])[2];
  171.      int16_t (*lowres_mvs[2][X264_BFRAME_MAX+1])[2];
  172. +    uint16_t (*lowres_costs[X264_BFRAME_MAX+2][X264_BFRAME_MAX+2]);
  173. +    uint8_t  (*lowres_inter_types[X264_BFRAME_MAX+2][X264_BFRAME_MAX+2]);
  174.      int     *lowres_mv_costs[2][X264_BFRAME_MAX+1];
  175.      int8_t  *ref[2];
  176.      int     i_ref[2];
  177.  -83,6 +85,7 @@ typedef struct
  178.      float   *f_qp_offset;
  179.      int     b_intra_calculated;
  180.      uint16_t *i_intra_cost;
  181. +    uint32_t *i_propagate_cost;
  182.      uint16_t *i_inv_qscale_factor;
  183.  
  184.      /* threading */
  185. diff --git a/encoder/encoder.c b/encoder/encoder.c
  186. index 0f1ccc8..add6b75 100644
  187. --- a/encoder/encoder.c
  188. +++ b/encoder/encoder.c
  189.  -441,6 +441,7 @@ static int x264_validate_parameters( x264_t *h )
  190.          h->param.rc.i_qp_min = x264_clip3( (int)(X264_MIN3( qp_p, qp_i, qp_b )), 0, 51 );
  191.          h->param.rc.i_qp_max = x264_clip3( (int)(X264_MAX3( qp_p, qp_i, qp_b ) + .999), 0, 51 );
  192.          h->param.rc.i_aq_mode = 0;
  193. +        h->param.rc.b_mb_tree = 0;
  194.      }
  195.      h->param.rc.i_qp_max = x264_clip3( h->param.rc.i_qp_max, 0, 51 );
  196.      h->param.rc.i_qp_min = x264_clip3( h->param.rc.i_qp_min, 0, h->param.rc.i_qp_max );
  197.  -473,6 +474,14 @@ static int x264_validate_parameters( x264_t *h )
  198.      if( !h->param.i_bframe )
  199.          h->param.i_bframe_adaptive = X264_B_ADAPT_NONE;
  200.      h->param.analyse.b_weighted_bipred = h->param.analyse.b_weighted_bipred && h->param.i_bframe > 0;
  201. +    h->param.i_lookahead = X264_MIN( h->param.i_lookahead, X264_LOOKAHEAD_MAX );
  202. +    if( h->param.rc.b_mb_tree && h->param.i_bframe && h->param.i_bframe_adaptive == X264_B_ADAPT_FAST )
  203. +    {
  204. +        x264_log( h, X264_LOG_WARNING, "mb_tree + b-adapt 1 is not supported\n" );
  205. +        h->param.i_lookahead = 0;
  206. +        h->param.rc.b_mb_tree = 0;
  207. +    }
  208. +
  209.      h->mb.b_direct_auto_write = h->param.analyse.i_direct_mv_pred == X264_DIRECT_PRED_AUTO
  210.                                  && h->param.i_bframe
  211.                                  && ( h->param.rc.b_stat_write || !h->param.rc.b_stat_read );
  212.  -537,6 +546,17 @@ static int x264_validate_parameters( x264_t *h )
  213.      h->param.rc.f_aq_strength = x264_clip3f( h->param.rc.f_aq_strength, 0, 3 );
  214.      if( h->param.rc.f_aq_strength == 0 )
  215.          h->param.rc.i_aq_mode = 0;
  216. +    /* MB-tree requires AQ to be on, even if the strength is zero. */
  217. +    if( !h->param.rc.i_aq_mode && h->param.rc.b_mb_tree )
  218. +    {
  219. +        h->param.rc.i_aq_mode = 1;
  220. +        h->param.rc.f_aq_strength = 0;
  221. +        if( h->param.b_bframe_pyramid )
  222. +        {
  223. +            x264_log( h, X264_LOG_WARNING, "b-pyramid + mb-tree is not supported\n" );
  224. +            h->param.b_bframe_pyramid = 0;
  225. +        }
  226. +    }
  227.      h->param.analyse.i_noise_reduction = x264_clip3( h->param.analyse.i_noise_reduction, 0, 1<<16 );
  228.      if( h->param.analyse.i_subpel_refine == 10 && (h->param.analyse.i_trellis != 2 || !h->param.rc.i_aq_mode) )
  229.          h->param.analyse.i_subpel_refine = 9;
  230.  -723,6 +743,9 @@ x264_t *x264_encoder_open   ( x264_param_t *param )
  231.          h->frames.i_delay = X264_MAX(h->param.i_bframe,3)*4 + h->param.i_threads - 1;
  232.      else
  233.          h->frames.i_delay = h->param.i_bframe + h->param.i_threads - 1;
  234. +    if( h->param.rc.b_mb_tree )
  235. +        h->frames.i_delay = X264_MAX( h->frames.i_delay, h->param.i_lookahead );
  236. +
  237.      h->frames.i_max_ref0 = h->param.i_frame_reference;
  238.      h->frames.i_max_ref1 = h->sps->vui.i_num_reorder_frames;
  239.      h->frames.i_max_dpb  = h->sps->vui.i_max_dec_frame_buffering;
  240.  -730,7 +753,8 @@ x264_t *x264_encoder_open   ( x264_param_t *param )
  241.          && ( h->param.rc.i_rc_method == X264_RC_ABR
  242.            || h->param.rc.i_rc_method == X264_RC_CRF
  243.            || h->param.i_bframe_adaptive
  244. -          || h->param.i_scenecut_threshold );
  245. +          || h->param.i_scenecut_threshold
  246. +          || h->param.rc.b_mb_tree );
  247.      h->frames.b_have_lowres |= (h->param.rc.b_stat_read && h->param.rc.i_vbv_buffer_size > 0);
  248.      h->frames.b_have_sub8x8_esa = !!(h->param.analyse.inter & X264_ANALYSE_PSUB8x8);
  249.  
  250.  -1443,7 +1467,12 @@ int     x264_encoder_encode( x264_t *h,
  251.          if( h->frames.b_have_lowres )
  252.              x264_frame_init_lowres( h, fenc );
  253.  
  254. -        if( h->param.rc.i_aq_mode )
  255. +        if( h->param.rc.b_mb_tree && h->param.rc.b_stat_read )
  256. +        {
  257. +            if( x264_macroblock_tree_read( h, fenc ) )
  258. +                return -1;
  259. +        }
  260. +        else if( h->param.rc.i_aq_mode )
  261.              x264_adaptive_quant_frame( h, fenc );
  262.  
  263.          if( h->frames.i_input <= h->frames.i_delay + 1 - h->param.i_threads )
  264. diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
  265. index 2f88708..a34e581 100644
  266. --- a/encoder/ratecontrol.c
  267. +++ b/encoder/ratecontrol.c
  268.  -105,6 +105,10 @@ struct x264_ratecontrol_t
  269.      /* 2pass stuff */
  270.      FILE *p_stat_file_out;
  271.      char *psz_stat_file_tmpname;
  272. +    FILE *p_mbtree_stat_file_out;
  273. +    char *psz_mbtree_stat_file_tmpname;
  274. +    char *psz_mbtree_stat_file_name;
  275. +    FILE *p_mbtree_stat_file_in;
  276.  
  277.      int num_entries;            /* number of ratecontrol_entry_ts */
  278.      ratecontrol_entry_t *entry; /* FIXME: copy needed data and free this once init is done */
  279.  -209,30 +213,12 @@ static const float log2_lut[128] = {
  280.      0.95420, 0.96000, 0.96578, 0.97154, 0.97728, 0.98299, 0.98868, 0.99435,
  281.  };
  282.  
  283. -static const uint8_t exp2_lut[64] = {
  284. -      1,   4,   7,  10,  13,  16,  19,  22,  25,  28,  31,  34,  37,  40,  44,  47,
  285. -     50,  53,  57,  60,  64,  67,  71,  74,  78,  81,  85,  89,  93,  96, 100, 104,
  286. -    108, 112, 116, 120, 124, 128, 132, 137, 141, 145, 150, 154, 159, 163, 168, 172,
  287. -    177, 182, 186, 191, 196, 201, 206, 211, 216, 221, 226, 232, 237, 242, 248, 253,
  288. -};
  289. -
  290.  static ALWAYS_INLINE float x264_log2( uint32_t x )
  291.  {
  292.      int lz = x264_clz( x );
  293.      return log2_lut[(x<<lz>>24)&0x7f] + (31 - lz);
  294.  }
  295.  
  296. -static ALWAYS_INLINE int x264_exp2fix8( float x )
  297. -{
  298. -    int i, f;
  299. -    x += 8;
  300. -    if( x <= 0 ) return 0;
  301. -    if( x >= 16 ) return 0xffff;
  302. -    i = x;
  303. -    f = (x-i)*64;
  304. -    return (exp2_lut[f]+256) << i >> 8;
  305. -}
  306. -
  307.  void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame )
  308.  {
  309.      /* constants chosen to result in approximately the same overall bitrate as without AQ.
  310.  -240,6 +226,17 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame )
  311.      int mb_x, mb_y;
  312.      float strength;
  313.      float avg_adj = 0.f;
  314. +    /* Need to init it anyways for MB tree. */
  315. +    if( h->param.rc.f_aq_strength == 0 )
  316. +    {
  317. +        int mb_xy;
  318. +        memset( frame->f_qp_offset, 0, sizeof(float) * h->mb.i_mb_count );
  319. +        if( h->frames.b_have_lowres )
  320. +            for( mb_xy = 0; mb_xy < h->mb.i_mb_count; mb_xy++ )
  321. +                frame->i_inv_qscale_factor[mb_xy] = 256;
  322. +        return;
  323. +    }
  324. +
  325.      if( h->param.rc.i_aq_mode == X264_AQ_AUTOVARIANCE )
  326.      {
  327.          for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y++ )
  328.  -256,6 +253,7 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame )
  329.      }
  330.      else
  331.          strength = h->param.rc.f_aq_strength * 1.0397f;
  332. +
  333.      for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y++ )
  334.          for( mb_x = 0; mb_x < h->sps->i_mb_width; mb_x++ )
  335.          {
  336.  -290,6 +288,34 @@ void x264_adaptive_quant( x264_t *h )
  337.      h->mb.i_qp = x264_clip3( h->rc->f_qpm + h->fenc->f_qp_offset[h->mb.i_mb_xy] + .5, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
  338.  }
  339.  
  340. +int x264_macroblock_tree_read( x264_t *h, x264_frame_t *frame )
  341. +{
  342. +    x264_ratecontrol_t *rc = h->rc;
  343. +    uint8_t i_type_actual = rc->entry[frame->i_frame].pict_type;
  344. +    
  345. +    if( i_type_actual != SLICE_TYPE_B )
  346. +    {
  347. +        uint8_t i_type;
  348. +        if( !fread( &i_type, 1, 1, rc->p_mbtree_stat_file_in ) )
  349. +            goto fail;
  350. +
  351. +        if( i_type != i_type_actual )
  352. +        {
  353. +            x264_log(h, X264_LOG_ERROR, "MB-tree frametype %d doesn't match actual frametype %d.\n", i_type,i_type_actual);
  354. +            return -1;
  355. +        }
  356. +
  357. +        if( fread( frame->f_qp_offset, sizeof(float), h->mb.i_mb_count, rc->p_mbtree_stat_file_in ) != h->mb.i_mb_count )
  358. +            goto fail;
  359. +    }
  360. +    else
  361. +        x264_adaptive_quant_frame( h, frame );
  362. +    return 0;
  363. +fail:
  364. +    x264_log(h, X264_LOG_ERROR, "Incomplete MB-tree stats file.\n");
  365. +    return -1;
  366. +}
  367. +
  368.  int x264_ratecontrol_new( x264_t *h )
  369.  {
  370.      x264_ratecontrol_t *rc;
  371.  -309,6 +335,18 @@ int x264_ratecontrol_new( x264_t *h )
  372.      else
  373.          rc->fps = 25.0;
  374.  
  375. +    if( h->param.rc.b_mb_tree )
  376. +    {
  377. +        h->param.rc.f_pb_factor = 1;
  378. +        h->param.rc.f_qcompress = 1;
  379. +        if( h->param.rc.i_vbv_buffer_size || h->param.rc.i_vbv_max_bitrate )
  380. +        {
  381. +            x264_log(h, X264_LOG_WARNING, "VBV is not currently supported with MB-tree.\n");
  382. +            h->param.rc.i_vbv_buffer_size = 0;
  383. +            h->param.rc.i_vbv_max_bitrate = 0;
  384. +        }
  385. +    }
  386. +
  387.      rc->bitrate = h->param.rc.i_bitrate * 1000.;
  388.      rc->rate_tolerance = h->param.rc.f_rate_tolerance;
  389.      rc->nmb = h->mb.i_mb_count;
  390.  -385,10 +423,11 @@ int x264_ratecontrol_new( x264_t *h )
  391.  
  392.      if( h->param.rc.i_rc_method == X264_RC_CRF )
  393.      {
  394. -        /* arbitrary rescaling to make CRF somewhat similar to QP */
  395. +        /* Arbitrary rescaling to make CRF somewhat similar to QP.
  396. +         * Try to compensate for MB-tree's effects as well. */
  397.          double base_cplx = h->mb.i_mb_count * (h->param.i_bframe ? 120 : 80);
  398.          rc->rate_factor_constant = pow( base_cplx, 1 - h->param.rc.f_qcompress )
  399. -                                 / qp2qscale( h->param.rc.f_rf_constant );
  400. +                                 / qp2qscale( h->param.rc.f_rf_constant + (h->param.rc.b_mb_tree?5:0) );
  401.      }
  402.  
  403.      rc->ip_offset = 6.0 * log(h->param.rc.f_ip_factor) / log(2.0);
  404.  -434,6 +473,19 @@ int x264_ratecontrol_new( x264_t *h )
  405.              x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n");
  406.              return -1;
  407.          }
  408. +        if( h->param.rc.b_mb_tree )
  409. +        {
  410. +            char *mbtree_stats_in = x264_malloc( strlen(h->param.rc.psz_stat_in) + 8 );
  411. +            strcpy( mbtree_stats_in, h->param.rc.psz_stat_in );
  412. +            strcat( mbtree_stats_in, ".mbtree" );
  413. +            rc->p_mbtree_stat_file_in = fopen( mbtree_stats_in, "rb" );
  414. +            x264_free( mbtree_stats_in );
  415. +            if( !rc->p_mbtree_stat_file_in )
  416. +            {
  417. +                x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open mbtree stats file\n");
  418. +                return -1;
  419. +            }
  420. +        }
  421.  
  422.          /* check whether 1st pass options were compatible with current options */
  423.          if( !strncmp( stats_buf, "#options:", 9 ) )
  424.  -597,6 +649,22 @@ int x264_ratecontrol_new( x264_t *h )
  425.          p = x264_param2string( &h->param, 1 );
  426.          fprintf( rc->p_stat_file_out, "#options: %s\n", p );
  427.          x264_free( p );
  428. +        if( h->param.rc.b_mb_tree && !h->param.rc.b_stat_read )
  429. +        {
  430. +            rc->psz_mbtree_stat_file_tmpname = x264_malloc( strlen(h->param.rc.psz_stat_out) + 13 );
  431. +            strcpy( rc->psz_mbtree_stat_file_tmpname, h->param.rc.psz_stat_out );
  432. +            strcat( rc->psz_mbtree_stat_file_tmpname, ".mbtree.temp" );
  433. +            rc->psz_mbtree_stat_file_name = x264_malloc( strlen(h->param.rc.psz_stat_out) + 8 );
  434. +            strcpy( rc->psz_mbtree_stat_file_name, h->param.rc.psz_stat_out );
  435. +            strcat( rc->psz_mbtree_stat_file_name, ".mbtree" );
  436. +
  437. +            rc->p_mbtree_stat_file_out = fopen( rc->psz_mbtree_stat_file_tmpname, "wb" );
  438. +            if( rc->p_mbtree_stat_file_out == NULL )
  439. +            {
  440. +                x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open mbtree stats file\n");
  441. +                return -1;
  442. +            }
  443. +        }
  444.      }
  445.  
  446.      for( i=0; i<h->param.i_threads; i++ )
  447.  -737,7 +805,7 @@ void x264_ratecontrol_summary( x264_t *h )
  448.          double base_cplx = h->mb.i_mb_count * (h->param.i_bframe ? 120 : 80);
  449.          x264_log( h, X264_LOG_INFO, "final ratefactor: %.2f\n",
  450.                    qscale2qp( pow( base_cplx, 1 - h->param.rc.f_qcompress )
  451. -                             * rc->cplxr_sum / rc->wanted_bits_window ) );
  452. +                             * rc->cplxr_sum / rc->wanted_bits_window ) - (h->param.rc.b_mb_tree?5:0) );
  453.      }
  454.  }
  455.  
  456.  -757,6 +825,18 @@ void x264_ratecontrol_delete( x264_t *h )
  457.              }
  458.          x264_free( rc->psz_stat_file_tmpname );
  459.      }
  460. +    if( rc->p_mbtree_stat_file_out )
  461. +    {
  462. +        fclose( rc->p_mbtree_stat_file_out );
  463. +        if( h->i_frame >= rc->num_entries )
  464. +            if( rename( rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name ) != 0 )
  465. +            {
  466. +                x264_log( h, X264_LOG_ERROR, "failed to rename \"%s\" to \"%s\"\n",
  467. +                          rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name );
  468. +            }
  469. +        x264_free( rc->psz_mbtree_stat_file_tmpname );
  470. +        x264_free( rc->psz_mbtree_stat_file_name );
  471. +    }
  472.      x264_free( rc->pred );
  473.      x264_free( rc->pred_b_from_p );
  474.      x264_free( rc->entry );
  475.  -1122,6 +1202,15 @@ void x264_ratecontrol_end( x264_t *h, int bits )
  476.                   h->stat.frame.i_mb_count_p,
  477.                   h->stat.frame.i_mb_count_skip,
  478.                   c_direct);
  479. +
  480. +        /* TODO: deal with endianness.
  481. +         * Don't re-write the data in multi-pass mode. */
  482. +        if( h->param.rc.b_mb_tree && h->fenc->b_kept_as_ref && !h->param.rc.b_stat_read )
  483. +        {
  484. +            uint8_t i_type = h->sh.i_type;
  485. +            fwrite( &i_type, 1, 1, rc->p_mbtree_stat_file_out );
  486. +            fwrite( h->fenc->f_qp_offset, sizeof(float), h->mb.i_mb_count, rc->p_mbtree_stat_file_out );
  487. +        }
  488.      }
  489.  
  490.      if( rc->b_abr )
  491. diff --git a/encoder/ratecontrol.h b/encoder/ratecontrol.h
  492. index 3310d3c..a0b62b2 100644
  493. --- a/encoder/ratecontrol.h
  494. +++ b/encoder/ratecontrol.h
  495.  -29,6 +29,7 @@ void x264_ratecontrol_delete( x264_t * );
  496.  
  497.  void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame );
  498.  void x264_adaptive_quant( x264_t * );
  499. +int  x264_macroblock_tree_read( x264_t *h, x264_frame_t *frame );
  500.  void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next );
  501.  void x264_ratecontrol_start( x264_t *, int i_force_qp );
  502.  int  x264_ratecontrol_slice_type( x264_t *, int i_frame );
  503. diff --git a/encoder/slicetype.c b/encoder/slicetype.c
  504. index 2c16429..ca71c5a 100644
  505. --- a/encoder/slicetype.c
  506. +++ b/encoder/slicetype.c
  507.  -63,6 +63,7 @@ static int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
  508.      x264_me_t m[2];
  509.      int i_bcost = COST_MAX;
  510.      int l, i;
  511. +    int list_used = 0;
  512.  
  513.      h->mb.pic.p_fenc[0] = h->mb.pic.fenc_buf;
  514.      h->mc.copy[PIXEL_8x8]( h->mb.pic.p_fenc[0], FENC_STRIDE, &fenc->lowres[0][i_pel_offset], i_stride, 8 );
  515.  -107,8 +108,7 @@ static int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
  516.          h->mc.avg[PIXEL_8x8]( pix1, 16, src1, stride1, src2, stride2, i_bipred_weight ); \
  517.          i_cost = penalty + h->pixf.mbcmp[PIXEL_8x8]( \
  518.                             m[0].p_fenc[0], FENC_STRIDE, pix1, 16 ); \
  519. -        if( i_bcost > i_cost ) \
  520. -            i_bcost = i_cost; \
  521. +        COPY2_IF_LT( i_bcost, i_cost, list_used, 3 ); \
  522.      }
  523.  
  524.      m[0].i_pixel = PIXEL_8x8;
  525.  -138,8 +138,7 @@ static int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
  526.              int i_cost;
  527.              h->mc.avg[PIXEL_8x8]( pix1, 16, m[0].p_fref[0], m[0].i_stride[0], m[1].p_fref[0], m[1].i_stride[0], i_bipred_weight );
  528.              i_cost = h->pixf.mbcmp[PIXEL_8x8]( m[0].p_fenc[0], FENC_STRIDE, pix1, 16 );
  529. -            if( i_bcost > i_cost )
  530. -                i_bcost = i_cost;
  531. +            COPY2_IF_LT( i_bcost, i_cost, list_used, 3 );
  532.          }
  533.      }
  534.  
  535.  -181,16 +180,18 @@ static int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
  536.              *(uint32_t*)m[l].mv = *(uint32_t*)fenc_mvs[l];
  537.              m[l].cost = *fenc_costs[l];
  538.          }
  539. -        i_bcost = X264_MIN( i_bcost, m[l].cost );
  540. +        COPY2_IF_LT( i_bcost, m[l].cost, list_used, l+1 );
  541.      }
  542.  
  543.      if( b_bidir && ( *(uint32_t*)m[0].mv || *(uint32_t*)m[1].mv ) )
  544.          TRY_BIDIR( m[0].mv, m[1].mv, 5 );
  545.  
  546. +    frames[b]->lowres_inter_types[b-p0][p1-b][i_mb_xy] = list_used;
  547. +
  548.  lowres_intra_mb:
  549.      /* forbid intra-mbs in B-frames, because it's rare and not worth checking */
  550.      /* FIXME: Should we still forbid them now that we cache intra scores? */
  551. -    if( !b_bidir )
  552. +    if( !b_bidir || h->param.rc.b_mb_tree )
  553.      {
  554.          int i_icost, b_intra;
  555.          if( !fenc->b_intra_calculated )
  556.  -237,18 +238,23 @@ lowres_intra_mb:
  557.          }
  558.          else
  559.              i_icost = fenc->i_intra_cost[i_mb_xy];
  560. -        b_intra = i_icost < i_bcost;
  561. -        if( b_intra )
  562. -            i_bcost = i_icost;
  563. -        if(   (i_mb_x > 0 && i_mb_x < h->sps->i_mb_width - 1
  564. -            && i_mb_y > 0 && i_mb_y < h->sps->i_mb_height - 1)
  565. -            || h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 )
  566. +        if( !b_bidir )
  567.          {
  568. -            fenc->i_intra_mbs[b-p0] += b_intra;
  569. -            fenc->i_cost_est[0][0] += i_icost;
  570. +            b_intra = i_icost < i_bcost;
  571. +            if( b_intra )
  572. +                i_bcost = i_icost;
  573. +            if(   (i_mb_x > 0 && i_mb_x < h->sps->i_mb_width - 1
  574. +                && i_mb_y > 0 && i_mb_y < h->sps->i_mb_height - 1)
  575. +                || h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 )
  576. +            {
  577. +                fenc->i_intra_mbs[b-p0] += b_intra;
  578. +                fenc->i_cost_est[0][0] += i_icost;
  579. +            }
  580.          }
  581.      }
  582.  
  583. +    frames[b]->lowres_costs[b-p0][p1-b][i_mb_xy] = i_bcost;
  584. +
  585.      return i_bcost;
  586.  }
  587.  #undef TRY_BIDIR
  588.  -262,6 +268,7 @@ static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
  589.                                 x264_frame_t **frames, int p0, int p1, int b,
  590.                                 int b_intra_penalty )
  591.  {
  592. +
  593.      int i_score = 0;
  594.      /* Don't use the AQ'd scores for slicetype decision. */
  595.      int i_score_aq = 0;
  596.  -299,7 +306,8 @@ static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
  597.  
  598.          /* the edge mbs seem to reduce the predictive quality of the
  599.           * whole frame's score, but are needed for a spatial distribution. */
  600. -        if( h->param.rc.i_vbv_buffer_size || h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 )
  601. +        if( h->param.rc.b_mb_tree || h->param.rc.i_vbv_buffer_size ||
  602. +            h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 )
  603.          {
  604.              for( h->mb.i_mb_y = h->sps->i_mb_height - 1; h->mb.i_mb_y >= 0; h->mb.i_mb_y-- )
  605.              {
  606.  -355,7 +363,169 @@ static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
  607.      return i_score;
  608.  }
  609.  
  610. -#define MAX_LENGTH (X264_BFRAME_MAX*4)
  611. +/* If MB-tree changes the quantizers, we need to recalculate the frame cost without
  612. + * re-running lookahead. */
  613. +static int x264_slicetype_frame_cost_recalculate( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames,
  614. +                                                  int p0, int p1, int b )
  615. +{
  616. +    int i_score = 0;
  617. +    int *row_satd = frames[b]->i_row_satds[b-p0][p1-b];
  618. +    x264_emms();
  619. +    for( h->mb.i_mb_y = h->sps->i_mb_height - 1; h->mb.i_mb_y >= 0; h->mb.i_mb_y-- )
  620. +    {
  621. +        row_satd[ h->mb.i_mb_y ] = 0;
  622. +        for( h->mb.i_mb_x = h->sps->i_mb_width - 1; h->mb.i_mb_x >= 0; h->mb.i_mb_x-- )
  623. +        {
  624. +            int i_mb_xy = h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride;
  625. +            int i_mb_cost = frames[b]->lowres_costs[b-p0][p1-b][i_mb_xy];
  626. +            float qp_adj = frames[b]->f_qp_offset[i_mb_xy];
  627. +            i_mb_cost = (i_mb_cost * x264_exp2fix8(qp_adj*(-1.f/6.f)) + 128) >> 8;
  628. +            row_satd[ h->mb.i_mb_y ] += i_mb_cost;
  629. +            if( (h->mb.i_mb_y > 0 && h->mb.i_mb_y < h->sps->i_mb_height - 1 &&
  630. +                 h->mb.i_mb_x > 0 && h->mb.i_mb_x < h->sps->i_mb_width - 1) ||
  631. +                 h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 )
  632. +            {
  633. +                i_score += i_mb_cost;
  634. +            }
  635. +        }
  636. +    }
  637. +    return i_score;
  638. +}
  639. +
  640. +static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, int p0, int p1, int b )
  641. +{
  642. +    int refs[2] = {p0,p1};
  643. +    int dist_scale_factor = p1 != p0 ? 128 : ( ((b-p0) << 8) + ((p1-p0) >> 1) ) / (p1-p0);
  644. +    int i_bipred_weight = h->param.analyse.b_weighted_bipred ? 64 - (dist_scale_factor>>2) : 32;
  645. +
  646. +    for( h->mb.i_mb_y = 0; h->mb.i_mb_y < h->sps->i_mb_height; h->mb.i_mb_y++ )
  647. +    {
  648. +        for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->sps->i_mb_width; h->mb.i_mb_x++ )
  649. +        {
  650. +            int mb_index = h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride;
  651. +            int inter_cost = frames[b]->lowres_costs[b-p0][p1-b][mb_index];
  652. +            int intra_cost = (frames[b]->i_intra_cost[mb_index] * frames[b]->i_inv_qscale_factor[mb_index]+128)>>8;
  653. +            int lists_used = frames[b]->lowres_inter_types[b-p0][p1-b][mb_index];
  654. +            /* The approximate amount of data that this block contains. */
  655. +            int propagate_amount = intra_cost + frames[b]->i_propagate_cost[mb_index];
  656. +
  657. +            /* Divide by 64 for per-pixel summing. */
  658. +            propagate_amount = (((uint64_t)propagate_amount*(intra_cost-inter_cost)) / intra_cost + 32) >> 6;
  659. +
  660. +            /* Don't propagate for an intra block. */
  661. +            if( inter_cost < intra_cost )
  662. +            {
  663. +                int mv[2][2], list;
  664. +                mv[0][0] = frames[b]->lowres_mvs[0][b-p0-1][mb_index][0];
  665. +                mv[0][1] = frames[b]->lowres_mvs[0][b-p0-1][mb_index][1];
  666. +                if( b != p1 )
  667. +                {
  668. +                    mv[1][0] = frames[b]->lowres_mvs[1][p1-b-1][mb_index][0];
  669. +                    mv[1][1] = frames[b]->lowres_mvs[1][p1-b-1][mb_index][1];
  670. +                }
  671. +
  672. +                /* Follow the MVs to the previous frame(s). */
  673. +                for( list = 0; list < 2; list++ )
  674. +                    if( (lists_used >> list)&1 )
  675. +                    {
  676. +                        int x = mv[list][0];
  677. +                        int y = mv[list][1];
  678. +                        int listamount = propagate_amount;
  679. +                        int mbx = (x>>5)+h->mb.i_mb_x;
  680. +                        int mby = ((y>>5)+h->mb.i_mb_y);
  681. +                        int idx0 = mbx + mby*h->mb.i_mb_stride;
  682. +                        int idx1 = idx0 + 1;
  683. +                        int idx2 = idx0 + h->mb.i_mb_stride;
  684. +                        int idx3 = idx0 + h->mb.i_mb_stride + 1;
  685. +                        int idx0weight = (32-(y&31))*(32-(x&31));
  686. +                        int idx1weight = (32-(y&31))*(x&31);
  687. +                        int idx2weight = (y&31)*(32-(x&31));
  688. +                        int idx3weight = (y&31)*(x&31);
  689. +
  690. +                        /* Apply bipred weighting. */
  691. +                        if( lists_used == 3 )
  692. +                            listamount = (listamount * (list?(64-i_bipred_weight):i_bipred_weight) + 32) >> 6;
  693. +
  694. +                        /* We could just clip the MVs, but pixels that lie outside the frame probably shouldn't
  695. +                         * be counted. */
  696. +                        if( mbx < h->sps->i_mb_width-1 && mby < h->sps->i_mb_height-1 && mbx >= 0 && mby >= 0 )
  697. +                        {
  698. +                            frames[refs[list]]->i_propagate_cost[idx0] += (listamount*idx0weight+8)>>4;
  699. +                            frames[refs[list]]->i_propagate_cost[idx1] += (listamount*idx1weight+8)>>4;
  700. +                            frames[refs[list]]->i_propagate_cost[idx2] += (listamount*idx2weight+8)>>4;
  701. +                            frames[refs[list]]->i_propagate_cost[idx3] += (listamount*idx3weight+8)>>4;
  702. +                        }
  703. +                        else /* Check offsets individually */
  704. +                        {
  705. +                            if( mbx < h->sps->i_mb_width && mby < h->sps->i_mb_height && mbx >= 0 && mby >= 0 )
  706. +                                frames[refs[list]]->i_propagate_cost[idx0] += (listamount*idx0weight+8)>>4;
  707. +                            if( mbx+1 < h->sps->i_mb_width && mby < h->sps->i_mb_height && mbx+1 >= 0 && mby >= 0 )
  708. +                                frames[refs[list]]->i_propagate_cost[idx1] += (listamount*idx1weight+8)>>4;
  709. +                            if( mbx < h->sps->i_mb_width && mby+1 < h->sps->i_mb_height && mbx >= 0 && mby+1 >= 0 )
  710. +                                frames[refs[list]]->i_propagate_cost[idx2] += (listamount*idx2weight+8)>>4;
  711. +                            if( mbx+1 < h->sps->i_mb_width && mby+1 < h->sps->i_mb_height && mbx+1 >= 0 && mby+1 >= 0 )
  712. +                                frames[refs[list]]->i_propagate_cost[idx3] += (listamount*idx3weight+8)>>4;
  713. +                        }
  714. +                    }
  715. +            }
  716. +        }
  717. +    }
  718. +}
  719. +
  720. +static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int num_frames, int b_intra )
  721. +{
  722. +    int i, idx = !b_intra;
  723. +    int last_nonb, cur_nonb = 1;
  724. +    if( b_intra )
  725. +       x264_slicetype_frame_cost( h, a, frames, 0, 0, 0, 0 );
  726. +
  727. +    i = num_frames-1;
  728. +    while( i > 0 && frames[i]->i_type == X264_TYPE_B )
  729. +        i--;
  730. +    last_nonb = i;
  731. +
  732. +    if( last_nonb < 0 )
  733. +        return;
  734. +
  735. +    memset( frames[last_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint32_t) );
  736. +    while( i-- > idx )
  737. +    {
  738. +        cur_nonb = i;
  739. +        while( frames[cur_nonb]->i_type == X264_TYPE_B && cur_nonb > 0 )
  740. +            cur_nonb--;
  741. +        if( cur_nonb < idx )
  742. +            break;
  743. +        x264_slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, last_nonb, 0 );
  744. +        memset( frames[cur_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint32_t) );
  745. +        x264_macroblock_tree_propagate( h, frames, cur_nonb, last_nonb, last_nonb );
  746. +        while( frames[i]->i_type == X264_TYPE_B && i > 0 )
  747. +        {
  748. +            x264_slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, i, 0 );
  749. +            memset( frames[i]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint32_t) );
  750. +            x264_macroblock_tree_propagate( h, frames, cur_nonb, last_nonb, i );
  751. +            i--;
  752. +        }
  753. +        last_nonb = cur_nonb;
  754. +    }
  755. +    x264_emms();
  756. +
  757. +    for( h->mb.i_mb_y = 0; h->mb.i_mb_y < h->sps->i_mb_height; h->mb.i_mb_y++ )
  758. +    {
  759. +        for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->sps->i_mb_width; h->mb.i_mb_x++ )
  760. +        {
  761. +            int mb_index = h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride;
  762. +            float intra_cost =  (frames[last_nonb]->i_intra_cost[mb_index] * frames[last_nonb]->i_inv_qscale_factor[mb_index]+128)>>8;
  763. +
  764. +            if( intra_cost )
  765. +            {
  766. +                float propagate_cost = frames[last_nonb]->i_propagate_cost[mb_index];
  767. +                float ratio = (intra_cost + propagate_cost) / (intra_cost);
  768. +                /* Magic formula is made of magic. */
  769. +                frames[last_nonb]->f_qp_offset[mb_index] -= 2.0 * log2f(ratio);
  770. +            }
  771. +        }
  772. +    }
  773. +}
  774.  
  775.  static int x264_slicetype_path_cost( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, char *path, int threshold )
  776.  {
  777.  -393,14 +563,14 @@ static int x264_slicetype_path_cost( x264_t *h, x264_mb_analysis_t *a, x264_fram
  778.  /* Uses strings due to the fact that the speed of the control functions is
  779.     negligable compared to the cost of running slicetype_frame_cost, and because
  780.     it makes debugging easier. */
  781. -static void x264_slicetype_path( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int length, int max_bframes, int buffer_size, char (*best_paths)[MAX_LENGTH] )
  782. +static void x264_slicetype_path( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int length, int max_bframes, int buffer_size, char (*best_paths)[X264_LOOKAHEAD_MAX] )
  783.  {
  784. -    char paths[X264_BFRAME_MAX+2][MAX_LENGTH] = {{0}};
  785. +    char paths[X264_BFRAME_MAX+2][X264_LOOKAHEAD_MAX] = {{0}};
  786.      int num_paths = X264_MIN(max_bframes+1, length);
  787.      int suffix_size, loc, path;
  788.      int best_cost = COST_MAX;
  789.      int best_path_index = 0;
  790. -    length = X264_MIN(length,MAX_LENGTH);
  791. +    length = X264_MIN(length,X264_LOOKAHEAD_MAX);
  792.  
  793.      /* Iterate over all currently possible paths and add suffixes to each one */
  794.      for( suffix_size = 0; suffix_size < num_paths; suffix_size++ )
  795.  -426,15 +596,6 @@ static void x264_slicetype_path( x264_t *h, x264_mb_analysis_t *a, x264_frame_t
  796.      memcpy( best_paths[length], paths[best_path_index], length );
  797.  }
  798.  
  799. -static int x264_slicetype_path_search( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int length, int bframes, int buffer )
  800. -{
  801. -    char best_paths[MAX_LENGTH][MAX_LENGTH] = {"","P"};
  802. -    int n;
  803. -    for( n = 2; n < length-1; n++ )
  804. -        x264_slicetype_path( h, a, frames, n, bframes, buffer, best_paths );
  805. -    return strspn( best_paths[length-2], "B" );
  806. -}
  807. -
  808.  static int scenecut( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int p0, int p1 )
  809.  {
  810.      x264_frame_t *frame = frames[p1];
  811.  -477,10 +638,10 @@ static int scenecut( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, in
  812.      return res;
  813.  }
  814.  
  815. -static void x264_slicetype_analyse( x264_t *h )
  816. +static void x264_slicetype_analyse( x264_t *h, int keyframe )
  817.  {
  818.      x264_mb_analysis_t a;
  819. -    x264_frame_t *frames[X264_BFRAME_MAX*4+3] = { NULL, };
  820. +    x264_frame_t *frames[X264_LOOKAHEAD_MAX+3] = { NULL, };
  821.      int num_frames;
  822.      int keyint_limit;
  823.      int j;
  824.  -497,13 +658,14 @@ static void x264_slicetype_analyse( x264_t *h )
  825.          frames[j+1] = h->frames.next[j];
  826.      keyint_limit = h->param.i_keyint_max - frames[0]->i_frame + h->frames.i_last_idr - 1;
  827.      num_frames = X264_MIN( j, keyint_limit );
  828. -    if( num_frames == 0 )
  829. +
  830. +    if( num_frames == 0 && (!j || !h->param.rc.b_mb_tree) )
  831.          return;
  832.  
  833.      x264_lowres_context_init( h, &a );
  834.      idr_frame_type = frames[1]->i_frame - h->frames.i_last_idr >= h->param.i_keyint_min ? X264_TYPE_IDR : X264_TYPE_I;
  835.  
  836. -    if( num_frames == 1 )
  837. +    if( num_frames == 1 && !h->param.rc.b_mb_tree )
  838.      {
  839.  no_b_frames:
  840.          frames[1]->i_type = X264_TYPE_P;
  841.  -512,28 +674,84 @@ no_b_frames:
  842.          return;
  843.      }
  844.  
  845. -    if( h->param.i_bframe_adaptive == X264_B_ADAPT_TRELLIS )
  846. +    if( h->param.i_bframe_adaptive == X264_B_ADAPT_TRELLIS || h->param.rc.b_mb_tree )
  847.      {
  848. -        int num_bframes;
  849. +        /* This is important psy-wise: if we have a non-scenecut keyframe,
  850. +         * there will be significant visual artifacts if the frames just before
  851. +         * go down in quality due to being referenced less, despite it being
  852. +         * more RD-optimal. */
  853. +        num_frames = j;
  854. +
  855. +        char best_paths[X264_LOOKAHEAD_MAX][X264_LOOKAHEAD_MAX] = {"","P"};
  856. +        int n;
  857. +        int num_bframes = 0;
  858.          int max_bframes = X264_MIN(num_frames-1, h->param.i_bframe);
  859. +        int num_analysed_frames = num_frames;
  860. +        int reset_start;
  861.          if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1 ) )
  862.          {
  863.              frames[1]->i_type = idr_frame_type;
  864.              return;
  865.          }
  866. -        num_bframes = x264_slicetype_path_search( h, &a, frames, num_frames, max_bframes, num_frames-max_bframes );
  867. -        assert(num_bframes < num_frames);
  868.  
  869. -        for( j = 1; j < num_bframes+1; j++ )
  870. +        if( h->param.i_bframe )
  871.          {
  872. -            if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, j, j+1 ) )
  873. +            if( h->param.i_bframe_adaptive == X264_B_ADAPT_TRELLIS )
  874.              {
  875. -                frames[j]->i_type = X264_TYPE_P;
  876. -                return;
  877. +                /* Perform the frametype analysis. */
  878. +                for( n = 2; n < num_frames-1; n++ )
  879. +                    x264_slicetype_path( h, &a, frames, n, max_bframes, num_frames-max_bframes, best_paths );
  880. +                num_bframes = strspn( best_paths[num_frames-2], "B" );
  881. +                /* Load the results of the analysis into the frame types. */
  882. +                for( j = 1; j < num_frames-1; j++ )
  883. +                    frames[j]->i_type = best_paths[num_frames-2][j-1] == 'B' ? X264_TYPE_B : X264_TYPE_P;
  884. +                frames[num_frames-1]->i_type = X264_TYPE_P;
  885.              }
  886. -            frames[j]->i_type = X264_TYPE_B;
  887. +            else /* No b-adapt, fast isn't currently supported. */
  888. +            {
  889. +                num_bframes = X264_MIN(num_frames-1, h->param.i_bframe);
  890. +                for( j = 1; j < num_frames-1; j++ )
  891. +                    frames[j]->i_type = (j%(num_bframes+1)) ? X264_TYPE_B : X264_TYPE_P;
  892. +                frames[num_frames-1]->i_type = X264_TYPE_P;
  893. +            }
  894. +
  895. +            /* Check scenecut on the first minigop. */
  896. +            for( j = 1; j < num_bframes+1; j++ )
  897. +                if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, j, j+1 ) )
  898. +                {
  899. +                    frames[j]->i_type = X264_TYPE_P;
  900. +                    num_analysed_frames = j;
  901. +                    break;
  902. +                }
  903. +
  904. +            reset_start = keyframe ? 1 : X264_MIN( num_bframes+2, num_analysed_frames+1 );
  905. +        }
  906. +        else
  907. +        {
  908. +            for( j = 1; j < num_frames; j++ )
  909. +                frames[j]->i_type = X264_TYPE_P;
  910. +            reset_start = !keyframe + 1;
  911.          }
  912. -        frames[num_bframes+1]->i_type = X264_TYPE_P;
  913. +
  914. +        /* Perform the actual macroblock tree analysis. */
  915. +        if( h->param.rc.b_mb_tree )
  916. +            x264_macroblock_tree( h, &a, frames, num_analysed_frames, keyframe );
  917. +
  918. +        /* Enforce keyframe limit. */
  919. +        if( h->param.i_bframe )
  920. +            for( j = 0; j <= num_bframes+1; j++ )
  921. +                if( j+1 >= keyint_limit )
  922. +                {
  923. +                    if( j )
  924. +                        frames[j]->i_type = X264_TYPE_P;
  925. +                    frames[j+1]->i_type = idr_frame_type;
  926. +                    reset_start = j+2;
  927. +                    break;
  928. +                }
  929. +
  930. +        /* Restore frametypes for all frames that haven't actually been decided yet. */
  931. +        for( j = reset_start; j < num_frames; j++ )
  932. +            frames[j]->i_type = X264_TYPE_AUTO;
  933.      }
  934.      else if( h->param.i_bframe_adaptive == X264_B_ADAPT_FAST )
  935.      {
  936.  -606,8 +824,9 @@ void x264_slicetype_decide( x264_t *h )
  937.                  x264_ratecontrol_slice_type( h, h->frames.next[i]->i_frame );
  938.      }
  939.      else if( (h->param.i_bframe && h->param.i_bframe_adaptive)
  940. -             || h->param.i_scenecut_threshold )
  941. -        x264_slicetype_analyse( h );
  942. +             || h->param.i_scenecut_threshold
  943. +             || h->param.rc.b_mb_tree )
  944. +        x264_slicetype_analyse( h, 0 );
  945.  
  946.      for( bframes = 0;; bframes++ )
  947.      {
  948.  -645,7 +864,11 @@ void x264_slicetype_decide( x264_t *h )
  949.                  frm->i_type = X264_TYPE_P;
  950.          }
  951.  
  952. -        if( frm->i_type == X264_TYPE_AUTO ) frm->i_type = X264_TYPE_B;
  953. +        if( frm->i_type == X264_TYPE_AUTO )
  954. +        {
  955. +            assert(0);
  956. +            frm->i_type = X264_TYPE_B;
  957. +        }
  958.          else if( !IS_X264_TYPE_B( frm->i_type ) ) break;
  959.      }
  960.  }
  961.  -653,7 +876,7 @@ void x264_slicetype_decide( x264_t *h )
  962.  int x264_rc_analyse_slice( x264_t *h )
  963.  {
  964.      x264_mb_analysis_t a;
  965. -    x264_frame_t *frames[X264_BFRAME_MAX*4+2] = { NULL, };
  966. +    x264_frame_t *frames[X264_LOOKAHEAD_MAX+2] = { NULL, };
  967.      int p0=0, p1, b;
  968.      int cost;
  969.  
  970.  -662,6 +885,12 @@ int x264_rc_analyse_slice( x264_t *h )
  971.      if( IS_X264_TYPE_I(h->fenc->i_type) )
  972.      {
  973.          p1 = b = 0;
  974. +        /* For MB-tree, we have to perform propagation analysis on I-frames too. */
  975. +        if( h->param.rc.b_mb_tree )
  976. +        {
  977. +            h->frames.last_nonb = h->fenc;
  978. +            x264_slicetype_analyse( h, 1 );
  979. +        }
  980.      }
  981.      else if( X264_TYPE_P == h->fenc->i_type )
  982.      {
  983.  -680,11 +909,16 @@ int x264_rc_analyse_slice( x264_t *h )
  984.      frames[p0] = h->fref0[0];
  985.      frames[b] = h->fenc;
  986.  
  987. -    cost = x264_slicetype_frame_cost( h, &a, frames, p0, p1, b, 0 );
  988. +    if( h->param.rc.b_mb_tree )
  989. +        cost = x264_slicetype_frame_cost_recalculate( h, &a, frames, p0, p1, b );
  990. +    else
  991. +    {
  992. +        cost = x264_slicetype_frame_cost( h, &a, frames, p0, p1, b, 0 );
  993.  
  994. -    /* In AQ, use the weighted score instead. */
  995. -    if( h->param.rc.i_aq_mode )
  996. -        cost = frames[b]->i_cost_est[b-p0][p1-b];
  997. +        /* In AQ, use the weighted score instead. */
  998. +        if( h->param.rc.i_aq_mode )
  999. +            cost = frames[b]->i_cost_est[b-p0][p1-b];
  1000. +    }
  1001.  
  1002.      h->fenc->i_row_satd = h->fenc->i_row_satds[b-p0][p1-b];
  1003.      h->fdec->i_row_satd = h->fdec->i_row_satds[b-p0][p1-b];
  1004. diff --git a/x264.c b/x264.c
  1005. index c3b4f29..886abed 100644
  1006. --- a/x264.c
  1007. +++ b/x264.c
  1008.  -184,6 +184,7 @@ static void Help( x264_param_t *defaults, int b_longhelp )
  1009.      H1( "  -i, --min-keyint <integer>  Minimum GOP size [%d]\n", defaults->i_keyint_min );
  1010.      H1( "      --no-scenecut           Disable adaptive I-frame decision\n" );
  1011.      H1( "      --scenecut <integer>    How aggressively to insert extra I-frames [%d]\n", defaults->i_scenecut_threshold );
  1012. +    H0( "      --lookahead <integer>   Number of frames for frametype lookahead [%d]\n", defaults->i_lookahead );
  1013.      H0( "  -b, --bframes <integer>     Number of B-frames between I and P [%d]\n", defaults->i_bframe );
  1014.      H1( "      --b-adapt               Adaptive B-frame decision method [%d]\n"
  1015.          "                                  Higher values may lower threading efficiency.\n"
  1016.  -228,6 +229,8 @@ static void Help( x264_param_t *defaults, int b_longhelp )
  1017.          "                                  - 2: Last pass, does not overwrite stats file\n"
  1018.          "                                  - 3: Nth pass, overwrites stats file\n" );
  1019.      H0( "      --stats <string>        Filename for 2 pass stats [\"%s\"]\n", defaults->rc.psz_stat_out );
  1020. +    H0( "      --mbtree                Enable mb-tree.  Requires b-adapt 2 if b-frames\n"
  1021. +        "                              are enabled. Experimental.\n" );
  1022.      H0( "      --qcomp <float>         QP curve compression: 0.0 => CBR, 1.0 => CQP [%.2f]\n", defaults->rc.f_qcompress );
  1023.      H1( "      --cplxblur <float>      Reduce fluctuations in QP (before curve compression) [%.1f]\n", defaults->rc.f_complexity_blur );
  1024.      H1( "      --qblur <float>         Reduce fluctuations in QP (after curve compression) [%.1f]\n", defaults->rc.f_qblur );
  1025.  -383,6 +386,7 @@ static struct option long_options[] =
  1026.      { "slow-firstpass",    no_argument, NULL, OPT_SLOWFIRSTPASS },
  1027.      { "bitrate",     required_argument, NULL, 'B' },
  1028.      { "bframes",     required_argument, NULL, 'b' },
  1029. +    { "lookahead",   required_argument, NULL, 0 },
  1030.      { "b-adapt",     required_argument, NULL, 0 },
  1031.      { "no-b-adapt",        no_argument, NULL, 0 },
  1032.      { "b-bias",      required_argument, NULL, 0 },
  1033.  -446,6 +450,8 @@ static struct option long_options[] =
  1034.      { "pass",        required_argument, NULL, 'p' },
  1035.      { "stats",       required_argument, NULL, 0 },
  1036.      { "qcomp",       required_argument, NULL, 0 },
  1037. +    { "mbtree",            no_argument, NULL, 0 },
  1038. +    { "no-mbtree",         no_argument, NULL, 0 },
  1039.      { "qblur",       required_argument, NULL, 0 },
  1040.      { "cplxblur",    required_argument, NULL, 0 },
  1041.      { "zones",       required_argument, NULL, 0 },
  1042.  -662,7 +668,6 @@ static int  Parse( int argc, char **argv,
  1043.                  param->i_deblocking_filter_alphac0 = -1;
  1044.                  param->i_deblocking_filter_beta = -1;
  1045.                  param->analyse.f_psy_trellis = 0.2;
  1046. -                param->rc.f_ip_factor = 2.1;
  1047.                  param->rc.f_aq_strength = 1.3;
  1048.                  if( param->analyse.inter & X264_ANALYSE_PSUB16x16 )
  1049.                      param->analyse.inter |= X264_ANALYSE_PSUB8x8;
  1050. diff --git a/x264.h b/x264.h
  1051. index 2dfcc8d..122d4ba 100644
  1052. --- a/x264.h
  1053. +++ b/x264.h
  1054.  -188,6 +188,7 @@ typedef struct x264_param_t
  1055.      int         i_keyint_max;       /* Force an IDR keyframe at this interval */
  1056.      int         i_keyint_min;       /* Scenecuts closer together than this are coded as I, not IDR. */
  1057.      int         i_scenecut_threshold; /* how aggressively to insert extra I frames */
  1058. +    int         i_lookahead;
  1059.      int         i_bframe;   /* how many b-frame between 2 references pictures */
  1060.      int         i_bframe_adaptive;
  1061.      int         i_bframe_bias;
  1062.  -271,6 +272,7 @@ typedef struct x264_param_t
  1063.  
  1064.          int         i_aq_mode;      /* psy adaptive QP. (X264_AQ_*) */
  1065.          float       f_aq_strength;
  1066. +        int         b_mb_tree;      /* Macroblock-tree ratecontrol. */
  1067.  
  1068.          /* 2pass */
  1069.          int         b_stat_write;   /* Enable stat writing in psz_stat_out */