View difference between Paste ID: <a href="/f87abd3f">f87abd3f</a> and <a href="/post/view"></a> - Pastebin.com

View difference between Paste ID: f87abd3f and

SHOW: | | - or go back to the newest paste.

From f21e71a04ba65aff9b5a4bfa8a73fd86c463f4ee Mon Sep 17 00:00:00 2001
From: Jason Garrett-Glaser <darkshikari@gmail.com>
Date: Mon, 3 Aug 2009 20:52:30 -0700
Subject: [PATCH 1/2] Various 1-pass VBV tweaks
 Make predictors have an offset in addition to a multiplier.
 This primarily fixes issues in sources with lots of extremely static scenes, such as anime and CGI.
 We tried linear regressions, but they were very unreliable as predictors.
 Also allow VBV to be slightly more aggressive in raising QPs to avoid not having enough bits left in some situations.
 Up to 1db improvement on some clips.

---
 encoder/ratecontrol.c |   32 +++++++++++++++++++++-----------
 1 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
index 2f88708..087e658 100644
--- a/encoder/ratecontrol.c
+++ b/encoder/ratecontrol.c
@@ -58,6 +58,7 @@ typedef struct
     double coeff;
     double count;
     double decay;
+    double offset;
 } predictor_t;
 
 struct x264_ratecontrol_t
@@ -409,9 +410,11 @@ int x264_ratecontrol_new( x264_t *h )
         rc->pred[i].coeff= 2.0;
         rc->pred[i].count= 1.0;
         rc->pred[i].decay= 0.5;
+        rc->pred[i].offset= 0.0;
         rc->row_preds[i].coeff= .25;
         rc->row_preds[i].count= 1.0;
         rc->row_preds[i].decay= 0.5;
+        rc->row_preds[i].offset= 0.0;
     }
     *rc->pred_b_from_p = rc->pred[0];
 
@@ -953,7 +956,7 @@ void x264_ratecontrol_mb( x264_t *h, int bits )
         if( y < h->sps->i_mb_height-1 )
         {
             int i_estimated;
-            int avg_qp = X264_MAX(h->fref0[0]->i_row_qp[y+1], h->fref1[0]->i_row_qp[y+1])
+            int avg_qp = X264_MIN(h->fref0[0]->i_row_qp[y+1], h->fref1[0]->i_row_qp[y+1])
                        + rc->pb_offset * ((h->fenc->i_type == X264_TYPE_BREF) ? 0.5 : 1);
             rc->qpm = X264_MIN(X264_MAX( rc->qp, avg_qp), 51); //avg_qp could go higher than 51 due to pb_offset
             i_estimated = row_bits_so_far(h, y); //FIXME: compute full estimated size
@@ -1153,10 +1156,6 @@ void x264_ratecontrol_end( x264_t *h, int bits )
             {
                 update_predictor( rc->pred_b_from_p, qp2qscale(rc->qpa_rc),
                                   h->fref1[h->i_ref1-1]->i_satd, rc->bframe_bits / rc->bframes );
-                /* In some cases, such as completely blank scenes, pred_b_from_p can go nuts */
-                /* Hackily cap the predictor coeff in case this happens. */
-                /* FIXME FIXME FIXME */
-                rc->pred_b_from_p->coeff = X264_MIN( rc->pred_b_from_p->coeff, 10. );
                 rc->bframe_bits = 0;
             }
         }
@@ -1270,17 +1269,28 @@ static double get_diff_limited_q(x264_t *h, ratecontrol_entry_t *rce, double q)
 
 static double predict_size( predictor_t *p, double q, double var )
 {
-     return p->coeff*var / (q*p->count);
+     return (p->coeff*var + p->offset) / (q*p->count);
 }
 
 static void update_predictor( predictor_t *p, double q, double var, double bits )
 {
+    const double range = 1.5;
     if( var < 10 )
         return;
-    p->count *= p->decay;
-    p->coeff *= p->decay;
-    p->count ++;
-    p->coeff += bits*q / var;
+    double old_coeff = p->coeff / p->count;
+    double new_coeff = bits*q / var;
+    double new_coeff_clipped = x264_clip3f( new_coeff, old_coeff/range, old_coeff*range );
+    double new_offset = bits*q - new_coeff_clipped * var;
+    if( new_offset >= 0 )
+        new_coeff = new_coeff_clipped;
+    else
+        new_offset = 0;
+    p->count  *= p->decay;
+    p->coeff  *= p->decay;
+    p->offset *= p->decay;
+    p->count  ++;
+    p->coeff  += new_coeff;
+    p->offset += new_offset;
 }
 
 // update VBV after encoding a frame
@@ -1350,7 +1360,7 @@ static double clip_qscale( x264_t *h, int pict_type, double q )
         double bits = predict_size( &rcc->pred[h->sh.i_type], q, rcc->last_satd );
         double qf = 1.0;
         if( bits > rcc->buffer_fill/2 )
-            qf = x264_clip3f( rcc->buffer_fill/(2*bits), 0.2, 1.0 );
+            qf = rcc->buffer_fill/(2*bits);
         q /= qf;
         bits *= qf;
         if( bits < rcc->buffer_rate/2 )
-- 
1.6.1.2


From e7182499c7bc23d3376090f66d7617b2080f2b46 Mon Sep 17 00:00:00 2001
From: Jason Garrett-Glaser <darkshikari@gmail.com>
Date: Tue, 4 Aug 2009 17:46:33 -0700
Subject: [PATCH 2/2] Macroblock-tree ratecontrol
 On by default; can be turned off with --no-mbtree.
 Uses a large lookahead to track temporal propagation of data and weight quality accordingly.
 Requires a very large separate statsfile (2 bytes per macroblock) in multi-pass mode.
 Doesn't work with b-pyramid yet.
 Note that MB-tree inherently measures quality different from the standard qcomp method, so bitrates produced by CRF may change somewhat.
 This makes the "medium" preset a bit slower.  Accordingly, make "fast" slower as well, and introduce a new preset "faster" between "fast" and "veryfast".
 All presets "fast" and above will have MB-tree on.
 Add a new option, --rc-lookahead, to control the distance MB tree looks ahead to perform propagation analysis.
 Default is 40; larger values will be slower and require more memory but give more accurate results.
 This value will be used in the future to control ratecontrol lookahead (VBV).
 Add a new option, --no-psy, to disable all psy optimizations that don't improve PSNR or SSIM.
 This disables psy-RD/trellis, but also other more subtle internal psy optimizations that can't be controlled directly via external parameters.
 Quality improvement from MB-tree is about 2-70% depending on content.
 Strength of MB-tree adjustments can be tweaked using qcompress; higher values mean lower MB-tree strength.

---
 common/common.c       |   22 ++-
 common/common.h       |   50 ++++++-
 common/frame.c        |   10 +-
 common/frame.h        |    3 +
 common/osdep.h        |    9 +-
 encoder/analyse.c     |    4 +-
 encoder/encoder.c     |   56 ++++++-
 encoder/ratecontrol.c |  201 +++++++++++++++++-------
 encoder/ratecontrol.h |    3 +-
 encoder/slicetype.c   |  424 ++++++++++++++++++++++++++++++++++++++-----------
 x264.c                |   31 +++-
 x264.h                |    5 +-
 12 files changed, 639 insertions(+), 179 deletions(-)

diff --git a/common/common.c b/common/common.c
index 9260c64..371ed1e 100644
--- a/common/common.c
+++ b/common/common.c
@@ -95,6 +95,7 @@ void    x264_param_default( x264_param_t *param )
     param->rc.f_pb_factor = 1.3;
     param->rc.i_aq_mode = X264_AQ_VARIANCE;
     param->rc.f_aq_strength = 1.0;
+    param->rc.i_lookahead = 40;
 
     param->rc.b_stat_write = 0;
     param->rc.psz_stat_out = "x264_2pass.log";
@@ -104,6 +105,7 @@ void    x264_param_default( x264_param_t *param )
     param->rc.f_qblur = 0.5;
     param->rc.f_complexity_blur = 20;
     param->rc.i_zones = 0;
+    param->rc.b_mb_tree = 1;
 
     /* Log */
     param->pf_log = x264_log_default;
@@ -117,6 +119,7 @@ void    x264_param_default( x264_param_t *param )
     param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_SPATIAL;
     param->analyse.i_me_method = X264_ME_HEX;
     param->analyse.f_psy_rd = 1.0;
+    param->analyse.b_psy = 1;
     param->analyse.f_psy_trellis = 0;
     param->analyse.i_me_range = 16;
     param->analyse.i_subpel_refine = 7;
@@ -493,6 +496,8 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
             p->analyse.f_psy_trellis = 0;
         }
     }
+    OPT("psy")
+        p->analyse.b_psy = atobool(value);
     OPT("chroma-me")
         p->analyse.b_chroma_me = atobool(value);
     OPT("mixed-refs")
@@ -524,6 +529,8 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
         p->rc.f_rf_constant = atof(value);
         p->rc.i_rc_method = X264_RC_CRF;
     }
+    OPT("rc-lookahead")
+        p->rc.i_lookahead = atoi(value);
     OPT2("qpmin", "qp-min")
         p->rc.i_qp_min = atoi(value);
     OPT2("qpmax", "qp-max")
@@ -559,6 +566,8 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
     }
     OPT("qcomp")
         p->rc.f_qcompress = atof(value);
+    OPT("mbtree")
+        p->rc.b_mb_tree = atobool(value);
     OPT("qblur")
         p->rc.f_qblur = atof(value);
     OPT2("cplxblur", "cplx-blur")
@@ -843,7 +852,9 @@ char *x264_param2string( x264_param_t *p, int b_res )
     s += sprintf( s, " analyse=%#x:%#x", p->analyse.intra, p->analyse.inter );
     s += sprintf( s, " me=%s", x264_motion_est_names[ p->analyse.i_me_method ] );
     s += sprintf( s, " subme=%d", p->analyse.i_subpel_refine );
-    s += sprintf( s, " psy_rd=%.1f:%.1f", p->analyse.f_psy_rd, p->analyse.f_psy_trellis );
+    s += sprintf( s, " psy=%d", p->analyse.b_psy );
+    if( p->analyse.b_psy )
+        s += sprintf( s, " psy_rd=%.1f:%.1f", p->analyse.f_psy_rd, p->analyse.f_psy_trellis );
     s += sprintf( s, " mixed_ref=%d", p->analyse.b_mixed_references );
     s += sprintf( s, " me_range=%d", p->analyse.i_me_range );
     s += sprintf( s, " chroma_me=%d", p->analyse.b_chroma_me );
@@ -868,9 +879,12 @@ char *x264_param2string( x264_param_t *p, int b_res )
     s += sprintf( s, " keyint=%d keyint_min=%d scenecut=%d",
                   p->i_keyint_max, p->i_keyint_min, p->i_scenecut_threshold );
 
-    s += sprintf( s, " rc=%s", p->rc.i_rc_method == X264_RC_ABR ?
+    if( p->rc.b_mb_tree )
+        s += sprintf( s, " rc_lookahead=%d", p->rc.i_lookahead );
+
+    s += sprintf( s, " rc=%s mbtree=%d", p->rc.i_rc_method == X264_RC_ABR ?
                                ( p->rc.b_stat_read ? "2pass" : p->rc.i_vbv_buffer_size ? "cbr" : "abr" )
-                               : p->rc.i_rc_method == X264_RC_CRF ? "crf" : "cqp" );
+                               : p->rc.i_rc_method == X264_RC_CRF ? "crf" : "cqp", p->rc.b_mb_tree );
     if( p->rc.i_rc_method == X264_RC_ABR || p->rc.i_rc_method == X264_RC_CRF )
     {
         if( p->rc.i_rc_method == X264_RC_CRF )
@@ -892,7 +906,7 @@ char *x264_param2string( x264_param_t *p, int b_res )
     if( !(p->rc.i_rc_method == X264_RC_CQP && p->rc.i_qp_constant == 0) )
     {
         s += sprintf( s, " ip_ratio=%.2f", p->rc.f_ip_factor );
-        if( p->i_bframe )
+        if( p->i_bframe && !p->rc.b_mb_tree )
             s += sprintf( s, " pb_ratio=%.2f", p->rc.f_pb_factor );
         s += sprintf( s, " aq=%d", p->rc.i_aq_mode );
         if( p->rc.i_aq_mode )
diff --git a/common/common.h b/common/common.h
index 8a25a13..30163ab 100644
--- a/common/common.h
+++ b/common/common.h
@@ -51,6 +51,7 @@
 #define X264_SLICE_MAX 4
 #define X264_NAL_MAX (4 + X264_SLICE_MAX)
 #define X264_PCM_COST (386*8)
+#define X264_LOOKAHEAD_MAX 250
 
 // number of pixels (per thread) in progress at any given time.
 // 16 for the macroblock in progress + 3 for deblocking + 3 for motion compensation filter + 2 for extra safety
@@ -152,6 +153,49 @@ static inline uint32_t x264_cabac_amvd_sum( int16_t *mvdleft, int16_t *mvdtop )
     return amvd0 + (amvd1<<16);
 }
 
+static const uint8_t exp2_lut[64] = {
+      1,   4,   7,  10,  13,  16,  19,  22,  25,  28,  31,  34,  37,  40,  44,  47,
+     50,  53,  57,  60,  64,  67,  71,  74,  78,  81,  85,  89,  93,  96, 100, 104,
+    108, 112, 116, 120, 124, 128, 132, 137, 141, 145, 150, 154, 159, 163, 168, 172,
+    177, 182, 186, 191, 196, 201, 206, 211, 216, 221, 226, 232, 237, 242, 248, 253,
+};
+
+static ALWAYS_INLINE int x264_exp2fix8( float x )
+{
+    int i, f;
+    x += 8;
+    if( x <= 0 ) return 0;
+    if( x >= 16 ) return 0xffff;
+    i = x;
+    f = (x-i)*64;
+    return (exp2_lut[f]+256) << i >> 8;
+}
+
+static const float log2_lut[128] = {
+    0.00000, 0.01123, 0.02237, 0.03342, 0.04439, 0.05528, 0.06609, 0.07682,
+    0.08746, 0.09803, 0.10852, 0.11894, 0.12928, 0.13955, 0.14975, 0.15987,
+    0.16993, 0.17991, 0.18982, 0.19967, 0.20945, 0.21917, 0.22882, 0.23840,
+    0.24793, 0.25739, 0.26679, 0.27612, 0.28540, 0.29462, 0.30378, 0.31288,
+    0.32193, 0.33092, 0.33985, 0.34873, 0.35755, 0.36632, 0.37504, 0.38370,
+    0.39232, 0.40088, 0.40939, 0.41785, 0.42626, 0.43463, 0.44294, 0.45121,
+    0.45943, 0.46761, 0.47573, 0.48382, 0.49185, 0.49985, 0.50779, 0.51570,
+    0.52356, 0.53138, 0.53916, 0.54689, 0.55459, 0.56224, 0.56986, 0.57743,
+    0.58496, 0.59246, 0.59991, 0.60733, 0.61471, 0.62205, 0.62936, 0.63662,
+    0.64386, 0.65105, 0.65821, 0.66534, 0.67243, 0.67948, 0.68650, 0.69349,
+    0.70044, 0.70736, 0.71425, 0.72110, 0.72792, 0.73471, 0.74147, 0.74819,
+    0.75489, 0.76155, 0.76818, 0.77479, 0.78136, 0.78790, 0.79442, 0.80090,
+    0.80735, 0.81378, 0.82018, 0.82655, 0.83289, 0.83920, 0.84549, 0.85175,
+    0.85798, 0.86419, 0.87036, 0.87652, 0.88264, 0.88874, 0.89482, 0.90087,
+    0.90689, 0.91289, 0.91886, 0.92481, 0.93074, 0.93664, 0.94251, 0.94837,
+    0.95420, 0.96000, 0.96578, 0.97154, 0.97728, 0.98299, 0.98868, 0.99435,
+};
+
+static ALWAYS_INLINE float x264_log2( uint32_t x )
+{
+    int lz = x264_clz( x );
+    return log2_lut[(x<<lz>>24)&0x7f] + (31 - lz);
+}
+
 /****************************************************************************
  *
  ****************************************************************************/
@@ -327,11 +371,11 @@ struct x264_t
     struct
     {
         /* Frames to be encoded (whose types have been decided) */
-        x264_frame_t *current[X264_BFRAME_MAX*4+3];
+        x264_frame_t *current[X264_LOOKAHEAD_MAX+3];
         /* Temporary buffer (frames types not yet decided) */
-        x264_frame_t *next[X264_BFRAME_MAX*4+3];
+        x264_frame_t *next[X264_LOOKAHEAD_MAX+3];
         /* Unused frames */
-        x264_frame_t *unused[X264_BFRAME_MAX*4 + X264_THREAD_MAX*2 + 16+4];
+        x264_frame_t *unused[X264_LOOKAHEAD_MAX + X264_THREAD_MAX*2 + 16+4];
         /* For adaptive B decision */
         x264_frame_t *last_nonb;
 
diff --git a/common/frame.c b/common/frame.c
index 23e6824..2097d52 100644
--- a/common/frame.c
+++ b/common/frame.c
@@ -96,6 +96,15 @@ x264_frame_t *x264_frame_new( x264_t *h )
                 memset( frame->lowres_mvs[j][i], 0, 2*h->mb.i_mb_count*sizeof(int16_t) );
                 CHECKED_MALLOC( frame->lowres_mv_costs[j][i], h->mb.i_mb_count*sizeof(int) );
             }
+        CHECKED_MALLOC( frame->i_intra_cost, i_mb_count * sizeof(uint16_t) );
+        memset( frame->i_intra_cost, -1, i_mb_count * sizeof(uint16_t) );
+        CHECKED_MALLOC( frame->i_propagate_cost, i_mb_count * sizeof(uint32_t) );
+        for( j = 0; j <= h->param.i_bframe+1; j++ )
+            for( i = 0; i <= h->param.i_bframe+1; i++ )
+            {
+                CHECKED_MALLOC( frame->lowres_costs[j][i], i_mb_count * sizeof(uint16_t) );
+                CHECKED_MALLOC( frame->lowres_inter_types[j][i], i_mb_count * sizeof(uint8_t) );
+            }
     }
 
     if( h->param.analyse.i_me_method >= X264_ME_ESA )
@@ -116,7 +125,6 @@ x264_frame_t *x264_frame_new( x264_t *h )
     CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t));
     CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
     CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
-    CHECKED_MALLOC( frame->i_intra_cost, i_mb_count * sizeof(uint16_t) );
     if( h->param.i_bframe )
     {
         CHECKED_MALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
diff --git a/common/frame.h b/common/frame.h
index aad77f5..a3da4e4 100644
--- a/common/frame.h
+++ b/common/frame.h
@@ -63,6 +63,8 @@ typedef struct
     int8_t  *mb_type;
     int16_t (*mv[2])[2];
     int16_t (*lowres_mvs[2][X264_BFRAME_MAX+1])[2];
+    uint16_t (*lowres_costs[X264_BFRAME_MAX+2][X264_BFRAME_MAX+2]);
+    uint8_t  (*lowres_inter_types[X264_BFRAME_MAX+2][X264_BFRAME_MAX+2]);
     int     *lowres_mv_costs[2][X264_BFRAME_MAX+1];
     int8_t  *ref[2];
     int     i_ref[2];
@@ -83,6 +85,7 @@ typedef struct
     float   *f_qp_offset;
     int     b_intra_calculated;
     uint16_t *i_intra_cost;
+    uint32_t *i_propagate_cost;
     uint16_t *i_inv_qscale_factor;
 
     /* threading */
diff --git a/common/osdep.h b/common/osdep.h
index 915ec05..2095198 100644
--- a/common/osdep.h
+++ b/common/osdep.h
@@ -147,7 +147,9 @@
 #ifdef WORDS_BIGENDIAN
 #define endian_fix(x) (x)
 #define endian_fix32(x) (x)
-#elif defined(__GNUC__) && defined(HAVE_MMX)
+#define endian_fix16(x) (x)
+#else
+#if defined(__GNUC__) && defined(HAVE_MMX)
 static ALWAYS_INLINE uint32_t endian_fix32( uint32_t x )
 {
     asm("bswap %0":"+r"(x));
@@ -171,6 +173,11 @@ static ALWAYS_INLINE intptr_t endian_fix( intptr_t x )
         return endian_fix32(x);
 }
 #endif
+static ALWAYS_INLINE uint16_t endian_fix16( uint16_t x )
+{
+    return (x<<8)|(x>>8);
+}
+#endif
 
 #if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 3)
 #define x264_clz(x) __builtin_clz(x)
diff --git a/encoder/analyse.c b/encoder/analyse.c
index 4a36fcd..38b9976 100644
--- a/encoder/analyse.c
+++ b/encoder/analyse.c
@@ -276,8 +276,8 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
         h->mb.i_trellis_lambda2[1][1] = x264_trellis_lambda2_tab[1][h->mb.i_chroma_qp];
     }
     h->mb.i_psy_rd_lambda = a->i_lambda;
-    /* Adjusting chroma lambda based on QP offset hurts PSNR, so we'll leave it as part of psy-RD. */
-    h->mb.i_chroma_lambda2_offset = h->mb.i_psy_rd ? x264_chroma_lambda2_offset_tab[h->mb.i_qp-h->mb.i_chroma_qp+12] : 256;
+    /* Adjusting chroma lambda based on QP offset hurts PSNR but improves visual quality. */
+    h->mb.i_chroma_lambda2_offset = h->param.analyse.b_psy ? x264_chroma_lambda2_offset_tab[h->mb.i_qp-h->mb.i_chroma_qp+12] : 256;
 
     h->mb.i_me_method = h->param.analyse.i_me_method;
     h->mb.i_subpel_refine = h->param.analyse.i_subpel_refine;
diff --git a/encoder/encoder.c b/encoder/encoder.c
index 0f1ccc8..74ff97d 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -42,7 +42,7 @@
 
 #define bs_write_ue bs_write_ue_big
 
-static void x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
+static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
                                     x264_nal_t **pp_nal, int *pi_nal,
                                     x264_picture_t *pic_out );
 
@@ -441,6 +441,7 @@ static int x264_validate_parameters( x264_t *h )
         h->param.rc.i_qp_min = x264_clip3( (int)(X264_MIN3( qp_p, qp_i, qp_b )), 0, 51 );
         h->param.rc.i_qp_max = x264_clip3( (int)(X264_MAX3( qp_p, qp_i, qp_b ) + .999), 0, 51 );
         h->param.rc.i_aq_mode = 0;
+        h->param.rc.b_mb_tree = 0;
     }
     h->param.rc.i_qp_max = x264_clip3( h->param.rc.i_qp_max, 0, 51 );
     h->param.rc.i_qp_min = x264_clip3( h->param.rc.i_qp_min, 0, h->param.rc.i_qp_max );
@@ -473,6 +474,15 @@ static int x264_validate_parameters( x264_t *h )
     if( !h->param.i_bframe )
         h->param.i_bframe_adaptive = X264_B_ADAPT_NONE;
     h->param.analyse.b_weighted_bipred = h->param.analyse.b_weighted_bipred && h->param.i_bframe > 0;
+    h->param.rc.i_lookahead = X264_MIN( h->param.rc.i_lookahead, X264_LOOKAHEAD_MAX );
+    h->param.rc.i_lookahead = X264_MIN( h->param.rc.i_lookahead, h->param.i_keyint_max );
+    if( h->param.rc.b_stat_read )
+        h->param.rc.i_lookahead = 0;
+    else if( !h->param.rc.i_lookahead )
+        h->param.rc.b_mb_tree = 0;
+    if( h->param.rc.f_qcompress == 1 )
+        h->param.rc.b_mb_tree = 0;
+
     h->mb.b_direct_auto_write = h->param.analyse.i_direct_mv_pred == X264_DIRECT_PRED_AUTO
                                 && h->param.i_bframe
                                 && ( h->param.rc.b_stat_write || !h->param.rc.b_stat_read );
@@ -513,6 +523,11 @@ static int x264_validate_parameters( x264_t *h )
     if( !h->param.b_cabac )
         h->param.analyse.i_trellis = 0;
     h->param.analyse.i_trellis = x264_clip3( h->param.analyse.i_trellis, 0, 2 );
+    if( !h->param.analyse.b_psy )
+    {
+        h->param.analyse.f_psy_rd = 0;
+        h->param.analyse.f_psy_trellis = 0;
+    }
     if( !h->param.analyse.i_trellis )
         h->param.analyse.f_psy_trellis = 0;
     h->param.analyse.f_psy_rd = x264_clip3f( h->param.analyse.f_psy_rd, 0, 10 );
@@ -537,6 +552,17 @@ static int x264_validate_parameters( x264_t *h )
     h->param.rc.f_aq_strength = x264_clip3f( h->param.rc.f_aq_strength, 0, 3 );
     if( h->param.rc.f_aq_strength == 0 )
         h->param.rc.i_aq_mode = 0;
+    /* MB-tree requires AQ to be on, even if the strength is zero. */
+    if( !h->param.rc.i_aq_mode && h->param.rc.b_mb_tree )
+    {
+        h->param.rc.i_aq_mode = 1;
+        h->param.rc.f_aq_strength = 0;
+    }
+    if( h->param.rc.b_mb_tree && h->param.b_bframe_pyramid )
+    {
+        x264_log( h, X264_LOG_WARNING, "b-pyramid + mb-tree is not supported\n" );
+        h->param.b_bframe_pyramid = 0;
+    }
     h->param.analyse.i_noise_reduction = x264_clip3( h->param.analyse.i_noise_reduction, 0, 1<<16 );
     if( h->param.analyse.i_subpel_refine == 10 && (h->param.analyse.i_trellis != 2 || !h->param.rc.i_aq_mode) )
         h->param.analyse.i_subpel_refine = 9;
@@ -723,6 +749,9 @@ x264_t *x264_encoder_open   ( x264_param_t *param )
         h->frames.i_delay = X264_MAX(h->param.i_bframe,3)*4 + h->param.i_threads - 1;
     else
         h->frames.i_delay = h->param.i_bframe + h->param.i_threads - 1;
+    if( h->param.rc.b_mb_tree )
+        h->frames.i_delay = X264_MAX( h->frames.i_delay, h->param.rc.i_lookahead );
+
     h->frames.i_max_ref0 = h->param.i_frame_reference;
     h->frames.i_max_ref1 = h->sps->vui.i_num_reorder_frames;
     h->frames.i_max_dpb  = h->sps->vui.i_max_dec_frame_buffering;
@@ -730,7 +759,8 @@ x264_t *x264_encoder_open   ( x264_param_t *param )
         && ( h->param.rc.i_rc_method == X264_RC_ABR
           || h->param.rc.i_rc_method == X264_RC_CRF
           || h->param.i_bframe_adaptive
-          || h->param.i_scenecut_threshold );
+          || h->param.i_scenecut_threshold
+          || h->param.rc.b_mb_tree );
     h->frames.b_have_lowres |= (h->param.rc.b_stat_read && h->param.rc.i_vbv_buffer_size > 0);
     h->frames.b_have_sub8x8_esa = !!(h->param.analyse.inter & X264_ANALYSE_PSUB8x8);
 
@@ -1443,7 +1473,12 @@ int     x264_encoder_encode( x264_t *h,
         if( h->frames.b_have_lowres )
             x264_frame_init_lowres( h, fenc );
 
-        if( h->param.rc.i_aq_mode )
+        if( h->param.rc.b_mb_tree && h->param.rc.b_stat_read )
+        {
+            if( x264_macroblock_tree_read( h, fenc ) )
+                return -1;
+        }
+        else if( h->param.rc.i_aq_mode )
             x264_adaptive_quant_frame( h, fenc );
 
         if( h->frames.i_input <= h->frames.i_delay + 1 - h->param.i_threads )
@@ -1461,7 +1496,8 @@ int     x264_encoder_encode( x264_t *h,
         /* 2: Select frame types */
         if( h->frames.next[0] == NULL )
         {
-            x264_encoder_frame_end( thread_oldest, thread_current, pp_nal, pi_nal, pic_out );
+            if( x264_encoder_frame_end( thread_oldest, thread_current, pp_nal, pi_nal, pic_out ) < 0 )
+                return -1;
             return 0;
         }
 
@@ -1621,11 +1657,12 @@ int     x264_encoder_encode( x264_t *h,
     else
         x264_slices_write( h );
 
-    x264_encoder_frame_end( thread_oldest, thread_current, pp_nal, pi_nal, pic_out );
+    if( x264_encoder_frame_end( thread_oldest, thread_current, pp_nal, pi_nal, pic_out ) < 0 )
+        return -1;
     return 0;
 }
 
-static void x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
+static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
                                     x264_nal_t **pp_nal, int *pi_nal,
                                     x264_picture_t *pic_out )
 {
@@ -1640,7 +1677,7 @@ static void x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
     if( !h->out.i_nal )
     {
         pic_out->i_type = X264_TYPE_AUTO;
-        return;
+        return 0;
     }
 
     x264_frame_push_unused( thread_current, h->fenc );
@@ -1670,7 +1707,8 @@ static void x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
 
     /* update rc */
     x264_emms();
-    x264_ratecontrol_end( h, h->out.i_frame_size * 8 );
+    if( x264_ratecontrol_end( h, h->out.i_frame_size * 8 ) < 0 )
+        return -1;
 
     /* restore CPU state (before using float again) */
     x264_emms();
@@ -1784,6 +1822,8 @@ static void x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
 
     if( h->param.psz_dump_yuv )
         x264_frame_dump( h );
+
+    return 0;
 }
 
 static void x264_print_intra( int64_t *i_mb_count, double i_count, int b_print_pcm, char *intra )
diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
index 087e658..f669007 100644
--- a/encoder/ratecontrol.c
+++ b/encoder/ratecontrol.c
@@ -71,6 +71,7 @@ struct x264_ratecontrol_t
     double fps;
     double bitrate;
     double rate_tolerance;
+    double qcompress;
     int nmb;                    /* number of macroblocks in a frame */
     int qp_constant[5];
 
@@ -106,6 +107,10 @@ struct x264_ratecontrol_t
     /* 2pass stuff */
     FILE *p_stat_file_out;
     char *psz_stat_file_tmpname;
+    FILE *p_mbtree_stat_file_out;
+    char *psz_mbtree_stat_file_tmpname;
+    char *psz_mbtree_stat_file_name;
+    FILE *p_mbtree_stat_file_in;
 
     int num_entries;            /* number of ratecontrol_entry_ts */
     ratecontrol_entry_t *entry; /* FIXME: copy needed data and free this once init is done */
@@ -118,6 +123,7 @@ struct x264_ratecontrol_t
     double lmin[5];             /* min qscale by frame type */
     double lmax[5];
     double lstep;               /* max change (multiply) in qscale per frame */
+    uint16_t *qp_buffer; /* Global buffer for converting MB-tree quantizer data. */
 
     /* MBRC stuff */
     double frame_size_estimated;
@@ -191,49 +197,6 @@ static NOINLINE uint32_t ac_energy_mb( x264_t *h, int mb_x, int mb_y, x264_frame
     return var;
 }
 
-static const float log2_lut[128] = {
-    0.00000, 0.01123, 0.02237, 0.03342, 0.04439, 0.05528, 0.06609, 0.07682,
-    0.08746, 0.09803, 0.10852, 0.11894, 0.12928, 0.13955, 0.14975, 0.15987,
-    0.16993, 0.17991, 0.18982, 0.19967, 0.20945, 0.21917, 0.22882, 0.23840,
-    0.24793, 0.25739, 0.26679, 0.27612, 0.28540, 0.29462, 0.30378, 0.31288,
-    0.32193, 0.33092, 0.33985, 0.34873, 0.35755, 0.36632, 0.37504, 0.38370,
-    0.39232, 0.40088, 0.40939, 0.41785, 0.42626, 0.43463, 0.44294, 0.45121,
-    0.45943, 0.46761, 0.47573, 0.48382, 0.49185, 0.49985, 0.50779, 0.51570,
-    0.52356, 0.53138, 0.53916, 0.54689, 0.55459, 0.56224, 0.56986, 0.57743,
-    0.58496, 0.59246, 0.59991, 0.60733, 0.61471, 0.62205, 0.62936, 0.63662,
-    0.64386, 0.65105, 0.65821, 0.66534, 0.67243, 0.67948, 0.68650, 0.69349,
-    0.70044, 0.70736, 0.71425, 0.72110, 0.72792, 0.73471, 0.74147, 0.74819,
-    0.75489, 0.76155, 0.76818, 0.77479, 0.78136, 0.78790, 0.79442, 0.80090,
-    0.80735, 0.81378, 0.82018, 0.82655, 0.83289, 0.83920, 0.84549, 0.85175,
-    0.85798, 0.86419, 0.87036, 0.87652, 0.88264, 0.88874, 0.89482, 0.90087,
-    0.90689, 0.91289, 0.91886, 0.92481, 0.93074, 0.93664, 0.94251, 0.94837,
-    0.95420, 0.96000, 0.96578, 0.97154, 0.97728, 0.98299, 0.98868, 0.99435,
-};
-
-static const uint8_t exp2_lut[64] = {
-      1,   4,   7,  10,  13,  16,  19,  22,  25,  28,  31,  34,  37,  40,  44,  47,
-     50,  53,  57,  60,  64,  67,  71,  74,  78,  81,  85,  89,  93,  96, 100, 104,
-    108, 112, 116, 120, 124, 128, 132, 137, 141, 145, 150, 154, 159, 163, 168, 172,
-    177, 182, 186, 191, 196, 201, 206, 211, 216, 221, 226, 232, 237, 242, 248, 253,
-};
-
-static ALWAYS_INLINE float x264_log2( uint32_t x )
-{
-    int lz = x264_clz( x );
-    return log2_lut[(x<<lz>>24)&0x7f] + (31 - lz);
-}
-
-static ALWAYS_INLINE int x264_exp2fix8( float x )
-{
-    int i, f;
-    x += 8;
-    if( x <= 0 ) return 0;
-    if( x >= 16 ) return 0xffff;
-    i = x;
-    f = (x-i)*64;
-    return (exp2_lut[f]+256) << i >> 8;
-}
-
 void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame )
 {
     /* constants chosen to result in approximately the same overall bitrate as without AQ.
@@ -241,6 +204,17 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame )
     int mb_x, mb_y;
     float strength;
     float avg_adj = 0.f;
+    /* Need to init it anyways for MB tree. */
+    if( h->param.rc.f_aq_strength == 0 )
+    {
+        int mb_xy;
+        memset( frame->f_qp_offset, 0, h->mb.i_mb_count * sizeof(float) );
+        if( h->frames.b_have_lowres )
+            for( mb_xy = 0; mb_xy < h->mb.i_mb_count; mb_xy++ )
+                frame->i_inv_qscale_factor[mb_xy] = 256;
+        return;
+    }
+
     if( h->param.rc.i_aq_mode == X264_AQ_AUTOVARIANCE )
     {
         for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y++ )
@@ -257,6 +231,7 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame )
     }
     else
         strength = h->param.rc.f_aq_strength * 1.0397f;
+
     for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y++ )
         for( mb_x = 0; mb_x < h->sps->i_mb_width; mb_x++ )
         {
@@ -291,6 +266,47 @@ void x264_adaptive_quant( x264_t *h )
     h->mb.i_qp = x264_clip3( h->rc->f_qpm + h->fenc->f_qp_offset[h->mb.i_mb_xy] + .5, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
 }
 
+int x264_macroblock_tree_read( x264_t *h, x264_frame_t *frame )
+{
+    x264_ratecontrol_t *rc = h->rc;
+    uint8_t i_type_actual = rc->entry[frame->i_frame].pict_type;
+    int i;
+
+    if( i_type_actual != SLICE_TYPE_B )
+    {
+        uint8_t i_type;
+
+        if( !fread( &i_type, 1, 1, rc->p_mbtree_stat_file_in ) )
+            goto fail;
+
+        if( i_type != i_type_actual )
+        {
+            x264_log(h, X264_LOG_ERROR, "MB-tree frametype %d doesn't match actual frametype %d.\n", i_type,i_type_actual);
+            return -1;
+        }
+
+        if( fread( rc->qp_buffer, sizeof(uint16_t), h->mb.i_mb_count, rc->p_mbtree_stat_file_in ) != h->mb.i_mb_count )
+            goto fail;
+
+        for( i = 0; i < h->mb.i_mb_count; i++ )
+            frame->f_qp_offset[i] = ((float)(int16_t)endian_fix16( rc->qp_buffer[i] )) * (1/256.0);
+    }
+    else
+        x264_adaptive_quant_frame( h, frame );
+    return 0;
+fail:
+    x264_log(h, X264_LOG_ERROR, "Incomplete MB-tree stats file.\n");
+    return -1;
+}
+
+static char *x264_strcat_filename( char *input, char *suffix )
+{
+    char *output = x264_malloc( strlen( input ) + strlen( suffix ) + 1 );
+    strcpy( output, input );
+    strcat( output, suffix );
+    return output;
+}
+
 int x264_ratecontrol_new( x264_t *h )
 {
     x264_ratecontrol_t *rc;
@@ -310,6 +326,14 @@ int x264_ratecontrol_new( x264_t *h )
     else
         rc->fps = 25.0;
 
+    if( h->param.rc.b_mb_tree )
+    {
+        h->param.rc.f_pb_factor = 1;
+        rc->qcompress = 1;
+    }
+    else
+        rc->qcompress = h->param.rc.f_qcompress;
+
     rc->bitrate = h->param.rc.i_bitrate * 1000.;
     rc->rate_tolerance = h->param.rc.f_rate_tolerance;
     rc->nmb = h->mb.i_mb_count;
@@ -379,17 +403,18 @@ int x264_ratecontrol_new( x264_t *h )
         rc->accum_p_norm = .01;
         rc->accum_p_qp = ABR_INIT_QP * rc->accum_p_norm;
         /* estimated ratio that produces a reasonable QP for the first I-frame */
-        rc->cplxr_sum = .01 * pow( 7.0e5, h->param.rc.f_qcompress ) * pow( h->mb.i_mb_count, 0.5 );
+        rc->cplxr_sum = .01 * pow( 7.0e5, rc->qcompress ) * pow( h->mb.i_mb_count, 0.5 );
         rc->wanted_bits_window = 1.0 * rc->bitrate / rc->fps;
         rc->last_non_b_pict_type = SLICE_TYPE_I;
     }
 
     if( h->param.rc.i_rc_method == X264_RC_CRF )
     {
-        /* arbitrary rescaling to make CRF somewhat similar to QP */
+        /* Arbitrary rescaling to make CRF somewhat similar to QP.
+         * Try to compensate for MB-tree's effects as well. */
         double base_cplx = h->mb.i_mb_count * (h->param.i_bframe ? 120 : 80);
-        rc->rate_factor_constant = pow( base_cplx, 1 - h->param.rc.f_qcompress )
-                                 / qp2qscale( h->param.rc.f_rf_constant );
+        rc->rate_factor_constant = pow( base_cplx, 1 - rc->qcompress )
+                                 / qp2qscale( h->param.rc.f_rf_constant + (h->param.rc.b_mb_tree?5:0) );
     }
 
     rc->ip_offset = 6.0 * log(h->param.rc.f_ip_factor) / log(2.0);
@@ -437,6 +462,17 @@ int x264_ratecontrol_new( x264_t *h )
             x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n");
             return -1;
         }
+        if( h->param.rc.b_mb_tree )
+        {
+            char *mbtree_stats_in = x264_strcat_filename( h->param.rc.psz_stat_in, ".mbtree" );
+            rc->p_mbtree_stat_file_in = fopen( mbtree_stats_in, "rb" );
+            x264_free( mbtree_stats_in );
+            if( !rc->p_mbtree_stat_file_in )
+            {
+                x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open mbtree stats file\n");
+                return -1;
+            }
+        }
 
         /* check whether 1st pass options were compatible with current options */
         if( !strncmp( stats_buf, "#options:", 9 ) )
@@ -483,6 +519,9 @@ int x264_ratecontrol_new( x264_t *h )
                 x264_log( h, X264_LOG_ERROR, "b_adapt method specified in stats file not valid\n" );
                 return -1;
             }
+
+            if( h->param.rc.b_mb_tree && ( p = strstr( opts, "rc-lookahead=" ) ) && sscanf( p, "rc-lookahead=%d", &i ) )
+                h->param.rc.i_lookahead = i;
         }
 
         /* find number of pics */
@@ -585,10 +624,7 @@ int x264_ratecontrol_new( x264_t *h )
     if( h->param.rc.b_stat_write )
     {
         char *p;
-
-        rc->psz_stat_file_tmpname = x264_malloc( strlen(h->param.rc.psz_stat_out) + 6 );
-        strcpy( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out );
-        strcat( rc->psz_stat_file_tmpname, ".temp" );
+        rc->psz_stat_file_tmpname = x264_strcat_filename( h->param.rc.psz_stat_out, ".temp" );
 
         rc->p_stat_file_out = fopen( rc->psz_stat_file_tmpname, "wb" );
         if( rc->p_stat_file_out == NULL )
@@ -600,6 +636,25 @@ int x264_ratecontrol_new( x264_t *h )
         p = x264_param2string( &h->param, 1 );
         fprintf( rc->p_stat_file_out, "#options: %s\n", p );
         x264_free( p );
+        if( h->param.rc.b_mb_tree && !h->param.rc.b_stat_read )
+        {
+            rc->psz_mbtree_stat_file_tmpname = x264_strcat_filename( h->param.rc.psz_stat_out, ".mbtree.temp" );
+            rc->psz_mbtree_stat_file_name = x264_strcat_filename( h->param.rc.psz_stat_out, ".mbtree" );
+
+            rc->p_mbtree_stat_file_out = fopen( rc->psz_mbtree_stat_file_tmpname, "wb" );
+            if( rc->p_mbtree_stat_file_out == NULL )
+            {
+                x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open mbtree stats file\n");
+                return -1;
+            }
+        }
+    }
+
+    if( h->param.rc.b_mb_tree && (h->param.rc.b_stat_read || h->param.rc.b_stat_write) )
+    {
+        rc->qp_buffer = x264_malloc( h->mb.i_mb_count * sizeof(uint16_t));
+        if( !rc->qp_buffer )
+            return -1;
     }
 
     for( i=0; i<h->param.i_threads; i++ )
@@ -739,8 +794,8 @@ void x264_ratecontrol_summary( x264_t *h )
     {
         double base_cplx = h->mb.i_mb_count * (h->param.i_bframe ? 120 : 80);
         x264_log( h, X264_LOG_INFO, "final ratefactor: %.2f\n",
-                  qscale2qp( pow( base_cplx, 1 - h->param.rc.f_qcompress )
-                             * rc->cplxr_sum / rc->wanted_bits_window ) );
+                  qscale2qp( pow( base_cplx, 1 - rc->qcompress )
+                             * rc->cplxr_sum / rc->wanted_bits_window ) - (h->param.rc.b_mb_tree?5:0) );
     }
 }
 
@@ -760,9 +815,22 @@ void x264_ratecontrol_delete( x264_t *h )
             }
         x264_free( rc->psz_stat_file_tmpname );
     }
+    if( rc->p_mbtree_stat_file_out )
+    {
+        fclose( rc->p_mbtree_stat_file_out );
+        if( h->i_frame >= rc->num_entries )
+            if( rename( rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name ) != 0 )
+            {
+                x264_log( h, X264_LOG_ERROR, "failed to rename \"%s\" to \"%s\"\n",
+                          rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name );
+            }
+        x264_free( rc->psz_mbtree_stat_file_tmpname );
+        x264_free( rc->psz_mbtree_stat_file_name );
+    }
     x264_free( rc->pred );
     x264_free( rc->pred_b_from_p );
     x264_free( rc->entry );
+    x264_free( rc->qp_buffer );
     if( rc->zones )
     {
         x264_free( rc->zones[0].param );
@@ -1086,7 +1154,7 @@ int x264_ratecontrol_slice_type( x264_t *h, int frame_num )
 }
 
 /* After encoding one frame, save stats and update ratecontrol state */
-void x264_ratecontrol_end( x264_t *h, int bits )
+int x264_ratecontrol_end( x264_t *h, int bits )
 {
     x264_ratecontrol_t *rc = h->rc;
     const int *mbs = h->stat.frame.i_mb_count;
@@ -1114,7 +1182,7 @@ void x264_ratecontrol_end( x264_t *h, int bits )
                         ( dir_frame>0 ? 's' : dir_frame<0 ? 't' :
                           dir_avg>0 ? 's' : dir_avg<0 ? 't' : '-' )
                         : '-';
-        fprintf( rc->p_stat_file_out,
+        if( fprintf( rc->p_stat_file_out,
                  "in:%d out:%d type:%c q:%.2f tex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d d:%c;\n",
                  h->fenc->i_frame, h->i_frame,
                  c_type, rc->qpa_rc,
@@ -1124,7 +1192,22 @@ void x264_ratecontrol_end( x264_t *h, int bits )
                  h->stat.frame.i_mb_count_i,
                  h->stat.frame.i_mb_count_p,
                  h->stat.frame.i_mb_count_skip,
-                 c_direct);
+                 c_direct) < 0 )
+             goto fail;
+
+        /* Don't re-write the data in multi-pass mode. */
+        if( h->param.rc.b_mb_tree && h->fenc->b_kept_as_ref && !h->param.rc.b_stat_read )
+        {
+            uint8_t i_type = h->sh.i_type;
+            int i;
+            /* Values are stored as big-endian FIX8.8 */
+            for( i = 0; i < h->mb.i_mb_count; i++ )
+                rc->qp_buffer[i] = endian_fix16( h->fenc->f_qp_offset[i]*256.0 );
+            if( fwrite( &i_type, 1, 1, rc->p_mbtree_stat_file_out ) < 1 )
+                goto fail;
+            if( fwrite( rc->qp_buffer, sizeof(uint16_t), h->mb.i_mb_count, rc->p_mbtree_stat_file_out ) < h->mb.i_mb_count )
+                goto fail;
+        }
     }
 
     if( rc->b_abr )
@@ -1162,6 +1245,10 @@ void x264_ratecontrol_end( x264_t *h, int bits )
     }
 
     update_vbv( h, bits );
+    return 0;
+fail:
+    x264_log(h, X264_LOG_ERROR, "ratecontrol_end: stats file could not be written to\n");
+    return -1;
 }
 
 /****************************************************************************
@@ -1177,7 +1264,7 @@ static double get_qscale(x264_t *h, ratecontrol_entry_t *rce, double rate_factor
     double q;
     x264_zone_t *zone = get_zone( h, frame_num );
 
-    q = pow( rce->blurred_complexity, 1 - h->param.rc.f_qcompress );
+    q = pow( rce->blurred_complexity, 1 - rcc->qcompress );
 
     // avoid NaN's in the rc_eq
     if(!isfinite(q) || rce->tex_bits + rce->mv_bits == 0)
diff --git a/encoder/ratecontrol.h b/encoder/ratecontrol.h
index 3310d3c..ed8abab 100644
--- a/encoder/ratecontrol.h
+++ b/encoder/ratecontrol.h
@@ -29,12 +29,13 @@ void x264_ratecontrol_delete( x264_t * );
 
 void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame );
 void x264_adaptive_quant( x264_t * );
+int  x264_macroblock_tree_read( x264_t *h, x264_frame_t *frame );
 void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next );
 void x264_ratecontrol_start( x264_t *, int i_force_qp );
 int  x264_ratecontrol_slice_type( x264_t *, int i_frame );
 void x264_ratecontrol_mb( x264_t *, int bits );
 int  x264_ratecontrol_qp( x264_t * );
-void x264_ratecontrol_end( x264_t *, int bits );
+int  x264_ratecontrol_end( x264_t *, int bits );
 void x264_ratecontrol_summary( x264_t * );
 void x264_ratecontrol_set_estimated_size( x264_t *, int bits );
 int  x264_ratecontrol_get_estimated_size( x264_t const *);
diff --git a/encoder/slicetype.c b/encoder/slicetype.c
index 2c16429..be3eaf7 100644
--- a/encoder/slicetype.c
+++ b/encoder/slicetype.c
@@ -63,6 +63,7 @@ static int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
     x264_me_t m[2];
     int i_bcost = COST_MAX;
     int l, i;
+    int list_used = 0;
 
     h->mb.pic.p_fenc[0] = h->mb.pic.fenc_buf;
     h->mc.copy[PIXEL_8x8]( h->mb.pic.p_fenc[0], FENC_STRIDE, &fenc->lowres[0][i_pel_offset], i_stride, 8 );
@@ -107,8 +108,7 @@ static int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
         h->mc.avg[PIXEL_8x8]( pix1, 16, src1, stride1, src2, stride2, i_bipred_weight ); \
         i_cost = penalty + h->pixf.mbcmp[PIXEL_8x8]( \
                            m[0].p_fenc[0], FENC_STRIDE, pix1, 16 ); \
-        if( i_bcost > i_cost ) \
-            i_bcost = i_cost; \
+        COPY2_IF_LT( i_bcost, i_cost, list_used, 3 ); \
     }
 
     m[0].i_pixel = PIXEL_8x8;
@@ -138,8 +138,7 @@ static int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
             int i_cost;
             h->mc.avg[PIXEL_8x8]( pix1, 16, m[0].p_fref[0], m[0].i_stride[0], m[1].p_fref[0], m[1].i_stride[0], i_bipred_weight );
             i_cost = h->pixf.mbcmp[PIXEL_8x8]( m[0].p_fenc[0], FENC_STRIDE, pix1, 16 );
-            if( i_bcost > i_cost )
-                i_bcost = i_cost;
+            COPY2_IF_LT( i_bcost, i_cost, list_used, 3 );
         }
     }
 
@@ -181,16 +180,18 @@ static int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
             *(uint32_t*)m[l].mv = *(uint32_t*)fenc_mvs[l];
             m[l].cost = *fenc_costs[l];
         }
-        i_bcost = X264_MIN( i_bcost, m[l].cost );
+        COPY2_IF_LT( i_bcost, m[l].cost, list_used, l+1 );
     }
 
     if( b_bidir && ( *(uint32_t*)m[0].mv || *(uint32_t*)m[1].mv ) )
         TRY_BIDIR( m[0].mv, m[1].mv, 5 );
 
+    frames[b]->lowres_inter_types[b-p0][p1-b][i_mb_xy] = list_used;
+
 lowres_intra_mb:
     /* forbid intra-mbs in B-frames, because it's rare and not worth checking */
     /* FIXME: Should we still forbid them now that we cache intra scores? */
-    if( !b_bidir )
+    if( !b_bidir || h->param.rc.b_mb_tree )
     {
         int i_icost, b_intra;
         if( !fenc->b_intra_calculated )
@@ -237,18 +238,23 @@ lowres_intra_mb:
         }
         else
             i_icost = fenc->i_intra_cost[i_mb_xy];
-        b_intra = i_icost < i_bcost;
-        if( b_intra )
-            i_bcost = i_icost;
-        if(   (i_mb_x > 0 && i_mb_x < h->sps->i_mb_width - 1
-            && i_mb_y > 0 && i_mb_y < h->sps->i_mb_height - 1)
-            || h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 )
+        if( !b_bidir )
         {
-            fenc->i_intra_mbs[b-p0] += b_intra;
-            fenc->i_cost_est[0][0] += i_icost;
+            b_intra = i_icost < i_bcost;
+            if( b_intra )
+                i_bcost = i_icost;
+            if(   (i_mb_x > 0 && i_mb_x < h->sps->i_mb_width - 1
+                && i_mb_y > 0 && i_mb_y < h->sps->i_mb_height - 1)
+                || h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 )
+            {
+                fenc->i_intra_mbs[b-p0] += b_intra;
+                fenc->i_cost_est[0][0] += i_icost;
+            }
         }
     }
 
+    frames[b]->lowres_costs[b-p0][p1-b][i_mb_xy] = i_bcost;
+
     return i_bcost;
 }
 #undef TRY_BIDIR
@@ -262,6 +268,7 @@ static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
                                x264_frame_t **frames, int p0, int p1, int b,
                                int b_intra_penalty )
 {
+
     int i_score = 0;
     /* Don't use the AQ'd scores for slicetype decision. */
     int i_score_aq = 0;
@@ -299,7 +306,8 @@ static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
 
         /* the edge mbs seem to reduce the predictive quality of the
          * whole frame's score, but are needed for a spatial distribution. */
-        if( h->param.rc.i_vbv_buffer_size || h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 )
+        if( h->param.rc.b_mb_tree || h->param.rc.i_vbv_buffer_size ||
+            h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 )
         {
             for( h->mb.i_mb_y = h->sps->i_mb_height - 1; h->mb.i_mb_y >= 0; h->mb.i_mb_y-- )
             {
@@ -355,7 +363,172 @@ static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
     return i_score;
 }
 
-#define MAX_LENGTH (X264_BFRAME_MAX*4)
+/* If MB-tree changes the quantizers, we need to recalculate the frame cost without
+ * re-running lookahead. */
+static int x264_slicetype_frame_cost_recalculate( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames,
+                                                  int p0, int p1, int b )
+{
+    int i_score = 0;
+    int *row_satd = frames[b]->i_row_satds[b-p0][p1-b];
+    x264_emms();
+    for( h->mb.i_mb_y = h->sps->i_mb_height - 1; h->mb.i_mb_y >= 0; h->mb.i_mb_y-- )
+    {
+        row_satd[ h->mb.i_mb_y ] = 0;
+        for( h->mb.i_mb_x = h->sps->i_mb_width - 1; h->mb.i_mb_x >= 0; h->mb.i_mb_x-- )
+        {
+            int i_mb_xy = h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride;
+            int i_mb_cost = frames[b]->lowres_costs[b-p0][p1-b][i_mb_xy];
+            float qp_adj = frames[b]->f_qp_offset[i_mb_xy];
+            i_mb_cost = (i_mb_cost * x264_exp2fix8(qp_adj*(-1.f/6.f)) + 128) >> 8;
+            row_satd[ h->mb.i_mb_y ] += i_mb_cost;
+            if( (h->mb.i_mb_y > 0 && h->mb.i_mb_y < h->sps->i_mb_height - 1 &&
+                 h->mb.i_mb_x > 0 && h->mb.i_mb_x < h->sps->i_mb_width - 1) ||
+                 h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 )
+            {
+                i_score += i_mb_cost;
+            }
+        }
+    }
+    return i_score;
+}
+
+static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, int p0, int p1, int b )
+{
+    x264_frame_t *refs[2] = {frames[p0],frames[p1]};
+    int dist_scale_factor = p1 != p0 ? 128 : ( ((b-p0) << 8) + ((p1-p0) >> 1) ) / (p1-p0);
+    int i_bipred_weight = h->param.analyse.b_weighted_bipred ? 64 - (dist_scale_factor>>2) : 32;
+
+    for( h->mb.i_mb_y = 0; h->mb.i_mb_y < h->sps->i_mb_height; h->mb.i_mb_y++ )
+    {
+        for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->sps->i_mb_width; h->mb.i_mb_x++ )
+        {
+            int mb_index = h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride;
+            int inter_cost = frames[b]->lowres_costs[b-p0][p1-b][mb_index];
+            int intra_cost = (frames[b]->i_intra_cost[mb_index] * frames[b]->i_inv_qscale_factor[mb_index]+128)>>8;
+            int lists_used = frames[b]->lowres_inter_types[b-p0][p1-b][mb_index];
+            /* The approximate amount of data that this block contains. */
+            int propagate_amount = intra_cost + frames[b]->i_propagate_cost[mb_index];
+
+            /* Divide by 64 for per-pixel summing. */
+            propagate_amount = (((uint64_t)propagate_amount*(intra_cost-inter_cost)) / intra_cost + 32) >> 6;
+
+            /* Don't propagate for an intra block. */
+            if( inter_cost < intra_cost )
+            {
+                int mv[2][2], list;
+                mv[0][0] = frames[b]->lowres_mvs[0][b-p0-1][mb_index][0];
+                mv[0][1] = frames[b]->lowres_mvs[0][b-p0-1][mb_index][1];
+                if( b != p1 )
+                {
+                    mv[1][0] = frames[b]->lowres_mvs[1][p1-b-1][mb_index][0];
+                    mv[1][1] = frames[b]->lowres_mvs[1][p1-b-1][mb_index][1];
+                }
+
+                /* Follow the MVs to the previous frame(s). */
+                for( list = 0; list < 2; list++ )
+                    if( (lists_used >> list)&1 )
+                    {
+                        int x = mv[list][0];
+                        int y = mv[list][1];
+                        int listamount = propagate_amount;
+                        int mbx = (x>>5)+h->mb.i_mb_x;
+                        int mby = ((y>>5)+h->mb.i_mb_y);
+                        int idx0 = mbx + mby*h->mb.i_mb_stride;
+                        int idx1 = idx0 + 1;
+                        int idx2 = idx0 + h->mb.i_mb_stride;
+                        int idx3 = idx0 + h->mb.i_mb_stride + 1;
+                        int idx0weight = (32-(y&31))*(32-(x&31));
+                        int idx1weight = (32-(y&31))*(x&31);
+                        int idx2weight = (y&31)*(32-(x&31));
+                        int idx3weight = (y&31)*(x&31);
+
+                        /* Apply bipred weighting. */
+                        if( lists_used == 3 )
+                            listamount = (listamount * (list?(64-i_bipred_weight):i_bipred_weight) + 32) >> 6;
+
+#define CLIP_ADD(s,x) (s) = X264_MIN((s)+(x),(1<<16)-1)
+
+                        /* We could just clip the MVs, but pixels that lie outside the frame probably shouldn't
+                         * be counted. */
+                        if( mbx < h->sps->i_mb_width-1 && mby < h->sps->i_mb_height-1 && mbx >= 0 && mby >= 0 )
+                        {
+                            CLIP_ADD( refs[list]->i_propagate_cost[idx0], (listamount*idx0weight+8)>>4 );
+                            CLIP_ADD( refs[list]->i_propagate_cost[idx1], (listamount*idx1weight+8)>>4 );
+                            CLIP_ADD( refs[list]->i_propagate_cost[idx2], (listamount*idx2weight+8)>>4 );
+                            CLIP_ADD( refs[list]->i_propagate_cost[idx3], (listamount*idx3weight+8)>>4 );
+                        }
+                        else /* Check offsets individually */
+                        {
+                            if( mbx < h->sps->i_mb_width && mby < h->sps->i_mb_height && mbx >= 0 && mby >= 0 )
+                                CLIP_ADD( refs[list]->i_propagate_cost[idx0], (listamount*idx0weight+8)>>4 );
+                            if( mbx+1 < h->sps->i_mb_width && mby < h->sps->i_mb_height && mbx+1 >= 0 && mby >= 0 )
+                                CLIP_ADD( refs[list]->i_propagate_cost[idx1], (listamount*idx1weight+8)>>4 );
+                            if( mbx < h->sps->i_mb_width && mby+1 < h->sps->i_mb_height && mbx >= 0 && mby+1 >= 0 )
+                                CLIP_ADD( refs[list]->i_propagate_cost[idx2], (listamount*idx2weight+8)>>4 );
+                            if( mbx+1 < h->sps->i_mb_width && mby+1 < h->sps->i_mb_height && mbx+1 >= 0 && mby+1 >= 0 )
+                                CLIP_ADD( refs[list]->i_propagate_cost[idx3], (listamount*idx3weight+8)>>4 );
+                        }
+                    }
+            }
+        }
+    }
+}
+
+static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int num_frames, int b_intra )
+{
+    int i, idx = !b_intra;
+    int last_nonb, cur_nonb = 1;
+    if( b_intra )
+       x264_slicetype_frame_cost( h, a, frames, 0, 0, 0, 0 );
+
+    i = num_frames-1;
+    while( i > 0 && frames[i]->i_type == X264_TYPE_B )
+        i--;
+    last_nonb = i;
+
+    if( last_nonb < 0 )
+        return;
+
+    memset( frames[last_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint32_t) );
+    while( i-- > idx )
+    {
+        cur_nonb = i;
+        while( frames[cur_nonb]->i_type == X264_TYPE_B && cur_nonb > 0 )
+            cur_nonb--;
+        if( cur_nonb < idx )
+            break;
+        x264_slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, last_nonb, 0 );
+        memset( frames[cur_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint32_t) );
+        x264_macroblock_tree_propagate( h, frames, cur_nonb, last_nonb, last_nonb );
+        while( frames[i]->i_type == X264_TYPE_B && i > 0 )
+        {
+            x264_slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, i, 0 );
+            memset( frames[i]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint32_t) );
+            x264_macroblock_tree_propagate( h, frames, cur_nonb, last_nonb, i );
+            i--;
+        }
+        last_nonb = cur_nonb;
+    }
+    x264_emms();
+
+    for( h->mb.i_mb_y = 0; h->mb.i_mb_y < h->sps->i_mb_height; h->mb.i_mb_y++ )
+    {
+        for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->sps->i_mb_width; h->mb.i_mb_x++ )
+        {
+            int mb_index = h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride;
+            int intra_cost = (frames[last_nonb]->i_intra_cost[mb_index] * frames[last_nonb]->i_inv_qscale_factor[mb_index]+128)>>8;
+
+            if( intra_cost )
+            {
+                int propagate_cost = frames[last_nonb]->i_propagate_cost[mb_index];
+                float log2_ratio = x264_log2(intra_cost + propagate_cost) - x264_log2(intra_cost);
+                /* Allow the constant to be adjusted via qcompress, since the two
+                 * concepts are very similar. */
+                frames[last_nonb]->f_qp_offset[mb_index] -= 5.0 * (1.0 - h->param.rc.f_qcompress) * log2_ratio;
+            }
+        }
+    }
+}
 
 static int x264_slicetype_path_cost( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, char *path, int threshold )
 {
@@ -393,14 +566,14 @@ static int x264_slicetype_path_cost( x264_t *h, x264_mb_analysis_t *a, x264_fram
 /* Uses strings due to the fact that the speed of the control functions is
    negligable compared to the cost of running slicetype_frame_cost, and because
    it makes debugging easier. */
-static void x264_slicetype_path( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int length, int max_bframes, int buffer_size, char (*best_paths)[MAX_LENGTH] )
+static void x264_slicetype_path( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int length, int max_bframes, int buffer_size, char (*best_paths)[X264_LOOKAHEAD_MAX] )
 {
-    char paths[X264_BFRAME_MAX+2][MAX_LENGTH] = {{0}};
+    char paths[X264_BFRAME_MAX+2][X264_LOOKAHEAD_MAX] = {{0}};
     int num_paths = X264_MIN(max_bframes+1, length);
     int suffix_size, loc, path;
     int best_cost = COST_MAX;
     int best_path_index = 0;
-    length = X264_MIN(length,MAX_LENGTH);
+    length = X264_MIN(length,X264_LOOKAHEAD_MAX);
 
     /* Iterate over all currently possible paths and add suffixes to each one */
     for( suffix_size = 0; suffix_size < num_paths; suffix_size++ )
@@ -426,15 +599,6 @@ static void x264_slicetype_path( x264_t *h, x264_mb_analysis_t *a, x264_frame_t
     memcpy( best_paths[length], paths[best_path_index], length );
 }
 
-static int x264_slicetype_path_search( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int length, int bframes, int buffer )
-{
-    char best_paths[MAX_LENGTH][MAX_LENGTH] = {"","P"};
-    int n;
-    for( n = 2; n < length-1; n++ )
-        x264_slicetype_path( h, a, frames, n, bframes, buffer, best_paths );
-    return strspn( best_paths[length-2], "B" );
-}
-
 static int scenecut( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int p0, int p1 )
 {
     x264_frame_t *frame = frames[p1];
@@ -477,13 +641,13 @@ static int scenecut( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, in
     return res;
 }
 
-static void x264_slicetype_analyse( x264_t *h )
+static void x264_slicetype_analyse( x264_t *h, int keyframe )
 {
     x264_mb_analysis_t a;
-    x264_frame_t *frames[X264_BFRAME_MAX*4+3] = { NULL, };
+    x264_frame_t *frames[X264_LOOKAHEAD_MAX+3] = { NULL, };
     int num_frames;
     int keyint_limit;
-    int j;
+    int i,j;
     int i_mb_count = NUM_MBS;
     int cost1p0, cost2p0, cost1b1, cost2p1;
     int idr_frame_type;
@@ -497,96 +661,150 @@ static void x264_slicetype_analyse( x264_t *h )
         frames[j+1] = h->frames.next[j];
     keyint_limit = h->param.i_keyint_max - frames[0]->i_frame + h->frames.i_last_idr - 1;
     num_frames = X264_MIN( j, keyint_limit );
-    if( num_frames == 0 )
+
+    if( num_frames == 0 && (!j || !h->param.rc.b_mb_tree) )
         return;
 
     x264_lowres_context_init( h, &a );
     idr_frame_type = frames[1]->i_frame - h->frames.i_last_idr >= h->param.i_keyint_min ? X264_TYPE_IDR : X264_TYPE_I;
 
-    if( num_frames == 1 )
+    if( num_frames == 1 && !h->param.rc.b_mb_tree )
     {
-no_b_frames:
         frames[1]->i_type = X264_TYPE_P;
         if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1 ) )
             frames[1]->i_type = idr_frame_type;
         return;
     }
 
-    if( h->param.i_bframe_adaptive == X264_B_ADAPT_TRELLIS )
+    /* This is important psy-wise: if we have a non-scenecut keyframe,
+     * there will be significant visual artifacts if the frames just before
+     * go down in quality due to being referenced less, despite it being
+     * more RD-optimal. */
+    if( h->param.analyse.b_psy && h->param.rc.b_mb_tree )
+        num_frames = j;
+
+    char best_paths[X264_LOOKAHEAD_MAX][X264_LOOKAHEAD_MAX] = {"","P"};
+    int n;
+    int num_bframes = 0;
+    int max_bframes = X264_MIN(num_frames-1, h->param.i_bframe);
+    int num_analysed_frames = num_frames;
+    int reset_start;
+    if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1 ) )
     {
-        int num_bframes;
-        int max_bframes = X264_MIN(num_frames-1, h->param.i_bframe);
-        if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1 ) )
+        frames[1]->i_type = idr_frame_type;
+        return;
+    }
+
+    if( h->param.i_bframe )
+    {
+        if( h->param.i_bframe_adaptive == X264_B_ADAPT_TRELLIS )
         {
-            frames[1]->i_type = idr_frame_type;
-            return;
+            /* Perform the frametype analysis. */
+            for( n = 2; n < num_frames-1; n++ )
+                x264_slicetype_path( h, &a, frames, n, max_bframes, num_frames-max_bframes, best_paths );
+            num_bframes = strspn( best_paths[num_frames-2], "B" );
+            /* Load the results of the analysis into the frame types. */
+            for( j = 1; j < num_frames; j++ )
+                frames[j]->i_type = best_paths[num_frames-2][j-1] == 'B' ? X264_TYPE_B : X264_TYPE_P;
+            frames[num_frames]->i_type = X264_TYPE_P;
         }
-        num_bframes = x264_slicetype_path_search( h, &a, frames, num_frames, max_bframes, num_frames-max_bframes );
-        assert(num_bframes < num_frames);
-
-        for( j = 1; j < num_bframes+1; j++ )
+        else if( h->param.i_bframe_adaptive == X264_B_ADAPT_FAST )
         {
-            if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, j, j+1 ) )
+            for( i = 0; i < num_frames-(2-!i); )
             {
-                frames[j]->i_type = X264_TYPE_P;
-                return;
-            }
-            frames[j]->i_type = X264_TYPE_B;
-        }
-        frames[num_bframes+1]->i_type = X264_TYPE_P;
-    }
-    else if( h->param.i_bframe_adaptive == X264_B_ADAPT_FAST )
-    {
-        cost2p1 = x264_slicetype_frame_cost( h, &a, frames, 0, 2, 2, 1 );
-        if( frames[2]->i_intra_mbs[2] > i_mb_count / 2 )
-            goto no_b_frames;
+                cost2p1 = x264_slicetype_frame_cost( h, &a, frames, i+0, i+2, i+2, 1 );
+                if( frames[i+2]->i_intra_mbs[2] > i_mb_count / 2 )
+                {
+                    frames[i+1]->i_type = X264_TYPE_P;
+                    frames[i+2]->i_type = X264_TYPE_P;
+                    i += 2;
+                    continue;
+                }
 
-        cost1b1 = x264_slicetype_frame_cost( h, &a, frames, 0, 2, 1, 0 );
-        cost1p0 = x264_slicetype_frame_cost( h, &a, frames, 0, 1, 1, 0 );
-        cost2p0 = x264_slicetype_frame_cost( h, &a, frames, 1, 2, 2, 0 );
+                cost1b1 = x264_slicetype_frame_cost( h, &a, frames, i+0, i+2, i+1, 0 );
+                cost1p0 = x264_slicetype_frame_cost( h, &a, frames, i+0, i+1, i+1, 0 );
+                cost2p0 = x264_slicetype_frame_cost( h, &a, frames, i+1, i+2, i+2, 0 );
 
-        if( cost1p0 + cost2p0 < cost1b1 + cost2p1 )
-            goto no_b_frames;
+                if( cost1p0 + cost2p0 < cost1b1 + cost2p1 )
+                {
+                    frames[i+1]->i_type = X264_TYPE_P;
+                    frames[i+2]->i_type = X264_TYPE_P;
+                    i += 2;
+                    continue;
+                }
 
-        // arbitrary and untuned
-        #define INTER_THRESH 300
-        #define P_SENS_BIAS (50 - h->param.i_bframe_bias)
-        frames[1]->i_type = X264_TYPE_B;
+                // arbitrary and untuned
+                #define INTER_THRESH 300
+                #define P_SENS_BIAS (50 - h->param.i_bframe_bias)
+                frames[i+1]->i_type = X264_TYPE_B;
+                frames[i+2]->i_type = X264_TYPE_P;
 
-        for( j = 2; j <= X264_MIN( h->param.i_bframe, num_frames-1 ); j++ )
+                for( j = i+2; j <= X264_MIN( h->param.i_bframe, num_frames-2 ); j++ )
+                {
+                    int pthresh = X264_MAX(INTER_THRESH - P_SENS_BIAS * (j-i-1), INTER_THRESH/10);
+                    int pcost = x264_slicetype_frame_cost( h, &a, frames, i+0, j+1, j+1, 1 );
+
+                    if( pcost > pthresh*i_mb_count || frames[j+1]->i_intra_mbs[j-i+1] > i_mb_count/3 )
+                    {
+                        frames[j]->i_type = X264_TYPE_P;
+                        break;
+                    }
+                    else
+                        frames[j]->i_type = X264_TYPE_B;
+                }
+                i = j;
+            }
+            frames[i+!i]->i_type = X264_TYPE_P;
+            num_bframes = 0;
+            while( num_bframes < num_frames && frames[num_bframes+1]->i_type == X264_TYPE_B )
+                num_bframes++;
+        }
+        else
         {
-            int pthresh = X264_MAX(INTER_THRESH - P_SENS_BIAS * (j-1), INTER_THRESH/10);
-            int pcost = x264_slicetype_frame_cost( h, &a, frames, 0, j+1, j+1, 1 );
+            num_bframes = X264_MIN(num_frames-1, h->param.i_bframe);
+            for( j = 1; j < num_frames; j++ )
+                frames[j]->i_type = (j%(num_bframes+1)) ? X264_TYPE_B : X264_TYPE_P;
+            frames[num_frames]->i_type = X264_TYPE_P;
+        }
 
-            if( pcost > pthresh*i_mb_count || frames[j+1]->i_intra_mbs[j+1] > i_mb_count/3 )
+        /* Check scenecut on the first minigop. */
+        for( j = 1; j < num_bframes+1; j++ )
+            if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, j, j+1 ) )
             {
                 frames[j]->i_type = X264_TYPE_P;
+                num_analysed_frames = j;
                 break;
             }
-            else
-                frames[j]->i_type = X264_TYPE_B;
-        }
+
+        reset_start = keyframe ? 1 : X264_MIN( num_bframes+2, num_analysed_frames+1 );
     }
     else
     {
-        int max_bframes = X264_MIN(num_frames-1, h->param.i_bframe);
-        if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1 ) )
-        {
-            frames[1]->i_type = idr_frame_type;
-            return;
-        }
+        for( j = 1; j < num_frames; j++ )
+            frames[j]->i_type = X264_TYPE_P;
+        reset_start = !keyframe + 1;
+    }
 
-        for( j = 1; j < max_bframes+1; j++ )
-        {
-            if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, j, j+1 ) )
+    /* Perform the actual macroblock tree analysis.
+     * Don't go farther than the lookahead parameter; this helps in short GOPs. */
+    if( h->param.rc.b_mb_tree )
+        x264_macroblock_tree( h, &a, frames, X264_MIN(num_analysed_frames, h->param.rc.i_lookahead), keyframe );
+
+    /* Enforce keyframe limit. */
+    if( h->param.i_bframe )
+        for( j = 0; j <= num_bframes; j++ )
+            if( j+1 > keyint_limit )
             {
-                frames[j]->i_type = X264_TYPE_P;
-                return;
+                if( j )
+                    frames[j]->i_type = X264_TYPE_P;
+                frames[j+1]->i_type = idr_frame_type;
+                reset_start = j+2;
+                break;
             }
-            frames[j]->i_type = X264_TYPE_B;
-        }
-        frames[max_bframes+1]->i_type = X264_TYPE_P;
-    }
+
+    /* Restore frametypes for all frames that haven't actually been decided yet. */
+    for( j = reset_start; j <= num_frames; j++ )
+        frames[j]->i_type = X264_TYPE_AUTO;
 }
 
 void x264_slicetype_decide( x264_t *h )
@@ -606,8 +824,9 @@ void x264_slicetype_decide( x264_t *h )
                 x264_ratecontrol_slice_type( h, h->frames.next[i]->i_frame );
     }
     else if( (h->param.i_bframe && h->param.i_bframe_adaptive)
-             || h->param.i_scenecut_threshold )
-        x264_slicetype_analyse( h );
+             || h->param.i_scenecut_threshold
+             || h->param.rc.b_mb_tree )
+        x264_slicetype_analyse( h, 0 );
 
     for( bframes = 0;; bframes++ )
     {
@@ -645,7 +864,9 @@ void x264_slicetype_decide( x264_t *h )
                 frm->i_type = X264_TYPE_P;
         }
 
-        if( frm->i_type == X264_TYPE_AUTO ) frm->i_type = X264_TYPE_B;
+        if( frm->i_type == X264_TYPE_AUTO )
+            frm->i_type = X264_TYPE_B;
+
         else if( !IS_X264_TYPE_B( frm->i_type ) ) break;
     }
 }
@@ -653,7 +874,7 @@ void x264_slicetype_decide( x264_t *h )
 int x264_rc_analyse_slice( x264_t *h )
 {
     x264_mb_analysis_t a;
-    x264_frame_t *frames[X264_BFRAME_MAX*4+2] = { NULL, };
+    x264_frame_t *frames[X264_LOOKAHEAD_MAX+2] = { NULL, };
     int p0=0, p1, b;
     int cost;
 
@@ -662,6 +883,12 @@ int x264_rc_analyse_slice( x264_t *h )
     if( IS_X264_TYPE_I(h->fenc->i_type) )
     {
         p1 = b = 0;
+        /* For MB-tree, we have to perform propagation analysis on I-frames too. */
+        if( h->param.rc.b_mb_tree )
+        {
+            h->frames.last_nonb = h->fenc;
+            x264_slicetype_analyse( h, 1 );
+        }
     }
     else if( X264_TYPE_P == h->fenc->i_type )
     {
@@ -680,11 +907,16 @@ int x264_rc_analyse_slice( x264_t *h )
     frames[p0] = h->fref0[0];
     frames[b] = h->fenc;
 
-    cost = x264_slicetype_frame_cost( h, &a, frames, p0, p1, b, 0 );
+    if( h->param.rc.b_mb_tree )
+        cost = x264_slicetype_frame_cost_recalculate( h, &a, frames, p0, p1, b );
+    else
+    {
+        cost = x264_slicetype_frame_cost( h, &a, frames, p0, p1, b, 0 );
 
-    /* In AQ, use the weighted score instead. */
-    if( h->param.rc.i_aq_mode )
-        cost = frames[b]->i_cost_est[b-p0][p1-b];
+        /* In AQ, use the weighted score instead. */
+        if( h->param.rc.i_aq_mode )
+            cost = frames[b]->i_cost_est[b-p0][p1-b];
+    }
 
     h->fenc->i_row_satd = h->fenc->i_row_satds[b-p0][p1-b];
     h->fdec->i_row_satd = h->fdec->i_row_satds[b-p0][p1-b];
diff --git a/x264.c b/x264.c
index c3b4f29..c75bfde 100644
--- a/x264.c
+++ b/x264.c
@@ -168,9 +168,8 @@ static void Help( x264_param_t *defaults, int b_longhelp )
     H0( "                                  - baseline,main,high\n" );
     H0( "      --preset                Use a preset to select encoding settings [medium]\n" );
     H0( "                                  Overridden by user settings\n");
-    H1( "                                  - ultrafast,veryfast,fast,medium\n"
-        "                                  - slow,slower,placebo\n" );
-    else H0( "                                  - ultrafast,veryfast,fast,medium,slow,slower\n" );
+    H0( "                                  - ultrafast,veryfast,faster,fast\n"
+        "                                  - medium,slow,slower,placebo\n" );
     H0( "      --tune                  Tune the settings for a particular type of source\n" );
     H0( "                                  Overridden by user settings\n");
     H1( "                                  - film,animation,grain,psnr,ssim\n"
@@ -204,6 +203,7 @@ static void Help( x264_param_t *defaults, int b_longhelp )
     H0( "  -q, --qp <integer>          Set QP (0-51, 0=lossless)\n" );
     H0( "  -B, --bitrate <integer>     Set bitrate (kbit/s)\n" );
     H0( "      --crf <float>           Quality-based VBR (0-51, 0=lossless) [%.1f]\n", defaults->rc.f_rf_constant );
+    H0( "      --rc-lookahead <integer> Number of frames for frametype lookahead [%d]\n", defaults->rc.i_lookahead );
     H0( "      --vbv-maxrate <integer> Max local bitrate (kbit/s) [%d]\n", defaults->rc.i_vbv_max_bitrate );
     H0( "      --vbv-bufsize <integer> Set size of the VBV buffer (kbit) [%d]\n", defaults->rc.i_vbv_buffer_size );
     H1( "      --vbv-init <float>      Initial VBV buffer occupancy [%.1f]\n", defaults->rc.f_vbv_buffer_init );
@@ -228,6 +228,7 @@ static void Help( x264_param_t *defaults, int b_longhelp )
         "                                  - 2: Last pass, does not overwrite stats file\n"
         "                                  - 3: Nth pass, overwrites stats file\n" );
     H0( "      --stats <string>        Filename for 2 pass stats [\"%s\"]\n", defaults->rc.psz_stat_out );
+    H0( "      --no-mbtree                Disable mb-tree ratecontrol.\n");
     H0( "      --qcomp <float>         QP curve compression: 0.0 => CBR, 1.0 => CQP [%.2f]\n", defaults->rc.f_qcompress );
     H1( "      --cplxblur <float>      Reduce fluctuations in QP (before curve compression) [%.1f]\n", defaults->rc.f_complexity_blur );
     H1( "      --qblur <float>         Reduce fluctuations in QP (after curve compression) [%.1f]\n", defaults->rc.f_qblur );
@@ -277,6 +278,8 @@ static void Help( x264_param_t *defaults, int b_longhelp )
         "                                  #1: RD (requires subme>=6)\n"
         "                                  #2: Trellis (requires trellis, experimental)\n",
                                        defaults->analyse.f_psy_rd, defaults->analyse.f_psy_trellis );
+    H1( "      --no-psy                Disable all visual optimizations that worsen\n"
+        "                              both PSNR and SSIM.\n" );
     H0( "      --no-mixed-refs         Don't decide references on a per partition basis\n" );
     H1( "      --no-chroma-me          Ignore chroma in motion estimation\n" );
     H0( "      --no-8x8dct             Disable adaptive spatial transform size\n" );
@@ -403,6 +406,7 @@ static struct option long_options[] =
     { "qpmax",       required_argument, NULL, 0 },
     { "qpstep",      required_argument, NULL, 0 },
     { "crf",         required_argument, NULL, 0 },
+    { "rc-lookahead",required_argument, NULL, 0 },
     { "ref",         required_argument, NULL, 'r' },
     { "asm",         required_argument, NULL, 0 },
     { "no-asm",            no_argument, NULL, 0 },
@@ -422,6 +426,7 @@ static struct option long_options[] =
     { "mvrange-thread", required_argument, NULL, 0 },
     { "subme",       required_argument, NULL, 'm' },
     { "psy-rd",      required_argument, NULL, 0 },
+    { "no-psy",            no_argument, NULL, 0 },
     { "mixed-refs",        no_argument, NULL, 0 },
     { "no-mixed-refs",     no_argument, NULL, 0 },
     { "no-chroma-me",      no_argument, NULL, 0 },
@@ -446,6 +451,8 @@ static struct option long_options[] =
     { "pass",        required_argument, NULL, 'p' },
     { "stats",       required_argument, NULL, 0 },
     { "qcomp",       required_argument, NULL, 0 },
+    { "mbtree",            no_argument, NULL, 0 },
+    { "no-mbtree",         no_argument, NULL, 0 },
     { "qblur",       required_argument, NULL, 0 },
     { "cplxblur",    required_argument, NULL, 0 },
     { "zones",       required_argument, NULL, 0 },
@@ -542,6 +549,8 @@ static int  Parse( int argc, char **argv,
                 param->rc.i_aq_mode = 0;
                 param->analyse.b_mixed_references = 0;
                 param->analyse.i_trellis = 0;
+                param->i_bframe_adaptive = X264_B_ADAPT_NONE;
+                param->rc.b_mb_tree = 0;
             }
             else if( !strcasecmp( optarg, "veryfast" ) )
             {
@@ -551,12 +560,20 @@ static int  Parse( int argc, char **argv,
                 param->i_frame_reference = 1;
                 param->analyse.b_mixed_references = 0;
                 param->analyse.i_trellis = 0;
+                param->rc.b_mb_tree = 0;
             }
-            else if( !strcasecmp( optarg, "fast" ) )
+            else if( !strcasecmp( optarg, "faster" ) )
             {
                 param->analyse.b_mixed_references = 0;
                 param->i_frame_reference = 2;
                 param->analyse.i_subpel_refine = 4;
+                param->rc.b_mb_tree = 0;
+            }
+            else if( !strcasecmp( optarg, "fast" ) )
+            {
+                param->i_frame_reference = 2;
+                param->analyse.i_subpel_refine = 6;
+                param->rc.i_lookahead = 30;
             }
             else if( !strcasecmp( optarg, "medium" ) )
             {
@@ -569,6 +586,7 @@ static int  Parse( int argc, char **argv,
                 param->i_frame_reference = 5;
                 param->i_bframe_adaptive = X264_B_ADAPT_TRELLIS;
                 param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_AUTO;
+                param->rc.i_lookahead = 50;
             }
             else if( !strcasecmp( optarg, "slower" ) )
             {
@@ -579,6 +597,7 @@ static int  Parse( int argc, char **argv,
                 param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_AUTO;
                 param->analyse.inter |= X264_ANALYSE_PSUB8x8;
                 param->analyse.i_trellis = 2;
+                param->rc.i_lookahead = 60;
             }
             else if( !strcasecmp( optarg, "placebo" ) )
             {
@@ -592,6 +611,7 @@ static int  Parse( int argc, char **argv,
                 param->analyse.b_fast_pskip = 0;
                 param->analyse.i_trellis = 2;
                 param->i_bframe = 16;
+                param->rc.i_lookahead = 60;
             }
             else
             {
@@ -644,11 +664,13 @@ static int  Parse( int argc, char **argv,
             {
                 param->analyse.f_psy_rd = 0;
                 param->rc.i_aq_mode = X264_AQ_NONE;
+                param->analyse.b_psy = 0;
             }
             else if( !strcasecmp( optarg, "ssim" ) )
             {
                 param->analyse.f_psy_rd = 0;
                 param->rc.i_aq_mode = X264_AQ_AUTOVARIANCE;
+                param->analyse.b_psy = 0;
             }
             else if( !strcasecmp( optarg, "fastdecode" ) )
             {
@@ -662,7 +684,6 @@ static int  Parse( int argc, char **argv,
                 param->i_deblocking_filter_alphac0 = -1;
                 param->i_deblocking_filter_beta = -1;
                 param->analyse.f_psy_trellis = 0.2;
-                param->rc.f_ip_factor = 2.1;
                 param->rc.f_aq_strength = 1.3;
                 if( param->analyse.inter & X264_ANALYSE_PSUB16x16 )
                     param->analyse.inter |= X264_ANALYSE_PSUB8x8;
diff --git a/x264.h b/x264.h
index 2dfcc8d..4982b2e 100644
--- a/x264.h
+++ b/x264.h
@@ -35,7 +35,7 @@
 
 #include <stdarg.h>
 
-#define X264_BUILD 68
+#define X264_BUILD 69
 
 /* x264_t:
  *      opaque handler for encoder */
@@ -242,6 +242,7 @@ typedef struct x264_param_t
         int          i_noise_reduction; /* adaptive pseudo-deadzone */
         float        f_psy_rd; /* Psy RD strength */
         float        f_psy_trellis; /* Psy trellis strength */
+        int          b_psy; /* Toggle all psy optimizations */
 
         /* the deadzone size that will be used in luma quantization */
         int          i_luma_deadzone[2]; /* {inter, intra} */
@@ -271,6 +272,8 @@ typedef struct x264_param_t
 
         int         i_aq_mode;      /* psy adaptive QP. (X264_AQ_*) */
         float       f_aq_strength;
+        int         b_mb_tree;      /* Macroblock-tree ratecontrol. */
+        int         i_lookahead;
 
         /* 2pass */
         int         b_stat_write;   /* Enable stat writing in psz_stat_out */
-- 
1.6.1.2



Public Pastes

🤑 G2A.com Free Gift Card Guide May 2024 FIX 🤑
GetText | 9 min ago | 0.39 KB
Custom PDF Viewer Block in Articulate Rise JS...
JavaScript | 21 min ago | 0.09 KB
Pastebin.ai - #1 pastebin alternative
PHP | 33 min ago | 0.84 KB
Receptor_RF_433MHz
MPASM | 1 hour ago | 10.04 KB
Transmisor_RF_433MHz
MPASM | 1 hour ago | 7.19 KB
[JS] YOUTUBE MP3 DOWNLOADER
JavaScript | 1 hour ago | 1.79 KB
fourth_task
C | 1 hour ago | 7.51 KB
sharelatex_overleaf_docker_compose
YAML | 2 hours ago | 6.20 KB