View difference between Paste ID: f87abd3f and
SHOW: | | - or go back to the newest paste.
1
From f21e71a04ba65aff9b5a4bfa8a73fd86c463f4ee Mon Sep 17 00:00:00 2001
2
From: Jason Garrett-Glaser <darkshikari@gmail.com>
3
Date: Mon, 3 Aug 2009 20:52:30 -0700
4
Subject: [PATCH 1/2] Various 1-pass VBV tweaks
5
 Make predictors have an offset in addition to a multiplier.
6
 This primarily fixes issues in sources with lots of extremely static scenes, such as anime and CGI.
7
 We tried linear regressions, but they were very unreliable as predictors.
8
 Also allow VBV to be slightly more aggressive in raising QPs to avoid not having enough bits left in some situations.
9
 Up to 1db improvement on some clips.
10
11
---
12
 encoder/ratecontrol.c |   32 +++++++++++++++++++++-----------
13
 1 files changed, 21 insertions(+), 11 deletions(-)
14
15
diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
16
index 2f88708..087e658 100644
17
--- a/encoder/ratecontrol.c
18
+++ b/encoder/ratecontrol.c
19
@@ -58,6 +58,7 @@ typedef struct
20
     double coeff;
21
     double count;
22
     double decay;
23
+    double offset;
24
 } predictor_t;
25
 
26
 struct x264_ratecontrol_t
27
@@ -409,9 +410,11 @@ int x264_ratecontrol_new( x264_t *h )
28
         rc->pred[i].coeff= 2.0;
29
         rc->pred[i].count= 1.0;
30
         rc->pred[i].decay= 0.5;
31
+        rc->pred[i].offset= 0.0;
32
         rc->row_preds[i].coeff= .25;
33
         rc->row_preds[i].count= 1.0;
34
         rc->row_preds[i].decay= 0.5;
35
+        rc->row_preds[i].offset= 0.0;
36
     }
37
     *rc->pred_b_from_p = rc->pred[0];
38
 
39
@@ -953,7 +956,7 @@ void x264_ratecontrol_mb( x264_t *h, int bits )
40
         if( y < h->sps->i_mb_height-1 )
41
         {
42
             int i_estimated;
43
-            int avg_qp = X264_MAX(h->fref0[0]->i_row_qp[y+1], h->fref1[0]->i_row_qp[y+1])
44
+            int avg_qp = X264_MIN(h->fref0[0]->i_row_qp[y+1], h->fref1[0]->i_row_qp[y+1])
45
                        + rc->pb_offset * ((h->fenc->i_type == X264_TYPE_BREF) ? 0.5 : 1);
46
             rc->qpm = X264_MIN(X264_MAX( rc->qp, avg_qp), 51); //avg_qp could go higher than 51 due to pb_offset
47
             i_estimated = row_bits_so_far(h, y); //FIXME: compute full estimated size
48
@@ -1153,10 +1156,6 @@ void x264_ratecontrol_end( x264_t *h, int bits )
49
             {
50
                 update_predictor( rc->pred_b_from_p, qp2qscale(rc->qpa_rc),
51
                                   h->fref1[h->i_ref1-1]->i_satd, rc->bframe_bits / rc->bframes );
52
-                /* In some cases, such as completely blank scenes, pred_b_from_p can go nuts */
53
-                /* Hackily cap the predictor coeff in case this happens. */
54
-                /* FIXME FIXME FIXME */
55
-                rc->pred_b_from_p->coeff = X264_MIN( rc->pred_b_from_p->coeff, 10. );
56
                 rc->bframe_bits = 0;
57
             }
58
         }
59
@@ -1270,17 +1269,28 @@ static double get_diff_limited_q(x264_t *h, ratecontrol_entry_t *rce, double q)
60
 
61
 static double predict_size( predictor_t *p, double q, double var )
62
 {
63
-     return p->coeff*var / (q*p->count);
64
+     return (p->coeff*var + p->offset) / (q*p->count);
65
 }
66
 
67
 static void update_predictor( predictor_t *p, double q, double var, double bits )
68
 {
69
+    const double range = 1.5;
70
     if( var < 10 )
71
         return;
72
-    p->count *= p->decay;
73
-    p->coeff *= p->decay;
74
-    p->count ++;
75
-    p->coeff += bits*q / var;
76
+    double old_coeff = p->coeff / p->count;
77
+    double new_coeff = bits*q / var;
78
+    double new_coeff_clipped = x264_clip3f( new_coeff, old_coeff/range, old_coeff*range );
79
+    double new_offset = bits*q - new_coeff_clipped * var;
80
+    if( new_offset >= 0 )
81
+        new_coeff = new_coeff_clipped;
82
+    else
83
+        new_offset = 0;
84
+    p->count  *= p->decay;
85
+    p->coeff  *= p->decay;
86
+    p->offset *= p->decay;
87
+    p->count  ++;
88
+    p->coeff  += new_coeff;
89
+    p->offset += new_offset;
90
 }
91
 
92
 // update VBV after encoding a frame
93
@@ -1350,7 +1360,7 @@ static double clip_qscale( x264_t *h, int pict_type, double q )
94
         double bits = predict_size( &rcc->pred[h->sh.i_type], q, rcc->last_satd );
95
         double qf = 1.0;
96
         if( bits > rcc->buffer_fill/2 )
97
-            qf = x264_clip3f( rcc->buffer_fill/(2*bits), 0.2, 1.0 );
98
+            qf = rcc->buffer_fill/(2*bits);
99
         q /= qf;
100
         bits *= qf;
101
         if( bits < rcc->buffer_rate/2 )
102
-- 
103
1.6.1.2
104
105
106
From e7182499c7bc23d3376090f66d7617b2080f2b46 Mon Sep 17 00:00:00 2001
107
From: Jason Garrett-Glaser <darkshikari@gmail.com>
108
Date: Tue, 4 Aug 2009 17:46:33 -0700
109
Subject: [PATCH 2/2] Macroblock-tree ratecontrol
110
 On by default; can be turned off with --no-mbtree.
111
 Uses a large lookahead to track temporal propagation of data and weight quality accordingly.
112
 Requires a very large separate statsfile (2 bytes per macroblock) in multi-pass mode.
113
 Doesn't work with b-pyramid yet.
114
 Note that MB-tree inherently measures quality different from the standard qcomp method, so bitrates produced by CRF may change somewhat.
115
 This makes the "medium" preset a bit slower.  Accordingly, make "fast" slower as well, and introduce a new preset "faster" between "fast" and "veryfast".
116
 All presets "fast" and above will have MB-tree on.
117
 Add a new option, --rc-lookahead, to control the distance MB tree looks ahead to perform propagation analysis.
118
 Default is 40; larger values will be slower and require more memory but give more accurate results.
119
 This value will be used in the future to control ratecontrol lookahead (VBV).
120
 Add a new option, --no-psy, to disable all psy optimizations that don't improve PSNR or SSIM.
121
 This disables psy-RD/trellis, but also other more subtle internal psy optimizations that can't be controlled directly via external parameters.
122
 Quality improvement from MB-tree is about 2-70% depending on content.
123
 Strength of MB-tree adjustments can be tweaked using qcompress; higher values mean lower MB-tree strength.
124
125
---
126
 common/common.c       |   22 ++-
127
 common/common.h       |   50 ++++++-
128
 common/frame.c        |   10 +-
129
 common/frame.h        |    3 +
130
 common/osdep.h        |    9 +-
131
 encoder/analyse.c     |    4 +-
132
 encoder/encoder.c     |   56 ++++++-
133
 encoder/ratecontrol.c |  201 +++++++++++++++++-------
134
 encoder/ratecontrol.h |    3 +-
135
 encoder/slicetype.c   |  424 ++++++++++++++++++++++++++++++++++++++-----------
136
 x264.c                |   31 +++-
137
 x264.h                |    5 +-
138
 12 files changed, 639 insertions(+), 179 deletions(-)
139
140
diff --git a/common/common.c b/common/common.c
141
index 9260c64..371ed1e 100644
142
--- a/common/common.c
143
+++ b/common/common.c
144
@@ -95,6 +95,7 @@ void    x264_param_default( x264_param_t *param )
145
     param->rc.f_pb_factor = 1.3;
146
     param->rc.i_aq_mode = X264_AQ_VARIANCE;
147
     param->rc.f_aq_strength = 1.0;
148
+    param->rc.i_lookahead = 40;
149
 
150
     param->rc.b_stat_write = 0;
151
     param->rc.psz_stat_out = "x264_2pass.log";
152
@@ -104,6 +105,7 @@ void    x264_param_default( x264_param_t *param )
153
     param->rc.f_qblur = 0.5;
154
     param->rc.f_complexity_blur = 20;
155
     param->rc.i_zones = 0;
156
+    param->rc.b_mb_tree = 1;
157
 
158
     /* Log */
159
     param->pf_log = x264_log_default;
160
@@ -117,6 +119,7 @@ void    x264_param_default( x264_param_t *param )
161
     param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_SPATIAL;
162
     param->analyse.i_me_method = X264_ME_HEX;
163
     param->analyse.f_psy_rd = 1.0;
164
+    param->analyse.b_psy = 1;
165
     param->analyse.f_psy_trellis = 0;
166
     param->analyse.i_me_range = 16;
167
     param->analyse.i_subpel_refine = 7;
168
@@ -493,6 +496,8 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
169
             p->analyse.f_psy_trellis = 0;
170
         }
171
     }
172
+    OPT("psy")
173
+        p->analyse.b_psy = atobool(value);
174
     OPT("chroma-me")
175
         p->analyse.b_chroma_me = atobool(value);
176
     OPT("mixed-refs")
177
@@ -524,6 +529,8 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
178
         p->rc.f_rf_constant = atof(value);
179
         p->rc.i_rc_method = X264_RC_CRF;
180
     }
181
+    OPT("rc-lookahead")
182
+        p->rc.i_lookahead = atoi(value);
183
     OPT2("qpmin", "qp-min")
184
         p->rc.i_qp_min = atoi(value);
185
     OPT2("qpmax", "qp-max")
186
@@ -559,6 +566,8 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
187
     }
188
     OPT("qcomp")
189
         p->rc.f_qcompress = atof(value);
190
+    OPT("mbtree")
191
+        p->rc.b_mb_tree = atobool(value);
192
     OPT("qblur")
193
         p->rc.f_qblur = atof(value);
194
     OPT2("cplxblur", "cplx-blur")
195
@@ -843,7 +852,9 @@ char *x264_param2string( x264_param_t *p, int b_res )
196
     s += sprintf( s, " analyse=%#x:%#x", p->analyse.intra, p->analyse.inter );
197
     s += sprintf( s, " me=%s", x264_motion_est_names[ p->analyse.i_me_method ] );
198
     s += sprintf( s, " subme=%d", p->analyse.i_subpel_refine );
199
-    s += sprintf( s, " psy_rd=%.1f:%.1f", p->analyse.f_psy_rd, p->analyse.f_psy_trellis );
200
+    s += sprintf( s, " psy=%d", p->analyse.b_psy );
201
+    if( p->analyse.b_psy )
202
+        s += sprintf( s, " psy_rd=%.1f:%.1f", p->analyse.f_psy_rd, p->analyse.f_psy_trellis );
203
     s += sprintf( s, " mixed_ref=%d", p->analyse.b_mixed_references );
204
     s += sprintf( s, " me_range=%d", p->analyse.i_me_range );
205
     s += sprintf( s, " chroma_me=%d", p->analyse.b_chroma_me );
206
@@ -868,9 +879,12 @@ char *x264_param2string( x264_param_t *p, int b_res )
207
     s += sprintf( s, " keyint=%d keyint_min=%d scenecut=%d",
208
                   p->i_keyint_max, p->i_keyint_min, p->i_scenecut_threshold );
209
 
210
-    s += sprintf( s, " rc=%s", p->rc.i_rc_method == X264_RC_ABR ?
211
+    if( p->rc.b_mb_tree )
212
+        s += sprintf( s, " rc_lookahead=%d", p->rc.i_lookahead );
213
+
214
+    s += sprintf( s, " rc=%s mbtree=%d", p->rc.i_rc_method == X264_RC_ABR ?
215
                                ( p->rc.b_stat_read ? "2pass" : p->rc.i_vbv_buffer_size ? "cbr" : "abr" )
216
-                               : p->rc.i_rc_method == X264_RC_CRF ? "crf" : "cqp" );
217
+                               : p->rc.i_rc_method == X264_RC_CRF ? "crf" : "cqp", p->rc.b_mb_tree );
218
     if( p->rc.i_rc_method == X264_RC_ABR || p->rc.i_rc_method == X264_RC_CRF )
219
     {
220
         if( p->rc.i_rc_method == X264_RC_CRF )
221
@@ -892,7 +906,7 @@ char *x264_param2string( x264_param_t *p, int b_res )
222
     if( !(p->rc.i_rc_method == X264_RC_CQP && p->rc.i_qp_constant == 0) )
223
     {
224
         s += sprintf( s, " ip_ratio=%.2f", p->rc.f_ip_factor );
225
-        if( p->i_bframe )
226
+        if( p->i_bframe && !p->rc.b_mb_tree )
227
             s += sprintf( s, " pb_ratio=%.2f", p->rc.f_pb_factor );
228
         s += sprintf( s, " aq=%d", p->rc.i_aq_mode );
229
         if( p->rc.i_aq_mode )
230
diff --git a/common/common.h b/common/common.h
231
index 8a25a13..30163ab 100644
232
--- a/common/common.h
233
+++ b/common/common.h
234
@@ -51,6 +51,7 @@
235
 #define X264_SLICE_MAX 4
236
 #define X264_NAL_MAX (4 + X264_SLICE_MAX)
237
 #define X264_PCM_COST (386*8)
238
+#define X264_LOOKAHEAD_MAX 250
239
 
240
 // number of pixels (per thread) in progress at any given time.
241
 // 16 for the macroblock in progress + 3 for deblocking + 3 for motion compensation filter + 2 for extra safety
242
@@ -152,6 +153,49 @@ static inline uint32_t x264_cabac_amvd_sum( int16_t *mvdleft, int16_t *mvdtop )
243
     return amvd0 + (amvd1<<16);
244
 }
245
 
246
+static const uint8_t exp2_lut[64] = {
247
+      1,   4,   7,  10,  13,  16,  19,  22,  25,  28,  31,  34,  37,  40,  44,  47,
248
+     50,  53,  57,  60,  64,  67,  71,  74,  78,  81,  85,  89,  93,  96, 100, 104,
249
+    108, 112, 116, 120, 124, 128, 132, 137, 141, 145, 150, 154, 159, 163, 168, 172,
250
+    177, 182, 186, 191, 196, 201, 206, 211, 216, 221, 226, 232, 237, 242, 248, 253,
251
+};
252
+
253
+static ALWAYS_INLINE int x264_exp2fix8( float x )
254
+{
255
+    int i, f;
256
+    x += 8;
257
+    if( x <= 0 ) return 0;
258
+    if( x >= 16 ) return 0xffff;
259
+    i = x;
260
+    f = (x-i)*64;
261
+    return (exp2_lut[f]+256) << i >> 8;
262
+}
263
+
264
+static const float log2_lut[128] = {
265
+    0.00000, 0.01123, 0.02237, 0.03342, 0.04439, 0.05528, 0.06609, 0.07682,
266
+    0.08746, 0.09803, 0.10852, 0.11894, 0.12928, 0.13955, 0.14975, 0.15987,
267
+    0.16993, 0.17991, 0.18982, 0.19967, 0.20945, 0.21917, 0.22882, 0.23840,
268
+    0.24793, 0.25739, 0.26679, 0.27612, 0.28540, 0.29462, 0.30378, 0.31288,
269
+    0.32193, 0.33092, 0.33985, 0.34873, 0.35755, 0.36632, 0.37504, 0.38370,
270
+    0.39232, 0.40088, 0.40939, 0.41785, 0.42626, 0.43463, 0.44294, 0.45121,
271
+    0.45943, 0.46761, 0.47573, 0.48382, 0.49185, 0.49985, 0.50779, 0.51570,
272
+    0.52356, 0.53138, 0.53916, 0.54689, 0.55459, 0.56224, 0.56986, 0.57743,
273
+    0.58496, 0.59246, 0.59991, 0.60733, 0.61471, 0.62205, 0.62936, 0.63662,
274
+    0.64386, 0.65105, 0.65821, 0.66534, 0.67243, 0.67948, 0.68650, 0.69349,
275
+    0.70044, 0.70736, 0.71425, 0.72110, 0.72792, 0.73471, 0.74147, 0.74819,
276
+    0.75489, 0.76155, 0.76818, 0.77479, 0.78136, 0.78790, 0.79442, 0.80090,
277
+    0.80735, 0.81378, 0.82018, 0.82655, 0.83289, 0.83920, 0.84549, 0.85175,
278
+    0.85798, 0.86419, 0.87036, 0.87652, 0.88264, 0.88874, 0.89482, 0.90087,
279
+    0.90689, 0.91289, 0.91886, 0.92481, 0.93074, 0.93664, 0.94251, 0.94837,
280
+    0.95420, 0.96000, 0.96578, 0.97154, 0.97728, 0.98299, 0.98868, 0.99435,
281
+};
282
+
283
+static ALWAYS_INLINE float x264_log2( uint32_t x )
284
+{
285
+    int lz = x264_clz( x );
286
+    return log2_lut[(x<<lz>>24)&0x7f] + (31 - lz);
287
+}
288
+
289
 /****************************************************************************
290
  *
291
  ****************************************************************************/
292
@@ -327,11 +371,11 @@ struct x264_t
293
     struct
294
     {
295
         /* Frames to be encoded (whose types have been decided) */
296
-        x264_frame_t *current[X264_BFRAME_MAX*4+3];
297
+        x264_frame_t *current[X264_LOOKAHEAD_MAX+3];
298
         /* Temporary buffer (frames types not yet decided) */
299
-        x264_frame_t *next[X264_BFRAME_MAX*4+3];
300
+        x264_frame_t *next[X264_LOOKAHEAD_MAX+3];
301
         /* Unused frames */
302
-        x264_frame_t *unused[X264_BFRAME_MAX*4 + X264_THREAD_MAX*2 + 16+4];
303
+        x264_frame_t *unused[X264_LOOKAHEAD_MAX + X264_THREAD_MAX*2 + 16+4];
304
         /* For adaptive B decision */
305
         x264_frame_t *last_nonb;
306
 
307
diff --git a/common/frame.c b/common/frame.c
308
index 23e6824..2097d52 100644
309
--- a/common/frame.c
310
+++ b/common/frame.c
311
@@ -96,6 +96,15 @@ x264_frame_t *x264_frame_new( x264_t *h )
312
                 memset( frame->lowres_mvs[j][i], 0, 2*h->mb.i_mb_count*sizeof(int16_t) );
313
                 CHECKED_MALLOC( frame->lowres_mv_costs[j][i], h->mb.i_mb_count*sizeof(int) );
314
             }
315
+        CHECKED_MALLOC( frame->i_intra_cost, i_mb_count * sizeof(uint16_t) );
316
+        memset( frame->i_intra_cost, -1, i_mb_count * sizeof(uint16_t) );
317
+        CHECKED_MALLOC( frame->i_propagate_cost, i_mb_count * sizeof(uint32_t) );
318
+        for( j = 0; j <= h->param.i_bframe+1; j++ )
319
+            for( i = 0; i <= h->param.i_bframe+1; i++ )
320
+            {
321
+                CHECKED_MALLOC( frame->lowres_costs[j][i], i_mb_count * sizeof(uint16_t) );
322
+                CHECKED_MALLOC( frame->lowres_inter_types[j][i], i_mb_count * sizeof(uint8_t) );
323
+            }
324
     }
325
 
326
     if( h->param.analyse.i_me_method >= X264_ME_ESA )
327
@@ -116,7 +125,6 @@ x264_frame_t *x264_frame_new( x264_t *h )
328
     CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t));
329
     CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
330
     CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
331
-    CHECKED_MALLOC( frame->i_intra_cost, i_mb_count * sizeof(uint16_t) );
332
     if( h->param.i_bframe )
333
     {
334
         CHECKED_MALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
335
diff --git a/common/frame.h b/common/frame.h
336
index aad77f5..a3da4e4 100644
337
--- a/common/frame.h
338
+++ b/common/frame.h
339
@@ -63,6 +63,8 @@ typedef struct
340
     int8_t  *mb_type;
341
     int16_t (*mv[2])[2];
342
     int16_t (*lowres_mvs[2][X264_BFRAME_MAX+1])[2];
343
+    uint16_t (*lowres_costs[X264_BFRAME_MAX+2][X264_BFRAME_MAX+2]);
344
+    uint8_t  (*lowres_inter_types[X264_BFRAME_MAX+2][X264_BFRAME_MAX+2]);
345
     int     *lowres_mv_costs[2][X264_BFRAME_MAX+1];
346
     int8_t  *ref[2];
347
     int     i_ref[2];
348
@@ -83,6 +85,7 @@ typedef struct
349
     float   *f_qp_offset;
350
     int     b_intra_calculated;
351
     uint16_t *i_intra_cost;
352
+    uint32_t *i_propagate_cost;
353
     uint16_t *i_inv_qscale_factor;
354
 
355
     /* threading */
356
diff --git a/common/osdep.h b/common/osdep.h
357
index 915ec05..2095198 100644
358
--- a/common/osdep.h
359
+++ b/common/osdep.h
360
@@ -147,7 +147,9 @@
361
 #ifdef WORDS_BIGENDIAN
362
 #define endian_fix(x) (x)
363
 #define endian_fix32(x) (x)
364
-#elif defined(__GNUC__) && defined(HAVE_MMX)
365
+#define endian_fix16(x) (x)
366
+#else
367
+#if defined(__GNUC__) && defined(HAVE_MMX)
368
 static ALWAYS_INLINE uint32_t endian_fix32( uint32_t x )
369
 {
370
     asm("bswap %0":"+r"(x));
371
@@ -171,6 +173,11 @@ static ALWAYS_INLINE intptr_t endian_fix( intptr_t x )
372
         return endian_fix32(x);
373
 }
374
 #endif
375
+static ALWAYS_INLINE uint16_t endian_fix16( uint16_t x )
376
+{
377
+    return (x<<8)|(x>>8);
378
+}
379
+#endif
380
 
381
 #if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 3)
382
 #define x264_clz(x) __builtin_clz(x)
383
diff --git a/encoder/analyse.c b/encoder/analyse.c
384
index 4a36fcd..38b9976 100644
385
--- a/encoder/analyse.c
386
+++ b/encoder/analyse.c
387
@@ -276,8 +276,8 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
388
         h->mb.i_trellis_lambda2[1][1] = x264_trellis_lambda2_tab[1][h->mb.i_chroma_qp];
389
     }
390
     h->mb.i_psy_rd_lambda = a->i_lambda;
391
-    /* Adjusting chroma lambda based on QP offset hurts PSNR, so we'll leave it as part of psy-RD. */
392
-    h->mb.i_chroma_lambda2_offset = h->mb.i_psy_rd ? x264_chroma_lambda2_offset_tab[h->mb.i_qp-h->mb.i_chroma_qp+12] : 256;
393
+    /* Adjusting chroma lambda based on QP offset hurts PSNR but improves visual quality. */
394
+    h->mb.i_chroma_lambda2_offset = h->param.analyse.b_psy ? x264_chroma_lambda2_offset_tab[h->mb.i_qp-h->mb.i_chroma_qp+12] : 256;
395
 
396
     h->mb.i_me_method = h->param.analyse.i_me_method;
397
     h->mb.i_subpel_refine = h->param.analyse.i_subpel_refine;
398
diff --git a/encoder/encoder.c b/encoder/encoder.c
399
index 0f1ccc8..74ff97d 100644
400
--- a/encoder/encoder.c
401
+++ b/encoder/encoder.c
402
@@ -42,7 +42,7 @@
403
 
404
 #define bs_write_ue bs_write_ue_big
405
 
406
-static void x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
407
+static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
408
                                     x264_nal_t **pp_nal, int *pi_nal,
409
                                     x264_picture_t *pic_out );
410
 
411
@@ -441,6 +441,7 @@ static int x264_validate_parameters( x264_t *h )
412
         h->param.rc.i_qp_min = x264_clip3( (int)(X264_MIN3( qp_p, qp_i, qp_b )), 0, 51 );
413
         h->param.rc.i_qp_max = x264_clip3( (int)(X264_MAX3( qp_p, qp_i, qp_b ) + .999), 0, 51 );
414
         h->param.rc.i_aq_mode = 0;
415
+        h->param.rc.b_mb_tree = 0;
416
     }
417
     h->param.rc.i_qp_max = x264_clip3( h->param.rc.i_qp_max, 0, 51 );
418
     h->param.rc.i_qp_min = x264_clip3( h->param.rc.i_qp_min, 0, h->param.rc.i_qp_max );
419
@@ -473,6 +474,15 @@ static int x264_validate_parameters( x264_t *h )
420
     if( !h->param.i_bframe )
421
         h->param.i_bframe_adaptive = X264_B_ADAPT_NONE;
422
     h->param.analyse.b_weighted_bipred = h->param.analyse.b_weighted_bipred && h->param.i_bframe > 0;
423
+    h->param.rc.i_lookahead = X264_MIN( h->param.rc.i_lookahead, X264_LOOKAHEAD_MAX );
424
+    h->param.rc.i_lookahead = X264_MIN( h->param.rc.i_lookahead, h->param.i_keyint_max );
425
+    if( h->param.rc.b_stat_read )
426
+        h->param.rc.i_lookahead = 0;
427
+    else if( !h->param.rc.i_lookahead )
428
+        h->param.rc.b_mb_tree = 0;
429
+    if( h->param.rc.f_qcompress == 1 )
430
+        h->param.rc.b_mb_tree = 0;
431
+
432
     h->mb.b_direct_auto_write = h->param.analyse.i_direct_mv_pred == X264_DIRECT_PRED_AUTO
433
                                 && h->param.i_bframe
434
                                 && ( h->param.rc.b_stat_write || !h->param.rc.b_stat_read );
435
@@ -513,6 +523,11 @@ static int x264_validate_parameters( x264_t *h )
436
     if( !h->param.b_cabac )
437
         h->param.analyse.i_trellis = 0;
438
     h->param.analyse.i_trellis = x264_clip3( h->param.analyse.i_trellis, 0, 2 );
439
+    if( !h->param.analyse.b_psy )
440
+    {
441
+        h->param.analyse.f_psy_rd = 0;
442
+        h->param.analyse.f_psy_trellis = 0;
443
+    }
444
     if( !h->param.analyse.i_trellis )
445
         h->param.analyse.f_psy_trellis = 0;
446
     h->param.analyse.f_psy_rd = x264_clip3f( h->param.analyse.f_psy_rd, 0, 10 );
447
@@ -537,6 +552,17 @@ static int x264_validate_parameters( x264_t *h )
448
     h->param.rc.f_aq_strength = x264_clip3f( h->param.rc.f_aq_strength, 0, 3 );
449
     if( h->param.rc.f_aq_strength == 0 )
450
         h->param.rc.i_aq_mode = 0;
451
+    /* MB-tree requires AQ to be on, even if the strength is zero. */
452
+    if( !h->param.rc.i_aq_mode && h->param.rc.b_mb_tree )
453
+    {
454
+        h->param.rc.i_aq_mode = 1;
455
+        h->param.rc.f_aq_strength = 0;
456
+    }
457
+    if( h->param.rc.b_mb_tree && h->param.b_bframe_pyramid )
458
+    {
459
+        x264_log( h, X264_LOG_WARNING, "b-pyramid + mb-tree is not supported\n" );
460
+        h->param.b_bframe_pyramid = 0;
461
+    }
462
     h->param.analyse.i_noise_reduction = x264_clip3( h->param.analyse.i_noise_reduction, 0, 1<<16 );
463
     if( h->param.analyse.i_subpel_refine == 10 && (h->param.analyse.i_trellis != 2 || !h->param.rc.i_aq_mode) )
464
         h->param.analyse.i_subpel_refine = 9;
465
@@ -723,6 +749,9 @@ x264_t *x264_encoder_open   ( x264_param_t *param )
466
         h->frames.i_delay = X264_MAX(h->param.i_bframe,3)*4 + h->param.i_threads - 1;
467
     else
468
         h->frames.i_delay = h->param.i_bframe + h->param.i_threads - 1;
469
+    if( h->param.rc.b_mb_tree )
470
+        h->frames.i_delay = X264_MAX( h->frames.i_delay, h->param.rc.i_lookahead );
471
+
472
     h->frames.i_max_ref0 = h->param.i_frame_reference;
473
     h->frames.i_max_ref1 = h->sps->vui.i_num_reorder_frames;
474
     h->frames.i_max_dpb  = h->sps->vui.i_max_dec_frame_buffering;
475
@@ -730,7 +759,8 @@ x264_t *x264_encoder_open   ( x264_param_t *param )
476
         && ( h->param.rc.i_rc_method == X264_RC_ABR
477
           || h->param.rc.i_rc_method == X264_RC_CRF
478
           || h->param.i_bframe_adaptive
479
-          || h->param.i_scenecut_threshold );
480
+          || h->param.i_scenecut_threshold
481
+          || h->param.rc.b_mb_tree );
482
     h->frames.b_have_lowres |= (h->param.rc.b_stat_read && h->param.rc.i_vbv_buffer_size > 0);
483
     h->frames.b_have_sub8x8_esa = !!(h->param.analyse.inter & X264_ANALYSE_PSUB8x8);
484
 
485
@@ -1443,7 +1473,12 @@ int     x264_encoder_encode( x264_t *h,
486
         if( h->frames.b_have_lowres )
487
             x264_frame_init_lowres( h, fenc );
488
 
489
-        if( h->param.rc.i_aq_mode )
490
+        if( h->param.rc.b_mb_tree && h->param.rc.b_stat_read )
491
+        {
492
+            if( x264_macroblock_tree_read( h, fenc ) )
493
+                return -1;
494
+        }
495
+        else if( h->param.rc.i_aq_mode )
496
             x264_adaptive_quant_frame( h, fenc );
497
 
498
         if( h->frames.i_input <= h->frames.i_delay + 1 - h->param.i_threads )
499
@@ -1461,7 +1496,8 @@ int     x264_encoder_encode( x264_t *h,
500
         /* 2: Select frame types */
501
         if( h->frames.next[0] == NULL )
502
         {
503
-            x264_encoder_frame_end( thread_oldest, thread_current, pp_nal, pi_nal, pic_out );
504
+            if( x264_encoder_frame_end( thread_oldest, thread_current, pp_nal, pi_nal, pic_out ) < 0 )
505
+                return -1;
506
             return 0;
507
         }
508
 
509
@@ -1621,11 +1657,12 @@ int     x264_encoder_encode( x264_t *h,
510
     else
511
         x264_slices_write( h );
512
 
513
-    x264_encoder_frame_end( thread_oldest, thread_current, pp_nal, pi_nal, pic_out );
514
+    if( x264_encoder_frame_end( thread_oldest, thread_current, pp_nal, pi_nal, pic_out ) < 0 )
515
+        return -1;
516
     return 0;
517
 }
518
 
519
-static void x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
520
+static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
521
                                     x264_nal_t **pp_nal, int *pi_nal,
522
                                     x264_picture_t *pic_out )
523
 {
524
@@ -1640,7 +1677,7 @@ static void x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
525
     if( !h->out.i_nal )
526
     {
527
         pic_out->i_type = X264_TYPE_AUTO;
528
-        return;
529
+        return 0;
530
     }
531
 
532
     x264_frame_push_unused( thread_current, h->fenc );
533
@@ -1670,7 +1707,8 @@ static void x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
534
 
535
     /* update rc */
536
     x264_emms();
537
-    x264_ratecontrol_end( h, h->out.i_frame_size * 8 );
538
+    if( x264_ratecontrol_end( h, h->out.i_frame_size * 8 ) < 0 )
539
+        return -1;
540
 
541
     /* restore CPU state (before using float again) */
542
     x264_emms();
543
@@ -1784,6 +1822,8 @@ static void x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
544
 
545
     if( h->param.psz_dump_yuv )
546
         x264_frame_dump( h );
547
+
548
+    return 0;
549
 }
550
 
551
 static void x264_print_intra( int64_t *i_mb_count, double i_count, int b_print_pcm, char *intra )
552
diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
553
index 087e658..f669007 100644
554
--- a/encoder/ratecontrol.c
555
+++ b/encoder/ratecontrol.c
556
@@ -71,6 +71,7 @@ struct x264_ratecontrol_t
557
     double fps;
558
     double bitrate;
559
     double rate_tolerance;
560
+    double qcompress;
561
     int nmb;                    /* number of macroblocks in a frame */
562
     int qp_constant[5];
563
 
564
@@ -106,6 +107,10 @@ struct x264_ratecontrol_t
565
     /* 2pass stuff */
566
     FILE *p_stat_file_out;
567
     char *psz_stat_file_tmpname;
568
+    FILE *p_mbtree_stat_file_out;
569
+    char *psz_mbtree_stat_file_tmpname;
570
+    char *psz_mbtree_stat_file_name;
571
+    FILE *p_mbtree_stat_file_in;
572
 
573
     int num_entries;            /* number of ratecontrol_entry_ts */
574
     ratecontrol_entry_t *entry; /* FIXME: copy needed data and free this once init is done */
575
@@ -118,6 +123,7 @@ struct x264_ratecontrol_t
576
     double lmin[5];             /* min qscale by frame type */
577
     double lmax[5];
578
     double lstep;               /* max change (multiply) in qscale per frame */
579
+    uint16_t *qp_buffer; /* Global buffer for converting MB-tree quantizer data. */
580
 
581
     /* MBRC stuff */
582
     double frame_size_estimated;
583
@@ -191,49 +197,6 @@ static NOINLINE uint32_t ac_energy_mb( x264_t *h, int mb_x, int mb_y, x264_frame
584
     return var;
585
 }
586
 
587
-static const float log2_lut[128] = {
588
-    0.00000, 0.01123, 0.02237, 0.03342, 0.04439, 0.05528, 0.06609, 0.07682,
589
-    0.08746, 0.09803, 0.10852, 0.11894, 0.12928, 0.13955, 0.14975, 0.15987,
590
-    0.16993, 0.17991, 0.18982, 0.19967, 0.20945, 0.21917, 0.22882, 0.23840,
591
-    0.24793, 0.25739, 0.26679, 0.27612, 0.28540, 0.29462, 0.30378, 0.31288,
592
-    0.32193, 0.33092, 0.33985, 0.34873, 0.35755, 0.36632, 0.37504, 0.38370,
593
-    0.39232, 0.40088, 0.40939, 0.41785, 0.42626, 0.43463, 0.44294, 0.45121,
594
-    0.45943, 0.46761, 0.47573, 0.48382, 0.49185, 0.49985, 0.50779, 0.51570,
595
-    0.52356, 0.53138, 0.53916, 0.54689, 0.55459, 0.56224, 0.56986, 0.57743,
596
-    0.58496, 0.59246, 0.59991, 0.60733, 0.61471, 0.62205, 0.62936, 0.63662,
597
-    0.64386, 0.65105, 0.65821, 0.66534, 0.67243, 0.67948, 0.68650, 0.69349,
598
-    0.70044, 0.70736, 0.71425, 0.72110, 0.72792, 0.73471, 0.74147, 0.74819,
599
-    0.75489, 0.76155, 0.76818, 0.77479, 0.78136, 0.78790, 0.79442, 0.80090,
600
-    0.80735, 0.81378, 0.82018, 0.82655, 0.83289, 0.83920, 0.84549, 0.85175,
601
-    0.85798, 0.86419, 0.87036, 0.87652, 0.88264, 0.88874, 0.89482, 0.90087,
602
-    0.90689, 0.91289, 0.91886, 0.92481, 0.93074, 0.93664, 0.94251, 0.94837,
603
-    0.95420, 0.96000, 0.96578, 0.97154, 0.97728, 0.98299, 0.98868, 0.99435,
604
-};
605
-
606
-static const uint8_t exp2_lut[64] = {
607
-      1,   4,   7,  10,  13,  16,  19,  22,  25,  28,  31,  34,  37,  40,  44,  47,
608
-     50,  53,  57,  60,  64,  67,  71,  74,  78,  81,  85,  89,  93,  96, 100, 104,
609
-    108, 112, 116, 120, 124, 128, 132, 137, 141, 145, 150, 154, 159, 163, 168, 172,
610
-    177, 182, 186, 191, 196, 201, 206, 211, 216, 221, 226, 232, 237, 242, 248, 253,
611
-};
612
-
613
-static ALWAYS_INLINE float x264_log2( uint32_t x )
614
-{
615
-    int lz = x264_clz( x );
616
-    return log2_lut[(x<<lz>>24)&0x7f] + (31 - lz);
617
-}
618
-
619
-static ALWAYS_INLINE int x264_exp2fix8( float x )
620
-{
621
-    int i, f;
622
-    x += 8;
623
-    if( x <= 0 ) return 0;
624
-    if( x >= 16 ) return 0xffff;
625
-    i = x;
626
-    f = (x-i)*64;
627
-    return (exp2_lut[f]+256) << i >> 8;
628
-}
629
-
630
 void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame )
631
 {
632
     /* constants chosen to result in approximately the same overall bitrate as without AQ.
633
@@ -241,6 +204,17 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame )
634
     int mb_x, mb_y;
635
     float strength;
636
     float avg_adj = 0.f;
637
+    /* Need to init it anyways for MB tree. */
638
+    if( h->param.rc.f_aq_strength == 0 )
639
+    {
640
+        int mb_xy;
641
+        memset( frame->f_qp_offset, 0, h->mb.i_mb_count * sizeof(float) );
642
+        if( h->frames.b_have_lowres )
643
+            for( mb_xy = 0; mb_xy < h->mb.i_mb_count; mb_xy++ )
644
+                frame->i_inv_qscale_factor[mb_xy] = 256;
645
+        return;
646
+    }
647
+
648
     if( h->param.rc.i_aq_mode == X264_AQ_AUTOVARIANCE )
649
     {
650
         for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y++ )
651
@@ -257,6 +231,7 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame )
652
     }
653
     else
654
         strength = h->param.rc.f_aq_strength * 1.0397f;
655
+
656
     for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y++ )
657
         for( mb_x = 0; mb_x < h->sps->i_mb_width; mb_x++ )
658
         {
659
@@ -291,6 +266,47 @@ void x264_adaptive_quant( x264_t *h )
660
     h->mb.i_qp = x264_clip3( h->rc->f_qpm + h->fenc->f_qp_offset[h->mb.i_mb_xy] + .5, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
661
 }
662
 
663
+int x264_macroblock_tree_read( x264_t *h, x264_frame_t *frame )
664
+{
665
+    x264_ratecontrol_t *rc = h->rc;
666
+    uint8_t i_type_actual = rc->entry[frame->i_frame].pict_type;
667
+    int i;
668
+
669
+    if( i_type_actual != SLICE_TYPE_B )
670
+    {
671
+        uint8_t i_type;
672
+
673
+        if( !fread( &i_type, 1, 1, rc->p_mbtree_stat_file_in ) )
674
+            goto fail;
675
+
676
+        if( i_type != i_type_actual )
677
+        {
678
+            x264_log(h, X264_LOG_ERROR, "MB-tree frametype %d doesn't match actual frametype %d.\n", i_type,i_type_actual);
679
+            return -1;
680
+        }
681
+
682
+        if( fread( rc->qp_buffer, sizeof(uint16_t), h->mb.i_mb_count, rc->p_mbtree_stat_file_in ) != h->mb.i_mb_count )
683
+            goto fail;
684
+
685
+        for( i = 0; i < h->mb.i_mb_count; i++ )
686
+            frame->f_qp_offset[i] = ((float)(int16_t)endian_fix16( rc->qp_buffer[i] )) * (1/256.0);
687
+    }
688
+    else
689
+        x264_adaptive_quant_frame( h, frame );
690
+    return 0;
691
+fail:
692
+    x264_log(h, X264_LOG_ERROR, "Incomplete MB-tree stats file.\n");
693
+    return -1;
694
+}
695
+
696
+static char *x264_strcat_filename( char *input, char *suffix )
697
+{
698
+    char *output = x264_malloc( strlen( input ) + strlen( suffix ) + 1 );
699
+    strcpy( output, input );
700
+    strcat( output, suffix );
701
+    return output;
702
+}
703
+
704
 int x264_ratecontrol_new( x264_t *h )
705
 {
706
     x264_ratecontrol_t *rc;
707
@@ -310,6 +326,14 @@ int x264_ratecontrol_new( x264_t *h )
708
     else
709
         rc->fps = 25.0;
710
 
711
+    if( h->param.rc.b_mb_tree )
712
+    {
713
+        h->param.rc.f_pb_factor = 1;
714
+        rc->qcompress = 1;
715
+    }
716
+    else
717
+        rc->qcompress = h->param.rc.f_qcompress;
718
+
719
     rc->bitrate = h->param.rc.i_bitrate * 1000.;
720
     rc->rate_tolerance = h->param.rc.f_rate_tolerance;
721
     rc->nmb = h->mb.i_mb_count;
722
@@ -379,17 +403,18 @@ int x264_ratecontrol_new( x264_t *h )
723
         rc->accum_p_norm = .01;
724
         rc->accum_p_qp = ABR_INIT_QP * rc->accum_p_norm;
725
         /* estimated ratio that produces a reasonable QP for the first I-frame */
726
-        rc->cplxr_sum = .01 * pow( 7.0e5, h->param.rc.f_qcompress ) * pow( h->mb.i_mb_count, 0.5 );
727
+        rc->cplxr_sum = .01 * pow( 7.0e5, rc->qcompress ) * pow( h->mb.i_mb_count, 0.5 );
728
         rc->wanted_bits_window = 1.0 * rc->bitrate / rc->fps;
729
         rc->last_non_b_pict_type = SLICE_TYPE_I;
730
     }
731
 
732
     if( h->param.rc.i_rc_method == X264_RC_CRF )
733
     {
734
-        /* arbitrary rescaling to make CRF somewhat similar to QP */
735
+        /* Arbitrary rescaling to make CRF somewhat similar to QP.
736
+         * Try to compensate for MB-tree's effects as well. */
737
         double base_cplx = h->mb.i_mb_count * (h->param.i_bframe ? 120 : 80);
738
-        rc->rate_factor_constant = pow( base_cplx, 1 - h->param.rc.f_qcompress )
739
-                                 / qp2qscale( h->param.rc.f_rf_constant );
740
+        rc->rate_factor_constant = pow( base_cplx, 1 - rc->qcompress )
741
+                                 / qp2qscale( h->param.rc.f_rf_constant + (h->param.rc.b_mb_tree?5:0) );
742
     }
743
 
744
     rc->ip_offset = 6.0 * log(h->param.rc.f_ip_factor) / log(2.0);
745
@@ -437,6 +462,17 @@ int x264_ratecontrol_new( x264_t *h )
746
             x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n");
747
             return -1;
748
         }
749
+        if( h->param.rc.b_mb_tree )
750
+        {
751
+            char *mbtree_stats_in = x264_strcat_filename( h->param.rc.psz_stat_in, ".mbtree" );
752
+            rc->p_mbtree_stat_file_in = fopen( mbtree_stats_in, "rb" );
753
+            x264_free( mbtree_stats_in );
754
+            if( !rc->p_mbtree_stat_file_in )
755
+            {
756
+                x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open mbtree stats file\n");
757
+                return -1;
758
+            }
759
+        }
760
 
761
         /* check whether 1st pass options were compatible with current options */
762
         if( !strncmp( stats_buf, "#options:", 9 ) )
763
@@ -483,6 +519,9 @@ int x264_ratecontrol_new( x264_t *h )
764
                 x264_log( h, X264_LOG_ERROR, "b_adapt method specified in stats file not valid\n" );
765
                 return -1;
766
             }
767
+
768
+            if( h->param.rc.b_mb_tree && ( p = strstr( opts, "rc-lookahead=" ) ) && sscanf( p, "rc-lookahead=%d", &i ) )
769
+                h->param.rc.i_lookahead = i;
770
         }
771
 
772
         /* find number of pics */
773
@@ -585,10 +624,7 @@ int x264_ratecontrol_new( x264_t *h )
774
     if( h->param.rc.b_stat_write )
775
     {
776
         char *p;
777
-
778
-        rc->psz_stat_file_tmpname = x264_malloc( strlen(h->param.rc.psz_stat_out) + 6 );
779
-        strcpy( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out );
780
-        strcat( rc->psz_stat_file_tmpname, ".temp" );
781
+        rc->psz_stat_file_tmpname = x264_strcat_filename( h->param.rc.psz_stat_out, ".temp" );
782
 
783
         rc->p_stat_file_out = fopen( rc->psz_stat_file_tmpname, "wb" );
784
         if( rc->p_stat_file_out == NULL )
785
@@ -600,6 +636,25 @@ int x264_ratecontrol_new( x264_t *h )
786
         p = x264_param2string( &h->param, 1 );
787
         fprintf( rc->p_stat_file_out, "#options: %s\n", p );
788
         x264_free( p );
789
+        if( h->param.rc.b_mb_tree && !h->param.rc.b_stat_read )
790
+        {
791
+            rc->psz_mbtree_stat_file_tmpname = x264_strcat_filename( h->param.rc.psz_stat_out, ".mbtree.temp" );
792
+            rc->psz_mbtree_stat_file_name = x264_strcat_filename( h->param.rc.psz_stat_out, ".mbtree" );
793
+
794
+            rc->p_mbtree_stat_file_out = fopen( rc->psz_mbtree_stat_file_tmpname, "wb" );
795
+            if( rc->p_mbtree_stat_file_out == NULL )
796
+            {
797
+                x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open mbtree stats file\n");
798
+                return -1;
799
+            }
800
+        }
801
+    }
802
+
803
+    if( h->param.rc.b_mb_tree && (h->param.rc.b_stat_read || h->param.rc.b_stat_write) )
804
+    {
805
+        rc->qp_buffer = x264_malloc( h->mb.i_mb_count * sizeof(uint16_t));
806
+        if( !rc->qp_buffer )
807
+            return -1;
808
     }
809
 
810
     for( i=0; i<h->param.i_threads; i++ )
811
@@ -739,8 +794,8 @@ void x264_ratecontrol_summary( x264_t *h )
812
     {
813
         double base_cplx = h->mb.i_mb_count * (h->param.i_bframe ? 120 : 80);
814
         x264_log( h, X264_LOG_INFO, "final ratefactor: %.2f\n",
815
-                  qscale2qp( pow( base_cplx, 1 - h->param.rc.f_qcompress )
816
-                             * rc->cplxr_sum / rc->wanted_bits_window ) );
817
+                  qscale2qp( pow( base_cplx, 1 - rc->qcompress )
818
+                             * rc->cplxr_sum / rc->wanted_bits_window ) - (h->param.rc.b_mb_tree?5:0) );
819
     }
820
 }
821
 
822
@@ -760,9 +815,22 @@ void x264_ratecontrol_delete( x264_t *h )
823
             }
824
         x264_free( rc->psz_stat_file_tmpname );
825
     }
826
+    if( rc->p_mbtree_stat_file_out )
827
+    {
828
+        fclose( rc->p_mbtree_stat_file_out );
829
+        if( h->i_frame >= rc->num_entries )
830
+            if( rename( rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name ) != 0 )
831
+            {
832
+                x264_log( h, X264_LOG_ERROR, "failed to rename \"%s\" to \"%s\"\n",
833
+                          rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name );
834
+            }
835
+        x264_free( rc->psz_mbtree_stat_file_tmpname );
836
+        x264_free( rc->psz_mbtree_stat_file_name );
837
+    }
838
     x264_free( rc->pred );
839
     x264_free( rc->pred_b_from_p );
840
     x264_free( rc->entry );
841
+    x264_free( rc->qp_buffer );
842
     if( rc->zones )
843
     {
844
         x264_free( rc->zones[0].param );
845
@@ -1086,7 +1154,7 @@ int x264_ratecontrol_slice_type( x264_t *h, int frame_num )
846
 }
847
 
848
 /* After encoding one frame, save stats and update ratecontrol state */
849
-void x264_ratecontrol_end( x264_t *h, int bits )
850
+int x264_ratecontrol_end( x264_t *h, int bits )
851
 {
852
     x264_ratecontrol_t *rc = h->rc;
853
     const int *mbs = h->stat.frame.i_mb_count;
854
@@ -1114,7 +1182,7 @@ void x264_ratecontrol_end( x264_t *h, int bits )
855
                         ( dir_frame>0 ? 's' : dir_frame<0 ? 't' :
856
                           dir_avg>0 ? 's' : dir_avg<0 ? 't' : '-' )
857
                         : '-';
858
-        fprintf( rc->p_stat_file_out,
859
+        if( fprintf( rc->p_stat_file_out,
860
                  "in:%d out:%d type:%c q:%.2f tex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d d:%c;\n",
861
                  h->fenc->i_frame, h->i_frame,
862
                  c_type, rc->qpa_rc,
863
@@ -1124,7 +1192,22 @@ void x264_ratecontrol_end( x264_t *h, int bits )
864
                  h->stat.frame.i_mb_count_i,
865
                  h->stat.frame.i_mb_count_p,
866
                  h->stat.frame.i_mb_count_skip,
867
-                 c_direct);
868
+                 c_direct) < 0 )
869
+             goto fail;
870
+
871
+        /* Don't re-write the data in multi-pass mode. */
872
+        if( h->param.rc.b_mb_tree && h->fenc->b_kept_as_ref && !h->param.rc.b_stat_read )
873
+        {
874
+            uint8_t i_type = h->sh.i_type;
875
+            int i;
876
+            /* Values are stored as big-endian FIX8.8 */
877
+            for( i = 0; i < h->mb.i_mb_count; i++ )
878
+                rc->qp_buffer[i] = endian_fix16( h->fenc->f_qp_offset[i]*256.0 );
879
+            if( fwrite( &i_type, 1, 1, rc->p_mbtree_stat_file_out ) < 1 )
880
+                goto fail;
881
+            if( fwrite( rc->qp_buffer, sizeof(uint16_t), h->mb.i_mb_count, rc->p_mbtree_stat_file_out ) < h->mb.i_mb_count )
882
+                goto fail;
883
+        }
884
     }
885
 
886
     if( rc->b_abr )
887
@@ -1162,6 +1245,10 @@ void x264_ratecontrol_end( x264_t *h, int bits )
888
     }
889
 
890
     update_vbv( h, bits );
891
+    return 0;
892
+fail:
893
+    x264_log(h, X264_LOG_ERROR, "ratecontrol_end: stats file could not be written to\n");
894
+    return -1;
895
 }
896
 
897
 /****************************************************************************
898
@@ -1177,7 +1264,7 @@ static double get_qscale(x264_t *h, ratecontrol_entry_t *rce, double rate_factor
899
     double q;
900
     x264_zone_t *zone = get_zone( h, frame_num );
901
 
902
-    q = pow( rce->blurred_complexity, 1 - h->param.rc.f_qcompress );
903
+    q = pow( rce->blurred_complexity, 1 - rcc->qcompress );
904
 
905
     // avoid NaN's in the rc_eq
906
     if(!isfinite(q) || rce->tex_bits + rce->mv_bits == 0)
907
diff --git a/encoder/ratecontrol.h b/encoder/ratecontrol.h
908
index 3310d3c..ed8abab 100644
909
--- a/encoder/ratecontrol.h
910
+++ b/encoder/ratecontrol.h
911
@@ -29,12 +29,13 @@ void x264_ratecontrol_delete( x264_t * );
912
 
913
 void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame );
914
 void x264_adaptive_quant( x264_t * );
915
+int  x264_macroblock_tree_read( x264_t *h, x264_frame_t *frame );
916
 void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next );
917
 void x264_ratecontrol_start( x264_t *, int i_force_qp );
918
 int  x264_ratecontrol_slice_type( x264_t *, int i_frame );
919
 void x264_ratecontrol_mb( x264_t *, int bits );
920
 int  x264_ratecontrol_qp( x264_t * );
921
-void x264_ratecontrol_end( x264_t *, int bits );
922
+int  x264_ratecontrol_end( x264_t *, int bits );
923
 void x264_ratecontrol_summary( x264_t * );
924
 void x264_ratecontrol_set_estimated_size( x264_t *, int bits );
925
 int  x264_ratecontrol_get_estimated_size( x264_t const *);
926
diff --git a/encoder/slicetype.c b/encoder/slicetype.c
927
index 2c16429..be3eaf7 100644
928
--- a/encoder/slicetype.c
929
+++ b/encoder/slicetype.c
930
@@ -63,6 +63,7 @@ static int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
931
     x264_me_t m[2];
932
     int i_bcost = COST_MAX;
933
     int l, i;
934
+    int list_used = 0;
935
 
936
     h->mb.pic.p_fenc[0] = h->mb.pic.fenc_buf;
937
     h->mc.copy[PIXEL_8x8]( h->mb.pic.p_fenc[0], FENC_STRIDE, &fenc->lowres[0][i_pel_offset], i_stride, 8 );
938
@@ -107,8 +108,7 @@ static int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
939
         h->mc.avg[PIXEL_8x8]( pix1, 16, src1, stride1, src2, stride2, i_bipred_weight ); \
940
         i_cost = penalty + h->pixf.mbcmp[PIXEL_8x8]( \
941
                            m[0].p_fenc[0], FENC_STRIDE, pix1, 16 ); \
942
-        if( i_bcost > i_cost ) \
943
-            i_bcost = i_cost; \
944
+        COPY2_IF_LT( i_bcost, i_cost, list_used, 3 ); \
945
     }
946
 
947
     m[0].i_pixel = PIXEL_8x8;
948
@@ -138,8 +138,7 @@ static int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
949
             int i_cost;
950
             h->mc.avg[PIXEL_8x8]( pix1, 16, m[0].p_fref[0], m[0].i_stride[0], m[1].p_fref[0], m[1].i_stride[0], i_bipred_weight );
951
             i_cost = h->pixf.mbcmp[PIXEL_8x8]( m[0].p_fenc[0], FENC_STRIDE, pix1, 16 );
952
-            if( i_bcost > i_cost )
953
-                i_bcost = i_cost;
954
+            COPY2_IF_LT( i_bcost, i_cost, list_used, 3 );
955
         }
956
     }
957
 
958
@@ -181,16 +180,18 @@ static int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
959
             *(uint32_t*)m[l].mv = *(uint32_t*)fenc_mvs[l];
960
             m[l].cost = *fenc_costs[l];
961
         }
962
-        i_bcost = X264_MIN( i_bcost, m[l].cost );
963
+        COPY2_IF_LT( i_bcost, m[l].cost, list_used, l+1 );
964
     }
965
 
966
     if( b_bidir && ( *(uint32_t*)m[0].mv || *(uint32_t*)m[1].mv ) )
967
         TRY_BIDIR( m[0].mv, m[1].mv, 5 );
968
 
969
+    frames[b]->lowres_inter_types[b-p0][p1-b][i_mb_xy] = list_used;
970
+
971
 lowres_intra_mb:
972
     /* forbid intra-mbs in B-frames, because it's rare and not worth checking */
973
     /* FIXME: Should we still forbid them now that we cache intra scores? */
974
-    if( !b_bidir )
975
+    if( !b_bidir || h->param.rc.b_mb_tree )
976
     {
977
         int i_icost, b_intra;
978
         if( !fenc->b_intra_calculated )
979
@@ -237,18 +238,23 @@ lowres_intra_mb:
980
         }
981
         else
982
             i_icost = fenc->i_intra_cost[i_mb_xy];
983
-        b_intra = i_icost < i_bcost;
984
-        if( b_intra )
985
-            i_bcost = i_icost;
986
-        if(   (i_mb_x > 0 && i_mb_x < h->sps->i_mb_width - 1
987
-            && i_mb_y > 0 && i_mb_y < h->sps->i_mb_height - 1)
988
-            || h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 )
989
+        if( !b_bidir )
990
         {
991
-            fenc->i_intra_mbs[b-p0] += b_intra;
992
-            fenc->i_cost_est[0][0] += i_icost;
993
+            b_intra = i_icost < i_bcost;
994
+            if( b_intra )
995
+                i_bcost = i_icost;
996
+            if(   (i_mb_x > 0 && i_mb_x < h->sps->i_mb_width - 1
997
+                && i_mb_y > 0 && i_mb_y < h->sps->i_mb_height - 1)
998
+                || h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 )
999
+            {
1000
+                fenc->i_intra_mbs[b-p0] += b_intra;
1001
+                fenc->i_cost_est[0][0] += i_icost;
1002
+            }
1003
         }
1004
     }
1005
 
1006
+    frames[b]->lowres_costs[b-p0][p1-b][i_mb_xy] = i_bcost;
1007
+
1008
     return i_bcost;
1009
 }
1010
 #undef TRY_BIDIR
1011
@@ -262,6 +268,7 @@ static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
1012
                                x264_frame_t **frames, int p0, int p1, int b,
1013
                                int b_intra_penalty )
1014
 {
1015
+
1016
     int i_score = 0;
1017
     /* Don't use the AQ'd scores for slicetype decision. */
1018
     int i_score_aq = 0;
1019
@@ -299,7 +306,8 @@ static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
1020
 
1021
         /* the edge mbs seem to reduce the predictive quality of the
1022
          * whole frame's score, but are needed for a spatial distribution. */
1023
-        if( h->param.rc.i_vbv_buffer_size || h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 )
1024
+        if( h->param.rc.b_mb_tree || h->param.rc.i_vbv_buffer_size ||
1025
+            h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 )
1026
         {
1027
             for( h->mb.i_mb_y = h->sps->i_mb_height - 1; h->mb.i_mb_y >= 0; h->mb.i_mb_y-- )
1028
             {
1029
@@ -355,7 +363,172 @@ static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
1030
     return i_score;
1031
 }
1032
 
1033
-#define MAX_LENGTH (X264_BFRAME_MAX*4)
1034
+/* If MB-tree changes the quantizers, we need to recalculate the frame cost without
1035
+ * re-running lookahead. */
1036
+static int x264_slicetype_frame_cost_recalculate( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames,
1037
+                                                  int p0, int p1, int b )
1038
+{
1039
+    int i_score = 0;
1040
+    int *row_satd = frames[b]->i_row_satds[b-p0][p1-b];
1041
+    x264_emms();
1042
+    for( h->mb.i_mb_y = h->sps->i_mb_height - 1; h->mb.i_mb_y >= 0; h->mb.i_mb_y-- )
1043
+    {
1044
+        row_satd[ h->mb.i_mb_y ] = 0;
1045
+        for( h->mb.i_mb_x = h->sps->i_mb_width - 1; h->mb.i_mb_x >= 0; h->mb.i_mb_x-- )
1046
+        {
1047
+            int i_mb_xy = h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride;
1048
+            int i_mb_cost = frames[b]->lowres_costs[b-p0][p1-b][i_mb_xy];
1049
+            float qp_adj = frames[b]->f_qp_offset[i_mb_xy];
1050
+            i_mb_cost = (i_mb_cost * x264_exp2fix8(qp_adj*(-1.f/6.f)) + 128) >> 8;
1051
+            row_satd[ h->mb.i_mb_y ] += i_mb_cost;
1052
+            if( (h->mb.i_mb_y > 0 && h->mb.i_mb_y < h->sps->i_mb_height - 1 &&
1053
+                 h->mb.i_mb_x > 0 && h->mb.i_mb_x < h->sps->i_mb_width - 1) ||
1054
+                 h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 )
1055
+            {
1056
+                i_score += i_mb_cost;
1057
+            }
1058
+        }
1059
+    }
1060
+    return i_score;
1061
+}
1062
+
1063
+static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, int p0, int p1, int b )
1064
+{
1065
+    x264_frame_t *refs[2] = {frames[p0],frames[p1]};
1066
+    int dist_scale_factor = p1 != p0 ? 128 : ( ((b-p0) << 8) + ((p1-p0) >> 1) ) / (p1-p0);
1067
+    int i_bipred_weight = h->param.analyse.b_weighted_bipred ? 64 - (dist_scale_factor>>2) : 32;
1068
+
1069
+    for( h->mb.i_mb_y = 0; h->mb.i_mb_y < h->sps->i_mb_height; h->mb.i_mb_y++ )
1070
+    {
1071
+        for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->sps->i_mb_width; h->mb.i_mb_x++ )
1072
+        {
1073
+            int mb_index = h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride;
1074
+            int inter_cost = frames[b]->lowres_costs[b-p0][p1-b][mb_index];
1075
+            int intra_cost = (frames[b]->i_intra_cost[mb_index] * frames[b]->i_inv_qscale_factor[mb_index]+128)>>8;
1076
+            int lists_used = frames[b]->lowres_inter_types[b-p0][p1-b][mb_index];
1077
+            /* The approximate amount of data that this block contains. */
1078
+            int propagate_amount = intra_cost + frames[b]->i_propagate_cost[mb_index];
1079
+
1080
+            /* Divide by 64 for per-pixel summing. */
1081
+            propagate_amount = (((uint64_t)propagate_amount*(intra_cost-inter_cost)) / intra_cost + 32) >> 6;
1082
+
1083
+            /* Don't propagate for an intra block. */
1084
+            if( inter_cost < intra_cost )
1085
+            {
1086
+                int mv[2][2], list;
1087
+                mv[0][0] = frames[b]->lowres_mvs[0][b-p0-1][mb_index][0];
1088
+                mv[0][1] = frames[b]->lowres_mvs[0][b-p0-1][mb_index][1];
1089
+                if( b != p1 )
1090
+                {
1091
+                    mv[1][0] = frames[b]->lowres_mvs[1][p1-b-1][mb_index][0];
1092
+                    mv[1][1] = frames[b]->lowres_mvs[1][p1-b-1][mb_index][1];
1093
+                }
1094
+
1095
+                /* Follow the MVs to the previous frame(s). */
1096
+                for( list = 0; list < 2; list++ )
1097
+                    if( (lists_used >> list)&1 )
1098
+                    {
1099
+                        int x = mv[list][0];
1100
+                        int y = mv[list][1];
1101
+                        int listamount = propagate_amount;
1102
+                        int mbx = (x>>5)+h->mb.i_mb_x;
1103
+                        int mby = ((y>>5)+h->mb.i_mb_y);
1104
+                        int idx0 = mbx + mby*h->mb.i_mb_stride;
1105
+                        int idx1 = idx0 + 1;
1106
+                        int idx2 = idx0 + h->mb.i_mb_stride;
1107
+                        int idx3 = idx0 + h->mb.i_mb_stride + 1;
1108
+                        int idx0weight = (32-(y&31))*(32-(x&31));
1109
+                        int idx1weight = (32-(y&31))*(x&31);
1110
+                        int idx2weight = (y&31)*(32-(x&31));
1111
+                        int idx3weight = (y&31)*(x&31);
1112
+
1113
+                        /* Apply bipred weighting. */
1114
+                        if( lists_used == 3 )
1115
+                            listamount = (listamount * (list?(64-i_bipred_weight):i_bipred_weight) + 32) >> 6;
1116
+
1117
+#define CLIP_ADD(s,x) (s) = X264_MIN((s)+(x),(1<<16)-1)
1118
+
1119
+                        /* We could just clip the MVs, but pixels that lie outside the frame probably shouldn't
1120
+                         * be counted. */
1121
+                        if( mbx < h->sps->i_mb_width-1 && mby < h->sps->i_mb_height-1 && mbx >= 0 && mby >= 0 )
1122
+                        {
1123
+                            CLIP_ADD( refs[list]->i_propagate_cost[idx0], (listamount*idx0weight+8)>>4 );
1124
+                            CLIP_ADD( refs[list]->i_propagate_cost[idx1], (listamount*idx1weight+8)>>4 );
1125
+                            CLIP_ADD( refs[list]->i_propagate_cost[idx2], (listamount*idx2weight+8)>>4 );
1126
+                            CLIP_ADD( refs[list]->i_propagate_cost[idx3], (listamount*idx3weight+8)>>4 );
1127
+                        }
1128
+                        else /* Check offsets individually */
1129
+                        {
1130
+                            if( mbx < h->sps->i_mb_width && mby < h->sps->i_mb_height && mbx >= 0 && mby >= 0 )
1131
+                                CLIP_ADD( refs[list]->i_propagate_cost[idx0], (listamount*idx0weight+8)>>4 );
1132
+                            if( mbx+1 < h->sps->i_mb_width && mby < h->sps->i_mb_height && mbx+1 >= 0 && mby >= 0 )
1133
+                                CLIP_ADD( refs[list]->i_propagate_cost[idx1], (listamount*idx1weight+8)>>4 );
1134
+                            if( mbx < h->sps->i_mb_width && mby+1 < h->sps->i_mb_height && mbx >= 0 && mby+1 >= 0 )
1135
+                                CLIP_ADD( refs[list]->i_propagate_cost[idx2], (listamount*idx2weight+8)>>4 );
1136
+                            if( mbx+1 < h->sps->i_mb_width && mby+1 < h->sps->i_mb_height && mbx+1 >= 0 && mby+1 >= 0 )
1137
+                                CLIP_ADD( refs[list]->i_propagate_cost[idx3], (listamount*idx3weight+8)>>4 );
1138
+                        }
1139
+                    }
1140
+            }
1141
+        }
1142
+    }
1143
+}
1144
+
1145
+static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int num_frames, int b_intra )
1146
+{
1147
+    int i, idx = !b_intra;
1148
+    int last_nonb, cur_nonb = 1;
1149
+    if( b_intra )
1150
+       x264_slicetype_frame_cost( h, a, frames, 0, 0, 0, 0 );
1151
+
1152
+    i = num_frames-1;
1153
+    while( i > 0 && frames[i]->i_type == X264_TYPE_B )
1154
+        i--;
1155
+    last_nonb = i;
1156
+
1157
+    if( last_nonb < 0 )
1158
+        return;
1159
+
1160
+    memset( frames[last_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint32_t) );
1161
+    while( i-- > idx )
1162
+    {
1163
+        cur_nonb = i;
1164
+        while( frames[cur_nonb]->i_type == X264_TYPE_B && cur_nonb > 0 )
1165
+            cur_nonb--;
1166
+        if( cur_nonb < idx )
1167
+            break;
1168
+        x264_slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, last_nonb, 0 );
1169
+        memset( frames[cur_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint32_t) );
1170
+        x264_macroblock_tree_propagate( h, frames, cur_nonb, last_nonb, last_nonb );
1171
+        while( frames[i]->i_type == X264_TYPE_B && i > 0 )
1172
+        {
1173
+            x264_slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, i, 0 );
1174
+            memset( frames[i]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint32_t) );
1175
+            x264_macroblock_tree_propagate( h, frames, cur_nonb, last_nonb, i );
1176
+            i--;
1177
+        }
1178
+        last_nonb = cur_nonb;
1179
+    }
1180
+    x264_emms();
1181
+
1182
+    for( h->mb.i_mb_y = 0; h->mb.i_mb_y < h->sps->i_mb_height; h->mb.i_mb_y++ )
1183
+    {
1184
+        for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->sps->i_mb_width; h->mb.i_mb_x++ )
1185
+        {
1186
+            int mb_index = h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride;
1187
+            int intra_cost = (frames[last_nonb]->i_intra_cost[mb_index] * frames[last_nonb]->i_inv_qscale_factor[mb_index]+128)>>8;
1188
+
1189
+            if( intra_cost )
1190
+            {
1191
+                int propagate_cost = frames[last_nonb]->i_propagate_cost[mb_index];
1192
+                float log2_ratio = x264_log2(intra_cost + propagate_cost) - x264_log2(intra_cost);
1193
+                /* Allow the constant to be adjusted via qcompress, since the two
1194
+                 * concepts are very similar. */
1195
+                frames[last_nonb]->f_qp_offset[mb_index] -= 5.0 * (1.0 - h->param.rc.f_qcompress) * log2_ratio;
1196
+            }
1197
+        }
1198
+    }
1199
+}
1200
 
1201
 static int x264_slicetype_path_cost( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, char *path, int threshold )
1202
 {
1203
@@ -393,14 +566,14 @@ static int x264_slicetype_path_cost( x264_t *h, x264_mb_analysis_t *a, x264_fram
1204
 /* Uses strings due to the fact that the speed of the control functions is
1205
    negligable compared to the cost of running slicetype_frame_cost, and because
1206
    it makes debugging easier. */
1207
-static void x264_slicetype_path( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int length, int max_bframes, int buffer_size, char (*best_paths)[MAX_LENGTH] )
1208
+static void x264_slicetype_path( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int length, int max_bframes, int buffer_size, char (*best_paths)[X264_LOOKAHEAD_MAX] )
1209
 {
1210
-    char paths[X264_BFRAME_MAX+2][MAX_LENGTH] = {{0}};
1211
+    char paths[X264_BFRAME_MAX+2][X264_LOOKAHEAD_MAX] = {{0}};
1212
     int num_paths = X264_MIN(max_bframes+1, length);
1213
     int suffix_size, loc, path;
1214
     int best_cost = COST_MAX;
1215
     int best_path_index = 0;
1216
-    length = X264_MIN(length,MAX_LENGTH);
1217
+    length = X264_MIN(length,X264_LOOKAHEAD_MAX);
1218
 
1219
     /* Iterate over all currently possible paths and add suffixes to each one */
1220
     for( suffix_size = 0; suffix_size < num_paths; suffix_size++ )
1221
@@ -426,15 +599,6 @@ static void x264_slicetype_path( x264_t *h, x264_mb_analysis_t *a, x264_frame_t
1222
     memcpy( best_paths[length], paths[best_path_index], length );
1223
 }
1224
 
1225
-static int x264_slicetype_path_search( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int length, int bframes, int buffer )
1226
-{
1227
-    char best_paths[MAX_LENGTH][MAX_LENGTH] = {"","P"};
1228
-    int n;
1229
-    for( n = 2; n < length-1; n++ )
1230
-        x264_slicetype_path( h, a, frames, n, bframes, buffer, best_paths );
1231
-    return strspn( best_paths[length-2], "B" );
1232
-}
1233
-
1234
 static int scenecut( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int p0, int p1 )
1235
 {
1236
     x264_frame_t *frame = frames[p1];
1237
@@ -477,13 +641,13 @@ static int scenecut( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, in
1238
     return res;
1239
 }
1240
 
1241
-static void x264_slicetype_analyse( x264_t *h )
1242
+static void x264_slicetype_analyse( x264_t *h, int keyframe )
1243
 {
1244
     x264_mb_analysis_t a;
1245
-    x264_frame_t *frames[X264_BFRAME_MAX*4+3] = { NULL, };
1246
+    x264_frame_t *frames[X264_LOOKAHEAD_MAX+3] = { NULL, };
1247
     int num_frames;
1248
     int keyint_limit;
1249
-    int j;
1250
+    int i,j;
1251
     int i_mb_count = NUM_MBS;
1252
     int cost1p0, cost2p0, cost1b1, cost2p1;
1253
     int idr_frame_type;
1254
@@ -497,96 +661,150 @@ static void x264_slicetype_analyse( x264_t *h )
1255
         frames[j+1] = h->frames.next[j];
1256
     keyint_limit = h->param.i_keyint_max - frames[0]->i_frame + h->frames.i_last_idr - 1;
1257
     num_frames = X264_MIN( j, keyint_limit );
1258
-    if( num_frames == 0 )
1259
+
1260
+    if( num_frames == 0 && (!j || !h->param.rc.b_mb_tree) )
1261
         return;
1262
 
1263
     x264_lowres_context_init( h, &a );
1264
     idr_frame_type = frames[1]->i_frame - h->frames.i_last_idr >= h->param.i_keyint_min ? X264_TYPE_IDR : X264_TYPE_I;
1265
 
1266
-    if( num_frames == 1 )
1267
+    if( num_frames == 1 && !h->param.rc.b_mb_tree )
1268
     {
1269
-no_b_frames:
1270
         frames[1]->i_type = X264_TYPE_P;
1271
         if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1 ) )
1272
             frames[1]->i_type = idr_frame_type;
1273
         return;
1274
     }
1275
 
1276
-    if( h->param.i_bframe_adaptive == X264_B_ADAPT_TRELLIS )
1277
+    /* This is important psy-wise: if we have a non-scenecut keyframe,
1278
+     * there will be significant visual artifacts if the frames just before
1279
+     * go down in quality due to being referenced less, despite it being
1280
+     * more RD-optimal. */
1281
+    if( h->param.analyse.b_psy && h->param.rc.b_mb_tree )
1282
+        num_frames = j;
1283
+
1284
+    char best_paths[X264_LOOKAHEAD_MAX][X264_LOOKAHEAD_MAX] = {"","P"};
1285
+    int n;
1286
+    int num_bframes = 0;
1287
+    int max_bframes = X264_MIN(num_frames-1, h->param.i_bframe);
1288
+    int num_analysed_frames = num_frames;
1289
+    int reset_start;
1290
+    if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1 ) )
1291
     {
1292
-        int num_bframes;
1293
-        int max_bframes = X264_MIN(num_frames-1, h->param.i_bframe);
1294
-        if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1 ) )
1295
+        frames[1]->i_type = idr_frame_type;
1296
+        return;
1297
+    }
1298
+
1299
+    if( h->param.i_bframe )
1300
+    {
1301
+        if( h->param.i_bframe_adaptive == X264_B_ADAPT_TRELLIS )
1302
         {
1303
-            frames[1]->i_type = idr_frame_type;
1304
-            return;
1305
+            /* Perform the frametype analysis. */
1306
+            for( n = 2; n < num_frames-1; n++ )
1307
+                x264_slicetype_path( h, &a, frames, n, max_bframes, num_frames-max_bframes, best_paths );
1308
+            num_bframes = strspn( best_paths[num_frames-2], "B" );
1309
+            /* Load the results of the analysis into the frame types. */
1310
+            for( j = 1; j < num_frames; j++ )
1311
+                frames[j]->i_type = best_paths[num_frames-2][j-1] == 'B' ? X264_TYPE_B : X264_TYPE_P;
1312
+            frames[num_frames]->i_type = X264_TYPE_P;
1313
         }
1314
-        num_bframes = x264_slicetype_path_search( h, &a, frames, num_frames, max_bframes, num_frames-max_bframes );
1315
-        assert(num_bframes < num_frames);
1316
-
1317
-        for( j = 1; j < num_bframes+1; j++ )
1318
+        else if( h->param.i_bframe_adaptive == X264_B_ADAPT_FAST )
1319
         {
1320
-            if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, j, j+1 ) )
1321
+            for( i = 0; i < num_frames-(2-!i); )
1322
             {
1323
-                frames[j]->i_type = X264_TYPE_P;
1324
-                return;
1325
-            }
1326
-            frames[j]->i_type = X264_TYPE_B;
1327
-        }
1328
-        frames[num_bframes+1]->i_type = X264_TYPE_P;
1329
-    }
1330
-    else if( h->param.i_bframe_adaptive == X264_B_ADAPT_FAST )
1331
-    {
1332
-        cost2p1 = x264_slicetype_frame_cost( h, &a, frames, 0, 2, 2, 1 );
1333
-        if( frames[2]->i_intra_mbs[2] > i_mb_count / 2 )
1334
-            goto no_b_frames;
1335
+                cost2p1 = x264_slicetype_frame_cost( h, &a, frames, i+0, i+2, i+2, 1 );
1336
+                if( frames[i+2]->i_intra_mbs[2] > i_mb_count / 2 )
1337
+                {
1338
+                    frames[i+1]->i_type = X264_TYPE_P;
1339
+                    frames[i+2]->i_type = X264_TYPE_P;
1340
+                    i += 2;
1341
+                    continue;
1342
+                }
1343
 
1344
-        cost1b1 = x264_slicetype_frame_cost( h, &a, frames, 0, 2, 1, 0 );
1345
-        cost1p0 = x264_slicetype_frame_cost( h, &a, frames, 0, 1, 1, 0 );
1346
-        cost2p0 = x264_slicetype_frame_cost( h, &a, frames, 1, 2, 2, 0 );
1347
+                cost1b1 = x264_slicetype_frame_cost( h, &a, frames, i+0, i+2, i+1, 0 );
1348
+                cost1p0 = x264_slicetype_frame_cost( h, &a, frames, i+0, i+1, i+1, 0 );
1349
+                cost2p0 = x264_slicetype_frame_cost( h, &a, frames, i+1, i+2, i+2, 0 );
1350
 
1351
-        if( cost1p0 + cost2p0 < cost1b1 + cost2p1 )
1352
-            goto no_b_frames;
1353
+                if( cost1p0 + cost2p0 < cost1b1 + cost2p1 )
1354
+                {
1355
+                    frames[i+1]->i_type = X264_TYPE_P;
1356
+                    frames[i+2]->i_type = X264_TYPE_P;
1357
+                    i += 2;
1358
+                    continue;
1359
+                }
1360
 
1361
-        // arbitrary and untuned
1362
-        #define INTER_THRESH 300
1363
-        #define P_SENS_BIAS (50 - h->param.i_bframe_bias)
1364
-        frames[1]->i_type = X264_TYPE_B;
1365
+                // arbitrary and untuned
1366
+                #define INTER_THRESH 300
1367
+                #define P_SENS_BIAS (50 - h->param.i_bframe_bias)
1368
+                frames[i+1]->i_type = X264_TYPE_B;
1369
+                frames[i+2]->i_type = X264_TYPE_P;
1370
 
1371
-        for( j = 2; j <= X264_MIN( h->param.i_bframe, num_frames-1 ); j++ )
1372
+                for( j = i+2; j <= X264_MIN( h->param.i_bframe, num_frames-2 ); j++ )
1373
+                {
1374
+                    int pthresh = X264_MAX(INTER_THRESH - P_SENS_BIAS * (j-i-1), INTER_THRESH/10);
1375
+                    int pcost = x264_slicetype_frame_cost( h, &a, frames, i+0, j+1, j+1, 1 );
1376
+
1377
+                    if( pcost > pthresh*i_mb_count || frames[j+1]->i_intra_mbs[j-i+1] > i_mb_count/3 )
1378
+                    {
1379
+                        frames[j]->i_type = X264_TYPE_P;
1380
+                        break;
1381
+                    }
1382
+                    else
1383
+                        frames[j]->i_type = X264_TYPE_B;
1384
+                }
1385
+                i = j;
1386
+            }
1387
+            frames[i+!i]->i_type = X264_TYPE_P;
1388
+            num_bframes = 0;
1389
+            while( num_bframes < num_frames && frames[num_bframes+1]->i_type == X264_TYPE_B )
1390
+                num_bframes++;
1391
+        }
1392
+        else
1393
         {
1394
-            int pthresh = X264_MAX(INTER_THRESH - P_SENS_BIAS * (j-1), INTER_THRESH/10);
1395
-            int pcost = x264_slicetype_frame_cost( h, &a, frames, 0, j+1, j+1, 1 );
1396
+            num_bframes = X264_MIN(num_frames-1, h->param.i_bframe);
1397
+            for( j = 1; j < num_frames; j++ )
1398
+                frames[j]->i_type = (j%(num_bframes+1)) ? X264_TYPE_B : X264_TYPE_P;
1399
+            frames[num_frames]->i_type = X264_TYPE_P;
1400
+        }
1401
 
1402
-            if( pcost > pthresh*i_mb_count || frames[j+1]->i_intra_mbs[j+1] > i_mb_count/3 )
1403
+        /* Check scenecut on the first minigop. */
1404
+        for( j = 1; j < num_bframes+1; j++ )
1405
+            if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, j, j+1 ) )
1406
             {
1407
                 frames[j]->i_type = X264_TYPE_P;
1408
+                num_analysed_frames = j;
1409
                 break;
1410
             }
1411
-            else
1412
-                frames[j]->i_type = X264_TYPE_B;
1413
-        }
1414
+
1415
+        reset_start = keyframe ? 1 : X264_MIN( num_bframes+2, num_analysed_frames+1 );
1416
     }
1417
     else
1418
     {
1419
-        int max_bframes = X264_MIN(num_frames-1, h->param.i_bframe);
1420
-        if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1 ) )
1421
-        {
1422
-            frames[1]->i_type = idr_frame_type;
1423
-            return;
1424
-        }
1425
+        for( j = 1; j < num_frames; j++ )
1426
+            frames[j]->i_type = X264_TYPE_P;
1427
+        reset_start = !keyframe + 1;
1428
+    }
1429
 
1430
-        for( j = 1; j < max_bframes+1; j++ )
1431
-        {
1432
-            if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, j, j+1 ) )
1433
+    /* Perform the actual macroblock tree analysis.
1434
+     * Don't go farther than the lookahead parameter; this helps in short GOPs. */
1435
+    if( h->param.rc.b_mb_tree )
1436
+        x264_macroblock_tree( h, &a, frames, X264_MIN(num_analysed_frames, h->param.rc.i_lookahead), keyframe );
1437
+
1438
+    /* Enforce keyframe limit. */
1439
+    if( h->param.i_bframe )
1440
+        for( j = 0; j <= num_bframes; j++ )
1441
+            if( j+1 > keyint_limit )
1442
             {
1443
-                frames[j]->i_type = X264_TYPE_P;
1444
-                return;
1445
+                if( j )
1446
+                    frames[j]->i_type = X264_TYPE_P;
1447
+                frames[j+1]->i_type = idr_frame_type;
1448
+                reset_start = j+2;
1449
+                break;
1450
             }
1451
-            frames[j]->i_type = X264_TYPE_B;
1452
-        }
1453
-        frames[max_bframes+1]->i_type = X264_TYPE_P;
1454
-    }
1455
+
1456
+    /* Restore frametypes for all frames that haven't actually been decided yet. */
1457
+    for( j = reset_start; j <= num_frames; j++ )
1458
+        frames[j]->i_type = X264_TYPE_AUTO;
1459
 }
1460
 
1461
 void x264_slicetype_decide( x264_t *h )
1462
@@ -606,8 +824,9 @@ void x264_slicetype_decide( x264_t *h )
1463
                 x264_ratecontrol_slice_type( h, h->frames.next[i]->i_frame );
1464
     }
1465
     else if( (h->param.i_bframe && h->param.i_bframe_adaptive)
1466
-             || h->param.i_scenecut_threshold )
1467
-        x264_slicetype_analyse( h );
1468
+             || h->param.i_scenecut_threshold
1469
+             || h->param.rc.b_mb_tree )
1470
+        x264_slicetype_analyse( h, 0 );
1471
 
1472
     for( bframes = 0;; bframes++ )
1473
     {
1474
@@ -645,7 +864,9 @@ void x264_slicetype_decide( x264_t *h )
1475
                 frm->i_type = X264_TYPE_P;
1476
         }
1477
 
1478
-        if( frm->i_type == X264_TYPE_AUTO ) frm->i_type = X264_TYPE_B;
1479
+        if( frm->i_type == X264_TYPE_AUTO )
1480
+            frm->i_type = X264_TYPE_B;
1481
+
1482
         else if( !IS_X264_TYPE_B( frm->i_type ) ) break;
1483
     }
1484
 }
1485
@@ -653,7 +874,7 @@ void x264_slicetype_decide( x264_t *h )
1486
 int x264_rc_analyse_slice( x264_t *h )
1487
 {
1488
     x264_mb_analysis_t a;
1489
-    x264_frame_t *frames[X264_BFRAME_MAX*4+2] = { NULL, };
1490
+    x264_frame_t *frames[X264_LOOKAHEAD_MAX+2] = { NULL, };
1491
     int p0=0, p1, b;
1492
     int cost;
1493
 
1494
@@ -662,6 +883,12 @@ int x264_rc_analyse_slice( x264_t *h )
1495
     if( IS_X264_TYPE_I(h->fenc->i_type) )
1496
     {
1497
         p1 = b = 0;
1498
+        /* For MB-tree, we have to perform propagation analysis on I-frames too. */
1499
+        if( h->param.rc.b_mb_tree )
1500
+        {
1501
+            h->frames.last_nonb = h->fenc;
1502
+            x264_slicetype_analyse( h, 1 );
1503
+        }
1504
     }
1505
     else if( X264_TYPE_P == h->fenc->i_type )
1506
     {
1507
@@ -680,11 +907,16 @@ int x264_rc_analyse_slice( x264_t *h )
1508
     frames[p0] = h->fref0[0];
1509
     frames[b] = h->fenc;
1510
 
1511
-    cost = x264_slicetype_frame_cost( h, &a, frames, p0, p1, b, 0 );
1512
+    if( h->param.rc.b_mb_tree )
1513
+        cost = x264_slicetype_frame_cost_recalculate( h, &a, frames, p0, p1, b );
1514
+    else
1515
+    {
1516
+        cost = x264_slicetype_frame_cost( h, &a, frames, p0, p1, b, 0 );
1517
 
1518
-    /* In AQ, use the weighted score instead. */
1519
-    if( h->param.rc.i_aq_mode )
1520
-        cost = frames[b]->i_cost_est[b-p0][p1-b];
1521
+        /* In AQ, use the weighted score instead. */
1522
+        if( h->param.rc.i_aq_mode )
1523
+            cost = frames[b]->i_cost_est[b-p0][p1-b];
1524
+    }
1525
 
1526
     h->fenc->i_row_satd = h->fenc->i_row_satds[b-p0][p1-b];
1527
     h->fdec->i_row_satd = h->fdec->i_row_satds[b-p0][p1-b];
1528
diff --git a/x264.c b/x264.c
1529
index c3b4f29..c75bfde 100644
1530
--- a/x264.c
1531
+++ b/x264.c
1532
@@ -168,9 +168,8 @@ static void Help( x264_param_t *defaults, int b_longhelp )
1533
     H0( "                                  - baseline,main,high\n" );
1534
     H0( "      --preset                Use a preset to select encoding settings [medium]\n" );
1535
     H0( "                                  Overridden by user settings\n");
1536
-    H1( "                                  - ultrafast,veryfast,fast,medium\n"
1537
-        "                                  - slow,slower,placebo\n" );
1538
-    else H0( "                                  - ultrafast,veryfast,fast,medium,slow,slower\n" );
1539
+    H0( "                                  - ultrafast,veryfast,faster,fast\n"
1540
+        "                                  - medium,slow,slower,placebo\n" );
1541
     H0( "      --tune                  Tune the settings for a particular type of source\n" );
1542
     H0( "                                  Overridden by user settings\n");
1543
     H1( "                                  - film,animation,grain,psnr,ssim\n"
1544
@@ -204,6 +203,7 @@ static void Help( x264_param_t *defaults, int b_longhelp )
1545
     H0( "  -q, --qp <integer>          Set QP (0-51, 0=lossless)\n" );
1546
     H0( "  -B, --bitrate <integer>     Set bitrate (kbit/s)\n" );
1547
     H0( "      --crf <float>           Quality-based VBR (0-51, 0=lossless) [%.1f]\n", defaults->rc.f_rf_constant );
1548
+    H0( "      --rc-lookahead <integer> Number of frames for frametype lookahead [%d]\n", defaults->rc.i_lookahead );
1549
     H0( "      --vbv-maxrate <integer> Max local bitrate (kbit/s) [%d]\n", defaults->rc.i_vbv_max_bitrate );
1550
     H0( "      --vbv-bufsize <integer> Set size of the VBV buffer (kbit) [%d]\n", defaults->rc.i_vbv_buffer_size );
1551
     H1( "      --vbv-init <float>      Initial VBV buffer occupancy [%.1f]\n", defaults->rc.f_vbv_buffer_init );
1552
@@ -228,6 +228,7 @@ static void Help( x264_param_t *defaults, int b_longhelp )
1553
         "                                  - 2: Last pass, does not overwrite stats file\n"
1554
         "                                  - 3: Nth pass, overwrites stats file\n" );
1555
     H0( "      --stats <string>        Filename for 2 pass stats [\"%s\"]\n", defaults->rc.psz_stat_out );
1556
+    H0( "      --no-mbtree                Disable mb-tree ratecontrol.\n");
1557
     H0( "      --qcomp <float>         QP curve compression: 0.0 => CBR, 1.0 => CQP [%.2f]\n", defaults->rc.f_qcompress );
1558
     H1( "      --cplxblur <float>      Reduce fluctuations in QP (before curve compression) [%.1f]\n", defaults->rc.f_complexity_blur );
1559
     H1( "      --qblur <float>         Reduce fluctuations in QP (after curve compression) [%.1f]\n", defaults->rc.f_qblur );
1560
@@ -277,6 +278,8 @@ static void Help( x264_param_t *defaults, int b_longhelp )
1561
         "                                  #1: RD (requires subme>=6)\n"
1562
         "                                  #2: Trellis (requires trellis, experimental)\n",
1563
                                        defaults->analyse.f_psy_rd, defaults->analyse.f_psy_trellis );
1564
+    H1( "      --no-psy                Disable all visual optimizations that worsen\n"
1565
+        "                              both PSNR and SSIM.\n" );
1566
     H0( "      --no-mixed-refs         Don't decide references on a per partition basis\n" );
1567
     H1( "      --no-chroma-me          Ignore chroma in motion estimation\n" );
1568
     H0( "      --no-8x8dct             Disable adaptive spatial transform size\n" );
1569
@@ -403,6 +406,7 @@ static struct option long_options[] =
1570
     { "qpmax",       required_argument, NULL, 0 },
1571
     { "qpstep",      required_argument, NULL, 0 },
1572
     { "crf",         required_argument, NULL, 0 },
1573
+    { "rc-lookahead",required_argument, NULL, 0 },
1574
     { "ref",         required_argument, NULL, 'r' },
1575
     { "asm",         required_argument, NULL, 0 },
1576
     { "no-asm",            no_argument, NULL, 0 },
1577
@@ -422,6 +426,7 @@ static struct option long_options[] =
1578
     { "mvrange-thread", required_argument, NULL, 0 },
1579
     { "subme",       required_argument, NULL, 'm' },
1580
     { "psy-rd",      required_argument, NULL, 0 },
1581
+    { "no-psy",            no_argument, NULL, 0 },
1582
     { "mixed-refs",        no_argument, NULL, 0 },
1583
     { "no-mixed-refs",     no_argument, NULL, 0 },
1584
     { "no-chroma-me",      no_argument, NULL, 0 },
1585
@@ -446,6 +451,8 @@ static struct option long_options[] =
1586
     { "pass",        required_argument, NULL, 'p' },
1587
     { "stats",       required_argument, NULL, 0 },
1588
     { "qcomp",       required_argument, NULL, 0 },
1589
+    { "mbtree",            no_argument, NULL, 0 },
1590
+    { "no-mbtree",         no_argument, NULL, 0 },
1591
     { "qblur",       required_argument, NULL, 0 },
1592
     { "cplxblur",    required_argument, NULL, 0 },
1593
     { "zones",       required_argument, NULL, 0 },
1594
@@ -542,6 +549,8 @@ static int  Parse( int argc, char **argv,
1595
                 param->rc.i_aq_mode = 0;
1596
                 param->analyse.b_mixed_references = 0;
1597
                 param->analyse.i_trellis = 0;
1598
+                param->i_bframe_adaptive = X264_B_ADAPT_NONE;
1599
+                param->rc.b_mb_tree = 0;
1600
             }
1601
             else if( !strcasecmp( optarg, "veryfast" ) )
1602
             {
1603
@@ -551,12 +560,20 @@ static int  Parse( int argc, char **argv,
1604
                 param->i_frame_reference = 1;
1605
                 param->analyse.b_mixed_references = 0;
1606
                 param->analyse.i_trellis = 0;
1607
+                param->rc.b_mb_tree = 0;
1608
             }
1609
-            else if( !strcasecmp( optarg, "fast" ) )
1610
+            else if( !strcasecmp( optarg, "faster" ) )
1611
             {
1612
                 param->analyse.b_mixed_references = 0;
1613
                 param->i_frame_reference = 2;
1614
                 param->analyse.i_subpel_refine = 4;
1615
+                param->rc.b_mb_tree = 0;
1616
+            }
1617
+            else if( !strcasecmp( optarg, "fast" ) )
1618
+            {
1619
+                param->i_frame_reference = 2;
1620
+                param->analyse.i_subpel_refine = 6;
1621
+                param->rc.i_lookahead = 30;
1622
             }
1623
             else if( !strcasecmp( optarg, "medium" ) )
1624
             {
1625
@@ -569,6 +586,7 @@ static int  Parse( int argc, char **argv,
1626
                 param->i_frame_reference = 5;
1627
                 param->i_bframe_adaptive = X264_B_ADAPT_TRELLIS;
1628
                 param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_AUTO;
1629
+                param->rc.i_lookahead = 50;
1630
             }
1631
             else if( !strcasecmp( optarg, "slower" ) )
1632
             {
1633
@@ -579,6 +597,7 @@ static int  Parse( int argc, char **argv,
1634
                 param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_AUTO;
1635
                 param->analyse.inter |= X264_ANALYSE_PSUB8x8;
1636
                 param->analyse.i_trellis = 2;
1637
+                param->rc.i_lookahead = 60;
1638
             }
1639
             else if( !strcasecmp( optarg, "placebo" ) )
1640
             {
1641
@@ -592,6 +611,7 @@ static int  Parse( int argc, char **argv,
1642
                 param->analyse.b_fast_pskip = 0;
1643
                 param->analyse.i_trellis = 2;
1644
                 param->i_bframe = 16;
1645
+                param->rc.i_lookahead = 60;
1646
             }
1647
             else
1648
             {
1649
@@ -644,11 +664,13 @@ static int  Parse( int argc, char **argv,
1650
             {
1651
                 param->analyse.f_psy_rd = 0;
1652
                 param->rc.i_aq_mode = X264_AQ_NONE;
1653
+                param->analyse.b_psy = 0;
1654
             }
1655
             else if( !strcasecmp( optarg, "ssim" ) )
1656
             {
1657
                 param->analyse.f_psy_rd = 0;
1658
                 param->rc.i_aq_mode = X264_AQ_AUTOVARIANCE;
1659
+                param->analyse.b_psy = 0;
1660
             }
1661
             else if( !strcasecmp( optarg, "fastdecode" ) )
1662
             {
1663
@@ -662,7 +684,6 @@ static int  Parse( int argc, char **argv,
1664
                 param->i_deblocking_filter_alphac0 = -1;
1665
                 param->i_deblocking_filter_beta = -1;
1666
                 param->analyse.f_psy_trellis = 0.2;
1667
-                param->rc.f_ip_factor = 2.1;
1668
                 param->rc.f_aq_strength = 1.3;
1669
                 if( param->analyse.inter & X264_ANALYSE_PSUB16x16 )
1670
                     param->analyse.inter |= X264_ANALYSE_PSUB8x8;
1671
diff --git a/x264.h b/x264.h
1672
index 2dfcc8d..4982b2e 100644
1673
--- a/x264.h
1674
+++ b/x264.h
1675
@@ -35,7 +35,7 @@
1676
 
1677
 #include <stdarg.h>
1678
 
1679
-#define X264_BUILD 68
1680
+#define X264_BUILD 69
1681
 
1682
 /* x264_t:
1683
  *      opaque handler for encoder */
1684
@@ -242,6 +242,7 @@ typedef struct x264_param_t
1685
         int          i_noise_reduction; /* adaptive pseudo-deadzone */
1686
         float        f_psy_rd; /* Psy RD strength */
1687
         float        f_psy_trellis; /* Psy trellis strength */
1688
+        int          b_psy; /* Toggle all psy optimizations */
1689
 
1690
         /* the deadzone size that will be used in luma quantization */
1691
         int          i_luma_deadzone[2]; /* {inter, intra} */
1692
@@ -271,6 +272,8 @@ typedef struct x264_param_t
1693
 
1694
         int         i_aq_mode;      /* psy adaptive QP. (X264_AQ_*) */
1695
         float       f_aq_strength;
1696
+        int         b_mb_tree;      /* Macroblock-tree ratecontrol. */
1697
+        int         i_lookahead;
1698
 
1699
         /* 2pass */
1700
         int         b_stat_write;   /* Enable stat writing in psz_stat_out */
1701
-- 
1702
1.6.1.2
1703
1704