View difference between Paste ID: f87abd3f and
SHOW:
|
|
- or go back to the newest paste.
1 | From f21e71a04ba65aff9b5a4bfa8a73fd86c463f4ee Mon Sep 17 00:00:00 2001 | |
2 | From: Jason Garrett-Glaser <darkshikari@gmail.com> | |
3 | Date: Mon, 3 Aug 2009 20:52:30 -0700 | |
4 | Subject: [PATCH 1/2] Various 1-pass VBV tweaks | |
5 | Make predictors have an offset in addition to a multiplier. | |
6 | This primarily fixes issues in sources with lots of extremely static scenes, such as anime and CGI. | |
7 | We tried linear regressions, but they were very unreliable as predictors. | |
8 | Also allow VBV to be slightly more aggressive in raising QPs to avoid not having enough bits left in some situations. | |
9 | Up to 1db improvement on some clips. | |
10 | ||
11 | --- | |
12 | encoder/ratecontrol.c | 32 +++++++++++++++++++++----------- | |
13 | 1 files changed, 21 insertions(+), 11 deletions(-) | |
14 | ||
15 | diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c | |
16 | index 2f88708..087e658 100644 | |
17 | --- a/encoder/ratecontrol.c | |
18 | +++ b/encoder/ratecontrol.c | |
19 | @@ -58,6 +58,7 @@ typedef struct | |
20 | double coeff; | |
21 | double count; | |
22 | double decay; | |
23 | + double offset; | |
24 | } predictor_t; | |
25 | ||
26 | struct x264_ratecontrol_t | |
27 | @@ -409,9 +410,11 @@ int x264_ratecontrol_new( x264_t *h ) | |
28 | rc->pred[i].coeff= 2.0; | |
29 | rc->pred[i].count= 1.0; | |
30 | rc->pred[i].decay= 0.5; | |
31 | + rc->pred[i].offset= 0.0; | |
32 | rc->row_preds[i].coeff= .25; | |
33 | rc->row_preds[i].count= 1.0; | |
34 | rc->row_preds[i].decay= 0.5; | |
35 | + rc->row_preds[i].offset= 0.0; | |
36 | } | |
37 | *rc->pred_b_from_p = rc->pred[0]; | |
38 | ||
39 | @@ -953,7 +956,7 @@ void x264_ratecontrol_mb( x264_t *h, int bits ) | |
40 | if( y < h->sps->i_mb_height-1 ) | |
41 | { | |
42 | int i_estimated; | |
43 | - int avg_qp = X264_MAX(h->fref0[0]->i_row_qp[y+1], h->fref1[0]->i_row_qp[y+1]) | |
44 | + int avg_qp = X264_MIN(h->fref0[0]->i_row_qp[y+1], h->fref1[0]->i_row_qp[y+1]) | |
45 | + rc->pb_offset * ((h->fenc->i_type == X264_TYPE_BREF) ? 0.5 : 1); | |
46 | rc->qpm = X264_MIN(X264_MAX( rc->qp, avg_qp), 51); //avg_qp could go higher than 51 due to pb_offset | |
47 | i_estimated = row_bits_so_far(h, y); //FIXME: compute full estimated size | |
48 | @@ -1153,10 +1156,6 @@ void x264_ratecontrol_end( x264_t *h, int bits ) | |
49 | { | |
50 | update_predictor( rc->pred_b_from_p, qp2qscale(rc->qpa_rc), | |
51 | h->fref1[h->i_ref1-1]->i_satd, rc->bframe_bits / rc->bframes ); | |
52 | - /* In some cases, such as completely blank scenes, pred_b_from_p can go nuts */ | |
53 | - /* Hackily cap the predictor coeff in case this happens. */ | |
54 | - /* FIXME FIXME FIXME */ | |
55 | - rc->pred_b_from_p->coeff = X264_MIN( rc->pred_b_from_p->coeff, 10. ); | |
56 | rc->bframe_bits = 0; | |
57 | } | |
58 | } | |
59 | @@ -1270,17 +1269,28 @@ static double get_diff_limited_q(x264_t *h, ratecontrol_entry_t *rce, double q) | |
60 | ||
61 | static double predict_size( predictor_t *p, double q, double var ) | |
62 | { | |
63 | - return p->coeff*var / (q*p->count); | |
64 | + return (p->coeff*var + p->offset) / (q*p->count); | |
65 | } | |
66 | ||
67 | static void update_predictor( predictor_t *p, double q, double var, double bits ) | |
68 | { | |
69 | + const double range = 1.5; | |
70 | if( var < 10 ) | |
71 | return; | |
72 | - p->count *= p->decay; | |
73 | - p->coeff *= p->decay; | |
74 | - p->count ++; | |
75 | - p->coeff += bits*q / var; | |
76 | + double old_coeff = p->coeff / p->count; | |
77 | + double new_coeff = bits*q / var; | |
78 | + double new_coeff_clipped = x264_clip3f( new_coeff, old_coeff/range, old_coeff*range ); | |
79 | + double new_offset = bits*q - new_coeff_clipped * var; | |
80 | + if( new_offset >= 0 ) | |
81 | + new_coeff = new_coeff_clipped; | |
82 | + else | |
83 | + new_offset = 0; | |
84 | + p->count *= p->decay; | |
85 | + p->coeff *= p->decay; | |
86 | + p->offset *= p->decay; | |
87 | + p->count ++; | |
88 | + p->coeff += new_coeff; | |
89 | + p->offset += new_offset; | |
90 | } | |
91 | ||
92 | // update VBV after encoding a frame | |
93 | @@ -1350,7 +1360,7 @@ static double clip_qscale( x264_t *h, int pict_type, double q ) | |
94 | double bits = predict_size( &rcc->pred[h->sh.i_type], q, rcc->last_satd ); | |
95 | double qf = 1.0; | |
96 | if( bits > rcc->buffer_fill/2 ) | |
97 | - qf = x264_clip3f( rcc->buffer_fill/(2*bits), 0.2, 1.0 ); | |
98 | + qf = rcc->buffer_fill/(2*bits); | |
99 | q /= qf; | |
100 | bits *= qf; | |
101 | if( bits < rcc->buffer_rate/2 ) | |
102 | -- | |
103 | 1.6.1.2 | |
104 | ||
105 | ||
106 | From e7182499c7bc23d3376090f66d7617b2080f2b46 Mon Sep 17 00:00:00 2001 | |
107 | From: Jason Garrett-Glaser <darkshikari@gmail.com> | |
108 | Date: Tue, 4 Aug 2009 17:46:33 -0700 | |
109 | Subject: [PATCH 2/2] Macroblock-tree ratecontrol | |
110 | On by default; can be turned off with --no-mbtree. | |
111 | Uses a large lookahead to track temporal propagation of data and weight quality accordingly. | |
112 | Requires a very large separate statsfile (2 bytes per macroblock) in multi-pass mode. | |
113 | Doesn't work with b-pyramid yet. | |
114 | Note that MB-tree inherently measures quality different from the standard qcomp method, so bitrates produced by CRF may change somewhat. | |
115 | This makes the "medium" preset a bit slower. Accordingly, make "fast" slower as well, and introduce a new preset "faster" between "fast" and "veryfast". | |
116 | All presets "fast" and above will have MB-tree on. | |
117 | Add a new option, --rc-lookahead, to control the distance MB tree looks ahead to perform propagation analysis. | |
118 | Default is 40; larger values will be slower and require more memory but give more accurate results. | |
119 | This value will be used in the future to control ratecontrol lookahead (VBV). | |
120 | Add a new option, --no-psy, to disable all psy optimizations that don't improve PSNR or SSIM. | |
121 | This disables psy-RD/trellis, but also other more subtle internal psy optimizations that can't be controlled directly via external parameters. | |
122 | Quality improvement from MB-tree is about 2-70% depending on content. | |
123 | Strength of MB-tree adjustments can be tweaked using qcompress; higher values mean lower MB-tree strength. | |
124 | ||
125 | --- | |
126 | common/common.c | 22 ++- | |
127 | common/common.h | 50 ++++++- | |
128 | common/frame.c | 10 +- | |
129 | common/frame.h | 3 + | |
130 | common/osdep.h | 9 +- | |
131 | encoder/analyse.c | 4 +- | |
132 | encoder/encoder.c | 56 ++++++- | |
133 | encoder/ratecontrol.c | 201 +++++++++++++++++------- | |
134 | encoder/ratecontrol.h | 3 +- | |
135 | encoder/slicetype.c | 424 ++++++++++++++++++++++++++++++++++++++----------- | |
136 | x264.c | 31 +++- | |
137 | x264.h | 5 +- | |
138 | 12 files changed, 639 insertions(+), 179 deletions(-) | |
139 | ||
140 | diff --git a/common/common.c b/common/common.c | |
141 | index 9260c64..371ed1e 100644 | |
142 | --- a/common/common.c | |
143 | +++ b/common/common.c | |
144 | @@ -95,6 +95,7 @@ void x264_param_default( x264_param_t *param ) | |
145 | param->rc.f_pb_factor = 1.3; | |
146 | param->rc.i_aq_mode = X264_AQ_VARIANCE; | |
147 | param->rc.f_aq_strength = 1.0; | |
148 | + param->rc.i_lookahead = 40; | |
149 | ||
150 | param->rc.b_stat_write = 0; | |
151 | param->rc.psz_stat_out = "x264_2pass.log"; | |
152 | @@ -104,6 +105,7 @@ void x264_param_default( x264_param_t *param ) | |
153 | param->rc.f_qblur = 0.5; | |
154 | param->rc.f_complexity_blur = 20; | |
155 | param->rc.i_zones = 0; | |
156 | + param->rc.b_mb_tree = 1; | |
157 | ||
158 | /* Log */ | |
159 | param->pf_log = x264_log_default; | |
160 | @@ -117,6 +119,7 @@ void x264_param_default( x264_param_t *param ) | |
161 | param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_SPATIAL; | |
162 | param->analyse.i_me_method = X264_ME_HEX; | |
163 | param->analyse.f_psy_rd = 1.0; | |
164 | + param->analyse.b_psy = 1; | |
165 | param->analyse.f_psy_trellis = 0; | |
166 | param->analyse.i_me_range = 16; | |
167 | param->analyse.i_subpel_refine = 7; | |
168 | @@ -493,6 +496,8 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value ) | |
169 | p->analyse.f_psy_trellis = 0; | |
170 | } | |
171 | } | |
172 | + OPT("psy") | |
173 | + p->analyse.b_psy = atobool(value); | |
174 | OPT("chroma-me") | |
175 | p->analyse.b_chroma_me = atobool(value); | |
176 | OPT("mixed-refs") | |
177 | @@ -524,6 +529,8 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value ) | |
178 | p->rc.f_rf_constant = atof(value); | |
179 | p->rc.i_rc_method = X264_RC_CRF; | |
180 | } | |
181 | + OPT("rc-lookahead") | |
182 | + p->rc.i_lookahead = atoi(value); | |
183 | OPT2("qpmin", "qp-min") | |
184 | p->rc.i_qp_min = atoi(value); | |
185 | OPT2("qpmax", "qp-max") | |
186 | @@ -559,6 +566,8 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value ) | |
187 | } | |
188 | OPT("qcomp") | |
189 | p->rc.f_qcompress = atof(value); | |
190 | + OPT("mbtree") | |
191 | + p->rc.b_mb_tree = atobool(value); | |
192 | OPT("qblur") | |
193 | p->rc.f_qblur = atof(value); | |
194 | OPT2("cplxblur", "cplx-blur") | |
195 | @@ -843,7 +852,9 @@ char *x264_param2string( x264_param_t *p, int b_res ) | |
196 | s += sprintf( s, " analyse=%#x:%#x", p->analyse.intra, p->analyse.inter ); | |
197 | s += sprintf( s, " me=%s", x264_motion_est_names[ p->analyse.i_me_method ] ); | |
198 | s += sprintf( s, " subme=%d", p->analyse.i_subpel_refine ); | |
199 | - s += sprintf( s, " psy_rd=%.1f:%.1f", p->analyse.f_psy_rd, p->analyse.f_psy_trellis ); | |
200 | + s += sprintf( s, " psy=%d", p->analyse.b_psy ); | |
201 | + if( p->analyse.b_psy ) | |
202 | + s += sprintf( s, " psy_rd=%.1f:%.1f", p->analyse.f_psy_rd, p->analyse.f_psy_trellis ); | |
203 | s += sprintf( s, " mixed_ref=%d", p->analyse.b_mixed_references ); | |
204 | s += sprintf( s, " me_range=%d", p->analyse.i_me_range ); | |
205 | s += sprintf( s, " chroma_me=%d", p->analyse.b_chroma_me ); | |
206 | @@ -868,9 +879,12 @@ char *x264_param2string( x264_param_t *p, int b_res ) | |
207 | s += sprintf( s, " keyint=%d keyint_min=%d scenecut=%d", | |
208 | p->i_keyint_max, p->i_keyint_min, p->i_scenecut_threshold ); | |
209 | ||
210 | - s += sprintf( s, " rc=%s", p->rc.i_rc_method == X264_RC_ABR ? | |
211 | + if( p->rc.b_mb_tree ) | |
212 | + s += sprintf( s, " rc_lookahead=%d", p->rc.i_lookahead ); | |
213 | + | |
214 | + s += sprintf( s, " rc=%s mbtree=%d", p->rc.i_rc_method == X264_RC_ABR ? | |
215 | ( p->rc.b_stat_read ? "2pass" : p->rc.i_vbv_buffer_size ? "cbr" : "abr" ) | |
216 | - : p->rc.i_rc_method == X264_RC_CRF ? "crf" : "cqp" ); | |
217 | + : p->rc.i_rc_method == X264_RC_CRF ? "crf" : "cqp", p->rc.b_mb_tree ); | |
218 | if( p->rc.i_rc_method == X264_RC_ABR || p->rc.i_rc_method == X264_RC_CRF ) | |
219 | { | |
220 | if( p->rc.i_rc_method == X264_RC_CRF ) | |
221 | @@ -892,7 +906,7 @@ char *x264_param2string( x264_param_t *p, int b_res ) | |
222 | if( !(p->rc.i_rc_method == X264_RC_CQP && p->rc.i_qp_constant == 0) ) | |
223 | { | |
224 | s += sprintf( s, " ip_ratio=%.2f", p->rc.f_ip_factor ); | |
225 | - if( p->i_bframe ) | |
226 | + if( p->i_bframe && !p->rc.b_mb_tree ) | |
227 | s += sprintf( s, " pb_ratio=%.2f", p->rc.f_pb_factor ); | |
228 | s += sprintf( s, " aq=%d", p->rc.i_aq_mode ); | |
229 | if( p->rc.i_aq_mode ) | |
230 | diff --git a/common/common.h b/common/common.h | |
231 | index 8a25a13..30163ab 100644 | |
232 | --- a/common/common.h | |
233 | +++ b/common/common.h | |
234 | @@ -51,6 +51,7 @@ | |
235 | #define X264_SLICE_MAX 4 | |
236 | #define X264_NAL_MAX (4 + X264_SLICE_MAX) | |
237 | #define X264_PCM_COST (386*8) | |
238 | +#define X264_LOOKAHEAD_MAX 250 | |
239 | ||
240 | // number of pixels (per thread) in progress at any given time. | |
241 | // 16 for the macroblock in progress + 3 for deblocking + 3 for motion compensation filter + 2 for extra safety | |
242 | @@ -152,6 +153,49 @@ static inline uint32_t x264_cabac_amvd_sum( int16_t *mvdleft, int16_t *mvdtop ) | |
243 | return amvd0 + (amvd1<<16); | |
244 | } | |
245 | ||
246 | +static const uint8_t exp2_lut[64] = { | |
247 | + 1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 37, 40, 44, 47, | |
248 | + 50, 53, 57, 60, 64, 67, 71, 74, 78, 81, 85, 89, 93, 96, 100, 104, | |
249 | + 108, 112, 116, 120, 124, 128, 132, 137, 141, 145, 150, 154, 159, 163, 168, 172, | |
250 | + 177, 182, 186, 191, 196, 201, 206, 211, 216, 221, 226, 232, 237, 242, 248, 253, | |
251 | +}; | |
252 | + | |
253 | +static ALWAYS_INLINE int x264_exp2fix8( float x ) | |
254 | +{ | |
255 | + int i, f; | |
256 | + x += 8; | |
257 | + if( x <= 0 ) return 0; | |
258 | + if( x >= 16 ) return 0xffff; | |
259 | + i = x; | |
260 | + f = (x-i)*64; | |
261 | + return (exp2_lut[f]+256) << i >> 8; | |
262 | +} | |
263 | + | |
264 | +static const float log2_lut[128] = { | |
265 | + 0.00000, 0.01123, 0.02237, 0.03342, 0.04439, 0.05528, 0.06609, 0.07682, | |
266 | + 0.08746, 0.09803, 0.10852, 0.11894, 0.12928, 0.13955, 0.14975, 0.15987, | |
267 | + 0.16993, 0.17991, 0.18982, 0.19967, 0.20945, 0.21917, 0.22882, 0.23840, | |
268 | + 0.24793, 0.25739, 0.26679, 0.27612, 0.28540, 0.29462, 0.30378, 0.31288, | |
269 | + 0.32193, 0.33092, 0.33985, 0.34873, 0.35755, 0.36632, 0.37504, 0.38370, | |
270 | + 0.39232, 0.40088, 0.40939, 0.41785, 0.42626, 0.43463, 0.44294, 0.45121, | |
271 | + 0.45943, 0.46761, 0.47573, 0.48382, 0.49185, 0.49985, 0.50779, 0.51570, | |
272 | + 0.52356, 0.53138, 0.53916, 0.54689, 0.55459, 0.56224, 0.56986, 0.57743, | |
273 | + 0.58496, 0.59246, 0.59991, 0.60733, 0.61471, 0.62205, 0.62936, 0.63662, | |
274 | + 0.64386, 0.65105, 0.65821, 0.66534, 0.67243, 0.67948, 0.68650, 0.69349, | |
275 | + 0.70044, 0.70736, 0.71425, 0.72110, 0.72792, 0.73471, 0.74147, 0.74819, | |
276 | + 0.75489, 0.76155, 0.76818, 0.77479, 0.78136, 0.78790, 0.79442, 0.80090, | |
277 | + 0.80735, 0.81378, 0.82018, 0.82655, 0.83289, 0.83920, 0.84549, 0.85175, | |
278 | + 0.85798, 0.86419, 0.87036, 0.87652, 0.88264, 0.88874, 0.89482, 0.90087, | |
279 | + 0.90689, 0.91289, 0.91886, 0.92481, 0.93074, 0.93664, 0.94251, 0.94837, | |
280 | + 0.95420, 0.96000, 0.96578, 0.97154, 0.97728, 0.98299, 0.98868, 0.99435, | |
281 | +}; | |
282 | + | |
283 | +static ALWAYS_INLINE float x264_log2( uint32_t x ) | |
284 | +{ | |
285 | + int lz = x264_clz( x ); | |
286 | + return log2_lut[(x<<lz>>24)&0x7f] + (31 - lz); | |
287 | +} | |
288 | + | |
289 | /**************************************************************************** | |
290 | * | |
291 | ****************************************************************************/ | |
292 | @@ -327,11 +371,11 @@ struct x264_t | |
293 | struct | |
294 | { | |
295 | /* Frames to be encoded (whose types have been decided) */ | |
296 | - x264_frame_t *current[X264_BFRAME_MAX*4+3]; | |
297 | + x264_frame_t *current[X264_LOOKAHEAD_MAX+3]; | |
298 | /* Temporary buffer (frames types not yet decided) */ | |
299 | - x264_frame_t *next[X264_BFRAME_MAX*4+3]; | |
300 | + x264_frame_t *next[X264_LOOKAHEAD_MAX+3]; | |
301 | /* Unused frames */ | |
302 | - x264_frame_t *unused[X264_BFRAME_MAX*4 + X264_THREAD_MAX*2 + 16+4]; | |
303 | + x264_frame_t *unused[X264_LOOKAHEAD_MAX + X264_THREAD_MAX*2 + 16+4]; | |
304 | /* For adaptive B decision */ | |
305 | x264_frame_t *last_nonb; | |
306 | ||
307 | diff --git a/common/frame.c b/common/frame.c | |
308 | index 23e6824..2097d52 100644 | |
309 | --- a/common/frame.c | |
310 | +++ b/common/frame.c | |
311 | @@ -96,6 +96,15 @@ x264_frame_t *x264_frame_new( x264_t *h ) | |
312 | memset( frame->lowres_mvs[j][i], 0, 2*h->mb.i_mb_count*sizeof(int16_t) ); | |
313 | CHECKED_MALLOC( frame->lowres_mv_costs[j][i], h->mb.i_mb_count*sizeof(int) ); | |
314 | } | |
315 | + CHECKED_MALLOC( frame->i_intra_cost, i_mb_count * sizeof(uint16_t) ); | |
316 | + memset( frame->i_intra_cost, -1, i_mb_count * sizeof(uint16_t) ); | |
317 | + CHECKED_MALLOC( frame->i_propagate_cost, i_mb_count * sizeof(uint32_t) ); | |
318 | + for( j = 0; j <= h->param.i_bframe+1; j++ ) | |
319 | + for( i = 0; i <= h->param.i_bframe+1; i++ ) | |
320 | + { | |
321 | + CHECKED_MALLOC( frame->lowres_costs[j][i], i_mb_count * sizeof(uint16_t) ); | |
322 | + CHECKED_MALLOC( frame->lowres_inter_types[j][i], i_mb_count * sizeof(uint8_t) ); | |
323 | + } | |
324 | } | |
325 | ||
326 | if( h->param.analyse.i_me_method >= X264_ME_ESA ) | |
327 | @@ -116,7 +125,6 @@ x264_frame_t *x264_frame_new( x264_t *h ) | |
328 | CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t)); | |
329 | CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) ); | |
330 | CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) ); | |
331 | - CHECKED_MALLOC( frame->i_intra_cost, i_mb_count * sizeof(uint16_t) ); | |
332 | if( h->param.i_bframe ) | |
333 | { | |
334 | CHECKED_MALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) ); | |
335 | diff --git a/common/frame.h b/common/frame.h | |
336 | index aad77f5..a3da4e4 100644 | |
337 | --- a/common/frame.h | |
338 | +++ b/common/frame.h | |
339 | @@ -63,6 +63,8 @@ typedef struct | |
340 | int8_t *mb_type; | |
341 | int16_t (*mv[2])[2]; | |
342 | int16_t (*lowres_mvs[2][X264_BFRAME_MAX+1])[2]; | |
343 | + uint16_t (*lowres_costs[X264_BFRAME_MAX+2][X264_BFRAME_MAX+2]); | |
344 | + uint8_t (*lowres_inter_types[X264_BFRAME_MAX+2][X264_BFRAME_MAX+2]); | |
345 | int *lowres_mv_costs[2][X264_BFRAME_MAX+1]; | |
346 | int8_t *ref[2]; | |
347 | int i_ref[2]; | |
348 | @@ -83,6 +85,7 @@ typedef struct | |
349 | float *f_qp_offset; | |
350 | int b_intra_calculated; | |
351 | uint16_t *i_intra_cost; | |
352 | + uint32_t *i_propagate_cost; | |
353 | uint16_t *i_inv_qscale_factor; | |
354 | ||
355 | /* threading */ | |
356 | diff --git a/common/osdep.h b/common/osdep.h | |
357 | index 915ec05..2095198 100644 | |
358 | --- a/common/osdep.h | |
359 | +++ b/common/osdep.h | |
360 | @@ -147,7 +147,9 @@ | |
361 | #ifdef WORDS_BIGENDIAN | |
362 | #define endian_fix(x) (x) | |
363 | #define endian_fix32(x) (x) | |
364 | -#elif defined(__GNUC__) && defined(HAVE_MMX) | |
365 | +#define endian_fix16(x) (x) | |
366 | +#else | |
367 | +#if defined(__GNUC__) && defined(HAVE_MMX) | |
368 | static ALWAYS_INLINE uint32_t endian_fix32( uint32_t x ) | |
369 | { | |
370 | asm("bswap %0":"+r"(x)); | |
371 | @@ -171,6 +173,11 @@ static ALWAYS_INLINE intptr_t endian_fix( intptr_t x ) | |
372 | return endian_fix32(x); | |
373 | } | |
374 | #endif | |
375 | +static ALWAYS_INLINE uint16_t endian_fix16( uint16_t x ) | |
376 | +{ | |
377 | + return (x<<8)|(x>>8); | |
378 | +} | |
379 | +#endif | |
380 | ||
381 | #if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 3) | |
382 | #define x264_clz(x) __builtin_clz(x) | |
383 | diff --git a/encoder/analyse.c b/encoder/analyse.c | |
384 | index 4a36fcd..38b9976 100644 | |
385 | --- a/encoder/analyse.c | |
386 | +++ b/encoder/analyse.c | |
387 | @@ -276,8 +276,8 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp ) | |
388 | h->mb.i_trellis_lambda2[1][1] = x264_trellis_lambda2_tab[1][h->mb.i_chroma_qp]; | |
389 | } | |
390 | h->mb.i_psy_rd_lambda = a->i_lambda; | |
391 | - /* Adjusting chroma lambda based on QP offset hurts PSNR, so we'll leave it as part of psy-RD. */ | |
392 | - h->mb.i_chroma_lambda2_offset = h->mb.i_psy_rd ? x264_chroma_lambda2_offset_tab[h->mb.i_qp-h->mb.i_chroma_qp+12] : 256; | |
393 | + /* Adjusting chroma lambda based on QP offset hurts PSNR but improves visual quality. */ | |
394 | + h->mb.i_chroma_lambda2_offset = h->param.analyse.b_psy ? x264_chroma_lambda2_offset_tab[h->mb.i_qp-h->mb.i_chroma_qp+12] : 256; | |
395 | ||
396 | h->mb.i_me_method = h->param.analyse.i_me_method; | |
397 | h->mb.i_subpel_refine = h->param.analyse.i_subpel_refine; | |
398 | diff --git a/encoder/encoder.c b/encoder/encoder.c | |
399 | index 0f1ccc8..74ff97d 100644 | |
400 | --- a/encoder/encoder.c | |
401 | +++ b/encoder/encoder.c | |
402 | @@ -42,7 +42,7 @@ | |
403 | ||
404 | #define bs_write_ue bs_write_ue_big | |
405 | ||
406 | -static void x264_encoder_frame_end( x264_t *h, x264_t *thread_current, | |
407 | +static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current, | |
408 | x264_nal_t **pp_nal, int *pi_nal, | |
409 | x264_picture_t *pic_out ); | |
410 | ||
411 | @@ -441,6 +441,7 @@ static int x264_validate_parameters( x264_t *h ) | |
412 | h->param.rc.i_qp_min = x264_clip3( (int)(X264_MIN3( qp_p, qp_i, qp_b )), 0, 51 ); | |
413 | h->param.rc.i_qp_max = x264_clip3( (int)(X264_MAX3( qp_p, qp_i, qp_b ) + .999), 0, 51 ); | |
414 | h->param.rc.i_aq_mode = 0; | |
415 | + h->param.rc.b_mb_tree = 0; | |
416 | } | |
417 | h->param.rc.i_qp_max = x264_clip3( h->param.rc.i_qp_max, 0, 51 ); | |
418 | h->param.rc.i_qp_min = x264_clip3( h->param.rc.i_qp_min, 0, h->param.rc.i_qp_max ); | |
419 | @@ -473,6 +474,15 @@ static int x264_validate_parameters( x264_t *h ) | |
420 | if( !h->param.i_bframe ) | |
421 | h->param.i_bframe_adaptive = X264_B_ADAPT_NONE; | |
422 | h->param.analyse.b_weighted_bipred = h->param.analyse.b_weighted_bipred && h->param.i_bframe > 0; | |
423 | + h->param.rc.i_lookahead = X264_MIN( h->param.rc.i_lookahead, X264_LOOKAHEAD_MAX ); | |
424 | + h->param.rc.i_lookahead = X264_MIN( h->param.rc.i_lookahead, h->param.i_keyint_max ); | |
425 | + if( h->param.rc.b_stat_read ) | |
426 | + h->param.rc.i_lookahead = 0; | |
427 | + else if( !h->param.rc.i_lookahead ) | |
428 | + h->param.rc.b_mb_tree = 0; | |
429 | + if( h->param.rc.f_qcompress == 1 ) | |
430 | + h->param.rc.b_mb_tree = 0; | |
431 | + | |
432 | h->mb.b_direct_auto_write = h->param.analyse.i_direct_mv_pred == X264_DIRECT_PRED_AUTO | |
433 | && h->param.i_bframe | |
434 | && ( h->param.rc.b_stat_write || !h->param.rc.b_stat_read ); | |
435 | @@ -513,6 +523,11 @@ static int x264_validate_parameters( x264_t *h ) | |
436 | if( !h->param.b_cabac ) | |
437 | h->param.analyse.i_trellis = 0; | |
438 | h->param.analyse.i_trellis = x264_clip3( h->param.analyse.i_trellis, 0, 2 ); | |
439 | + if( !h->param.analyse.b_psy ) | |
440 | + { | |
441 | + h->param.analyse.f_psy_rd = 0; | |
442 | + h->param.analyse.f_psy_trellis = 0; | |
443 | + } | |
444 | if( !h->param.analyse.i_trellis ) | |
445 | h->param.analyse.f_psy_trellis = 0; | |
446 | h->param.analyse.f_psy_rd = x264_clip3f( h->param.analyse.f_psy_rd, 0, 10 ); | |
447 | @@ -537,6 +552,17 @@ static int x264_validate_parameters( x264_t *h ) | |
448 | h->param.rc.f_aq_strength = x264_clip3f( h->param.rc.f_aq_strength, 0, 3 ); | |
449 | if( h->param.rc.f_aq_strength == 0 ) | |
450 | h->param.rc.i_aq_mode = 0; | |
451 | + /* MB-tree requires AQ to be on, even if the strength is zero. */ | |
452 | + if( !h->param.rc.i_aq_mode && h->param.rc.b_mb_tree ) | |
453 | + { | |
454 | + h->param.rc.i_aq_mode = 1; | |
455 | + h->param.rc.f_aq_strength = 0; | |
456 | + } | |
457 | + if( h->param.rc.b_mb_tree && h->param.b_bframe_pyramid ) | |
458 | + { | |
459 | + x264_log( h, X264_LOG_WARNING, "b-pyramid + mb-tree is not supported\n" ); | |
460 | + h->param.b_bframe_pyramid = 0; | |
461 | + } | |
462 | h->param.analyse.i_noise_reduction = x264_clip3( h->param.analyse.i_noise_reduction, 0, 1<<16 ); | |
463 | if( h->param.analyse.i_subpel_refine == 10 && (h->param.analyse.i_trellis != 2 || !h->param.rc.i_aq_mode) ) | |
464 | h->param.analyse.i_subpel_refine = 9; | |
465 | @@ -723,6 +749,9 @@ x264_t *x264_encoder_open ( x264_param_t *param ) | |
466 | h->frames.i_delay = X264_MAX(h->param.i_bframe,3)*4 + h->param.i_threads - 1; | |
467 | else | |
468 | h->frames.i_delay = h->param.i_bframe + h->param.i_threads - 1; | |
469 | + if( h->param.rc.b_mb_tree ) | |
470 | + h->frames.i_delay = X264_MAX( h->frames.i_delay, h->param.rc.i_lookahead ); | |
471 | + | |
472 | h->frames.i_max_ref0 = h->param.i_frame_reference; | |
473 | h->frames.i_max_ref1 = h->sps->vui.i_num_reorder_frames; | |
474 | h->frames.i_max_dpb = h->sps->vui.i_max_dec_frame_buffering; | |
475 | @@ -730,7 +759,8 @@ x264_t *x264_encoder_open ( x264_param_t *param ) | |
476 | && ( h->param.rc.i_rc_method == X264_RC_ABR | |
477 | || h->param.rc.i_rc_method == X264_RC_CRF | |
478 | || h->param.i_bframe_adaptive | |
479 | - || h->param.i_scenecut_threshold ); | |
480 | + || h->param.i_scenecut_threshold | |
481 | + || h->param.rc.b_mb_tree ); | |
482 | h->frames.b_have_lowres |= (h->param.rc.b_stat_read && h->param.rc.i_vbv_buffer_size > 0); | |
483 | h->frames.b_have_sub8x8_esa = !!(h->param.analyse.inter & X264_ANALYSE_PSUB8x8); | |
484 | ||
485 | @@ -1443,7 +1473,12 @@ int x264_encoder_encode( x264_t *h, | |
486 | if( h->frames.b_have_lowres ) | |
487 | x264_frame_init_lowres( h, fenc ); | |
488 | ||
489 | - if( h->param.rc.i_aq_mode ) | |
490 | + if( h->param.rc.b_mb_tree && h->param.rc.b_stat_read ) | |
491 | + { | |
492 | + if( x264_macroblock_tree_read( h, fenc ) ) | |
493 | + return -1; | |
494 | + } | |
495 | + else if( h->param.rc.i_aq_mode ) | |
496 | x264_adaptive_quant_frame( h, fenc ); | |
497 | ||
498 | if( h->frames.i_input <= h->frames.i_delay + 1 - h->param.i_threads ) | |
499 | @@ -1461,7 +1496,8 @@ int x264_encoder_encode( x264_t *h, | |
500 | /* 2: Select frame types */ | |
501 | if( h->frames.next[0] == NULL ) | |
502 | { | |
503 | - x264_encoder_frame_end( thread_oldest, thread_current, pp_nal, pi_nal, pic_out ); | |
504 | + if( x264_encoder_frame_end( thread_oldest, thread_current, pp_nal, pi_nal, pic_out ) < 0 ) | |
505 | + return -1; | |
506 | return 0; | |
507 | } | |
508 | ||
509 | @@ -1621,11 +1657,12 @@ int x264_encoder_encode( x264_t *h, | |
510 | else | |
511 | x264_slices_write( h ); | |
512 | ||
513 | - x264_encoder_frame_end( thread_oldest, thread_current, pp_nal, pi_nal, pic_out ); | |
514 | + if( x264_encoder_frame_end( thread_oldest, thread_current, pp_nal, pi_nal, pic_out ) < 0 ) | |
515 | + return -1; | |
516 | return 0; | |
517 | } | |
518 | ||
519 | -static void x264_encoder_frame_end( x264_t *h, x264_t *thread_current, | |
520 | +static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current, | |
521 | x264_nal_t **pp_nal, int *pi_nal, | |
522 | x264_picture_t *pic_out ) | |
523 | { | |
524 | @@ -1640,7 +1677,7 @@ static void x264_encoder_frame_end( x264_t *h, x264_t *thread_current, | |
525 | if( !h->out.i_nal ) | |
526 | { | |
527 | pic_out->i_type = X264_TYPE_AUTO; | |
528 | - return; | |
529 | + return 0; | |
530 | } | |
531 | ||
532 | x264_frame_push_unused( thread_current, h->fenc ); | |
533 | @@ -1670,7 +1707,8 @@ static void x264_encoder_frame_end( x264_t *h, x264_t *thread_current, | |
534 | ||
535 | /* update rc */ | |
536 | x264_emms(); | |
537 | - x264_ratecontrol_end( h, h->out.i_frame_size * 8 ); | |
538 | + if( x264_ratecontrol_end( h, h->out.i_frame_size * 8 ) < 0 ) | |
539 | + return -1; | |
540 | ||
541 | /* restore CPU state (before using float again) */ | |
542 | x264_emms(); | |
543 | @@ -1784,6 +1822,8 @@ static void x264_encoder_frame_end( x264_t *h, x264_t *thread_current, | |
544 | ||
545 | if( h->param.psz_dump_yuv ) | |
546 | x264_frame_dump( h ); | |
547 | + | |
548 | + return 0; | |
549 | } | |
550 | ||
551 | static void x264_print_intra( int64_t *i_mb_count, double i_count, int b_print_pcm, char *intra ) | |
552 | diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c | |
553 | index 087e658..f669007 100644 | |
554 | --- a/encoder/ratecontrol.c | |
555 | +++ b/encoder/ratecontrol.c | |
556 | @@ -71,6 +71,7 @@ struct x264_ratecontrol_t | |
557 | double fps; | |
558 | double bitrate; | |
559 | double rate_tolerance; | |
560 | + double qcompress; | |
561 | int nmb; /* number of macroblocks in a frame */ | |
562 | int qp_constant[5]; | |
563 | ||
564 | @@ -106,6 +107,10 @@ struct x264_ratecontrol_t | |
565 | /* 2pass stuff */ | |
566 | FILE *p_stat_file_out; | |
567 | char *psz_stat_file_tmpname; | |
568 | + FILE *p_mbtree_stat_file_out; | |
569 | + char *psz_mbtree_stat_file_tmpname; | |
570 | + char *psz_mbtree_stat_file_name; | |
571 | + FILE *p_mbtree_stat_file_in; | |
572 | ||
573 | int num_entries; /* number of ratecontrol_entry_ts */ | |
574 | ratecontrol_entry_t *entry; /* FIXME: copy needed data and free this once init is done */ | |
575 | @@ -118,6 +123,7 @@ struct x264_ratecontrol_t | |
576 | double lmin[5]; /* min qscale by frame type */ | |
577 | double lmax[5]; | |
578 | double lstep; /* max change (multiply) in qscale per frame */ | |
579 | + uint16_t *qp_buffer; /* Global buffer for converting MB-tree quantizer data. */ | |
580 | ||
581 | /* MBRC stuff */ | |
582 | double frame_size_estimated; | |
583 | @@ -191,49 +197,6 @@ static NOINLINE uint32_t ac_energy_mb( x264_t *h, int mb_x, int mb_y, x264_frame | |
584 | return var; | |
585 | } | |
586 | ||
587 | -static const float log2_lut[128] = { | |
588 | - 0.00000, 0.01123, 0.02237, 0.03342, 0.04439, 0.05528, 0.06609, 0.07682, | |
589 | - 0.08746, 0.09803, 0.10852, 0.11894, 0.12928, 0.13955, 0.14975, 0.15987, | |
590 | - 0.16993, 0.17991, 0.18982, 0.19967, 0.20945, 0.21917, 0.22882, 0.23840, | |
591 | - 0.24793, 0.25739, 0.26679, 0.27612, 0.28540, 0.29462, 0.30378, 0.31288, | |
592 | - 0.32193, 0.33092, 0.33985, 0.34873, 0.35755, 0.36632, 0.37504, 0.38370, | |
593 | - 0.39232, 0.40088, 0.40939, 0.41785, 0.42626, 0.43463, 0.44294, 0.45121, | |
594 | - 0.45943, 0.46761, 0.47573, 0.48382, 0.49185, 0.49985, 0.50779, 0.51570, | |
595 | - 0.52356, 0.53138, 0.53916, 0.54689, 0.55459, 0.56224, 0.56986, 0.57743, | |
596 | - 0.58496, 0.59246, 0.59991, 0.60733, 0.61471, 0.62205, 0.62936, 0.63662, | |
597 | - 0.64386, 0.65105, 0.65821, 0.66534, 0.67243, 0.67948, 0.68650, 0.69349, | |
598 | - 0.70044, 0.70736, 0.71425, 0.72110, 0.72792, 0.73471, 0.74147, 0.74819, | |
599 | - 0.75489, 0.76155, 0.76818, 0.77479, 0.78136, 0.78790, 0.79442, 0.80090, | |
600 | - 0.80735, 0.81378, 0.82018, 0.82655, 0.83289, 0.83920, 0.84549, 0.85175, | |
601 | - 0.85798, 0.86419, 0.87036, 0.87652, 0.88264, 0.88874, 0.89482, 0.90087, | |
602 | - 0.90689, 0.91289, 0.91886, 0.92481, 0.93074, 0.93664, 0.94251, 0.94837, | |
603 | - 0.95420, 0.96000, 0.96578, 0.97154, 0.97728, 0.98299, 0.98868, 0.99435, | |
604 | -}; | |
605 | - | |
606 | -static const uint8_t exp2_lut[64] = { | |
607 | - 1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 37, 40, 44, 47, | |
608 | - 50, 53, 57, 60, 64, 67, 71, 74, 78, 81, 85, 89, 93, 96, 100, 104, | |
609 | - 108, 112, 116, 120, 124, 128, 132, 137, 141, 145, 150, 154, 159, 163, 168, 172, | |
610 | - 177, 182, 186, 191, 196, 201, 206, 211, 216, 221, 226, 232, 237, 242, 248, 253, | |
611 | -}; | |
612 | - | |
613 | -static ALWAYS_INLINE float x264_log2( uint32_t x ) | |
614 | -{ | |
615 | - int lz = x264_clz( x ); | |
616 | - return log2_lut[(x<<lz>>24)&0x7f] + (31 - lz); | |
617 | -} | |
618 | - | |
619 | -static ALWAYS_INLINE int x264_exp2fix8( float x ) | |
620 | -{ | |
621 | - int i, f; | |
622 | - x += 8; | |
623 | - if( x <= 0 ) return 0; | |
624 | - if( x >= 16 ) return 0xffff; | |
625 | - i = x; | |
626 | - f = (x-i)*64; | |
627 | - return (exp2_lut[f]+256) << i >> 8; | |
628 | -} | |
629 | - | |
630 | void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame ) | |
631 | { | |
632 | /* constants chosen to result in approximately the same overall bitrate as without AQ. | |
633 | @@ -241,6 +204,17 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame ) | |
634 | int mb_x, mb_y; | |
635 | float strength; | |
636 | float avg_adj = 0.f; | |
637 | + /* Need to init it anyways for MB tree. */ | |
638 | + if( h->param.rc.f_aq_strength == 0 ) | |
639 | + { | |
640 | + int mb_xy; | |
641 | + memset( frame->f_qp_offset, 0, h->mb.i_mb_count * sizeof(float) ); | |
642 | + if( h->frames.b_have_lowres ) | |
643 | + for( mb_xy = 0; mb_xy < h->mb.i_mb_count; mb_xy++ ) | |
644 | + frame->i_inv_qscale_factor[mb_xy] = 256; | |
645 | + return; | |
646 | + } | |
647 | + | |
648 | if( h->param.rc.i_aq_mode == X264_AQ_AUTOVARIANCE ) | |
649 | { | |
650 | for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y++ ) | |
651 | @@ -257,6 +231,7 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame ) | |
652 | } | |
653 | else | |
654 | strength = h->param.rc.f_aq_strength * 1.0397f; | |
655 | + | |
656 | for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y++ ) | |
657 | for( mb_x = 0; mb_x < h->sps->i_mb_width; mb_x++ ) | |
658 | { | |
659 | @@ -291,6 +266,47 @@ void x264_adaptive_quant( x264_t *h ) | |
660 | h->mb.i_qp = x264_clip3( h->rc->f_qpm + h->fenc->f_qp_offset[h->mb.i_mb_xy] + .5, h->param.rc.i_qp_min, h->param.rc.i_qp_max ); | |
661 | } | |
662 | ||
663 | +int x264_macroblock_tree_read( x264_t *h, x264_frame_t *frame ) | |
664 | +{ | |
665 | + x264_ratecontrol_t *rc = h->rc; | |
666 | + uint8_t i_type_actual = rc->entry[frame->i_frame].pict_type; | |
667 | + int i; | |
668 | + | |
669 | + if( i_type_actual != SLICE_TYPE_B ) | |
670 | + { | |
671 | + uint8_t i_type; | |
672 | + | |
673 | + if( !fread( &i_type, 1, 1, rc->p_mbtree_stat_file_in ) ) | |
674 | + goto fail; | |
675 | + | |
676 | + if( i_type != i_type_actual ) | |
677 | + { | |
678 | + x264_log(h, X264_LOG_ERROR, "MB-tree frametype %d doesn't match actual frametype %d.\n", i_type,i_type_actual); | |
679 | + return -1; | |
680 | + } | |
681 | + | |
682 | + if( fread( rc->qp_buffer, sizeof(uint16_t), h->mb.i_mb_count, rc->p_mbtree_stat_file_in ) != h->mb.i_mb_count ) | |
683 | + goto fail; | |
684 | + | |
685 | + for( i = 0; i < h->mb.i_mb_count; i++ ) | |
686 | + frame->f_qp_offset[i] = ((float)(int16_t)endian_fix16( rc->qp_buffer[i] )) * (1/256.0); | |
687 | + } | |
688 | + else | |
689 | + x264_adaptive_quant_frame( h, frame ); | |
690 | + return 0; | |
691 | +fail: | |
692 | + x264_log(h, X264_LOG_ERROR, "Incomplete MB-tree stats file.\n"); | |
693 | + return -1; | |
694 | +} | |
695 | + | |
696 | +static char *x264_strcat_filename( char *input, char *suffix ) | |
697 | +{ | |
698 | + char *output = x264_malloc( strlen( input ) + strlen( suffix ) + 1 ); | |
699 | + strcpy( output, input ); | |
700 | + strcat( output, suffix ); | |
701 | + return output; | |
702 | +} | |
703 | + | |
704 | int x264_ratecontrol_new( x264_t *h ) | |
705 | { | |
706 | x264_ratecontrol_t *rc; | |
707 | @@ -310,6 +326,14 @@ int x264_ratecontrol_new( x264_t *h ) | |
708 | else | |
709 | rc->fps = 25.0; | |
710 | ||
711 | + if( h->param.rc.b_mb_tree ) | |
712 | + { | |
713 | + h->param.rc.f_pb_factor = 1; | |
714 | + rc->qcompress = 1; | |
715 | + } | |
716 | + else | |
717 | + rc->qcompress = h->param.rc.f_qcompress; | |
718 | + | |
719 | rc->bitrate = h->param.rc.i_bitrate * 1000.; | |
720 | rc->rate_tolerance = h->param.rc.f_rate_tolerance; | |
721 | rc->nmb = h->mb.i_mb_count; | |
722 | @@ -379,17 +403,18 @@ int x264_ratecontrol_new( x264_t *h ) | |
723 | rc->accum_p_norm = .01; | |
724 | rc->accum_p_qp = ABR_INIT_QP * rc->accum_p_norm; | |
725 | /* estimated ratio that produces a reasonable QP for the first I-frame */ | |
726 | - rc->cplxr_sum = .01 * pow( 7.0e5, h->param.rc.f_qcompress ) * pow( h->mb.i_mb_count, 0.5 ); | |
727 | + rc->cplxr_sum = .01 * pow( 7.0e5, rc->qcompress ) * pow( h->mb.i_mb_count, 0.5 ); | |
728 | rc->wanted_bits_window = 1.0 * rc->bitrate / rc->fps; | |
729 | rc->last_non_b_pict_type = SLICE_TYPE_I; | |
730 | } | |
731 | ||
732 | if( h->param.rc.i_rc_method == X264_RC_CRF ) | |
733 | { | |
734 | - /* arbitrary rescaling to make CRF somewhat similar to QP */ | |
735 | + /* Arbitrary rescaling to make CRF somewhat similar to QP. | |
736 | + * Try to compensate for MB-tree's effects as well. */ | |
737 | double base_cplx = h->mb.i_mb_count * (h->param.i_bframe ? 120 : 80); | |
738 | - rc->rate_factor_constant = pow( base_cplx, 1 - h->param.rc.f_qcompress ) | |
739 | - / qp2qscale( h->param.rc.f_rf_constant ); | |
740 | + rc->rate_factor_constant = pow( base_cplx, 1 - rc->qcompress ) | |
741 | + / qp2qscale( h->param.rc.f_rf_constant + (h->param.rc.b_mb_tree?5:0) ); | |
742 | } | |
743 | ||
744 | rc->ip_offset = 6.0 * log(h->param.rc.f_ip_factor) / log(2.0); | |
745 | @@ -437,6 +462,17 @@ int x264_ratecontrol_new( x264_t *h ) | |
746 | x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n"); | |
747 | return -1; | |
748 | } | |
749 | + if( h->param.rc.b_mb_tree ) | |
750 | + { | |
751 | + char *mbtree_stats_in = x264_strcat_filename( h->param.rc.psz_stat_in, ".mbtree" ); | |
752 | + rc->p_mbtree_stat_file_in = fopen( mbtree_stats_in, "rb" ); | |
753 | + x264_free( mbtree_stats_in ); | |
754 | + if( !rc->p_mbtree_stat_file_in ) | |
755 | + { | |
756 | + x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open mbtree stats file\n"); | |
757 | + return -1; | |
758 | + } | |
759 | + } | |
760 | ||
761 | /* check whether 1st pass options were compatible with current options */ | |
762 | if( !strncmp( stats_buf, "#options:", 9 ) ) | |
763 | @@ -483,6 +519,9 @@ int x264_ratecontrol_new( x264_t *h ) | |
764 | x264_log( h, X264_LOG_ERROR, "b_adapt method specified in stats file not valid\n" ); | |
765 | return -1; | |
766 | } | |
767 | + | |
768 | + if( h->param.rc.b_mb_tree && ( p = strstr( opts, "rc-lookahead=" ) ) && sscanf( p, "rc-lookahead=%d", &i ) ) | |
769 | + h->param.rc.i_lookahead = i; | |
770 | } | |
771 | ||
772 | /* find number of pics */ | |
773 | @@ -585,10 +624,7 @@ int x264_ratecontrol_new( x264_t *h ) | |
774 | if( h->param.rc.b_stat_write ) | |
775 | { | |
776 | char *p; | |
777 | - | |
778 | - rc->psz_stat_file_tmpname = x264_malloc( strlen(h->param.rc.psz_stat_out) + 6 ); | |
779 | - strcpy( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out ); | |
780 | - strcat( rc->psz_stat_file_tmpname, ".temp" ); | |
781 | + rc->psz_stat_file_tmpname = x264_strcat_filename( h->param.rc.psz_stat_out, ".temp" ); | |
782 | ||
783 | rc->p_stat_file_out = fopen( rc->psz_stat_file_tmpname, "wb" ); | |
784 | if( rc->p_stat_file_out == NULL ) | |
785 | @@ -600,6 +636,25 @@ int x264_ratecontrol_new( x264_t *h ) | |
786 | p = x264_param2string( &h->param, 1 ); | |
787 | fprintf( rc->p_stat_file_out, "#options: %s\n", p ); | |
788 | x264_free( p ); | |
789 | + if( h->param.rc.b_mb_tree && !h->param.rc.b_stat_read ) | |
790 | + { | |
791 | + rc->psz_mbtree_stat_file_tmpname = x264_strcat_filename( h->param.rc.psz_stat_out, ".mbtree.temp" ); | |
792 | + rc->psz_mbtree_stat_file_name = x264_strcat_filename( h->param.rc.psz_stat_out, ".mbtree" ); | |
793 | + | |
794 | + rc->p_mbtree_stat_file_out = fopen( rc->psz_mbtree_stat_file_tmpname, "wb" ); | |
795 | + if( rc->p_mbtree_stat_file_out == NULL ) | |
796 | + { | |
797 | + x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open mbtree stats file\n"); | |
798 | + return -1; | |
799 | + } | |
800 | + } | |
801 | + } | |
802 | + | |
803 | + if( h->param.rc.b_mb_tree && (h->param.rc.b_stat_read || h->param.rc.b_stat_write) ) | |
804 | + { | |
805 | + rc->qp_buffer = x264_malloc( h->mb.i_mb_count * sizeof(uint16_t)); | |
806 | + if( !rc->qp_buffer ) | |
807 | + return -1; | |
808 | } | |
809 | ||
810 | for( i=0; i<h->param.i_threads; i++ ) | |
811 | @@ -739,8 +794,8 @@ void x264_ratecontrol_summary( x264_t *h ) | |
812 | { | |
813 | double base_cplx = h->mb.i_mb_count * (h->param.i_bframe ? 120 : 80); | |
814 | x264_log( h, X264_LOG_INFO, "final ratefactor: %.2f\n", | |
815 | - qscale2qp( pow( base_cplx, 1 - h->param.rc.f_qcompress ) | |
816 | - * rc->cplxr_sum / rc->wanted_bits_window ) ); | |
817 | + qscale2qp( pow( base_cplx, 1 - rc->qcompress ) | |
818 | + * rc->cplxr_sum / rc->wanted_bits_window ) - (h->param.rc.b_mb_tree?5:0) ); | |
819 | } | |
820 | } | |
821 | ||
822 | @@ -760,9 +815,22 @@ void x264_ratecontrol_delete( x264_t *h ) | |
823 | } | |
824 | x264_free( rc->psz_stat_file_tmpname ); | |
825 | } | |
826 | + if( rc->p_mbtree_stat_file_out ) | |
827 | + { | |
828 | + fclose( rc->p_mbtree_stat_file_out ); | |
829 | + if( h->i_frame >= rc->num_entries ) | |
830 | + if( rename( rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name ) != 0 ) | |
831 | + { | |
832 | + x264_log( h, X264_LOG_ERROR, "failed to rename \"%s\" to \"%s\"\n", | |
833 | + rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name ); | |
834 | + } | |
835 | + x264_free( rc->psz_mbtree_stat_file_tmpname ); | |
836 | + x264_free( rc->psz_mbtree_stat_file_name ); | |
837 | + } | |
838 | x264_free( rc->pred ); | |
839 | x264_free( rc->pred_b_from_p ); | |
840 | x264_free( rc->entry ); | |
841 | + x264_free( rc->qp_buffer ); | |
842 | if( rc->zones ) | |
843 | { | |
844 | x264_free( rc->zones[0].param ); | |
845 | @@ -1086,7 +1154,7 @@ int x264_ratecontrol_slice_type( x264_t *h, int frame_num ) | |
846 | } | |
847 | ||
848 | /* After encoding one frame, save stats and update ratecontrol state */ | |
849 | -void x264_ratecontrol_end( x264_t *h, int bits ) | |
850 | +int x264_ratecontrol_end( x264_t *h, int bits ) | |
851 | { | |
852 | x264_ratecontrol_t *rc = h->rc; | |
853 | const int *mbs = h->stat.frame.i_mb_count; | |
854 | @@ -1114,7 +1182,7 @@ void x264_ratecontrol_end( x264_t *h, int bits ) | |
855 | ( dir_frame>0 ? 's' : dir_frame<0 ? 't' : | |
856 | dir_avg>0 ? 's' : dir_avg<0 ? 't' : '-' ) | |
857 | : '-'; | |
858 | - fprintf( rc->p_stat_file_out, | |
859 | + if( fprintf( rc->p_stat_file_out, | |
860 | "in:%d out:%d type:%c q:%.2f tex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d d:%c;\n", | |
861 | h->fenc->i_frame, h->i_frame, | |
862 | c_type, rc->qpa_rc, | |
863 | @@ -1124,7 +1192,22 @@ void x264_ratecontrol_end( x264_t *h, int bits ) | |
864 | h->stat.frame.i_mb_count_i, | |
865 | h->stat.frame.i_mb_count_p, | |
866 | h->stat.frame.i_mb_count_skip, | |
867 | - c_direct); | |
868 | + c_direct) < 0 ) | |
869 | + goto fail; | |
870 | + | |
871 | + /* Don't re-write the data in multi-pass mode. */ | |
872 | + if( h->param.rc.b_mb_tree && h->fenc->b_kept_as_ref && !h->param.rc.b_stat_read ) | |
873 | + { | |
874 | + uint8_t i_type = h->sh.i_type; | |
875 | + int i; | |
876 | + /* Values are stored as big-endian FIX8.8 */ | |
877 | + for( i = 0; i < h->mb.i_mb_count; i++ ) | |
878 | + rc->qp_buffer[i] = endian_fix16( h->fenc->f_qp_offset[i]*256.0 ); | |
879 | + if( fwrite( &i_type, 1, 1, rc->p_mbtree_stat_file_out ) < 1 ) | |
880 | + goto fail; | |
881 | + if( fwrite( rc->qp_buffer, sizeof(uint16_t), h->mb.i_mb_count, rc->p_mbtree_stat_file_out ) < h->mb.i_mb_count ) | |
882 | + goto fail; | |
883 | + } | |
884 | } | |
885 | ||
886 | if( rc->b_abr ) | |
887 | @@ -1162,6 +1245,10 @@ void x264_ratecontrol_end( x264_t *h, int bits ) | |
888 | } | |
889 | ||
890 | update_vbv( h, bits ); | |
891 | + return 0; | |
892 | +fail: | |
893 | + x264_log(h, X264_LOG_ERROR, "ratecontrol_end: stats file could not be written to\n"); | |
894 | + return -1; | |
895 | } | |
896 | ||
897 | /**************************************************************************** | |
898 | @@ -1177,7 +1264,7 @@ static double get_qscale(x264_t *h, ratecontrol_entry_t *rce, double rate_factor | |
899 | double q; | |
900 | x264_zone_t *zone = get_zone( h, frame_num ); | |
901 | ||
902 | - q = pow( rce->blurred_complexity, 1 - h->param.rc.f_qcompress ); | |
903 | + q = pow( rce->blurred_complexity, 1 - rcc->qcompress ); | |
904 | ||
905 | // avoid NaN's in the rc_eq | |
906 | if(!isfinite(q) || rce->tex_bits + rce->mv_bits == 0) | |
907 | diff --git a/encoder/ratecontrol.h b/encoder/ratecontrol.h | |
908 | index 3310d3c..ed8abab 100644 | |
909 | --- a/encoder/ratecontrol.h | |
910 | +++ b/encoder/ratecontrol.h | |
911 | @@ -29,12 +29,13 @@ void x264_ratecontrol_delete( x264_t * ); | |
912 | ||
913 | void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame ); | |
914 | void x264_adaptive_quant( x264_t * ); | |
915 | +int x264_macroblock_tree_read( x264_t *h, x264_frame_t *frame ); | |
916 | void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next ); | |
917 | void x264_ratecontrol_start( x264_t *, int i_force_qp ); | |
918 | int x264_ratecontrol_slice_type( x264_t *, int i_frame ); | |
919 | void x264_ratecontrol_mb( x264_t *, int bits ); | |
920 | int x264_ratecontrol_qp( x264_t * ); | |
921 | -void x264_ratecontrol_end( x264_t *, int bits ); | |
922 | +int x264_ratecontrol_end( x264_t *, int bits ); | |
923 | void x264_ratecontrol_summary( x264_t * ); | |
924 | void x264_ratecontrol_set_estimated_size( x264_t *, int bits ); | |
925 | int x264_ratecontrol_get_estimated_size( x264_t const *); | |
926 | diff --git a/encoder/slicetype.c b/encoder/slicetype.c | |
927 | index 2c16429..be3eaf7 100644 | |
928 | --- a/encoder/slicetype.c | |
929 | +++ b/encoder/slicetype.c | |
930 | @@ -63,6 +63,7 @@ static int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a, | |
931 | x264_me_t m[2]; | |
932 | int i_bcost = COST_MAX; | |
933 | int l, i; | |
934 | + int list_used = 0; | |
935 | ||
936 | h->mb.pic.p_fenc[0] = h->mb.pic.fenc_buf; | |
937 | h->mc.copy[PIXEL_8x8]( h->mb.pic.p_fenc[0], FENC_STRIDE, &fenc->lowres[0][i_pel_offset], i_stride, 8 ); | |
938 | @@ -107,8 +108,7 @@ static int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a, | |
939 | h->mc.avg[PIXEL_8x8]( pix1, 16, src1, stride1, src2, stride2, i_bipred_weight ); \ | |
940 | i_cost = penalty + h->pixf.mbcmp[PIXEL_8x8]( \ | |
941 | m[0].p_fenc[0], FENC_STRIDE, pix1, 16 ); \ | |
942 | - if( i_bcost > i_cost ) \ | |
943 | - i_bcost = i_cost; \ | |
944 | + COPY2_IF_LT( i_bcost, i_cost, list_used, 3 ); \ | |
945 | } | |
946 | ||
947 | m[0].i_pixel = PIXEL_8x8; | |
948 | @@ -138,8 +138,7 @@ static int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a, | |
949 | int i_cost; | |
950 | h->mc.avg[PIXEL_8x8]( pix1, 16, m[0].p_fref[0], m[0].i_stride[0], m[1].p_fref[0], m[1].i_stride[0], i_bipred_weight ); | |
951 | i_cost = h->pixf.mbcmp[PIXEL_8x8]( m[0].p_fenc[0], FENC_STRIDE, pix1, 16 ); | |
952 | - if( i_bcost > i_cost ) | |
953 | - i_bcost = i_cost; | |
954 | + COPY2_IF_LT( i_bcost, i_cost, list_used, 3 ); | |
955 | } | |
956 | } | |
957 | ||
958 | @@ -181,16 +180,18 @@ static int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a, | |
959 | *(uint32_t*)m[l].mv = *(uint32_t*)fenc_mvs[l]; | |
960 | m[l].cost = *fenc_costs[l]; | |
961 | } | |
962 | - i_bcost = X264_MIN( i_bcost, m[l].cost ); | |
963 | + COPY2_IF_LT( i_bcost, m[l].cost, list_used, l+1 ); | |
964 | } | |
965 | ||
966 | if( b_bidir && ( *(uint32_t*)m[0].mv || *(uint32_t*)m[1].mv ) ) | |
967 | TRY_BIDIR( m[0].mv, m[1].mv, 5 ); | |
968 | ||
969 | + frames[b]->lowres_inter_types[b-p0][p1-b][i_mb_xy] = list_used; | |
970 | + | |
971 | lowres_intra_mb: | |
972 | /* forbid intra-mbs in B-frames, because it's rare and not worth checking */ | |
973 | /* FIXME: Should we still forbid them now that we cache intra scores? */ | |
974 | - if( !b_bidir ) | |
975 | + if( !b_bidir || h->param.rc.b_mb_tree ) | |
976 | { | |
977 | int i_icost, b_intra; | |
978 | if( !fenc->b_intra_calculated ) | |
979 | @@ -237,18 +238,23 @@ lowres_intra_mb: | |
980 | } | |
981 | else | |
982 | i_icost = fenc->i_intra_cost[i_mb_xy]; | |
983 | - b_intra = i_icost < i_bcost; | |
984 | - if( b_intra ) | |
985 | - i_bcost = i_icost; | |
986 | - if( (i_mb_x > 0 && i_mb_x < h->sps->i_mb_width - 1 | |
987 | - && i_mb_y > 0 && i_mb_y < h->sps->i_mb_height - 1) | |
988 | - || h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 ) | |
989 | + if( !b_bidir ) | |
990 | { | |
991 | - fenc->i_intra_mbs[b-p0] += b_intra; | |
992 | - fenc->i_cost_est[0][0] += i_icost; | |
993 | + b_intra = i_icost < i_bcost; | |
994 | + if( b_intra ) | |
995 | + i_bcost = i_icost; | |
996 | + if( (i_mb_x > 0 && i_mb_x < h->sps->i_mb_width - 1 | |
997 | + && i_mb_y > 0 && i_mb_y < h->sps->i_mb_height - 1) | |
998 | + || h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 ) | |
999 | + { | |
1000 | + fenc->i_intra_mbs[b-p0] += b_intra; | |
1001 | + fenc->i_cost_est[0][0] += i_icost; | |
1002 | + } | |
1003 | } | |
1004 | } | |
1005 | ||
1006 | + frames[b]->lowres_costs[b-p0][p1-b][i_mb_xy] = i_bcost; | |
1007 | + | |
1008 | return i_bcost; | |
1009 | } | |
1010 | #undef TRY_BIDIR | |
1011 | @@ -262,6 +268,7 @@ static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a, | |
1012 | x264_frame_t **frames, int p0, int p1, int b, | |
1013 | int b_intra_penalty ) | |
1014 | { | |
1015 | + | |
1016 | int i_score = 0; | |
1017 | /* Don't use the AQ'd scores for slicetype decision. */ | |
1018 | int i_score_aq = 0; | |
1019 | @@ -299,7 +306,8 @@ static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a, | |
1020 | ||
1021 | /* the edge mbs seem to reduce the predictive quality of the | |
1022 | * whole frame's score, but are needed for a spatial distribution. */ | |
1023 | - if( h->param.rc.i_vbv_buffer_size || h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 ) | |
1024 | + if( h->param.rc.b_mb_tree || h->param.rc.i_vbv_buffer_size || | |
1025 | + h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 ) | |
1026 | { | |
1027 | for( h->mb.i_mb_y = h->sps->i_mb_height - 1; h->mb.i_mb_y >= 0; h->mb.i_mb_y-- ) | |
1028 | { | |
1029 | @@ -355,7 +363,172 @@ static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a, | |
1030 | return i_score; | |
1031 | } | |
1032 | ||
1033 | -#define MAX_LENGTH (X264_BFRAME_MAX*4) | |
1034 | +/* If MB-tree changes the quantizers, we need to recalculate the frame cost without | |
1035 | + * re-running lookahead. */ | |
1036 | +static int x264_slicetype_frame_cost_recalculate( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, | |
1037 | + int p0, int p1, int b ) | |
1038 | +{ | |
1039 | + int i_score = 0; | |
1040 | + int *row_satd = frames[b]->i_row_satds[b-p0][p1-b]; | |
1041 | + x264_emms(); | |
1042 | + for( h->mb.i_mb_y = h->sps->i_mb_height - 1; h->mb.i_mb_y >= 0; h->mb.i_mb_y-- ) | |
1043 | + { | |
1044 | + row_satd[ h->mb.i_mb_y ] = 0; | |
1045 | + for( h->mb.i_mb_x = h->sps->i_mb_width - 1; h->mb.i_mb_x >= 0; h->mb.i_mb_x-- ) | |
1046 | + { | |
1047 | + int i_mb_xy = h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride; | |
1048 | + int i_mb_cost = frames[b]->lowres_costs[b-p0][p1-b][i_mb_xy]; | |
1049 | + float qp_adj = frames[b]->f_qp_offset[i_mb_xy]; | |
1050 | + i_mb_cost = (i_mb_cost * x264_exp2fix8(qp_adj*(-1.f/6.f)) + 128) >> 8; | |
1051 | + row_satd[ h->mb.i_mb_y ] += i_mb_cost; | |
1052 | + if( (h->mb.i_mb_y > 0 && h->mb.i_mb_y < h->sps->i_mb_height - 1 && | |
1053 | + h->mb.i_mb_x > 0 && h->mb.i_mb_x < h->sps->i_mb_width - 1) || | |
1054 | + h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 ) | |
1055 | + { | |
1056 | + i_score += i_mb_cost; | |
1057 | + } | |
1058 | + } | |
1059 | + } | |
1060 | + return i_score; | |
1061 | +} | |
1062 | + | |
1063 | +static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, int p0, int p1, int b ) | |
1064 | +{ | |
1065 | + x264_frame_t *refs[2] = {frames[p0],frames[p1]}; | |
1066 | + int dist_scale_factor = p1 != p0 ? 128 : ( ((b-p0) << 8) + ((p1-p0) >> 1) ) / (p1-p0); | |
1067 | + int i_bipred_weight = h->param.analyse.b_weighted_bipred ? 64 - (dist_scale_factor>>2) : 32; | |
1068 | + | |
1069 | + for( h->mb.i_mb_y = 0; h->mb.i_mb_y < h->sps->i_mb_height; h->mb.i_mb_y++ ) | |
1070 | + { | |
1071 | + for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->sps->i_mb_width; h->mb.i_mb_x++ ) | |
1072 | + { | |
1073 | + int mb_index = h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride; | |
1074 | + int inter_cost = frames[b]->lowres_costs[b-p0][p1-b][mb_index]; | |
1075 | + int intra_cost = (frames[b]->i_intra_cost[mb_index] * frames[b]->i_inv_qscale_factor[mb_index]+128)>>8; | |
1076 | + int lists_used = frames[b]->lowres_inter_types[b-p0][p1-b][mb_index]; | |
1077 | + /* The approximate amount of data that this block contains. */ | |
1078 | + int propagate_amount = intra_cost + frames[b]->i_propagate_cost[mb_index]; | |
1079 | + | |
1080 | + /* Divide by 64 for per-pixel summing. */ | |
1081 | + propagate_amount = (((uint64_t)propagate_amount*(intra_cost-inter_cost)) / intra_cost + 32) >> 6; | |
1082 | + | |
1083 | + /* Don't propagate for an intra block. */ | |
1084 | + if( inter_cost < intra_cost ) | |
1085 | + { | |
1086 | + int mv[2][2], list; | |
1087 | + mv[0][0] = frames[b]->lowres_mvs[0][b-p0-1][mb_index][0]; | |
1088 | + mv[0][1] = frames[b]->lowres_mvs[0][b-p0-1][mb_index][1]; | |
1089 | + if( b != p1 ) | |
1090 | + { | |
1091 | + mv[1][0] = frames[b]->lowres_mvs[1][p1-b-1][mb_index][0]; | |
1092 | + mv[1][1] = frames[b]->lowres_mvs[1][p1-b-1][mb_index][1]; | |
1093 | + } | |
1094 | + | |
1095 | + /* Follow the MVs to the previous frame(s). */ | |
1096 | + for( list = 0; list < 2; list++ ) | |
1097 | + if( (lists_used >> list)&1 ) | |
1098 | + { | |
1099 | + int x = mv[list][0]; | |
1100 | + int y = mv[list][1]; | |
1101 | + int listamount = propagate_amount; | |
1102 | + int mbx = (x>>5)+h->mb.i_mb_x; | |
1103 | + int mby = ((y>>5)+h->mb.i_mb_y); | |
1104 | + int idx0 = mbx + mby*h->mb.i_mb_stride; | |
1105 | + int idx1 = idx0 + 1; | |
1106 | + int idx2 = idx0 + h->mb.i_mb_stride; | |
1107 | + int idx3 = idx0 + h->mb.i_mb_stride + 1; | |
1108 | + int idx0weight = (32-(y&31))*(32-(x&31)); | |
1109 | + int idx1weight = (32-(y&31))*(x&31); | |
1110 | + int idx2weight = (y&31)*(32-(x&31)); | |
1111 | + int idx3weight = (y&31)*(x&31); | |
1112 | + | |
1113 | + /* Apply bipred weighting. */ | |
1114 | + if( lists_used == 3 ) | |
1115 | + listamount = (listamount * (list?(64-i_bipred_weight):i_bipred_weight) + 32) >> 6; | |
1116 | + | |
1117 | +#define CLIP_ADD(s,x) (s) = X264_MIN((s)+(x),(1<<16)-1) | |
1118 | + | |
1119 | + /* We could just clip the MVs, but pixels that lie outside the frame probably shouldn't | |
1120 | + * be counted. */ | |
1121 | + if( mbx < h->sps->i_mb_width-1 && mby < h->sps->i_mb_height-1 && mbx >= 0 && mby >= 0 ) | |
1122 | + { | |
1123 | + CLIP_ADD( refs[list]->i_propagate_cost[idx0], (listamount*idx0weight+8)>>4 ); | |
1124 | + CLIP_ADD( refs[list]->i_propagate_cost[idx1], (listamount*idx1weight+8)>>4 ); | |
1125 | + CLIP_ADD( refs[list]->i_propagate_cost[idx2], (listamount*idx2weight+8)>>4 ); | |
1126 | + CLIP_ADD( refs[list]->i_propagate_cost[idx3], (listamount*idx3weight+8)>>4 ); | |
1127 | + } | |
1128 | + else /* Check offsets individually */ | |
1129 | + { | |
1130 | + if( mbx < h->sps->i_mb_width && mby < h->sps->i_mb_height && mbx >= 0 && mby >= 0 ) | |
1131 | + CLIP_ADD( refs[list]->i_propagate_cost[idx0], (listamount*idx0weight+8)>>4 ); | |
1132 | + if( mbx+1 < h->sps->i_mb_width && mby < h->sps->i_mb_height && mbx+1 >= 0 && mby >= 0 ) | |
1133 | + CLIP_ADD( refs[list]->i_propagate_cost[idx1], (listamount*idx1weight+8)>>4 ); | |
1134 | + if( mbx < h->sps->i_mb_width && mby+1 < h->sps->i_mb_height && mbx >= 0 && mby+1 >= 0 ) | |
1135 | + CLIP_ADD( refs[list]->i_propagate_cost[idx2], (listamount*idx2weight+8)>>4 ); | |
1136 | + if( mbx+1 < h->sps->i_mb_width && mby+1 < h->sps->i_mb_height && mbx+1 >= 0 && mby+1 >= 0 ) | |
1137 | + CLIP_ADD( refs[list]->i_propagate_cost[idx3], (listamount*idx3weight+8)>>4 ); | |
1138 | + } | |
1139 | + } | |
1140 | + } | |
1141 | + } | |
1142 | + } | |
1143 | +} | |
1144 | + | |
1145 | +static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int num_frames, int b_intra ) | |
1146 | +{ | |
1147 | + int i, idx = !b_intra; | |
1148 | + int last_nonb, cur_nonb = 1; | |
1149 | + if( b_intra ) | |
1150 | + x264_slicetype_frame_cost( h, a, frames, 0, 0, 0, 0 ); | |
1151 | + | |
1152 | + i = num_frames-1; | |
1153 | + while( i > 0 && frames[i]->i_type == X264_TYPE_B ) | |
1154 | + i--; | |
1155 | + last_nonb = i; | |
1156 | + | |
1157 | + if( last_nonb < 0 ) | |
1158 | + return; | |
1159 | + | |
1160 | + memset( frames[last_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint32_t) ); | |
1161 | + while( i-- > idx ) | |
1162 | + { | |
1163 | + cur_nonb = i; | |
1164 | + while( frames[cur_nonb]->i_type == X264_TYPE_B && cur_nonb > 0 ) | |
1165 | + cur_nonb--; | |
1166 | + if( cur_nonb < idx ) | |
1167 | + break; | |
1168 | + x264_slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, last_nonb, 0 ); | |
1169 | + memset( frames[cur_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint32_t) ); | |
1170 | + x264_macroblock_tree_propagate( h, frames, cur_nonb, last_nonb, last_nonb ); | |
1171 | + while( frames[i]->i_type == X264_TYPE_B && i > 0 ) | |
1172 | + { | |
1173 | + x264_slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, i, 0 ); | |
1174 | + memset( frames[i]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint32_t) ); | |
1175 | + x264_macroblock_tree_propagate( h, frames, cur_nonb, last_nonb, i ); | |
1176 | + i--; | |
1177 | + } | |
1178 | + last_nonb = cur_nonb; | |
1179 | + } | |
1180 | + x264_emms(); | |
1181 | + | |
1182 | + for( h->mb.i_mb_y = 0; h->mb.i_mb_y < h->sps->i_mb_height; h->mb.i_mb_y++ ) | |
1183 | + { | |
1184 | + for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->sps->i_mb_width; h->mb.i_mb_x++ ) | |
1185 | + { | |
1186 | + int mb_index = h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride; | |
1187 | + int intra_cost = (frames[last_nonb]->i_intra_cost[mb_index] * frames[last_nonb]->i_inv_qscale_factor[mb_index]+128)>>8; | |
1188 | + | |
1189 | + if( intra_cost ) | |
1190 | + { | |
1191 | + int propagate_cost = frames[last_nonb]->i_propagate_cost[mb_index]; | |
1192 | + float log2_ratio = x264_log2(intra_cost + propagate_cost) - x264_log2(intra_cost); | |
1193 | + /* Allow the constant to be adjusted via qcompress, since the two | |
1194 | + * concepts are very similar. */ | |
1195 | + frames[last_nonb]->f_qp_offset[mb_index] -= 5.0 * (1.0 - h->param.rc.f_qcompress) * log2_ratio; | |
1196 | + } | |
1197 | + } | |
1198 | + } | |
1199 | +} | |
1200 | ||
1201 | static int x264_slicetype_path_cost( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, char *path, int threshold ) | |
1202 | { | |
1203 | @@ -393,14 +566,14 @@ static int x264_slicetype_path_cost( x264_t *h, x264_mb_analysis_t *a, x264_fram | |
1204 | /* Uses strings due to the fact that the speed of the control functions is | |
1205 | negligable compared to the cost of running slicetype_frame_cost, and because | |
1206 | it makes debugging easier. */ | |
1207 | -static void x264_slicetype_path( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int length, int max_bframes, int buffer_size, char (*best_paths)[MAX_LENGTH] ) | |
1208 | +static void x264_slicetype_path( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int length, int max_bframes, int buffer_size, char (*best_paths)[X264_LOOKAHEAD_MAX] ) | |
1209 | { | |
1210 | - char paths[X264_BFRAME_MAX+2][MAX_LENGTH] = {{0}}; | |
1211 | + char paths[X264_BFRAME_MAX+2][X264_LOOKAHEAD_MAX] = {{0}}; | |
1212 | int num_paths = X264_MIN(max_bframes+1, length); | |
1213 | int suffix_size, loc, path; | |
1214 | int best_cost = COST_MAX; | |
1215 | int best_path_index = 0; | |
1216 | - length = X264_MIN(length,MAX_LENGTH); | |
1217 | + length = X264_MIN(length,X264_LOOKAHEAD_MAX); | |
1218 | ||
1219 | /* Iterate over all currently possible paths and add suffixes to each one */ | |
1220 | for( suffix_size = 0; suffix_size < num_paths; suffix_size++ ) | |
1221 | @@ -426,15 +599,6 @@ static void x264_slicetype_path( x264_t *h, x264_mb_analysis_t *a, x264_frame_t | |
1222 | memcpy( best_paths[length], paths[best_path_index], length ); | |
1223 | } | |
1224 | ||
1225 | -static int x264_slicetype_path_search( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int length, int bframes, int buffer ) | |
1226 | -{ | |
1227 | - char best_paths[MAX_LENGTH][MAX_LENGTH] = {"","P"}; | |
1228 | - int n; | |
1229 | - for( n = 2; n < length-1; n++ ) | |
1230 | - x264_slicetype_path( h, a, frames, n, bframes, buffer, best_paths ); | |
1231 | - return strspn( best_paths[length-2], "B" ); | |
1232 | -} | |
1233 | - | |
1234 | static int scenecut( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, int p0, int p1 ) | |
1235 | { | |
1236 | x264_frame_t *frame = frames[p1]; | |
1237 | @@ -477,13 +641,13 @@ static int scenecut( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **frames, in | |
1238 | return res; | |
1239 | } | |
1240 | ||
1241 | -static void x264_slicetype_analyse( x264_t *h ) | |
1242 | +static void x264_slicetype_analyse( x264_t *h, int keyframe ) | |
1243 | { | |
1244 | x264_mb_analysis_t a; | |
1245 | - x264_frame_t *frames[X264_BFRAME_MAX*4+3] = { NULL, }; | |
1246 | + x264_frame_t *frames[X264_LOOKAHEAD_MAX+3] = { NULL, }; | |
1247 | int num_frames; | |
1248 | int keyint_limit; | |
1249 | - int j; | |
1250 | + int i,j; | |
1251 | int i_mb_count = NUM_MBS; | |
1252 | int cost1p0, cost2p0, cost1b1, cost2p1; | |
1253 | int idr_frame_type; | |
1254 | @@ -497,96 +661,150 @@ static void x264_slicetype_analyse( x264_t *h ) | |
1255 | frames[j+1] = h->frames.next[j]; | |
1256 | keyint_limit = h->param.i_keyint_max - frames[0]->i_frame + h->frames.i_last_idr - 1; | |
1257 | num_frames = X264_MIN( j, keyint_limit ); | |
1258 | - if( num_frames == 0 ) | |
1259 | + | |
1260 | + if( num_frames == 0 && (!j || !h->param.rc.b_mb_tree) ) | |
1261 | return; | |
1262 | ||
1263 | x264_lowres_context_init( h, &a ); | |
1264 | idr_frame_type = frames[1]->i_frame - h->frames.i_last_idr >= h->param.i_keyint_min ? X264_TYPE_IDR : X264_TYPE_I; | |
1265 | ||
1266 | - if( num_frames == 1 ) | |
1267 | + if( num_frames == 1 && !h->param.rc.b_mb_tree ) | |
1268 | { | |
1269 | -no_b_frames: | |
1270 | frames[1]->i_type = X264_TYPE_P; | |
1271 | if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1 ) ) | |
1272 | frames[1]->i_type = idr_frame_type; | |
1273 | return; | |
1274 | } | |
1275 | ||
1276 | - if( h->param.i_bframe_adaptive == X264_B_ADAPT_TRELLIS ) | |
1277 | + /* This is important psy-wise: if we have a non-scenecut keyframe, | |
1278 | + * there will be significant visual artifacts if the frames just before | |
1279 | + * go down in quality due to being referenced less, despite it being | |
1280 | + * more RD-optimal. */ | |
1281 | + if( h->param.analyse.b_psy && h->param.rc.b_mb_tree ) | |
1282 | + num_frames = j; | |
1283 | + | |
1284 | + char best_paths[X264_LOOKAHEAD_MAX][X264_LOOKAHEAD_MAX] = {"","P"}; | |
1285 | + int n; | |
1286 | + int num_bframes = 0; | |
1287 | + int max_bframes = X264_MIN(num_frames-1, h->param.i_bframe); | |
1288 | + int num_analysed_frames = num_frames; | |
1289 | + int reset_start; | |
1290 | + if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1 ) ) | |
1291 | { | |
1292 | - int num_bframes; | |
1293 | - int max_bframes = X264_MIN(num_frames-1, h->param.i_bframe); | |
1294 | - if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1 ) ) | |
1295 | + frames[1]->i_type = idr_frame_type; | |
1296 | + return; | |
1297 | + } | |
1298 | + | |
1299 | + if( h->param.i_bframe ) | |
1300 | + { | |
1301 | + if( h->param.i_bframe_adaptive == X264_B_ADAPT_TRELLIS ) | |
1302 | { | |
1303 | - frames[1]->i_type = idr_frame_type; | |
1304 | - return; | |
1305 | + /* Perform the frametype analysis. */ | |
1306 | + for( n = 2; n < num_frames-1; n++ ) | |
1307 | + x264_slicetype_path( h, &a, frames, n, max_bframes, num_frames-max_bframes, best_paths ); | |
1308 | + num_bframes = strspn( best_paths[num_frames-2], "B" ); | |
1309 | + /* Load the results of the analysis into the frame types. */ | |
1310 | + for( j = 1; j < num_frames; j++ ) | |
1311 | + frames[j]->i_type = best_paths[num_frames-2][j-1] == 'B' ? X264_TYPE_B : X264_TYPE_P; | |
1312 | + frames[num_frames]->i_type = X264_TYPE_P; | |
1313 | } | |
1314 | - num_bframes = x264_slicetype_path_search( h, &a, frames, num_frames, max_bframes, num_frames-max_bframes ); | |
1315 | - assert(num_bframes < num_frames); | |
1316 | - | |
1317 | - for( j = 1; j < num_bframes+1; j++ ) | |
1318 | + else if( h->param.i_bframe_adaptive == X264_B_ADAPT_FAST ) | |
1319 | { | |
1320 | - if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, j, j+1 ) ) | |
1321 | + for( i = 0; i < num_frames-(2-!i); ) | |
1322 | { | |
1323 | - frames[j]->i_type = X264_TYPE_P; | |
1324 | - return; | |
1325 | - } | |
1326 | - frames[j]->i_type = X264_TYPE_B; | |
1327 | - } | |
1328 | - frames[num_bframes+1]->i_type = X264_TYPE_P; | |
1329 | - } | |
1330 | - else if( h->param.i_bframe_adaptive == X264_B_ADAPT_FAST ) | |
1331 | - { | |
1332 | - cost2p1 = x264_slicetype_frame_cost( h, &a, frames, 0, 2, 2, 1 ); | |
1333 | - if( frames[2]->i_intra_mbs[2] > i_mb_count / 2 ) | |
1334 | - goto no_b_frames; | |
1335 | + cost2p1 = x264_slicetype_frame_cost( h, &a, frames, i+0, i+2, i+2, 1 ); | |
1336 | + if( frames[i+2]->i_intra_mbs[2] > i_mb_count / 2 ) | |
1337 | + { | |
1338 | + frames[i+1]->i_type = X264_TYPE_P; | |
1339 | + frames[i+2]->i_type = X264_TYPE_P; | |
1340 | + i += 2; | |
1341 | + continue; | |
1342 | + } | |
1343 | ||
1344 | - cost1b1 = x264_slicetype_frame_cost( h, &a, frames, 0, 2, 1, 0 ); | |
1345 | - cost1p0 = x264_slicetype_frame_cost( h, &a, frames, 0, 1, 1, 0 ); | |
1346 | - cost2p0 = x264_slicetype_frame_cost( h, &a, frames, 1, 2, 2, 0 ); | |
1347 | + cost1b1 = x264_slicetype_frame_cost( h, &a, frames, i+0, i+2, i+1, 0 ); | |
1348 | + cost1p0 = x264_slicetype_frame_cost( h, &a, frames, i+0, i+1, i+1, 0 ); | |
1349 | + cost2p0 = x264_slicetype_frame_cost( h, &a, frames, i+1, i+2, i+2, 0 ); | |
1350 | ||
1351 | - if( cost1p0 + cost2p0 < cost1b1 + cost2p1 ) | |
1352 | - goto no_b_frames; | |
1353 | + if( cost1p0 + cost2p0 < cost1b1 + cost2p1 ) | |
1354 | + { | |
1355 | + frames[i+1]->i_type = X264_TYPE_P; | |
1356 | + frames[i+2]->i_type = X264_TYPE_P; | |
1357 | + i += 2; | |
1358 | + continue; | |
1359 | + } | |
1360 | ||
1361 | - // arbitrary and untuned | |
1362 | - #define INTER_THRESH 300 | |
1363 | - #define P_SENS_BIAS (50 - h->param.i_bframe_bias) | |
1364 | - frames[1]->i_type = X264_TYPE_B; | |
1365 | + // arbitrary and untuned | |
1366 | + #define INTER_THRESH 300 | |
1367 | + #define P_SENS_BIAS (50 - h->param.i_bframe_bias) | |
1368 | + frames[i+1]->i_type = X264_TYPE_B; | |
1369 | + frames[i+2]->i_type = X264_TYPE_P; | |
1370 | ||
1371 | - for( j = 2; j <= X264_MIN( h->param.i_bframe, num_frames-1 ); j++ ) | |
1372 | + for( j = i+2; j <= X264_MIN( h->param.i_bframe, num_frames-2 ); j++ ) | |
1373 | + { | |
1374 | + int pthresh = X264_MAX(INTER_THRESH - P_SENS_BIAS * (j-i-1), INTER_THRESH/10); | |
1375 | + int pcost = x264_slicetype_frame_cost( h, &a, frames, i+0, j+1, j+1, 1 ); | |
1376 | + | |
1377 | + if( pcost > pthresh*i_mb_count || frames[j+1]->i_intra_mbs[j-i+1] > i_mb_count/3 ) | |
1378 | + { | |
1379 | + frames[j]->i_type = X264_TYPE_P; | |
1380 | + break; | |
1381 | + } | |
1382 | + else | |
1383 | + frames[j]->i_type = X264_TYPE_B; | |
1384 | + } | |
1385 | + i = j; | |
1386 | + } | |
1387 | + frames[i+!i]->i_type = X264_TYPE_P; | |
1388 | + num_bframes = 0; | |
1389 | + while( num_bframes < num_frames && frames[num_bframes+1]->i_type == X264_TYPE_B ) | |
1390 | + num_bframes++; | |
1391 | + } | |
1392 | + else | |
1393 | { | |
1394 | - int pthresh = X264_MAX(INTER_THRESH - P_SENS_BIAS * (j-1), INTER_THRESH/10); | |
1395 | - int pcost = x264_slicetype_frame_cost( h, &a, frames, 0, j+1, j+1, 1 ); | |
1396 | + num_bframes = X264_MIN(num_frames-1, h->param.i_bframe); | |
1397 | + for( j = 1; j < num_frames; j++ ) | |
1398 | + frames[j]->i_type = (j%(num_bframes+1)) ? X264_TYPE_B : X264_TYPE_P; | |
1399 | + frames[num_frames]->i_type = X264_TYPE_P; | |
1400 | + } | |
1401 | ||
1402 | - if( pcost > pthresh*i_mb_count || frames[j+1]->i_intra_mbs[j+1] > i_mb_count/3 ) | |
1403 | + /* Check scenecut on the first minigop. */ | |
1404 | + for( j = 1; j < num_bframes+1; j++ ) | |
1405 | + if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, j, j+1 ) ) | |
1406 | { | |
1407 | frames[j]->i_type = X264_TYPE_P; | |
1408 | + num_analysed_frames = j; | |
1409 | break; | |
1410 | } | |
1411 | - else | |
1412 | - frames[j]->i_type = X264_TYPE_B; | |
1413 | - } | |
1414 | + | |
1415 | + reset_start = keyframe ? 1 : X264_MIN( num_bframes+2, num_analysed_frames+1 ); | |
1416 | } | |
1417 | else | |
1418 | { | |
1419 | - int max_bframes = X264_MIN(num_frames-1, h->param.i_bframe); | |
1420 | - if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1 ) ) | |
1421 | - { | |
1422 | - frames[1]->i_type = idr_frame_type; | |
1423 | - return; | |
1424 | - } | |
1425 | + for( j = 1; j < num_frames; j++ ) | |
1426 | + frames[j]->i_type = X264_TYPE_P; | |
1427 | + reset_start = !keyframe + 1; | |
1428 | + } | |
1429 | ||
1430 | - for( j = 1; j < max_bframes+1; j++ ) | |
1431 | - { | |
1432 | - if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, j, j+1 ) ) | |
1433 | + /* Perform the actual macroblock tree analysis. | |
1434 | + * Don't go farther than the lookahead parameter; this helps in short GOPs. */ | |
1435 | + if( h->param.rc.b_mb_tree ) | |
1436 | + x264_macroblock_tree( h, &a, frames, X264_MIN(num_analysed_frames, h->param.rc.i_lookahead), keyframe ); | |
1437 | + | |
1438 | + /* Enforce keyframe limit. */ | |
1439 | + if( h->param.i_bframe ) | |
1440 | + for( j = 0; j <= num_bframes; j++ ) | |
1441 | + if( j+1 > keyint_limit ) | |
1442 | { | |
1443 | - frames[j]->i_type = X264_TYPE_P; | |
1444 | - return; | |
1445 | + if( j ) | |
1446 | + frames[j]->i_type = X264_TYPE_P; | |
1447 | + frames[j+1]->i_type = idr_frame_type; | |
1448 | + reset_start = j+2; | |
1449 | + break; | |
1450 | } | |
1451 | - frames[j]->i_type = X264_TYPE_B; | |
1452 | - } | |
1453 | - frames[max_bframes+1]->i_type = X264_TYPE_P; | |
1454 | - } | |
1455 | + | |
1456 | + /* Restore frametypes for all frames that haven't actually been decided yet. */ | |
1457 | + for( j = reset_start; j <= num_frames; j++ ) | |
1458 | + frames[j]->i_type = X264_TYPE_AUTO; | |
1459 | } | |
1460 | ||
1461 | void x264_slicetype_decide( x264_t *h ) | |
1462 | @@ -606,8 +824,9 @@ void x264_slicetype_decide( x264_t *h ) | |
1463 | x264_ratecontrol_slice_type( h, h->frames.next[i]->i_frame ); | |
1464 | } | |
1465 | else if( (h->param.i_bframe && h->param.i_bframe_adaptive) | |
1466 | - || h->param.i_scenecut_threshold ) | |
1467 | - x264_slicetype_analyse( h ); | |
1468 | + || h->param.i_scenecut_threshold | |
1469 | + || h->param.rc.b_mb_tree ) | |
1470 | + x264_slicetype_analyse( h, 0 ); | |
1471 | ||
1472 | for( bframes = 0;; bframes++ ) | |
1473 | { | |
1474 | @@ -645,7 +864,9 @@ void x264_slicetype_decide( x264_t *h ) | |
1475 | frm->i_type = X264_TYPE_P; | |
1476 | } | |
1477 | ||
1478 | - if( frm->i_type == X264_TYPE_AUTO ) frm->i_type = X264_TYPE_B; | |
1479 | + if( frm->i_type == X264_TYPE_AUTO ) | |
1480 | + frm->i_type = X264_TYPE_B; | |
1481 | + | |
1482 | else if( !IS_X264_TYPE_B( frm->i_type ) ) break; | |
1483 | } | |
1484 | } | |
1485 | @@ -653,7 +874,7 @@ void x264_slicetype_decide( x264_t *h ) | |
1486 | int x264_rc_analyse_slice( x264_t *h ) | |
1487 | { | |
1488 | x264_mb_analysis_t a; | |
1489 | - x264_frame_t *frames[X264_BFRAME_MAX*4+2] = { NULL, }; | |
1490 | + x264_frame_t *frames[X264_LOOKAHEAD_MAX+2] = { NULL, }; | |
1491 | int p0=0, p1, b; | |
1492 | int cost; | |
1493 | ||
1494 | @@ -662,6 +883,12 @@ int x264_rc_analyse_slice( x264_t *h ) | |
1495 | if( IS_X264_TYPE_I(h->fenc->i_type) ) | |
1496 | { | |
1497 | p1 = b = 0; | |
1498 | + /* For MB-tree, we have to perform propagation analysis on I-frames too. */ | |
1499 | + if( h->param.rc.b_mb_tree ) | |
1500 | + { | |
1501 | + h->frames.last_nonb = h->fenc; | |
1502 | + x264_slicetype_analyse( h, 1 ); | |
1503 | + } | |
1504 | } | |
1505 | else if( X264_TYPE_P == h->fenc->i_type ) | |
1506 | { | |
1507 | @@ -680,11 +907,16 @@ int x264_rc_analyse_slice( x264_t *h ) | |
1508 | frames[p0] = h->fref0[0]; | |
1509 | frames[b] = h->fenc; | |
1510 | ||
1511 | - cost = x264_slicetype_frame_cost( h, &a, frames, p0, p1, b, 0 ); | |
1512 | + if( h->param.rc.b_mb_tree ) | |
1513 | + cost = x264_slicetype_frame_cost_recalculate( h, &a, frames, p0, p1, b ); | |
1514 | + else | |
1515 | + { | |
1516 | + cost = x264_slicetype_frame_cost( h, &a, frames, p0, p1, b, 0 ); | |
1517 | ||
1518 | - /* In AQ, use the weighted score instead. */ | |
1519 | - if( h->param.rc.i_aq_mode ) | |
1520 | - cost = frames[b]->i_cost_est[b-p0][p1-b]; | |
1521 | + /* In AQ, use the weighted score instead. */ | |
1522 | + if( h->param.rc.i_aq_mode ) | |
1523 | + cost = frames[b]->i_cost_est[b-p0][p1-b]; | |
1524 | + } | |
1525 | ||
1526 | h->fenc->i_row_satd = h->fenc->i_row_satds[b-p0][p1-b]; | |
1527 | h->fdec->i_row_satd = h->fdec->i_row_satds[b-p0][p1-b]; | |
1528 | diff --git a/x264.c b/x264.c | |
1529 | index c3b4f29..c75bfde 100644 | |
1530 | --- a/x264.c | |
1531 | +++ b/x264.c | |
1532 | @@ -168,9 +168,8 @@ static void Help( x264_param_t *defaults, int b_longhelp ) | |
1533 | H0( " - baseline,main,high\n" ); | |
1534 | H0( " --preset Use a preset to select encoding settings [medium]\n" ); | |
1535 | H0( " Overridden by user settings\n"); | |
1536 | - H1( " - ultrafast,veryfast,fast,medium\n" | |
1537 | - " - slow,slower,placebo\n" ); | |
1538 | - else H0( " - ultrafast,veryfast,fast,medium,slow,slower\n" ); | |
1539 | + H0( " - ultrafast,veryfast,faster,fast\n" | |
1540 | + " - medium,slow,slower,placebo\n" ); | |
1541 | H0( " --tune Tune the settings for a particular type of source\n" ); | |
1542 | H0( " Overridden by user settings\n"); | |
1543 | H1( " - film,animation,grain,psnr,ssim\n" | |
1544 | @@ -204,6 +203,7 @@ static void Help( x264_param_t *defaults, int b_longhelp ) | |
1545 | H0( " -q, --qp <integer> Set QP (0-51, 0=lossless)\n" ); | |
1546 | H0( " -B, --bitrate <integer> Set bitrate (kbit/s)\n" ); | |
1547 | H0( " --crf <float> Quality-based VBR (0-51, 0=lossless) [%.1f]\n", defaults->rc.f_rf_constant ); | |
1548 | + H0( " --rc-lookahead <integer> Number of frames for frametype lookahead [%d]\n", defaults->rc.i_lookahead ); | |
1549 | H0( " --vbv-maxrate <integer> Max local bitrate (kbit/s) [%d]\n", defaults->rc.i_vbv_max_bitrate ); | |
1550 | H0( " --vbv-bufsize <integer> Set size of the VBV buffer (kbit) [%d]\n", defaults->rc.i_vbv_buffer_size ); | |
1551 | H1( " --vbv-init <float> Initial VBV buffer occupancy [%.1f]\n", defaults->rc.f_vbv_buffer_init ); | |
1552 | @@ -228,6 +228,7 @@ static void Help( x264_param_t *defaults, int b_longhelp ) | |
1553 | " - 2: Last pass, does not overwrite stats file\n" | |
1554 | " - 3: Nth pass, overwrites stats file\n" ); | |
1555 | H0( " --stats <string> Filename for 2 pass stats [\"%s\"]\n", defaults->rc.psz_stat_out ); | |
1556 | + H0( " --no-mbtree Disable mb-tree ratecontrol.\n"); | |
1557 | H0( " --qcomp <float> QP curve compression: 0.0 => CBR, 1.0 => CQP [%.2f]\n", defaults->rc.f_qcompress ); | |
1558 | H1( " --cplxblur <float> Reduce fluctuations in QP (before curve compression) [%.1f]\n", defaults->rc.f_complexity_blur ); | |
1559 | H1( " --qblur <float> Reduce fluctuations in QP (after curve compression) [%.1f]\n", defaults->rc.f_qblur ); | |
1560 | @@ -277,6 +278,8 @@ static void Help( x264_param_t *defaults, int b_longhelp ) | |
1561 | " #1: RD (requires subme>=6)\n" | |
1562 | " #2: Trellis (requires trellis, experimental)\n", | |
1563 | defaults->analyse.f_psy_rd, defaults->analyse.f_psy_trellis ); | |
1564 | + H1( " --no-psy Disable all visual optimizations that worsen\n" | |
1565 | + " both PSNR and SSIM.\n" ); | |
1566 | H0( " --no-mixed-refs Don't decide references on a per partition basis\n" ); | |
1567 | H1( " --no-chroma-me Ignore chroma in motion estimation\n" ); | |
1568 | H0( " --no-8x8dct Disable adaptive spatial transform size\n" ); | |
1569 | @@ -403,6 +406,7 @@ static struct option long_options[] = | |
1570 | { "qpmax", required_argument, NULL, 0 }, | |
1571 | { "qpstep", required_argument, NULL, 0 }, | |
1572 | { "crf", required_argument, NULL, 0 }, | |
1573 | + { "rc-lookahead",required_argument, NULL, 0 }, | |
1574 | { "ref", required_argument, NULL, 'r' }, | |
1575 | { "asm", required_argument, NULL, 0 }, | |
1576 | { "no-asm", no_argument, NULL, 0 }, | |
1577 | @@ -422,6 +426,7 @@ static struct option long_options[] = | |
1578 | { "mvrange-thread", required_argument, NULL, 0 }, | |
1579 | { "subme", required_argument, NULL, 'm' }, | |
1580 | { "psy-rd", required_argument, NULL, 0 }, | |
1581 | + { "no-psy", no_argument, NULL, 0 }, | |
1582 | { "mixed-refs", no_argument, NULL, 0 }, | |
1583 | { "no-mixed-refs", no_argument, NULL, 0 }, | |
1584 | { "no-chroma-me", no_argument, NULL, 0 }, | |
1585 | @@ -446,6 +451,8 @@ static struct option long_options[] = | |
1586 | { "pass", required_argument, NULL, 'p' }, | |
1587 | { "stats", required_argument, NULL, 0 }, | |
1588 | { "qcomp", required_argument, NULL, 0 }, | |
1589 | + { "mbtree", no_argument, NULL, 0 }, | |
1590 | + { "no-mbtree", no_argument, NULL, 0 }, | |
1591 | { "qblur", required_argument, NULL, 0 }, | |
1592 | { "cplxblur", required_argument, NULL, 0 }, | |
1593 | { "zones", required_argument, NULL, 0 }, | |
1594 | @@ -542,6 +549,8 @@ static int Parse( int argc, char **argv, | |
1595 | param->rc.i_aq_mode = 0; | |
1596 | param->analyse.b_mixed_references = 0; | |
1597 | param->analyse.i_trellis = 0; | |
1598 | + param->i_bframe_adaptive = X264_B_ADAPT_NONE; | |
1599 | + param->rc.b_mb_tree = 0; | |
1600 | } | |
1601 | else if( !strcasecmp( optarg, "veryfast" ) ) | |
1602 | { | |
1603 | @@ -551,12 +560,20 @@ static int Parse( int argc, char **argv, | |
1604 | param->i_frame_reference = 1; | |
1605 | param->analyse.b_mixed_references = 0; | |
1606 | param->analyse.i_trellis = 0; | |
1607 | + param->rc.b_mb_tree = 0; | |
1608 | } | |
1609 | - else if( !strcasecmp( optarg, "fast" ) ) | |
1610 | + else if( !strcasecmp( optarg, "faster" ) ) | |
1611 | { | |
1612 | param->analyse.b_mixed_references = 0; | |
1613 | param->i_frame_reference = 2; | |
1614 | param->analyse.i_subpel_refine = 4; | |
1615 | + param->rc.b_mb_tree = 0; | |
1616 | + } | |
1617 | + else if( !strcasecmp( optarg, "fast" ) ) | |
1618 | + { | |
1619 | + param->i_frame_reference = 2; | |
1620 | + param->analyse.i_subpel_refine = 6; | |
1621 | + param->rc.i_lookahead = 30; | |
1622 | } | |
1623 | else if( !strcasecmp( optarg, "medium" ) ) | |
1624 | { | |
1625 | @@ -569,6 +586,7 @@ static int Parse( int argc, char **argv, | |
1626 | param->i_frame_reference = 5; | |
1627 | param->i_bframe_adaptive = X264_B_ADAPT_TRELLIS; | |
1628 | param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_AUTO; | |
1629 | + param->rc.i_lookahead = 50; | |
1630 | } | |
1631 | else if( !strcasecmp( optarg, "slower" ) ) | |
1632 | { | |
1633 | @@ -579,6 +597,7 @@ static int Parse( int argc, char **argv, | |
1634 | param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_AUTO; | |
1635 | param->analyse.inter |= X264_ANALYSE_PSUB8x8; | |
1636 | param->analyse.i_trellis = 2; | |
1637 | + param->rc.i_lookahead = 60; | |
1638 | } | |
1639 | else if( !strcasecmp( optarg, "placebo" ) ) | |
1640 | { | |
1641 | @@ -592,6 +611,7 @@ static int Parse( int argc, char **argv, | |
1642 | param->analyse.b_fast_pskip = 0; | |
1643 | param->analyse.i_trellis = 2; | |
1644 | param->i_bframe = 16; | |
1645 | + param->rc.i_lookahead = 60; | |
1646 | } | |
1647 | else | |
1648 | { | |
1649 | @@ -644,11 +664,13 @@ static int Parse( int argc, char **argv, | |
1650 | { | |
1651 | param->analyse.f_psy_rd = 0; | |
1652 | param->rc.i_aq_mode = X264_AQ_NONE; | |
1653 | + param->analyse.b_psy = 0; | |
1654 | } | |
1655 | else if( !strcasecmp( optarg, "ssim" ) ) | |
1656 | { | |
1657 | param->analyse.f_psy_rd = 0; | |
1658 | param->rc.i_aq_mode = X264_AQ_AUTOVARIANCE; | |
1659 | + param->analyse.b_psy = 0; | |
1660 | } | |
1661 | else if( !strcasecmp( optarg, "fastdecode" ) ) | |
1662 | { | |
1663 | @@ -662,7 +684,6 @@ static int Parse( int argc, char **argv, | |
1664 | param->i_deblocking_filter_alphac0 = -1; | |
1665 | param->i_deblocking_filter_beta = -1; | |
1666 | param->analyse.f_psy_trellis = 0.2; | |
1667 | - param->rc.f_ip_factor = 2.1; | |
1668 | param->rc.f_aq_strength = 1.3; | |
1669 | if( param->analyse.inter & X264_ANALYSE_PSUB16x16 ) | |
1670 | param->analyse.inter |= X264_ANALYSE_PSUB8x8; | |
1671 | diff --git a/x264.h b/x264.h | |
1672 | index 2dfcc8d..4982b2e 100644 | |
1673 | --- a/x264.h | |
1674 | +++ b/x264.h | |
1675 | @@ -35,7 +35,7 @@ | |
1676 | ||
1677 | #include <stdarg.h> | |
1678 | ||
1679 | -#define X264_BUILD 68 | |
1680 | +#define X264_BUILD 69 | |
1681 | ||
1682 | /* x264_t: | |
1683 | * opaque handler for encoder */ | |
1684 | @@ -242,6 +242,7 @@ typedef struct x264_param_t | |
1685 | int i_noise_reduction; /* adaptive pseudo-deadzone */ | |
1686 | float f_psy_rd; /* Psy RD strength */ | |
1687 | float f_psy_trellis; /* Psy trellis strength */ | |
1688 | + int b_psy; /* Toggle all psy optimizations */ | |
1689 | ||
1690 | /* the deadzone size that will be used in luma quantization */ | |
1691 | int i_luma_deadzone[2]; /* {inter, intra} */ | |
1692 | @@ -271,6 +272,8 @@ typedef struct x264_param_t | |
1693 | ||
1694 | int i_aq_mode; /* psy adaptive QP. (X264_AQ_*) */ | |
1695 | float f_aq_strength; | |
1696 | + int b_mb_tree; /* Macroblock-tree ratecontrol. */ | |
1697 | + int i_lookahead; | |
1698 | ||
1699 | /* 2pass */ | |
1700 | int b_stat_write; /* Enable stat writing in psz_stat_out */ | |
1701 | -- | |
1702 | 1.6.1.2 | |
1703 | ||
1704 |