Advertisement
Guest User

Untitled

a guest
May 5th, 2017
580
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 119.63 KB | None | 0 0
  1. From 83d2ad004ccf51fb20a1c571e022586e40ca941c Mon Sep 17 00:00:00 2001
  2. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  3. Date: Fri, 29 Jan 2010 02:40:41 -0800
  4. Subject: [PATCH 01/14] Add ability to adjust ratecontrol parameters on the fly encoder_reconfig and x264_picture_t->param can now be used to change ratecontrol parameters. This is extraordinarily useful in certain streaming situations where the encoder needs to adapt the bitrate to network circumstances.
  5.  
  6. What can be changed:
  7. 1) CRF can be adjusted if in CRF mode.
  8. 2) VBV maxrate and bufsize can be adjusted if in VBV mode.
  9. 3) Bitrate can be adjusted if in CBR mode.
  10. However, x264 cannot switch between modes and cannot change bitrate in ABR mode.
  11.  
  12. Also fix a bug where x264_picture_t->param reconfig method would not always be frame-exact.
  13.  
  14. Commit sponsored by SayMama video calling.
  15. ---
  16. encoder/encoder.c | 55 +++++++++++++++++++-
  17. encoder/ratecontrol.c | 137 +++++++++++++++++++++++-------------------------
  18. encoder/ratecontrol.h | 2 +
  19. x264.h | 7 ++-
  20. 4 files changed, 125 insertions(+), 76 deletions(-)
  21.  
  22. diff --git a/encoder/encoder.c b/encoder/encoder.c
  23. index d873cd0..8e9c118 100644
  24. --- a/encoder/encoder.c
  25. +++ b/encoder/encoder.c
  26. @@ -507,6 +507,39 @@ static int x264_validate_parameters( x264_t *h )
  27. }
  28. h->param.rc.i_qp_max = x264_clip3( h->param.rc.i_qp_max, 0, 51 );
  29. h->param.rc.i_qp_min = x264_clip3( h->param.rc.i_qp_min, 0, h->param.rc.i_qp_max );
  30. + if( h->param.rc.i_vbv_buffer_size )
  31. + {
  32. + if( h->param.rc.i_rc_method == X264_RC_CQP )
  33. + {
  34. + x264_log(h, X264_LOG_WARNING, "VBV is incompatible with constant QP, ignored.\n");
  35. + h->param.rc.i_vbv_max_bitrate = 0;
  36. + h->param.rc.i_vbv_buffer_size = 0;
  37. + }
  38. + else if( h->param.rc.i_vbv_max_bitrate == 0 )
  39. + {
  40. + if( h->param.rc.i_rc_method == X264_RC_ABR )
  41. + {
  42. + x264_log( h, X264_LOG_INFO, "VBV maxrate unspecified, assuming CBR\n" );
  43. + h->param.rc.i_vbv_max_bitrate = h->param.rc.i_bitrate;
  44. + }
  45. + else
  46. + {
  47. + x264_log( h, X264_LOG_INFO, "VBV bufsize set but maxrate unspecified, ignored\n" );
  48. + h->param.rc.i_vbv_buffer_size = 0;
  49. + }
  50. + }
  51. + else if( h->param.rc.i_vbv_max_bitrate < h->param.rc.i_bitrate &&
  52. + h->param.rc.i_vbv_max_bitrate > 0 )
  53. + {
  54. + x264_log(h, X264_LOG_WARNING, "max bitrate less than average bitrate, ignored.\n");
  55. + h->param.rc.i_vbv_max_bitrate = 0;
  56. + }
  57. + }
  58. + else if( h->param.rc.i_vbv_max_bitrate )
  59. + {
  60. + x264_log(h, X264_LOG_WARNING, "VBV maxrate specified, but no bufsize.\n");
  61. + h->param.rc.i_vbv_max_bitrate = 0;
  62. + }
  63.  
  64. int max_slices = (h->param.i_height+((16<<h->param.b_interlaced)-1))/(16<<h->param.b_interlaced);
  65. if( h->param.b_sliced_threads )
  66. @@ -1071,7 +1104,7 @@ fail:
  67. ****************************************************************************/
  68. int x264_encoder_reconfig( x264_t *h, x264_param_t *param )
  69. {
  70. - h = h->thread[h->i_thread_phase];
  71. + h = h->thread[h->thread[0]->i_thread_phase];
  72. x264_set_aspect_ratio( h, param, 0 );
  73. #define COPY(var) h->param.var = param->var
  74. COPY( i_frame_reference ); // but never uses more refs than initially specified
  75. @@ -1110,11 +1143,29 @@ int x264_encoder_reconfig( x264_t *h, x264_param_t *param )
  76. COPY( i_slice_max_size );
  77. COPY( i_slice_max_mbs );
  78. COPY( i_slice_count );
  79. + /* VBV can't be turned on if it wasn't on to begin with */
  80. + if( h->param.rc.i_vbv_max_bitrate > 0 && h->param.rc.i_vbv_buffer_size > 0 )
  81. + {
  82. + COPY( rc.i_vbv_max_bitrate );
  83. + COPY( rc.i_vbv_buffer_size );
  84. + COPY( rc.i_bitrate );
  85. + }
  86. + COPY( rc.f_rf_constant );
  87. #undef COPY
  88.  
  89. mbcmp_init( h );
  90.  
  91. - return x264_validate_parameters( h );
  92. + int failure = x264_validate_parameters( h );
  93. +
  94. + /* Supported reconfiguration options (1-pass only):
  95. + * vbv-maxrate
  96. + * vbv-bufsize
  97. + * crf
  98. + * bitrate (CBR only) */
  99. + if( !failure )
  100. + x264_ratecontrol_init_reconfigurable( h, 0 );
  101. +
  102. + return failure;
  103. }
  104.  
  105. /****************************************************************************
  106. diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
  107. index 63b3be6..52196e7 100644
  108. --- a/encoder/ratecontrol.c
  109. +++ b/encoder/ratecontrol.c
  110. @@ -388,6 +388,53 @@ static char *x264_strcat_filename( char *input, char *suffix )
  111. return output;
  112. }
  113.  
  114. +void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init )
  115. +{
  116. + x264_ratecontrol_t *rc = h->rc;
  117. + if( !b_init && rc->b_2pass )
  118. + return;
  119. +
  120. + if( h->param.rc.i_vbv_max_bitrate > 0 && h->param.rc.i_vbv_buffer_size > 0 )
  121. + {
  122. + if( h->param.rc.i_vbv_buffer_size < (int)(h->param.rc.i_vbv_max_bitrate / rc->fps) )
  123. + {
  124. + h->param.rc.i_vbv_buffer_size = h->param.rc.i_vbv_max_bitrate / rc->fps;
  125. + x264_log( h, X264_LOG_WARNING, "VBV buffer size cannot be smaller than one frame, using %d kbit\n",
  126. + h->param.rc.i_vbv_buffer_size );
  127. + }
  128. +
  129. + /* We don't support changing the ABR bitrate right now,
  130. + so if the stream starts as CBR, keep it CBR. */
  131. + if( rc->b_vbv_min_rate )
  132. + h->param.rc.i_vbv_max_bitrate = h->param.rc.i_bitrate;
  133. + rc->buffer_rate = h->param.rc.i_vbv_max_bitrate * 1000. / rc->fps;
  134. + rc->buffer_size = h->param.rc.i_vbv_buffer_size * 1000.;
  135. + rc->single_frame_vbv = rc->buffer_rate * 1.1 > rc->buffer_size;
  136. + rc->cbr_decay = 1.0 - rc->buffer_rate / rc->buffer_size
  137. + * 0.5 * X264_MAX(0, 1.5 - rc->buffer_rate * rc->fps / rc->bitrate);
  138. + if( b_init )
  139. + {
  140. + if( h->param.rc.f_vbv_buffer_init > 1. )
  141. + h->param.rc.f_vbv_buffer_init = x264_clip3f( h->param.rc.f_vbv_buffer_init / h->param.rc.i_vbv_buffer_size, 0, 1 );
  142. + h->param.rc.f_vbv_buffer_init = X264_MAX( h->param.rc.f_vbv_buffer_init, rc->buffer_rate / rc->buffer_size );
  143. + rc->buffer_fill_final = rc->buffer_size * h->param.rc.f_vbv_buffer_init;
  144. + rc->b_vbv = 1;
  145. + rc->b_vbv_min_rate = !rc->b_2pass
  146. + && h->param.rc.i_rc_method == X264_RC_ABR
  147. + && h->param.rc.i_vbv_max_bitrate <= h->param.rc.i_bitrate;
  148. + }
  149. + }
  150. + if( h->param.rc.i_rc_method == X264_RC_CRF )
  151. + {
  152. + /* Arbitrary rescaling to make CRF somewhat similar to QP.
  153. + * Try to compensate for MB-tree's effects as well. */
  154. + double base_cplx = h->mb.i_mb_count * (h->param.i_bframe ? 120 : 80);
  155. + double mbtree_offset = h->param.rc.b_mb_tree ? (1.0-h->param.rc.f_qcompress)*13.5 : 0;
  156. + rc->rate_factor_constant = pow( base_cplx, 1 - rc->qcompress )
  157. + / qp2qscale( h->param.rc.f_rf_constant + mbtree_offset );
  158. + }
  159. +}
  160. +
  161. int x264_ratecontrol_new( x264_t *h )
  162. {
  163. x264_ratecontrol_t *rc;
  164. @@ -426,60 +473,10 @@ int x264_ratecontrol_new( x264_t *h )
  165. x264_log(h, X264_LOG_ERROR, "constant rate-factor is incompatible with 2pass.\n");
  166. return -1;
  167. }
  168. - if( h->param.rc.i_vbv_buffer_size )
  169. - {
  170. - if( h->param.rc.i_rc_method == X264_RC_CQP )
  171. - {
  172. - x264_log(h, X264_LOG_WARNING, "VBV is incompatible with constant QP, ignored.\n");
  173. - h->param.rc.i_vbv_max_bitrate = 0;
  174. - h->param.rc.i_vbv_buffer_size = 0;
  175. - }
  176. - else if( h->param.rc.i_vbv_max_bitrate == 0 )
  177. - {
  178. - if( h->param.rc.i_rc_method == X264_RC_ABR )
  179. - {
  180. - x264_log( h, X264_LOG_INFO, "VBV maxrate unspecified, assuming CBR\n" );
  181. - h->param.rc.i_vbv_max_bitrate = h->param.rc.i_bitrate;
  182. - }
  183. - else
  184. - {
  185. - x264_log( h, X264_LOG_INFO, "VBV bufsize set but maxrate unspecified, ignored\n" );
  186. - h->param.rc.i_vbv_buffer_size = 0;
  187. - }
  188. - }
  189. - }
  190. - if( h->param.rc.i_vbv_max_bitrate < h->param.rc.i_bitrate &&
  191. - h->param.rc.i_vbv_max_bitrate > 0)
  192. - x264_log(h, X264_LOG_WARNING, "max bitrate less than average bitrate, ignored.\n");
  193. - else if( h->param.rc.i_vbv_max_bitrate > 0 &&
  194. - h->param.rc.i_vbv_buffer_size > 0 )
  195. - {
  196. - if( h->param.rc.i_vbv_buffer_size < (int)(h->param.rc.i_vbv_max_bitrate / rc->fps) )
  197. - {
  198. - h->param.rc.i_vbv_buffer_size = h->param.rc.i_vbv_max_bitrate / rc->fps;
  199. - x264_log( h, X264_LOG_WARNING, "VBV buffer size cannot be smaller than one frame, using %d kbit\n",
  200. - h->param.rc.i_vbv_buffer_size );
  201. - }
  202. - if( h->param.rc.f_vbv_buffer_init > 1. )
  203. - h->param.rc.f_vbv_buffer_init = x264_clip3f( h->param.rc.f_vbv_buffer_init / h->param.rc.i_vbv_buffer_size, 0, 1 );
  204. - rc->buffer_rate = h->param.rc.i_vbv_max_bitrate * 1000. / rc->fps;
  205. - rc->buffer_size = h->param.rc.i_vbv_buffer_size * 1000.;
  206. - rc->single_frame_vbv = rc->buffer_rate * 1.1 > rc->buffer_size;
  207. - h->param.rc.f_vbv_buffer_init = X264_MAX( h->param.rc.f_vbv_buffer_init, rc->buffer_rate / rc->buffer_size );
  208. - rc->buffer_fill_final = rc->buffer_size * h->param.rc.f_vbv_buffer_init;
  209. - rc->cbr_decay = 1.0 - rc->buffer_rate / rc->buffer_size
  210. - * 0.5 * X264_MAX(0, 1.5 - rc->buffer_rate * rc->fps / rc->bitrate);
  211. - rc->b_vbv = 1;
  212. - rc->b_vbv_min_rate = !rc->b_2pass
  213. - && h->param.rc.i_rc_method == X264_RC_ABR
  214. - && h->param.rc.i_vbv_max_bitrate <= h->param.rc.i_bitrate;
  215. - }
  216. - else if( h->param.rc.i_vbv_max_bitrate )
  217. - {
  218. - x264_log(h, X264_LOG_WARNING, "VBV maxrate specified, but no bufsize.\n");
  219. - h->param.rc.i_vbv_max_bitrate = 0;
  220. - }
  221. - if(rc->rate_tolerance < 0.01)
  222. +
  223. + x264_ratecontrol_init_reconfigurable( h, 1 );
  224. +
  225. + if( rc->rate_tolerance < 0.01 )
  226. {
  227. x264_log(h, X264_LOG_WARNING, "bitrate tolerance too small, using .01\n");
  228. rc->rate_tolerance = 0.01;
  229. @@ -499,16 +496,6 @@ int x264_ratecontrol_new( x264_t *h )
  230. rc->last_non_b_pict_type = SLICE_TYPE_I;
  231. }
  232.  
  233. - if( h->param.rc.i_rc_method == X264_RC_CRF )
  234. - {
  235. - /* Arbitrary rescaling to make CRF somewhat similar to QP.
  236. - * Try to compensate for MB-tree's effects as well. */
  237. - double base_cplx = h->mb.i_mb_count * (h->param.i_bframe ? 120 : 80);
  238. - double mbtree_offset = h->param.rc.b_mb_tree ? (1.0-h->param.rc.f_qcompress)*13.5 : 0;
  239. - rc->rate_factor_constant = pow( base_cplx, 1 - rc->qcompress )
  240. - / qp2qscale( h->param.rc.f_rf_constant + mbtree_offset );
  241. - }
  242. -
  243. rc->ip_offset = 6.0 * log(h->param.rc.f_ip_factor) / log(2.0);
  244. rc->pb_offset = 6.0 * log(h->param.rc.f_pb_factor) / log(2.0);
  245. rc->qp_constant[SLICE_TYPE_P] = h->param.rc.i_qp_constant;
  246. @@ -1577,15 +1564,15 @@ static void update_vbv( x264_t *h, int bits )
  247. if( rct->buffer_fill_final < 0 )
  248. x264_log( h, X264_LOG_WARNING, "VBV underflow (frame %d, %.0f bits)\n", h->i_frame, rct->buffer_fill_final );
  249. rct->buffer_fill_final = X264_MAX( rct->buffer_fill_final, 0 );
  250. - rct->buffer_fill_final += rct->buffer_rate;
  251. - rct->buffer_fill_final = X264_MIN( rct->buffer_fill_final, rct->buffer_size );
  252. + rct->buffer_fill_final += rcc->buffer_rate;
  253. + rct->buffer_fill_final = X264_MIN( rct->buffer_fill_final, rcc->buffer_size );
  254. }
  255.  
  256. // provisionally update VBV according to the planned size of all frames currently in progress
  257. static void update_vbv_plan( x264_t *h, int overhead )
  258. {
  259. x264_ratecontrol_t *rcc = h->rc;
  260. - rcc->buffer_fill = h->thread[0]->rc->buffer_fill_final - overhead;
  261. + rcc->buffer_fill = h->thread[0]->rc->buffer_fill_final;
  262. if( h->i_thread_frames > 1 )
  263. {
  264. int j = h->rc - h->thread[0]->rc;
  265. @@ -1603,6 +1590,8 @@ static void update_vbv_plan( x264_t *h, int overhead )
  266. rcc->buffer_fill = X264_MIN( rcc->buffer_fill, rcc->buffer_size );
  267. }
  268. }
  269. + rcc->buffer_fill = X264_MIN( rcc->buffer_fill, rcc->buffer_size );
  270. + rcc->buffer_fill -= overhead;
  271. }
  272.  
  273. // apply VBV constraints and clip qscale to between lmin and lmax
  274. @@ -2027,8 +2016,7 @@ void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next )
  275. #define COPY(var) memcpy(&cur->rc->var, &prev->rc->var, sizeof(cur->rc->var))
  276. /* these vars are updated in x264_ratecontrol_start()
  277. * so copy them from the context that most recently started (prev)
  278. - * to the context that's about to start (cur).
  279. - */
  280. + * to the context that's about to start (cur). */
  281. COPY(accum_p_qp);
  282. COPY(accum_p_norm);
  283. COPY(last_satd);
  284. @@ -2040,6 +2028,14 @@ void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next )
  285. COPY(bframes);
  286. COPY(prev_zone);
  287. COPY(qpbuf_pos);
  288. + /* these vars can be updated by x264_ratecontrol_init_reconfigurable */
  289. + COPY(buffer_rate);
  290. + COPY(buffer_size);
  291. + COPY(single_frame_vbv);
  292. + COPY(cbr_decay);
  293. + COPY(b_vbv_min_rate);
  294. + COPY(rate_factor_constant);
  295. + COPY(bitrate);
  296. #undef COPY
  297. }
  298. if( cur != next )
  299. @@ -2047,8 +2043,7 @@ void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next )
  300. #define COPY(var) next->rc->var = cur->rc->var
  301. /* these vars are updated in x264_ratecontrol_end()
  302. * so copy them from the context that most recently ended (cur)
  303. - * to the context that's about to end (next)
  304. - */
  305. + * to the context that's about to end (next) */
  306. COPY(cplxr_sum);
  307. COPY(expected_bits_sum);
  308. COPY(wanted_bits_window);
  309. diff --git a/encoder/ratecontrol.h b/encoder/ratecontrol.h
  310. index 5a8d088..2767866 100644
  311. --- a/encoder/ratecontrol.h
  312. +++ b/encoder/ratecontrol.h
  313. @@ -27,6 +27,8 @@
  314. int x264_ratecontrol_new ( x264_t * );
  315. void x264_ratecontrol_delete( x264_t * );
  316.  
  317. +void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init );
  318. +
  319. void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame );
  320. void x264_adaptive_quant( x264_t * );
  321. int x264_macroblock_tree_read( x264_t *h, x264_frame_t *frame );
  322. diff --git a/x264.h b/x264.h
  323. index 2550864..e7d19b7 100644
  324. --- a/x264.h
  325. +++ b/x264.h
  326. @@ -35,7 +35,7 @@
  327.  
  328. #include <stdarg.h>
  329.  
  330. -#define X264_BUILD 84
  331. +#define X264_BUILD 85
  332.  
  333. /* x264_t:
  334. * opaque handler for encoder */
  335. @@ -480,11 +480,12 @@ typedef struct
  336. x264_t *x264_encoder_open( x264_param_t * );
  337.  
  338. /* x264_encoder_reconfig:
  339. - * analysis-related parameters from x264_param_t are copied.
  340. + * various parameters from x264_param_t are copied.
  341. * this takes effect immediately, on whichever frame is encoded next;
  342. * due to delay, this may not be the next frame passed to encoder_encode.
  343. * if the change should apply to some particular frame, use x264_picture_t->param instead.
  344. - * returns 0 on success, negative on parameter validation error. */
  345. + * returns 0 on success, negative on parameter validation error.
  346. + * not all parameters can be changed; see the actual function for a detailed breakdown. */
  347. int x264_encoder_reconfig( x264_t *, x264_param_t * );
  348. /* x264_encoder_parameters:
  349. * copies the current internal set of parameters to the pointer provided
  350. --
  351. 1.6.1.2
  352.  
  353.  
  354. From 3a4ddc546cd2a368f22ba0a26da093129d6f6772 Mon Sep 17 00:00:00 2001
  355. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  356. Date: Mon, 1 Feb 2010 13:04:47 -0800
  357. Subject: [PATCH 02/14] Slightly faster predictor_difference_mmxext
  358.  
  359. ---
  360. common/x86/util.h | 17 ++++++++++-------
  361. 1 files changed, 10 insertions(+), 7 deletions(-)
  362.  
  363. diff --git a/common/x86/util.h b/common/x86/util.h
  364. index efc700a..c8bcf4b 100644
  365. --- a/common/x86/util.h
  366. +++ b/common/x86/util.h
  367. @@ -45,8 +45,9 @@ static inline void x264_median_mv_mmxext( int16_t *dst, int16_t *a, int16_t *b,
  368. #define x264_predictor_difference x264_predictor_difference_mmxext
  369. static inline int x264_predictor_difference_mmxext( int16_t (*mvc)[2], intptr_t i_mvc )
  370. {
  371. - int sum = 0;
  372. - uint16_t output[4];
  373. + int sum;
  374. + static const uint64_t pw_1 = 0x0001000100010001ULL;
  375. +
  376. asm(
  377. "pxor %%mm4, %%mm4 \n"
  378. "test $1, %1 \n"
  379. @@ -56,7 +57,7 @@ static inline int x264_predictor_difference_mmxext( int16_t (*mvc)[2], intptr_t
  380. "psubw %%mm3, %%mm0 \n"
  381. "jmp 2f \n"
  382. "3: \n"
  383. - "sub $1, %1 \n"
  384. + "dec %1 \n"
  385. "1: \n"
  386. "movq -8(%2,%1,4), %%mm0 \n"
  387. "psubw -4(%2,%1,4), %%mm0 \n"
  388. @@ -67,11 +68,13 @@ static inline int x264_predictor_difference_mmxext( int16_t (*mvc)[2], intptr_t
  389. "pmaxsw %%mm2, %%mm0 \n"
  390. "paddusw %%mm0, %%mm4 \n"
  391. "jg 1b \n"
  392. - "movq %%mm4, %0 \n"
  393. - :"=m"(output), "+r"(i_mvc)
  394. - :"r"(mvc), "m"(M64( mvc ))
  395. + "pmaddwd %4, %%mm4 \n"
  396. + "pshufw $14, %%mm4, %%mm0 \n"
  397. + "paddd %%mm0, %%mm4 \n"
  398. + "movd %%mm4, %0 \n"
  399. + :"=r"(sum), "+r"(i_mvc)
  400. + :"r"(mvc), "m"(M64( mvc )), "m"(pw_1)
  401. );
  402. - sum += output[0] + output[1] + output[2] + output[3];
  403. return sum;
  404. }
  405. #define x264_cabac_amvd_sum x264_cabac_amvd_sum_mmxext
  406. --
  407. 1.6.1.2
  408.  
  409.  
  410. From 8d417bd03fcc4f3a8058b2db9063b0d967600536 Mon Sep 17 00:00:00 2001
  411. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  412. Date: Tue, 2 Feb 2010 03:15:18 -0800
  413. Subject: [PATCH 03/14] Improve bidir search, fix some artifacts in fades
  414. Modify analysis to allow bidir to use different motion vectors than L0/L1.
  415. Always try the <0,0,0,0> motion vector for bidir.
  416. Eliminates almost all errant motion vectors in fades.
  417. Slightly improves PSNR as well (~0.015db).
  418.  
  419. ---
  420. encoder/analyse.c | 50 ++++++++++++++++++++++++++++++++++++++------------
  421. 1 files changed, 38 insertions(+), 12 deletions(-)
  422.  
  423. diff --git a/encoder/analyse.c b/encoder/analyse.c
  424. index 666596b..1fb2206 100644
  425. --- a/encoder/analyse.c
  426. +++ b/encoder/analyse.c
  427. @@ -40,6 +40,7 @@ typedef struct
  428. int i_ref;
  429. int i_rd16x16;
  430. x264_me_t me16x16;
  431. + x264_me_t bi16x16; /* for b16x16 BI mode, since MVs can differ from l0/l1 */
  432.  
  433. /* 8x8 */
  434. int i_cost8x8;
  435. @@ -1722,20 +1723,45 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
  436. a->l1.me16x16.i_ref = a->l1.i_ref;
  437.  
  438. /* get cost of BI mode */
  439. + int ref_costs = REF_COST( 0, a->l0.i_ref ) + REF_COST( 1, a->l1.i_ref );
  440. + h->mc.memcpy_aligned( &a->l0.bi16x16, &a->l0.me16x16, sizeof(x264_me_t) );
  441. + h->mc.memcpy_aligned( &a->l1.bi16x16, &a->l1.me16x16, sizeof(x264_me_t) );
  442. src0 = h->mc.get_ref( pix0, &stride0,
  443. h->mb.pic.p_fref[0][a->l0.i_ref], h->mb.pic.i_stride[0],
  444. - a->l0.me16x16.mv[0], a->l0.me16x16.mv[1], 16, 16, weight_none );
  445. + a->l0.bi16x16.mv[0], a->l0.bi16x16.mv[1], 16, 16, weight_none );
  446. src1 = h->mc.get_ref( pix1, &stride1,
  447. h->mb.pic.p_fref[1][a->l1.i_ref], h->mb.pic.i_stride[0],
  448. - a->l1.me16x16.mv[0], a->l1.me16x16.mv[1], 16, 16, weight_none );
  449. + a->l1.bi16x16.mv[0], a->l1.bi16x16.mv[1], 16, 16, weight_none );
  450.  
  451. h->mc.avg[PIXEL_16x16]( pix0, 16, src0, stride0, src1, stride1, h->mb.bipred_weight[a->l0.i_ref][a->l1.i_ref] );
  452.  
  453. a->i_cost16x16bi = h->pixf.mbcmp[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE, pix0, 16 )
  454. - + REF_COST( 0, a->l0.i_ref )
  455. - + REF_COST( 1, a->l1.i_ref )
  456. - + a->l0.me16x16.cost_mv
  457. - + a->l1.me16x16.cost_mv;
  458. + + ref_costs
  459. + + a->l0.bi16x16.cost_mv
  460. + + a->l1.bi16x16.cost_mv;
  461. +
  462. +
  463. + /* Always try the 0,0,0,0 vector; helps avoid errant motion vectors in fades */
  464. + if( M32( a->l0.bi16x16.mv ) | M32( a->l1.bi16x16.mv ) )
  465. + {
  466. + int l0_mv_cost = a->l0.bi16x16.p_cost_mv[-a->l0.bi16x16.mvp[0]]
  467. + + a->l0.bi16x16.p_cost_mv[-a->l0.bi16x16.mvp[1]];
  468. + int l1_mv_cost = a->l1.bi16x16.p_cost_mv[-a->l1.bi16x16.mvp[0]]
  469. + + a->l1.bi16x16.p_cost_mv[-a->l1.bi16x16.mvp[1]];
  470. + h->mc.avg[PIXEL_16x16]( pix0, 16, h->mb.pic.p_fref[0][a->l0.i_ref][0], h->mb.pic.i_stride[0],
  471. + h->mb.pic.p_fref[1][a->l1.i_ref][0], h->mb.pic.i_stride[0],
  472. + h->mb.bipred_weight[a->l0.i_ref][a->l1.i_ref] );
  473. + int cost00 = h->pixf.mbcmp[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE, pix0, 16 )
  474. + + ref_costs + l0_mv_cost + l1_mv_cost;
  475. + if( cost00 < a->i_cost16x16bi )
  476. + {
  477. + M32( a->l0.bi16x16.mv ) = 0;
  478. + M32( a->l1.bi16x16.mv ) = 0;
  479. + a->l0.bi16x16.cost_mv = l0_mv_cost;
  480. + a->l1.bi16x16.cost_mv = l1_mv_cost;
  481. + a->i_cost16x16bi = cost00;
  482. + }
  483. + }
  484.  
  485. /* mb type cost */
  486. a->i_cost16x16bi += a->i_lambda * i_mb_b_cost_table[B_BI_BI];
  487. @@ -2205,7 +2231,7 @@ static void x264_refine_bidir( x264_t *h, x264_mb_analysis_t *a )
  488. {
  489. case D_16x16:
  490. if( h->mb.i_type == B_BI_BI )
  491. - x264_me_refine_bidir_satd( h, &a->l0.me16x16, &a->l1.me16x16, i_biweight );
  492. + x264_me_refine_bidir_satd( h, &a->l0.bi16x16, &a->l1.bi16x16, i_biweight );
  493. break;
  494. case D_16x8:
  495. for( i=0; i<2; i++ )
  496. @@ -2819,8 +2845,8 @@ intra_analysis:
  497. }
  498. else if( i_type == B_BI_BI )
  499. {
  500. - x264_me_refine_qpel( h, &analysis.l0.me16x16 );
  501. - x264_me_refine_qpel( h, &analysis.l1.me16x16 );
  502. + x264_me_refine_qpel( h, &analysis.l0.bi16x16 );
  503. + x264_me_refine_qpel( h, &analysis.l1.bi16x16 );
  504. }
  505. }
  506. else if( i_partition == D_16x8 )
  507. @@ -2938,7 +2964,7 @@ intra_analysis:
  508. x264_me_refine_qpel_rd( h, &analysis.l1.me16x16, analysis.i_lambda2, 0, 1 );
  509. }
  510. else if( i_type == B_BI_BI )
  511. - x264_me_refine_bidir_rd( h, &analysis.l0.me16x16, &analysis.l1.me16x16, i_biweight, 0, analysis.i_lambda2 );
  512. + x264_me_refine_bidir_rd( h, &analysis.l0.bi16x16, &analysis.l1.bi16x16, i_biweight, 0, analysis.i_lambda2 );
  513. }
  514. else if( i_partition == D_16x8 )
  515. {
  516. @@ -3121,10 +3147,10 @@ static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a )
  517. break;
  518. case B_BI_BI:
  519. x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.i_ref );
  520. - x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv );
  521. + x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, a->l0.bi16x16.mv );
  522.  
  523. x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, a->l1.i_ref );
  524. - x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 1, a->l1.me16x16.mv );
  525. + x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 1, a->l1.bi16x16.mv );
  526. break;
  527. }
  528. break;
  529. --
  530. 1.6.1.2
  531.  
  532.  
  533. From 65f2eb0783b325bddee87b171b904bc54e5ceacc Mon Sep 17 00:00:00 2001
  534. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  535. Date: Wed, 3 Feb 2010 14:22:05 -0800
  536. Subject: [PATCH 04/14] Faster CABAC MB header writing
  537. Reorganize the header writing to merge mb type and mb mode info (mv, pred, etc)
  538. Reduces redundant branches and better splits the code between frame types (for better code cache usage).
  539. Also slightly simplify qp delta calculation.
  540. Also make CAVLC and CABAC a bit more consistent in structure and function names.
  541.  
  542. ---
  543. encoder/cabac.c | 573 ++++++++++++++++++++++++++-----------------------------
  544. encoder/cavlc.c | 118 ++++++------
  545. 2 files changed, 334 insertions(+), 357 deletions(-)
  546.  
  547. diff --git a/encoder/cabac.c b/encoder/cabac.c
  548. index 271f527..6ff2aed 100644
  549. --- a/encoder/cabac.c
  550. +++ b/encoder/cabac.c
  551. @@ -29,151 +29,6 @@
  552. #define RDO_SKIP_BS 0
  553. #endif
  554.  
  555. -static inline void x264_cabac_mb_type_intra( x264_t *h, x264_cabac_t *cb, int i_mb_type,
  556. - int ctx0, int ctx1, int ctx2, int ctx3, int ctx4, int ctx5 )
  557. -{
  558. - if( i_mb_type == I_4x4 || i_mb_type == I_8x8 )
  559. - {
  560. - x264_cabac_encode_decision_noup( cb, ctx0, 0 );
  561. - }
  562. -#if !RDO_SKIP_BS
  563. - else if( i_mb_type == I_PCM )
  564. - {
  565. - x264_cabac_encode_decision_noup( cb, ctx0, 1 );
  566. - x264_cabac_encode_flush( h, cb );
  567. - }
  568. -#endif
  569. - else
  570. - {
  571. - int i_pred = x264_mb_pred_mode16x16_fix[h->mb.i_intra16x16_pred_mode];
  572. -
  573. - x264_cabac_encode_decision_noup( cb, ctx0, 1 );
  574. - x264_cabac_encode_terminal( cb );
  575. -
  576. - x264_cabac_encode_decision_noup( cb, ctx1, !!h->mb.i_cbp_luma );
  577. - if( h->mb.i_cbp_chroma == 0 )
  578. - x264_cabac_encode_decision_noup( cb, ctx2, 0 );
  579. - else
  580. - {
  581. - x264_cabac_encode_decision( cb, ctx2, 1 );
  582. - x264_cabac_encode_decision_noup( cb, ctx3, h->mb.i_cbp_chroma>>1 );
  583. - }
  584. - x264_cabac_encode_decision( cb, ctx4, i_pred>>1 );
  585. - x264_cabac_encode_decision_noup( cb, ctx5, i_pred&1 );
  586. - }
  587. -}
  588. -
  589. -static void x264_cabac_mb_type( x264_t *h, x264_cabac_t *cb )
  590. -{
  591. - const int i_mb_type = h->mb.i_type;
  592. -
  593. - if( h->sh.b_mbaff &&
  594. - (!(h->mb.i_mb_y & 1) || IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride])) )
  595. - {
  596. - x264_cabac_encode_decision_noup( cb, 70 + h->mb.cache.i_neighbour_interlaced, h->mb.b_interlaced );
  597. - }
  598. -
  599. - if( h->sh.i_type == SLICE_TYPE_I )
  600. - {
  601. - int ctx = 0;
  602. - if( h->mb.i_mb_type_left >= 0 && h->mb.i_mb_type_left != I_4x4 )
  603. - ctx++;
  604. - if( h->mb.i_mb_type_top >= 0 && h->mb.i_mb_type_top != I_4x4 )
  605. - ctx++;
  606. -
  607. - x264_cabac_mb_type_intra( h, cb, i_mb_type, 3+ctx, 3+3, 3+4, 3+5, 3+6, 3+7 );
  608. - }
  609. - else if( h->sh.i_type == SLICE_TYPE_P )
  610. - {
  611. - /* prefix: 14, suffix: 17 */
  612. - if( i_mb_type == P_L0 )
  613. - {
  614. - x264_cabac_encode_decision_noup( cb, 14, 0 );
  615. - x264_cabac_encode_decision_noup( cb, 15, h->mb.i_partition != D_16x16 );
  616. - x264_cabac_encode_decision_noup( cb, 17-(h->mb.i_partition == D_16x16), h->mb.i_partition == D_16x8 );
  617. - }
  618. - else if( i_mb_type == P_8x8 )
  619. - {
  620. - x264_cabac_encode_decision_noup( cb, 14, 0 );
  621. - x264_cabac_encode_decision_noup( cb, 15, 0 );
  622. - x264_cabac_encode_decision_noup( cb, 16, 1 );
  623. - }
  624. - else /* intra */
  625. - {
  626. - /* prefix */
  627. - x264_cabac_encode_decision_noup( cb, 14, 1 );
  628. -
  629. - /* suffix */
  630. - x264_cabac_mb_type_intra( h, cb, i_mb_type, 17+0, 17+1, 17+2, 17+2, 17+3, 17+3 );
  631. - }
  632. - }
  633. - else //if( h->sh.i_type == SLICE_TYPE_B )
  634. - {
  635. - int ctx = 0;
  636. - if( h->mb.i_mb_type_left >= 0 && h->mb.i_mb_type_left != B_SKIP && h->mb.i_mb_type_left != B_DIRECT )
  637. - ctx++;
  638. - if( h->mb.i_mb_type_top >= 0 && h->mb.i_mb_type_top != B_SKIP && h->mb.i_mb_type_top != B_DIRECT )
  639. - ctx++;
  640. -
  641. - if( i_mb_type == B_DIRECT )
  642. - {
  643. - x264_cabac_encode_decision_noup( cb, 27+ctx, 0 );
  644. - return;
  645. - }
  646. - x264_cabac_encode_decision_noup( cb, 27+ctx, 1 );
  647. -
  648. - if( i_mb_type == B_8x8 )
  649. - {
  650. - x264_cabac_encode_decision_noup( cb, 27+3, 1 );
  651. - x264_cabac_encode_decision_noup( cb, 27+4, 1 );
  652. - x264_cabac_encode_decision( cb, 27+5, 1 );
  653. - x264_cabac_encode_decision( cb, 27+5, 1 );
  654. - x264_cabac_encode_decision_noup( cb, 27+5, 1 );
  655. - }
  656. - else if( IS_INTRA( i_mb_type ) )
  657. - {
  658. - /* prefix */
  659. - x264_cabac_encode_decision_noup( cb, 27+3, 1 );
  660. - x264_cabac_encode_decision_noup( cb, 27+4, 1 );
  661. - x264_cabac_encode_decision( cb, 27+5, 1 );
  662. - x264_cabac_encode_decision( cb, 27+5, 0 );
  663. - x264_cabac_encode_decision( cb, 27+5, 1 );
  664. -
  665. - /* suffix */
  666. - x264_cabac_mb_type_intra( h, cb, i_mb_type, 32+0, 32+1, 32+2, 32+2, 32+3, 32+3 );
  667. - }
  668. - else
  669. - {
  670. - static const uint8_t i_mb_bits[9*3] =
  671. - {
  672. - 0x31, 0x29, 0x4, /* L0 L0 */
  673. - 0x35, 0x2d, 0, /* L0 L1 */
  674. - 0x43, 0x63, 0, /* L0 BI */
  675. - 0x3d, 0x2f, 0, /* L1 L0 */
  676. - 0x39, 0x25, 0x6, /* L1 L1 */
  677. - 0x53, 0x73, 0, /* L1 BI */
  678. - 0x4b, 0x6b, 0, /* BI L0 */
  679. - 0x5b, 0x7b, 0, /* BI L1 */
  680. - 0x47, 0x67, 0x21 /* BI BI */
  681. - };
  682. -
  683. - const int idx = (i_mb_type - B_L0_L0) * 3 + (h->mb.i_partition - D_16x8);
  684. - int bits = i_mb_bits[idx];
  685. -
  686. - x264_cabac_encode_decision_noup( cb, 27+3, bits&1 );
  687. - x264_cabac_encode_decision( cb, 27+5-(bits&1), (bits>>1)&1 ); bits >>= 2;
  688. - if( bits != 1 )
  689. - {
  690. - x264_cabac_encode_decision( cb, 27+5, bits&1 ); bits >>= 1;
  691. - x264_cabac_encode_decision( cb, 27+5, bits&1 ); bits >>= 1;
  692. - x264_cabac_encode_decision( cb, 27+5, bits&1 ); bits >>= 1;
  693. - if( bits != 1 )
  694. - x264_cabac_encode_decision_noup( cb, 27+5, bits&1 );
  695. - }
  696. - }
  697. - }
  698. -}
  699. -
  700. static void x264_cabac_mb_intra4x4_pred_mode( x264_cabac_t *cb, int i_pred, int i_mode )
  701. {
  702. if( i_pred == i_mode )
  703. @@ -209,6 +64,12 @@ static void x264_cabac_mb_intra_chroma_pred_mode( x264_t *h, x264_cabac_t *cb )
  704. }
  705. }
  706.  
  707. +static inline void x264_cabac_mb_transform_size( x264_t *h, x264_cabac_t *cb )
  708. +{
  709. + int ctx = 399 + h->mb.cache.i_neighbour_transform_size;
  710. + x264_cabac_encode_decision_noup( cb, ctx, h->mb.b_transform_8x8 );
  711. +}
  712. +
  713. static void x264_cabac_mb_cbp_luma( x264_t *h, x264_cabac_t *cb )
  714. {
  715. int cbp = h->mb.i_cbp_luma;
  716. @@ -244,7 +105,6 @@ static void x264_cabac_mb_cbp_chroma( x264_t *h, x264_cabac_t *cb )
  717. static void x264_cabac_mb_qp_delta( x264_t *h, x264_cabac_t *cb )
  718. {
  719. int i_dqp = h->mb.i_qp - h->mb.i_last_qp;
  720. - int ctx;
  721.  
  722. /* Avoid writing a delta quant if we have an empty i16x16 block, e.g. in a completely flat background area */
  723. if( h->mb.i_type == I_16x16 && !h->mb.cbp[h->mb.i_mb_xy] )
  724. @@ -257,7 +117,7 @@ static void x264_cabac_mb_qp_delta( x264_t *h, x264_cabac_t *cb )
  725.  
  726. /* Since, per the above, empty-CBP I16x16 blocks never have delta quants,
  727. * we don't have to check for them. */
  728. - ctx = h->mb.i_last_dqp && h->mb.cbp[h->mb.i_mb_prev_xy];
  729. + int ctx = !!h->mb.i_last_dqp;
  730.  
  731. if( i_dqp != 0 )
  732. {
  733. @@ -321,12 +181,6 @@ static inline void x264_cabac_mb_sub_b_partition( x264_cabac_t *cb, int i_sub )
  734. x264_cabac_encode_decision( cb, 39, i_sub == D_L1_8x8 );
  735. }
  736.  
  737. -static inline void x264_cabac_mb_transform_size( x264_t *h, x264_cabac_t *cb )
  738. -{
  739. - int ctx = 399 + h->mb.cache.i_neighbour_transform_size;
  740. - x264_cabac_encode_decision_noup( cb, ctx, h->mb.b_transform_8x8 );
  741. -}
  742. -
  743. static void x264_cabac_mb_ref( x264_t *h, x264_cabac_t *cb, int i_list, int idx )
  744. {
  745. const int i8 = x264_scan8[idx];
  746. @@ -463,6 +317,267 @@ static inline void x264_cabac_mb8x8_mvd( x264_t *h, x264_cabac_t *cb, int i )
  747. }
  748. }
  749.  
  750. +static void x264_cabac_mb_header_intra( x264_t *h, x264_cabac_t *cb, int i_mb_type,
  751. + int ctx0, int ctx1, int ctx2, int ctx3, int ctx4, int ctx5 )
  752. +{
  753. + if( i_mb_type == I_4x4 || i_mb_type == I_8x8 )
  754. + {
  755. + int i, di = h->mb.b_transform_8x8 ? 4 : 1;
  756. + x264_cabac_encode_decision_noup( cb, ctx0, 0 );
  757. +
  758. + if( h->pps->b_transform_8x8_mode )
  759. + x264_cabac_mb_transform_size( h, cb );
  760. +
  761. + for( i = 0; i < 16; i += di )
  762. + {
  763. + const int i_pred = x264_mb_predict_intra4x4_mode( h, i );
  764. + const int i_mode = x264_mb_pred_mode4x4_fix( h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] );
  765. + x264_cabac_mb_intra4x4_pred_mode( cb, i_pred, i_mode );
  766. + }
  767. + }
  768. +#if !RDO_SKIP_BS
  769. + else if( i_mb_type == I_PCM )
  770. + {
  771. + x264_cabac_encode_decision_noup( cb, ctx0, 1 );
  772. + x264_cabac_encode_flush( h, cb );
  773. + return;
  774. + }
  775. +#endif
  776. + else
  777. + {
  778. + int i_pred = x264_mb_pred_mode16x16_fix[h->mb.i_intra16x16_pred_mode];
  779. +
  780. + x264_cabac_encode_decision_noup( cb, ctx0, 1 );
  781. + x264_cabac_encode_terminal( cb );
  782. +
  783. + x264_cabac_encode_decision_noup( cb, ctx1, !!h->mb.i_cbp_luma );
  784. + if( h->mb.i_cbp_chroma == 0 )
  785. + x264_cabac_encode_decision_noup( cb, ctx2, 0 );
  786. + else
  787. + {
  788. + x264_cabac_encode_decision( cb, ctx2, 1 );
  789. + x264_cabac_encode_decision_noup( cb, ctx3, h->mb.i_cbp_chroma>>1 );
  790. + }
  791. + x264_cabac_encode_decision( cb, ctx4, i_pred>>1 );
  792. + x264_cabac_encode_decision_noup( cb, ctx5, i_pred&1 );
  793. + }
  794. + x264_cabac_mb_intra_chroma_pred_mode( h, cb );
  795. +}
  796. +
  797. +static inline void x264_cabac_mb_header( x264_t *h, x264_cabac_t *cb )
  798. +{
  799. + const int i_mb_type = h->mb.i_type;
  800. + int i_list, i;
  801. +
  802. + if( h->sh.b_mbaff &&
  803. + (!(h->mb.i_mb_y & 1) || IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride])) )
  804. + {
  805. + x264_cabac_encode_decision_noup( cb, 70 + h->mb.cache.i_neighbour_interlaced, h->mb.b_interlaced );
  806. + }
  807. +
  808. + if( h->sh.i_type == SLICE_TYPE_I )
  809. + {
  810. + int ctx = 0;
  811. + if( h->mb.i_mb_type_left >= 0 && h->mb.i_mb_type_left != I_4x4 )
  812. + ctx++;
  813. + if( h->mb.i_mb_type_top >= 0 && h->mb.i_mb_type_top != I_4x4 )
  814. + ctx++;
  815. +
  816. + x264_cabac_mb_header_intra( h, cb, i_mb_type, 3+ctx, 3+3, 3+4, 3+5, 3+6, 3+7 );
  817. + }
  818. + else if( h->sh.i_type == SLICE_TYPE_P )
  819. + {
  820. + /* prefix: 14, suffix: 17 */
  821. + if( i_mb_type == P_L0 )
  822. + {
  823. + x264_cabac_encode_decision_noup( cb, 14, 0 );
  824. + if( h->mb.i_partition == D_16x16 )
  825. + {
  826. + x264_cabac_encode_decision_noup( cb, 15, 0 );
  827. + x264_cabac_encode_decision_noup( cb, 16, 0 );
  828. + if( h->mb.pic.i_fref[0] > 1 )
  829. + x264_cabac_mb_ref( h, cb, 0, 0 );
  830. + x264_cabac_mb_mvd( h, cb, 0, 0, 4, 4 );
  831. + }
  832. + else if( h->mb.i_partition == D_16x8 )
  833. + {
  834. + x264_cabac_encode_decision_noup( cb, 15, 1 );
  835. + x264_cabac_encode_decision_noup( cb, 17, 1 );
  836. + if( h->mb.pic.i_fref[0] > 1 )
  837. + {
  838. + x264_cabac_mb_ref( h, cb, 0, 0 );
  839. + x264_cabac_mb_ref( h, cb, 0, 8 );
  840. + }
  841. + x264_cabac_mb_mvd( h, cb, 0, 0, 4, 2 );
  842. + x264_cabac_mb_mvd( h, cb, 0, 8, 4, 2 );
  843. + }
  844. + else //if( h->mb.i_partition == D_8x16 )
  845. + {
  846. + x264_cabac_encode_decision_noup( cb, 15, 1 );
  847. + x264_cabac_encode_decision_noup( cb, 17, 0 );
  848. + if( h->mb.pic.i_fref[0] > 1 )
  849. + {
  850. + x264_cabac_mb_ref( h, cb, 0, 0 );
  851. + x264_cabac_mb_ref( h, cb, 0, 4 );
  852. + }
  853. + x264_cabac_mb_mvd( h, cb, 0, 0, 2, 4 );
  854. + x264_cabac_mb_mvd( h, cb, 0, 4, 2, 4 );
  855. + }
  856. + }
  857. + else if( i_mb_type == P_8x8 )
  858. + {
  859. + x264_cabac_encode_decision_noup( cb, 14, 0 );
  860. + x264_cabac_encode_decision_noup( cb, 15, 0 );
  861. + x264_cabac_encode_decision_noup( cb, 16, 1 );
  862. +
  863. + /* sub mb type */
  864. + for( i = 0; i < 4; i++ )
  865. + x264_cabac_mb_sub_p_partition( cb, h->mb.i_sub_partition[i] );
  866. +
  867. + /* ref 0 */
  868. + if( h->mb.pic.i_fref[0] > 1 )
  869. + {
  870. + x264_cabac_mb_ref( h, cb, 0, 0 );
  871. + x264_cabac_mb_ref( h, cb, 0, 4 );
  872. + x264_cabac_mb_ref( h, cb, 0, 8 );
  873. + x264_cabac_mb_ref( h, cb, 0, 12 );
  874. + }
  875. +
  876. + for( i = 0; i < 4; i++ )
  877. + x264_cabac_mb8x8_mvd( h, cb, i );
  878. + }
  879. + else /* intra */
  880. + {
  881. + /* prefix */
  882. + x264_cabac_encode_decision_noup( cb, 14, 1 );
  883. +
  884. + /* suffix */
  885. + x264_cabac_mb_header_intra( h, cb, i_mb_type, 17+0, 17+1, 17+2, 17+2, 17+3, 17+3 );
  886. + }
  887. + }
  888. + else //if( h->sh.i_type == SLICE_TYPE_B )
  889. + {
  890. + int ctx = 0;
  891. + if( h->mb.i_mb_type_left >= 0 && h->mb.i_mb_type_left != B_SKIP && h->mb.i_mb_type_left != B_DIRECT )
  892. + ctx++;
  893. + if( h->mb.i_mb_type_top >= 0 && h->mb.i_mb_type_top != B_SKIP && h->mb.i_mb_type_top != B_DIRECT )
  894. + ctx++;
  895. +
  896. + if( i_mb_type == B_DIRECT )
  897. + {
  898. + x264_cabac_encode_decision_noup( cb, 27+ctx, 0 );
  899. + return;
  900. + }
  901. + x264_cabac_encode_decision_noup( cb, 27+ctx, 1 );
  902. +
  903. + if( i_mb_type == B_8x8 )
  904. + {
  905. + x264_cabac_encode_decision_noup( cb, 27+3, 1 );
  906. + x264_cabac_encode_decision_noup( cb, 27+4, 1 );
  907. + x264_cabac_encode_decision ( cb, 27+5, 1 );
  908. + x264_cabac_encode_decision ( cb, 27+5, 1 );
  909. + x264_cabac_encode_decision_noup( cb, 27+5, 1 );
  910. +
  911. + /* sub mb type */
  912. + for( i = 0; i < 4; i++ )
  913. + x264_cabac_mb_sub_b_partition( cb, h->mb.i_sub_partition[i] );
  914. +
  915. + /* ref */
  916. + if( h->mb.pic.i_fref[0] > 1 )
  917. + for( i = 0; i < 4; i++ )
  918. + if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
  919. + x264_cabac_mb_ref( h, cb, 0, 4*i );
  920. +
  921. + if( h->mb.pic.i_fref[1] > 1 )
  922. + for( i = 0; i < 4; i++ )
  923. + if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
  924. + x264_cabac_mb_ref( h, cb, 1, 4*i );
  925. +
  926. + for( i = 0; i < 4; i++ )
  927. + if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
  928. + x264_cabac_mb_mvd( h, cb, 0, 4*i, 2, 2 );
  929. +
  930. + for( i = 0; i < 4; i++ )
  931. + if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
  932. + x264_cabac_mb_mvd( h, cb, 1, 4*i, 2, 2 );
  933. + }
  934. + else if( IS_INTRA( i_mb_type ) )
  935. + {
  936. + /* prefix */
  937. + x264_cabac_encode_decision_noup( cb, 27+3, 1 );
  938. + x264_cabac_encode_decision_noup( cb, 27+4, 1 );
  939. + x264_cabac_encode_decision ( cb, 27+5, 1 );
  940. + x264_cabac_encode_decision ( cb, 27+5, 0 );
  941. + x264_cabac_encode_decision ( cb, 27+5, 1 );
  942. +
  943. + /* suffix */
  944. + x264_cabac_mb_header_intra( h, cb, i_mb_type, 32+0, 32+1, 32+2, 32+2, 32+3, 32+3 );
  945. + }
  946. + else
  947. + {
  948. + static const uint8_t i_mb_bits[9*3] =
  949. + {
  950. + 0x31, 0x29, 0x4, /* L0 L0 */
  951. + 0x35, 0x2d, 0, /* L0 L1 */
  952. + 0x43, 0x63, 0, /* L0 BI */
  953. + 0x3d, 0x2f, 0, /* L1 L0 */
  954. + 0x39, 0x25, 0x6, /* L1 L1 */
  955. + 0x53, 0x73, 0, /* L1 BI */
  956. + 0x4b, 0x6b, 0, /* BI L0 */
  957. + 0x5b, 0x7b, 0, /* BI L1 */
  958. + 0x47, 0x67, 0x21 /* BI BI */
  959. + };
  960. +
  961. + const int idx = (i_mb_type - B_L0_L0) * 3 + (h->mb.i_partition - D_16x8);
  962. + int bits = i_mb_bits[idx];
  963. +
  964. + x264_cabac_encode_decision_noup( cb, 27+3, bits&1 );
  965. + x264_cabac_encode_decision( cb, 27+5-(bits&1), (bits>>1)&1 ); bits >>= 2;
  966. + if( bits != 1 )
  967. + {
  968. + x264_cabac_encode_decision( cb, 27+5, bits&1 ); bits >>= 1;
  969. + x264_cabac_encode_decision( cb, 27+5, bits&1 ); bits >>= 1;
  970. + x264_cabac_encode_decision( cb, 27+5, bits&1 ); bits >>= 1;
  971. + if( bits != 1 )
  972. + x264_cabac_encode_decision_noup( cb, 27+5, bits&1 );
  973. + }
  974. +
  975. + const uint8_t (*b_list)[2] = x264_mb_type_list_table[i_mb_type];
  976. + if( h->mb.pic.i_fref[0] > 1 )
  977. + {
  978. + if( b_list[0][0] )
  979. + x264_cabac_mb_ref( h, cb, 0, 0 );
  980. + if( b_list[0][1] && h->mb.i_partition != D_16x16 )
  981. + x264_cabac_mb_ref( h, cb, 0, 8 >> (h->mb.i_partition == D_8x16) );
  982. + }
  983. + if( h->mb.pic.i_fref[1] > 1 )
  984. + {
  985. + if( b_list[1][0] )
  986. + x264_cabac_mb_ref( h, cb, 1, 0 );
  987. + if( b_list[1][1] && h->mb.i_partition != D_16x16 )
  988. + x264_cabac_mb_ref( h, cb, 1, 8 >> (h->mb.i_partition == D_8x16) );
  989. + }
  990. + for( i_list = 0; i_list < 2; i_list++ )
  991. + {
  992. + if( h->mb.i_partition == D_16x16 )
  993. + {
  994. + if( b_list[i_list][0] ) x264_cabac_mb_mvd( h, cb, i_list, 0, 4, 4 );
  995. + }
  996. + else if( h->mb.i_partition == D_16x8 )
  997. + {
  998. + if( b_list[i_list][0] ) x264_cabac_mb_mvd( h, cb, i_list, 0, 4, 2 );
  999. + if( b_list[i_list][1] ) x264_cabac_mb_mvd( h, cb, i_list, 8, 4, 2 );
  1000. + }
  1001. + else //if( h->mb.i_partition == D_8x16 )
  1002. + {
  1003. + if( b_list[i_list][0] ) x264_cabac_mb_mvd( h, cb, i_list, 0, 2, 4 );
  1004. + if( b_list[i_list][1] ) x264_cabac_mb_mvd( h, cb, i_list, 4, 2, 4 );
  1005. + }
  1006. + }
  1007. + }
  1008. + }
  1009. +}
  1010. +
  1011. /* i_ctxBlockCat: 0-> DC 16x16 i_idx = 0
  1012. * 1-> AC 16x16 i_idx = luma4x4idx
  1013. * 2-> Luma4x4 i_idx = luma4x4idx
  1014. @@ -752,7 +867,6 @@ static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBl
  1015. void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
  1016. {
  1017. const int i_mb_type = h->mb.i_type;
  1018. - int i_list;
  1019. int i;
  1020.  
  1021. #if !RDO_SKIP_BS
  1022. @@ -760,15 +874,14 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
  1023. int i_mb_pos_tex;
  1024. #endif
  1025.  
  1026. - /* Write the MB type */
  1027. - x264_cabac_mb_type( h, cb );
  1028. + x264_cabac_mb_header( h, cb );
  1029.  
  1030. #if !RDO_SKIP_BS
  1031. + i_mb_pos_tex = x264_cabac_pos( cb );
  1032. + h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start;
  1033. +
  1034. if( i_mb_type == I_PCM )
  1035. {
  1036. - i_mb_pos_tex = x264_cabac_pos( cb );
  1037. - h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start;
  1038. -
  1039. memcpy( cb->p, h->mb.pic.p_fenc[0], 256 );
  1040. cb->p += 256;
  1041. for( i = 0; i < 8; i++ )
  1042. @@ -793,140 +906,6 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
  1043. }
  1044. #endif
  1045.  
  1046. - if( IS_INTRA( i_mb_type ) )
  1047. - {
  1048. - if( h->pps->b_transform_8x8_mode && i_mb_type != I_16x16 )
  1049. - x264_cabac_mb_transform_size( h, cb );
  1050. -
  1051. - if( i_mb_type != I_16x16 )
  1052. - {
  1053. - int di = h->mb.b_transform_8x8 ? 4 : 1;
  1054. - for( i = 0; i < 16; i += di )
  1055. - {
  1056. - const int i_pred = x264_mb_predict_intra4x4_mode( h, i );
  1057. - const int i_mode = x264_mb_pred_mode4x4_fix( h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] );
  1058. - x264_cabac_mb_intra4x4_pred_mode( cb, i_pred, i_mode );
  1059. - }
  1060. - }
  1061. -
  1062. - x264_cabac_mb_intra_chroma_pred_mode( h, cb );
  1063. - }
  1064. - else if( i_mb_type == P_L0 )
  1065. - {
  1066. - if( h->mb.i_partition == D_16x16 )
  1067. - {
  1068. - if( h->mb.pic.i_fref[0] > 1 )
  1069. - {
  1070. - x264_cabac_mb_ref( h, cb, 0, 0 );
  1071. - }
  1072. - x264_cabac_mb_mvd( h, cb, 0, 0, 4, 4 );
  1073. - }
  1074. - else if( h->mb.i_partition == D_16x8 )
  1075. - {
  1076. - if( h->mb.pic.i_fref[0] > 1 )
  1077. - {
  1078. - x264_cabac_mb_ref( h, cb, 0, 0 );
  1079. - x264_cabac_mb_ref( h, cb, 0, 8 );
  1080. - }
  1081. - x264_cabac_mb_mvd( h, cb, 0, 0, 4, 2 );
  1082. - x264_cabac_mb_mvd( h, cb, 0, 8, 4, 2 );
  1083. - }
  1084. - else //if( h->mb.i_partition == D_8x16 )
  1085. - {
  1086. - if( h->mb.pic.i_fref[0] > 1 )
  1087. - {
  1088. - x264_cabac_mb_ref( h, cb, 0, 0 );
  1089. - x264_cabac_mb_ref( h, cb, 0, 4 );
  1090. - }
  1091. - x264_cabac_mb_mvd( h, cb, 0, 0, 2, 4 );
  1092. - x264_cabac_mb_mvd( h, cb, 0, 4, 2, 4 );
  1093. - }
  1094. - }
  1095. - else if( i_mb_type == P_8x8 )
  1096. - {
  1097. - /* sub mb type */
  1098. - for( i = 0; i < 4; i++ )
  1099. - x264_cabac_mb_sub_p_partition( cb, h->mb.i_sub_partition[i] );
  1100. -
  1101. - /* ref 0 */
  1102. - if( h->mb.pic.i_fref[0] > 1 )
  1103. - {
  1104. - x264_cabac_mb_ref( h, cb, 0, 0 );
  1105. - x264_cabac_mb_ref( h, cb, 0, 4 );
  1106. - x264_cabac_mb_ref( h, cb, 0, 8 );
  1107. - x264_cabac_mb_ref( h, cb, 0, 12 );
  1108. - }
  1109. -
  1110. - for( i = 0; i < 4; i++ )
  1111. - x264_cabac_mb8x8_mvd( h, cb, i );
  1112. - }
  1113. - else if( i_mb_type == B_8x8 )
  1114. - {
  1115. - /* sub mb type */
  1116. - for( i = 0; i < 4; i++ )
  1117. - x264_cabac_mb_sub_b_partition( cb, h->mb.i_sub_partition[i] );
  1118. -
  1119. - /* ref */
  1120. - if( h->mb.pic.i_fref[0] > 1 )
  1121. - for( i = 0; i < 4; i++ )
  1122. - if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
  1123. - x264_cabac_mb_ref( h, cb, 0, 4*i );
  1124. -
  1125. - if( h->mb.pic.i_fref[1] > 1 )
  1126. - for( i = 0; i < 4; i++ )
  1127. - if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
  1128. - x264_cabac_mb_ref( h, cb, 1, 4*i );
  1129. -
  1130. - for( i = 0; i < 4; i++ )
  1131. - if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
  1132. - x264_cabac_mb_mvd( h, cb, 0, 4*i, 2, 2 );
  1133. -
  1134. - for( i = 0; i < 4; i++ )
  1135. - if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
  1136. - x264_cabac_mb_mvd( h, cb, 1, 4*i, 2, 2 );
  1137. - }
  1138. - else if( i_mb_type != B_DIRECT )
  1139. - {
  1140. - /* All B mode */
  1141. - const uint8_t (*b_list)[2] = x264_mb_type_list_table[i_mb_type];
  1142. - if( h->mb.pic.i_fref[0] > 1 )
  1143. - {
  1144. - if( b_list[0][0] )
  1145. - x264_cabac_mb_ref( h, cb, 0, 0 );
  1146. - if( b_list[0][1] && h->mb.i_partition != D_16x16 )
  1147. - x264_cabac_mb_ref( h, cb, 0, 8 >> (h->mb.i_partition == D_8x16) );
  1148. - }
  1149. - if( h->mb.pic.i_fref[1] > 1 )
  1150. - {
  1151. - if( b_list[1][0] )
  1152. - x264_cabac_mb_ref( h, cb, 1, 0 );
  1153. - if( b_list[1][1] && h->mb.i_partition != D_16x16 )
  1154. - x264_cabac_mb_ref( h, cb, 1, 8 >> (h->mb.i_partition == D_8x16) );
  1155. - }
  1156. - for( i_list = 0; i_list < 2; i_list++ )
  1157. - {
  1158. - if( h->mb.i_partition == D_16x16 )
  1159. - {
  1160. - if( b_list[i_list][0] ) x264_cabac_mb_mvd( h, cb, i_list, 0, 4, 4 );
  1161. - }
  1162. - else if( h->mb.i_partition == D_16x8 )
  1163. - {
  1164. - if( b_list[i_list][0] ) x264_cabac_mb_mvd( h, cb, i_list, 0, 4, 2 );
  1165. - if( b_list[i_list][1] ) x264_cabac_mb_mvd( h, cb, i_list, 8, 4, 2 );
  1166. - }
  1167. - else //if( h->mb.i_partition == D_8x16 )
  1168. - {
  1169. - if( b_list[i_list][0] ) x264_cabac_mb_mvd( h, cb, i_list, 0, 2, 4 );
  1170. - if( b_list[i_list][1] ) x264_cabac_mb_mvd( h, cb, i_list, 4, 2, 4 );
  1171. - }
  1172. - }
  1173. - }
  1174. -
  1175. -#if !RDO_SKIP_BS
  1176. - i_mb_pos_tex = x264_cabac_pos( cb );
  1177. - h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start;
  1178. -#endif
  1179. -
  1180. if( i_mb_type != I_16x16 )
  1181. {
  1182. x264_cabac_mb_cbp_luma( h, cb );
  1183. @@ -934,11 +913,9 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
  1184. }
  1185.  
  1186. if( x264_mb_transform_8x8_allowed( h ) && h->mb.i_cbp_luma )
  1187. - {
  1188. x264_cabac_mb_transform_size( h, cb );
  1189. - }
  1190.  
  1191. - if( h->mb.i_cbp_luma > 0 || h->mb.i_cbp_chroma > 0 || i_mb_type == I_16x16 )
  1192. + if( h->mb.i_cbp_luma || h->mb.i_cbp_chroma || i_mb_type == I_16x16 )
  1193. {
  1194. const int b_intra = IS_INTRA( i_mb_type );
  1195. x264_cabac_mb_qp_delta( h, cb );
  1196. @@ -950,7 +927,7 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
  1197. block_residual_write_cabac_cbf( h, cb, DCT_LUMA_DC, 24, h->dct.luma16x16_dc, 1 );
  1198.  
  1199. /* AC Luma */
  1200. - if( h->mb.i_cbp_luma != 0 )
  1201. + if( h->mb.i_cbp_luma )
  1202. for( i = 0; i < 16; i++ )
  1203. block_residual_write_cabac_cbf( h, cb, DCT_LUMA_AC, i, h->dct.luma4x4[i]+1, 1 );
  1204. }
  1205. @@ -967,7 +944,7 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
  1206. block_residual_write_cabac_cbf( h, cb, DCT_LUMA_4x4, i, h->dct.luma4x4[i], b_intra );
  1207. }
  1208.  
  1209. - if( h->mb.i_cbp_chroma&0x03 ) /* Chroma DC residual present */
  1210. + if( h->mb.i_cbp_chroma ) /* Chroma DC residual present */
  1211. {
  1212. block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_DC, 25, h->dct.chroma_dc[0], b_intra );
  1213. block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_DC, 26, h->dct.chroma_dc[1], b_intra );
  1214. diff --git a/encoder/cavlc.c b/encoder/cavlc.c
  1215. index c65c9bd..d18408b 100644
  1216. --- a/encoder/cavlc.c
  1217. +++ b/encoder/cavlc.c
  1218. @@ -203,7 +203,7 @@ static const uint8_t ct_index[17] = {0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,3};
  1219. *nnz = block_residual_write_cavlc(h,cat,l,nC);\
  1220. }
  1221.  
  1222. -static void cavlc_qp_delta( x264_t *h )
  1223. +static void x264_cavlc_mb_qp_delta( x264_t *h )
  1224. {
  1225. bs_t *s = &h->out.bs;
  1226. int i_dqp = h->mb.i_qp - h->mb.i_last_qp;
  1227. @@ -228,7 +228,7 @@ static void cavlc_qp_delta( x264_t *h )
  1228. bs_write_se( s, i_dqp );
  1229. }
  1230.  
  1231. -static void cavlc_mb_mvd( x264_t *h, int i_list, int idx, int width )
  1232. +static void x264_cavlc_mb_mvd( x264_t *h, int i_list, int idx, int width )
  1233. {
  1234. bs_t *s = &h->out.bs;
  1235. ALIGNED_4( int16_t mvp[2] );
  1236. @@ -237,26 +237,26 @@ static void cavlc_mb_mvd( x264_t *h, int i_list, int idx, int width )
  1237. bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[idx]][1] - mvp[1] );
  1238. }
  1239.  
  1240. -static inline void cavlc_mb8x8_mvd( x264_t *h, int i )
  1241. +static inline void x264_cavlc_mb8x8_mvd( x264_t *h, int i )
  1242. {
  1243. switch( h->mb.i_sub_partition[i] )
  1244. {
  1245. case D_L0_8x8:
  1246. - cavlc_mb_mvd( h, 0, 4*i, 2 );
  1247. + x264_cavlc_mb_mvd( h, 0, 4*i, 2 );
  1248. break;
  1249. case D_L0_8x4:
  1250. - cavlc_mb_mvd( h, 0, 4*i+0, 2 );
  1251. - cavlc_mb_mvd( h, 0, 4*i+2, 2 );
  1252. + x264_cavlc_mb_mvd( h, 0, 4*i+0, 2 );
  1253. + x264_cavlc_mb_mvd( h, 0, 4*i+2, 2 );
  1254. break;
  1255. case D_L0_4x8:
  1256. - cavlc_mb_mvd( h, 0, 4*i+0, 1 );
  1257. - cavlc_mb_mvd( h, 0, 4*i+1, 1 );
  1258. + x264_cavlc_mb_mvd( h, 0, 4*i+0, 1 );
  1259. + x264_cavlc_mb_mvd( h, 0, 4*i+1, 1 );
  1260. break;
  1261. case D_L0_4x4:
  1262. - cavlc_mb_mvd( h, 0, 4*i+0, 1 );
  1263. - cavlc_mb_mvd( h, 0, 4*i+1, 1 );
  1264. - cavlc_mb_mvd( h, 0, 4*i+2, 1 );
  1265. - cavlc_mb_mvd( h, 0, 4*i+3, 1 );
  1266. + x264_cavlc_mb_mvd( h, 0, 4*i+0, 1 );
  1267. + x264_cavlc_mb_mvd( h, 0, 4*i+1, 1 );
  1268. + x264_cavlc_mb_mvd( h, 0, 4*i+2, 1 );
  1269. + x264_cavlc_mb_mvd( h, 0, 4*i+3, 1 );
  1270. break;
  1271. }
  1272. }
  1273. @@ -372,7 +372,7 @@ void x264_macroblock_write_cavlc( x264_t *h )
  1274.  
  1275. if( h->mb.pic.i_fref[0] > 1 )
  1276. bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
  1277. - cavlc_mb_mvd( h, 0, 0, 4 );
  1278. + x264_cavlc_mb_mvd( h, 0, 0, 4 );
  1279. }
  1280. else if( h->mb.i_partition == D_16x8 )
  1281. {
  1282. @@ -382,8 +382,8 @@ void x264_macroblock_write_cavlc( x264_t *h )
  1283. bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
  1284. bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[8]] );
  1285. }
  1286. - cavlc_mb_mvd( h, 0, 0, 4 );
  1287. - cavlc_mb_mvd( h, 0, 8, 4 );
  1288. + x264_cavlc_mb_mvd( h, 0, 0, 4 );
  1289. + x264_cavlc_mb_mvd( h, 0, 8, 4 );
  1290. }
  1291. else if( h->mb.i_partition == D_8x16 )
  1292. {
  1293. @@ -393,8 +393,8 @@ void x264_macroblock_write_cavlc( x264_t *h )
  1294. bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
  1295. bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[4]] );
  1296. }
  1297. - cavlc_mb_mvd( h, 0, 0, 2 );
  1298. - cavlc_mb_mvd( h, 0, 4, 2 );
  1299. + x264_cavlc_mb_mvd( h, 0, 0, 2 );
  1300. + x264_cavlc_mb_mvd( h, 0, 4, 2 );
  1301. }
  1302. }
  1303. else if( i_mb_type == P_8x8 )
  1304. @@ -429,7 +429,7 @@ void x264_macroblock_write_cavlc( x264_t *h )
  1305. }
  1306.  
  1307. for( i = 0; i < 4; i++ )
  1308. - cavlc_mb8x8_mvd( h, i );
  1309. + x264_cavlc_mb8x8_mvd( h, i );
  1310. }
  1311. else if( i_mb_type == B_8x8 )
  1312. {
  1313. @@ -452,10 +452,10 @@ void x264_macroblock_write_cavlc( x264_t *h )
  1314. /* mvd */
  1315. for( i = 0; i < 4; i++ )
  1316. if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
  1317. - cavlc_mb_mvd( h, 0, 4*i, 2 );
  1318. + x264_cavlc_mb_mvd( h, 0, 4*i, 2 );
  1319. for( i = 0; i < 4; i++ )
  1320. if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
  1321. - cavlc_mb_mvd( h, 1, 4*i, 2 );
  1322. + x264_cavlc_mb_mvd( h, 1, 4*i, 2 );
  1323. }
  1324. else if( i_mb_type != B_DIRECT )
  1325. {
  1326. @@ -470,8 +470,8 @@ void x264_macroblock_write_cavlc( x264_t *h )
  1327. {
  1328. if( i_ref0_max && b_list[0][0] ) bs_write_te( s, i_ref0_max, h->mb.cache.ref[0][x264_scan8[0]] );
  1329. if( i_ref1_max && b_list[1][0] ) bs_write_te( s, i_ref1_max, h->mb.cache.ref[1][x264_scan8[0]] );
  1330. - if( b_list[0][0] ) cavlc_mb_mvd( h, 0, 0, 4 );
  1331. - if( b_list[1][0] ) cavlc_mb_mvd( h, 1, 0, 4 );
  1332. + if( b_list[0][0] ) x264_cavlc_mb_mvd( h, 0, 0, 4 );
  1333. + if( b_list[1][0] ) x264_cavlc_mb_mvd( h, 1, 0, 4 );
  1334. }
  1335. else
  1336. {
  1337. @@ -481,17 +481,17 @@ void x264_macroblock_write_cavlc( x264_t *h )
  1338. if( i_ref1_max && b_list[1][1] ) bs_write_te( s, i_ref1_max, h->mb.cache.ref[1][x264_scan8[12]] );
  1339. if( h->mb.i_partition == D_16x8 )
  1340. {
  1341. - if( b_list[0][0] ) cavlc_mb_mvd( h, 0, 0, 4 );
  1342. - if( b_list[0][1] ) cavlc_mb_mvd( h, 0, 8, 4 );
  1343. - if( b_list[1][0] ) cavlc_mb_mvd( h, 1, 0, 4 );
  1344. - if( b_list[1][1] ) cavlc_mb_mvd( h, 1, 8, 4 );
  1345. + if( b_list[0][0] ) x264_cavlc_mb_mvd( h, 0, 0, 4 );
  1346. + if( b_list[0][1] ) x264_cavlc_mb_mvd( h, 0, 8, 4 );
  1347. + if( b_list[1][0] ) x264_cavlc_mb_mvd( h, 1, 0, 4 );
  1348. + if( b_list[1][1] ) x264_cavlc_mb_mvd( h, 1, 8, 4 );
  1349. }
  1350. else //if( h->mb.i_partition == D_8x16 )
  1351. {
  1352. - if( b_list[0][0] ) cavlc_mb_mvd( h, 0, 0, 2 );
  1353. - if( b_list[0][1] ) cavlc_mb_mvd( h, 0, 4, 2 );
  1354. - if( b_list[1][0] ) cavlc_mb_mvd( h, 1, 0, 2 );
  1355. - if( b_list[1][1] ) cavlc_mb_mvd( h, 1, 4, 2 );
  1356. + if( b_list[0][0] ) x264_cavlc_mb_mvd( h, 0, 0, 2 );
  1357. + if( b_list[0][1] ) x264_cavlc_mb_mvd( h, 0, 4, 2 );
  1358. + if( b_list[1][0] ) x264_cavlc_mb_mvd( h, 1, 0, 2 );
  1359. + if( b_list[1][1] ) x264_cavlc_mb_mvd( h, 1, 4, 2 );
  1360. }
  1361. }
  1362. }
  1363. @@ -514,31 +514,31 @@ void x264_macroblock_write_cavlc( x264_t *h )
  1364. bs_write1( s, h->mb.b_transform_8x8 );
  1365.  
  1366. /* write residual */
  1367. - if( i_mb_type == I_16x16 )
  1368. + if( h->mb.i_cbp_luma || h->mb.i_cbp_chroma || i_mb_type == I_16x16 )
  1369. {
  1370. - cavlc_qp_delta( h );
  1371. + x264_cavlc_mb_qp_delta( h );
  1372.  
  1373. - /* DC Luma */
  1374. - block_residual_write_cavlc( h, DCT_LUMA_DC, 24 , h->dct.luma16x16_dc );
  1375. + if( i_mb_type == I_16x16 )
  1376. + {
  1377. + /* DC Luma */
  1378. + block_residual_write_cavlc( h, DCT_LUMA_DC, 24 , h->dct.luma16x16_dc );
  1379.  
  1380. - /* AC Luma */
  1381. - if( h->mb.i_cbp_luma )
  1382. - for( i = 0; i < 16; i++ )
  1383. - block_residual_write_cavlc( h, DCT_LUMA_AC, i, h->dct.luma4x4[i]+1 );
  1384. - }
  1385. - else if( h->mb.i_cbp_luma | h->mb.i_cbp_chroma )
  1386. - {
  1387. - cavlc_qp_delta( h );
  1388. - x264_macroblock_luma_write_cavlc( h, 0, 3 );
  1389. - }
  1390. - if( h->mb.i_cbp_chroma )
  1391. - {
  1392. - /* Chroma DC residual present */
  1393. - block_residual_write_cavlc( h, DCT_CHROMA_DC, 25, h->dct.chroma_dc[0] );
  1394. - block_residual_write_cavlc( h, DCT_CHROMA_DC, 26, h->dct.chroma_dc[1] );
  1395. - if( h->mb.i_cbp_chroma&0x02 ) /* Chroma AC residual present */
  1396. - for( i = 16; i < 24; i++ )
  1397. - block_residual_write_cavlc( h, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1 );
  1398. + /* AC Luma */
  1399. + if( h->mb.i_cbp_luma )
  1400. + for( i = 0; i < 16; i++ )
  1401. + block_residual_write_cavlc( h, DCT_LUMA_AC, i, h->dct.luma4x4[i]+1 );
  1402. + }
  1403. + else
  1404. + x264_macroblock_luma_write_cavlc( h, 0, 3 );
  1405. +
  1406. + if( h->mb.i_cbp_chroma ) /* Chroma DC residual present */
  1407. + {
  1408. + block_residual_write_cavlc( h, DCT_CHROMA_DC, 25, h->dct.chroma_dc[0] );
  1409. + block_residual_write_cavlc( h, DCT_CHROMA_DC, 26, h->dct.chroma_dc[1] );
  1410. + if( h->mb.i_cbp_chroma&0x02 ) /* Chroma AC residual present */
  1411. + for( i = 16; i < 24; i++ )
  1412. + block_residual_write_cavlc( h, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1 );
  1413. + }
  1414. }
  1415.  
  1416. #if !RDO_SKIP_BS
  1417. @@ -563,22 +563,22 @@ static int x264_partition_size_cavlc( x264_t *h, int i8, int i_pixel )
  1418.  
  1419. if( i_mb_type == P_8x8 )
  1420. {
  1421. - cavlc_mb8x8_mvd( h, i8 );
  1422. + x264_cavlc_mb8x8_mvd( h, i8 );
  1423. bs_write_ue( s, sub_mb_type_p_to_golomb[ h->mb.i_sub_partition[i8] ] );
  1424. }
  1425. else if( i_mb_type == P_L0 )
  1426. - cavlc_mb_mvd( h, 0, 4*i8, 4>>b_8x16 );
  1427. + x264_cavlc_mb_mvd( h, 0, 4*i8, 4>>b_8x16 );
  1428. else if( i_mb_type > B_DIRECT && i_mb_type < B_8x8 )
  1429. {
  1430. - if( x264_mb_type_list_table[ i_mb_type ][0][!!i8] ) cavlc_mb_mvd( h, 0, 4*i8, 4>>b_8x16 );
  1431. - if( x264_mb_type_list_table[ i_mb_type ][1][!!i8] ) cavlc_mb_mvd( h, 1, 4*i8, 4>>b_8x16 );
  1432. + if( x264_mb_type_list_table[ i_mb_type ][0][!!i8] ) x264_cavlc_mb_mvd( h, 0, 4*i8, 4>>b_8x16 );
  1433. + if( x264_mb_type_list_table[ i_mb_type ][1][!!i8] ) x264_cavlc_mb_mvd( h, 1, 4*i8, 4>>b_8x16 );
  1434. }
  1435. else //if( i_mb_type == B_8x8 )
  1436. {
  1437. if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i8] ] )
  1438. - cavlc_mb_mvd( h, 0, 4*i8, 2 );
  1439. + x264_cavlc_mb_mvd( h, 0, 4*i8, 2 );
  1440. if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i8] ] )
  1441. - cavlc_mb_mvd( h, 1, 4*i8, 2 );
  1442. + x264_cavlc_mb_mvd( h, 1, 4*i8, 2 );
  1443. }
  1444.  
  1445. for( j = (i_pixel < PIXEL_8x8); j >= 0; j-- )
  1446. @@ -596,7 +596,7 @@ static int x264_subpartition_size_cavlc( x264_t *h, int i4, int i_pixel )
  1447. {
  1448. int b_8x4 = i_pixel == PIXEL_8x4;
  1449. h->out.bs.i_bits_encoded = 0;
  1450. - cavlc_mb_mvd( h, 0, i4, 1+b_8x4 );
  1451. + x264_cavlc_mb_mvd( h, 0, i4, 1+b_8x4 );
  1452. block_residual_write_cavlc( h, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4] );
  1453. if( i_pixel != PIXEL_4x4 )
  1454. {
  1455. --
  1456. 1.6.1.2
  1457.  
  1458.  
  1459. From c8db72d3e9af6f5850a2e94904657910a77c5103 Mon Sep 17 00:00:00 2001
  1460. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  1461. Date: Wed, 3 Feb 2010 18:19:29 -0800
  1462. Subject: [PATCH 05/14] Simplify decimate checks in macroblock_encode
  1463. Also fix a misleading comment.
  1464.  
  1465. ---
  1466. common/common.h | 1 +
  1467. encoder/analyse.c | 1 +
  1468. encoder/macroblock.c | 12 +++++-------
  1469. 3 files changed, 7 insertions(+), 7 deletions(-)
  1470.  
  1471. diff --git a/common/common.h b/common/common.h
  1472. index 950f48f..8b1b05a 100644
  1473. --- a/common/common.h
  1474. +++ b/common/common.h
  1475. @@ -484,6 +484,7 @@ struct x264_t
  1476. int b_chroma_me;
  1477. int b_trellis;
  1478. int b_noise_reduction;
  1479. + int b_dct_decimate;
  1480. int i_psy_rd; /* Psy RD strength--fixed point value*/
  1481. int i_psy_trellis; /* Psy trellis strength--fixed point value*/
  1482.  
  1483. diff --git a/encoder/analyse.c b/encoder/analyse.c
  1484. index 1fb2206..b8710dc 100644
  1485. --- a/encoder/analyse.c
  1486. +++ b/encoder/analyse.c
  1487. @@ -364,6 +364,7 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
  1488. h->mb.i_subpel_refine = h->param.analyse.i_subpel_refine;
  1489. h->mb.b_chroma_me = h->param.analyse.b_chroma_me && h->sh.i_type == SLICE_TYPE_P
  1490. && h->mb.i_subpel_refine >= 5;
  1491. + h->mb.b_dct_decimate = h->param.analyse.b_dct_decimate && h->sh.i_type != SLICE_TYPE_I;
  1492.  
  1493. h->mb.b_transform_8x8 = 0;
  1494. h->mb.b_noise_reduction = 0;
  1495. diff --git a/encoder/macroblock.c b/encoder/macroblock.c
  1496. index e4edb8a..fa7942d 100644
  1497. --- a/encoder/macroblock.c
  1498. +++ b/encoder/macroblock.c
  1499. @@ -208,8 +208,7 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qp )
  1500. ALIGNED_ARRAY_16( int16_t, dct_dc4x4,[16] );
  1501.  
  1502. int i, nz;
  1503. - int b_decimate = h->sh.i_type == SLICE_TYPE_B || (h->param.analyse.b_dct_decimate && h->sh.i_type == SLICE_TYPE_P);
  1504. - int decimate_score = b_decimate ? 0 : 9;
  1505. + int decimate_score = h->mb.b_dct_decimate ? 0 : 9;
  1506.  
  1507. if( h->mb.b_lossless )
  1508. {
  1509. @@ -342,7 +341,7 @@ static inline int x264_mb_optimize_chroma_dc( x264_t *h, int b_inter, int i_qp,
  1510. void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
  1511. {
  1512. int i, ch, nz, nz_dc;
  1513. - int b_decimate = b_inter && (h->sh.i_type == SLICE_TYPE_B || h->param.analyse.b_dct_decimate);
  1514. + int b_decimate = b_inter && h->mb.b_dct_decimate;
  1515. ALIGNED_ARRAY_16( int16_t, dct2x2,[4] );
  1516. h->mb.i_cbp_chroma = 0;
  1517.  
  1518. @@ -607,7 +606,7 @@ void x264_macroblock_encode( x264_t *h )
  1519. {
  1520. int i_cbp_dc = 0;
  1521. int i_qp = h->mb.i_qp;
  1522. - int b_decimate = h->sh.i_type == SLICE_TYPE_B || h->param.analyse.b_dct_decimate;
  1523. + int b_decimate = h->mb.b_dct_decimate;
  1524. int b_force_no_skip = 0;
  1525. int i,idx,nz;
  1526. h->mb.i_cbp_luma = 0;
  1527. @@ -914,8 +913,7 @@ void x264_macroblock_encode( x264_t *h )
  1528.  
  1529. /*****************************************************************************
  1530. * x264_macroblock_probe_skip:
  1531. - * Check if the current MB could be encoded as a [PB]_SKIP (it supposes you use
  1532. - * the previous QP
  1533. + * Check if the current MB could be encoded as a [PB]_SKIP
  1534. *****************************************************************************/
  1535. int x264_macroblock_probe_skip( x264_t *h, int b_bidir )
  1536. {
  1537. @@ -1052,7 +1050,7 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
  1538. int i_qp = h->mb.i_qp;
  1539. uint8_t *p_fenc = h->mb.pic.p_fenc[0] + (i8&1)*8 + (i8>>1)*8*FENC_STRIDE;
  1540. uint8_t *p_fdec = h->mb.pic.p_fdec[0] + (i8&1)*8 + (i8>>1)*8*FDEC_STRIDE;
  1541. - int b_decimate = h->sh.i_type == SLICE_TYPE_B || h->param.analyse.b_dct_decimate;
  1542. + int b_decimate = h->mb.b_dct_decimate;
  1543. int nnz8x8 = 0;
  1544. int ch, nz;
  1545.  
  1546. --
  1547. 1.6.1.2
  1548.  
  1549.  
  1550. From 69cba07228fce5004ad526aac68887e43fcfcacc Mon Sep 17 00:00:00 2001
  1551. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  1552. Date: Wed, 3 Feb 2010 18:36:44 -0800
  1553. Subject: [PATCH 06/14] Fix subpel iteration counts with B-frame analysis and subme 6/8
  1554. Since subme 6 means "like subme 5, except RD on P-frames", B-frame analysis
  1555. shouldn't use the RD subpel counts at subme 6. Similarly with subme 8.
  1556. Slightly faster (and very marginally worse) compression at subme 6 and 8.
  1557.  
  1558. ---
  1559. encoder/analyse.c | 2 ++
  1560. 1 files changed, 2 insertions(+), 0 deletions(-)
  1561.  
  1562. diff --git a/encoder/analyse.c b/encoder/analyse.c
  1563. index b8710dc..35d39d5 100644
  1564. --- a/encoder/analyse.c
  1565. +++ b/encoder/analyse.c
  1566. @@ -362,6 +362,8 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
  1567.  
  1568. h->mb.i_me_method = h->param.analyse.i_me_method;
  1569. h->mb.i_subpel_refine = h->param.analyse.i_subpel_refine;
  1570. + if( h->sh.i_type == SLICE_TYPE_B && (h->mb.i_subpel_refine == 6 || h->mb.i_subpel_refine == 8) )
  1571. + h->mb.i_subpel_refine--;
  1572. h->mb.b_chroma_me = h->param.analyse.b_chroma_me && h->sh.i_type == SLICE_TYPE_P
  1573. && h->mb.i_subpel_refine >= 5;
  1574. h->mb.b_dct_decimate = h->param.analyse.b_dct_decimate && h->sh.i_type != SLICE_TYPE_I;
  1575. --
  1576. 1.6.1.2
  1577.  
  1578.  
  1579. From 6561e6ff5091f773facb9b1f3bf57bb037fe0267 Mon Sep 17 00:00:00 2001
  1580. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  1581. Date: Wed, 3 Feb 2010 20:01:16 -0800
  1582. Subject: [PATCH 07/14] Smarter QPRD
  1583. Catch some cases in which RD checks can be avoided; reduces QPRD RD calls by 10-20%.
  1584.  
  1585. ---
  1586. encoder/analyse.c | 42 ++++++++++++++++++++++++++++++++++++++----
  1587. 1 files changed, 38 insertions(+), 4 deletions(-)
  1588.  
  1589. diff --git a/encoder/analyse.c b/encoder/analyse.c
  1590. index 35d39d5..e30b849 100644
  1591. --- a/encoder/analyse.c
  1592. +++ b/encoder/analyse.c
  1593. @@ -2306,9 +2306,10 @@ static inline void x264_mb_analyse_qp_rd( x264_t *h, x264_mb_analysis_t *a )
  1594. int orig_qp = h->mb.i_qp, bqp = h->mb.i_qp;
  1595. int last_qp_tried = 0;
  1596. origcost = bcost = x264_rd_cost_mb( h, a->i_lambda2 );
  1597. + int origcbp = h->mb.cbp[h->mb.i_mb_xy];
  1598.  
  1599. /* If CBP is already zero, don't raise the quantizer any higher. */
  1600. - for( direction = h->mb.cbp[h->mb.i_mb_xy] ? 1 : -1; direction >= -1; direction-=2 )
  1601. + for( direction = origcbp ? 1 : -1; direction >= -1; direction-=2 )
  1602. {
  1603. /* Without psy-RD, require monotonicity when moving quant away from previous
  1604. * macroblock's quant; allow 1 failure when moving quant towards previous quant.
  1605. @@ -2323,14 +2324,47 @@ static inline void x264_mb_analyse_qp_rd( x264_t *h, x264_mb_analysis_t *a )
  1606. h->mb.i_qp = orig_qp;
  1607. failures = 0;
  1608. prevcost = origcost;
  1609. +
  1610. + /* If the current QP results in an empty CBP, it's highly likely that lower QPs
  1611. + * (up to a point) will too. So, jump down to where the threshold will kick in
  1612. + * and check the QP there. If the CBP is still empty, skip the main loop.
  1613. + * If it isn't empty, we would have ended up having to check this QP anyways,
  1614. + * so as long as we store it for later lookup, we lose nothing. */
  1615. + int already_checked_qp = -1;
  1616. + int already_checked_cost = COST_MAX;
  1617. + if( direction == -1 )
  1618. + {
  1619. + if( !origcbp )
  1620. + {
  1621. + h->mb.i_qp = X264_MAX( h->mb.i_qp - threshold - 1, h->param.rc.i_qp_min );
  1622. + h->mb.i_chroma_qp = h->chroma_qp_table[h->mb.i_qp];
  1623. + already_checked_cost = x264_rd_cost_mb( h, a->i_lambda2 );
  1624. + if( !h->mb.cbp[h->mb.i_mb_xy] )
  1625. + {
  1626. + /* If our empty-CBP block is lower QP than the last QP,
  1627. + * the last QP cannot possibly have a CBP either. */
  1628. + if( h->mb.i_last_qp > h->mb.i_qp )
  1629. + last_qp_tried = 1;
  1630. + break;
  1631. + }
  1632. + already_checked_qp = h->mb.i_qp;
  1633. + h->mb.i_qp = orig_qp;
  1634. + }
  1635. + }
  1636. +
  1637. h->mb.i_qp += direction;
  1638. while( h->mb.i_qp >= h->param.rc.i_qp_min && h->mb.i_qp <= h->param.rc.i_qp_max )
  1639. {
  1640. if( h->mb.i_last_qp == h->mb.i_qp )
  1641. last_qp_tried = 1;
  1642. - h->mb.i_chroma_qp = h->chroma_qp_table[h->mb.i_qp];
  1643. - cost = x264_rd_cost_mb( h, a->i_lambda2 );
  1644. - COPY2_IF_LT( bcost, cost, bqp, h->mb.i_qp );
  1645. + if( h->mb.i_qp == already_checked_qp )
  1646. + cost = already_checked_cost;
  1647. + else
  1648. + {
  1649. + h->mb.i_chroma_qp = h->chroma_qp_table[h->mb.i_qp];
  1650. + cost = x264_rd_cost_mb( h, a->i_lambda2 );
  1651. + COPY2_IF_LT( bcost, cost, bqp, h->mb.i_qp );
  1652. + }
  1653.  
  1654. /* We can't assume that the costs are monotonic over QPs.
  1655. * Tie case-as-failure seems to give better results. */
  1656. --
  1657. 1.6.1.2
  1658.  
  1659.  
  1660. From 44499e11c37c2eda2438e7d346c24f9c66008363 Mon Sep 17 00:00:00 2001
  1661. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  1662. Date: Wed, 3 Feb 2010 20:27:57 -0800
  1663. Subject: [PATCH 08/14] Fix 2-pass ratecontrol continuation in case of missing statsfile
  1664. Didn't work properly if MB-tree was enabled.
  1665.  
  1666. ---
  1667. encoder/ratecontrol.c | 1 +
  1668. 1 files changed, 1 insertions(+), 0 deletions(-)
  1669.  
  1670. diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
  1671. index 52196e7..e314ba2 100644
  1672. --- a/encoder/ratecontrol.c
  1673. +++ b/encoder/ratecontrol.c
  1674. @@ -1280,6 +1280,7 @@ int x264_ratecontrol_slice_type( x264_t *h, int frame_num )
  1675. h->thread[i]->param.rc.b_stat_read = 0;
  1676. h->thread[i]->param.i_bframe_adaptive = 0;
  1677. h->thread[i]->param.i_scenecut_threshold = 0;
  1678. + h->thread[i]->param.rc.b_mb_tree = 0;
  1679. if( h->thread[i]->param.i_bframe > 1 )
  1680. h->thread[i]->param.i_bframe = 1;
  1681. }
  1682. --
  1683. 1.6.1.2
  1684.  
  1685.  
  1686. From 95fa057ad69e497b7adf0391ef8e63b0c3d24d17 Mon Sep 17 00:00:00 2001
  1687. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  1688. Date: Fri, 5 Feb 2010 16:15:23 -0800
  1689. Subject: [PATCH 09/14] Various CABAC/CAVLC cleanups/speedups
  1690. Make some if/else chains into switch statements.
  1691. Store CBP data in x264_t and only move it to frame storage later.
  1692. This saves a wasted cache line and some unnecessary dereferences in RDO.
  1693.  
  1694. ---
  1695. common/common.h | 1 +
  1696. common/macroblock.c | 3 +-
  1697. encoder/analyse.c | 8 +-
  1698. encoder/cabac.c | 40 +++---
  1699. encoder/cavlc.c | 365 ++++++++++++++++++++++++++------------------------
  1700. encoder/macroblock.c | 19 +--
  1701. 6 files changed, 219 insertions(+), 217 deletions(-)
  1702.  
  1703. diff --git a/common/common.h b/common/common.h
  1704. index 8b1b05a..d4a8dd9 100644
  1705. --- a/common/common.h
  1706. +++ b/common/common.h
  1707. @@ -542,6 +542,7 @@ struct x264_t
  1708. ALIGNED_4( uint8_t i_sub_partition[4] );
  1709. int b_transform_8x8;
  1710.  
  1711. + int i_cbp_combined;
  1712. int i_cbp_luma;
  1713. int i_cbp_chroma;
  1714.  
  1715. diff --git a/common/macroblock.c b/common/macroblock.c
  1716. index 10f09ac..d86f3af 100644
  1717. --- a/common/macroblock.c
  1718. +++ b/common/macroblock.c
  1719. @@ -1343,11 +1343,12 @@ void x264_macroblock_cache_save( x264_t *h )
  1720. M16( &non_zero_count[16+2*2] ) = M32( &h->mb.cache.non_zero_count[x264_scan8[16+2*2]-1] ) >> 8;
  1721. M16( &non_zero_count[16+3*2] ) = M32( &h->mb.cache.non_zero_count[x264_scan8[16+3*2]-1] ) >> 8;
  1722.  
  1723. - if( h->mb.i_type != I_16x16 && h->mb.i_cbp_luma == 0 && h->mb.i_cbp_chroma == 0 )
  1724. + if( h->mb.i_type != I_16x16 && !h->mb.i_cbp_combined )
  1725. h->mb.i_qp = h->mb.i_last_qp;
  1726. h->mb.qp[i_mb_xy] = h->mb.i_qp;
  1727. h->mb.i_last_dqp = h->mb.i_qp - h->mb.i_last_qp;
  1728. h->mb.i_last_qp = h->mb.i_qp;
  1729. + h->mb.cbp[i_mb_xy] = h->mb.i_cbp_combined;
  1730. }
  1731.  
  1732. if( h->mb.i_cbp_luma == 0 && h->mb.i_type != I_8x8 )
  1733. diff --git a/encoder/analyse.c b/encoder/analyse.c
  1734. index e30b849..dba85b4 100644
  1735. --- a/encoder/analyse.c
  1736. +++ b/encoder/analyse.c
  1737. @@ -1198,7 +1198,7 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
  1738. h->mb.i_partition = D_16x16;
  1739. x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv );
  1740. a->l0.i_rd16x16 = x264_rd_cost_mb( h, a->i_lambda2 );
  1741. - if( !(h->mb.i_cbp_luma|h->mb.i_cbp_chroma) )
  1742. + if( !h->mb.i_cbp_combined )
  1743. h->mb.i_type = P_SKIP;
  1744. }
  1745. }
  1746. @@ -2306,7 +2306,7 @@ static inline void x264_mb_analyse_qp_rd( x264_t *h, x264_mb_analysis_t *a )
  1747. int orig_qp = h->mb.i_qp, bqp = h->mb.i_qp;
  1748. int last_qp_tried = 0;
  1749. origcost = bcost = x264_rd_cost_mb( h, a->i_lambda2 );
  1750. - int origcbp = h->mb.cbp[h->mb.i_mb_xy];
  1751. + int origcbp = h->mb.i_cbp_combined;
  1752.  
  1753. /* If CBP is already zero, don't raise the quantizer any higher. */
  1754. for( direction = origcbp ? 1 : -1; direction >= -1; direction-=2 )
  1755. @@ -2339,7 +2339,7 @@ static inline void x264_mb_analyse_qp_rd( x264_t *h, x264_mb_analysis_t *a )
  1756. h->mb.i_qp = X264_MAX( h->mb.i_qp - threshold - 1, h->param.rc.i_qp_min );
  1757. h->mb.i_chroma_qp = h->chroma_qp_table[h->mb.i_qp];
  1758. already_checked_cost = x264_rd_cost_mb( h, a->i_lambda2 );
  1759. - if( !h->mb.cbp[h->mb.i_mb_xy] )
  1760. + if( !h->mb.i_cbp_combined )
  1761. {
  1762. /* If our empty-CBP block is lower QP than the last QP,
  1763. * the last QP cannot possibly have a CBP either. */
  1764. @@ -2376,7 +2376,7 @@ static inline void x264_mb_analyse_qp_rd( x264_t *h, x264_mb_analysis_t *a )
  1765.  
  1766. if( failures > threshold )
  1767. break;
  1768. - if( direction == 1 && !h->mb.cbp[h->mb.i_mb_xy] )
  1769. + if( direction == 1 && !h->mb.i_cbp_combined )
  1770. break;
  1771. h->mb.i_qp += direction;
  1772. }
  1773. diff --git a/encoder/cabac.c b/encoder/cabac.c
  1774. index 6ff2aed..6c14722 100644
  1775. --- a/encoder/cabac.c
  1776. +++ b/encoder/cabac.c
  1777. @@ -107,7 +107,7 @@ static void x264_cabac_mb_qp_delta( x264_t *h, x264_cabac_t *cb )
  1778. int i_dqp = h->mb.i_qp - h->mb.i_last_qp;
  1779.  
  1780. /* Avoid writing a delta quant if we have an empty i16x16 block, e.g. in a completely flat background area */
  1781. - if( h->mb.i_type == I_16x16 && !h->mb.cbp[h->mb.i_mb_xy] )
  1782. + if( h->mb.i_type == I_16x16 && !h->mb.i_cbp_combined )
  1783. {
  1784. #if !RDO_SKIP_BS
  1785. h->mb.i_qp = h->mb.i_last_qp;
  1786. @@ -915,7 +915,7 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
  1787. if( x264_mb_transform_8x8_allowed( h ) && h->mb.i_cbp_luma )
  1788. x264_cabac_mb_transform_size( h, cb );
  1789.  
  1790. - if( h->mb.i_cbp_luma || h->mb.i_cbp_chroma || i_mb_type == I_16x16 )
  1791. + if( h->mb.i_cbp_combined || i_mb_type == I_16x16 )
  1792. {
  1793. const int b_intra = IS_INTRA( i_mb_type );
  1794. x264_cabac_mb_qp_delta( h, cb );
  1795. @@ -973,24 +973,24 @@ static void x264_partition_size_cabac( x264_t *h, x264_cabac_t *cb, int i8, int
  1796. int b_8x16 = h->mb.i_partition == D_8x16;
  1797. int j;
  1798.  
  1799. - if( i_mb_type == P_8x8 )
  1800. + switch( i_mb_type )
  1801. {
  1802. - x264_cabac_mb8x8_mvd( h, cb, i8 );
  1803. - x264_cabac_mb_sub_p_partition( cb, h->mb.i_sub_partition[i8] );
  1804. - }
  1805. - else if( i_mb_type == P_L0 )
  1806. - x264_cabac_mb_mvd( h, cb, 0, 4*i8, 4>>b_8x16, 2<<b_8x16 );
  1807. - else if( i_mb_type > B_DIRECT && i_mb_type < B_8x8 )
  1808. - {
  1809. - if( x264_mb_type_list_table[ i_mb_type ][0][!!i8] ) x264_cabac_mb_mvd( h, cb, 0, 4*i8, 4>>b_8x16, 2<<b_8x16 );
  1810. - if( x264_mb_type_list_table[ i_mb_type ][1][!!i8] ) x264_cabac_mb_mvd( h, cb, 1, 4*i8, 4>>b_8x16, 2<<b_8x16 );
  1811. - }
  1812. - else //if( i_mb_type == B_8x8 )
  1813. - {
  1814. - if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i8] ] )
  1815. - x264_cabac_mb_mvd( h, cb, 0, 4*i8, 2, 2 );
  1816. - if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i8] ] )
  1817. - x264_cabac_mb_mvd( h, cb, 1, 4*i8, 2, 2 );
  1818. + case P_L0:
  1819. + x264_cabac_mb_mvd( h, cb, 0, 4*i8, 4>>b_8x16, 2<<b_8x16 );
  1820. + break;
  1821. + case P_8x8:
  1822. + x264_cabac_mb8x8_mvd( h, cb, i8 );
  1823. + x264_cabac_mb_sub_p_partition( cb, h->mb.i_sub_partition[i8] );
  1824. + break;
  1825. + case B_8x8:
  1826. + if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i8] ] )
  1827. + x264_cabac_mb_mvd( h, cb, 0, 4*i8, 2, 2 );
  1828. + if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i8] ] )
  1829. + x264_cabac_mb_mvd( h, cb, 1, 4*i8, 2, 2 );
  1830. + break;
  1831. + default: /* Rest of the B types */
  1832. + if( x264_mb_type_list_table[i_mb_type][0][!!i8] ) x264_cabac_mb_mvd( h, cb, 0, 4*i8, 4>>b_8x16, 2<<b_8x16 );
  1833. + if( x264_mb_type_list_table[i_mb_type][1][!!i8] ) x264_cabac_mb_mvd( h, cb, 1, 4*i8, 4>>b_8x16, 2<<b_8x16 );
  1834. }
  1835.  
  1836. for( j = (i_pixel < PIXEL_8x8); j >= 0; j-- )
  1837. @@ -1019,9 +1019,7 @@ static void x264_subpartition_size_cabac( x264_t *h, x264_cabac_t *cb, int i4, i
  1838. int b_8x4 = i_pixel == PIXEL_8x4;
  1839. block_residual_write_cabac_cbf( h, cb, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4], 0 );
  1840. if( i_pixel == PIXEL_4x4 )
  1841. - {
  1842. x264_cabac_mb_mvd( h, cb, 0, i4, 1, 1 );
  1843. - }
  1844. else
  1845. {
  1846. x264_cabac_mb_mvd( h, cb, 0, i4, 1+b_8x4, 2-b_8x4 );
  1847. diff --git a/encoder/cavlc.c b/encoder/cavlc.c
  1848. index d18408b..45b55fe 100644
  1849. --- a/encoder/cavlc.c
  1850. +++ b/encoder/cavlc.c
  1851. @@ -209,8 +209,7 @@ static void x264_cavlc_mb_qp_delta( x264_t *h )
  1852. int i_dqp = h->mb.i_qp - h->mb.i_last_qp;
  1853.  
  1854. /* Avoid writing a delta quant if we have an empty i16x16 block, e.g. in a completely flat background area */
  1855. - if( h->mb.i_type == I_16x16 && !(h->mb.i_cbp_luma | h->mb.i_cbp_chroma)
  1856. - && !h->mb.cache.non_zero_count[x264_scan8[24]] )
  1857. + if( h->mb.i_type == I_16x16 && !h->mb.i_cbp_combined )
  1858. {
  1859. #if !RDO_SKIP_BS
  1860. h->mb.i_qp = h->mb.i_last_qp;
  1861. @@ -302,201 +301,209 @@ void x264_macroblock_write_cavlc( x264_t *h )
  1862. bs_write1( s, h->mb.b_interlaced );
  1863. }
  1864.  
  1865. -#if !RDO_SKIP_BS
  1866. - if( i_mb_type == I_PCM )
  1867. - {
  1868. - uint8_t *p_start = s->p_start;
  1869. - bs_write_ue( s, i_mb_i_offset + 25 );
  1870. - i_mb_pos_tex = bs_pos( s );
  1871. - h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start;
  1872. -
  1873. - bs_align_0( s );
  1874. -
  1875. - memcpy( s->p, h->mb.pic.p_fenc[0], 256 );
  1876. - s->p += 256;
  1877. - for( i = 0; i < 8; i++ )
  1878. - memcpy( s->p + i*8, h->mb.pic.p_fenc[1] + i*FENC_STRIDE, 8 );
  1879. - s->p += 64;
  1880. - for( i = 0; i < 8; i++ )
  1881. - memcpy( s->p + i*8, h->mb.pic.p_fenc[2] + i*FENC_STRIDE, 8 );
  1882. - s->p += 64;
  1883. -
  1884. - bs_init( s, s->p, s->p_end - s->p );
  1885. - s->p_start = p_start;
  1886. -
  1887. - /* if PCM is chosen, we need to store reconstructed frame data */
  1888. - h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.p_fenc[0], FENC_STRIDE, 16 );
  1889. - h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE, 8 );
  1890. - h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2], FENC_STRIDE, 8 );
  1891. -
  1892. - h->stat.frame.i_tex_bits += bs_pos(s) - i_mb_pos_tex;
  1893. - return;
  1894. - }
  1895. -#endif
  1896. -
  1897. /* Write:
  1898. - type
  1899. - prediction
  1900. - mv */
  1901. - if( i_mb_type == I_4x4 || i_mb_type == I_8x8 )
  1902. + switch( i_mb_type )
  1903. {
  1904. - int di = i_mb_type == I_8x8 ? 4 : 1;
  1905. - bs_write_ue( s, i_mb_i_offset + 0 );
  1906. - if( h->pps->b_transform_8x8_mode )
  1907. - bs_write1( s, h->mb.b_transform_8x8 );
  1908. -
  1909. - /* Prediction: Luma */
  1910. - for( i = 0; i < 16; i += di )
  1911. + case I_4x4:
  1912. + case I_8x8:
  1913. {
  1914. - int i_pred = x264_mb_predict_intra4x4_mode( h, i );
  1915. - int i_mode = x264_mb_pred_mode4x4_fix( h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] );
  1916. + int di = i_mb_type == I_8x8 ? 4 : 1;
  1917. + bs_write_ue( s, i_mb_i_offset + 0 );
  1918. + if( h->pps->b_transform_8x8_mode )
  1919. + bs_write1( s, h->mb.b_transform_8x8 );
  1920.  
  1921. - if( i_pred == i_mode )
  1922. - bs_write1( s, 1 ); /* b_prev_intra4x4_pred_mode */
  1923. - else
  1924. - bs_write( s, 4, i_mode - (i_mode > i_pred) );
  1925. + /* Prediction: Luma */
  1926. + for( i = 0; i < 16; i += di )
  1927. + {
  1928. + int i_pred = x264_mb_predict_intra4x4_mode( h, i );
  1929. + int i_mode = x264_mb_pred_mode4x4_fix( h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] );
  1930. +
  1931. + if( i_pred == i_mode )
  1932. + bs_write1( s, 1 ); /* b_prev_intra4x4_pred_mode */
  1933. + else
  1934. + bs_write( s, 4, i_mode - (i_mode > i_pred) );
  1935. + }
  1936. + bs_write_ue( s, x264_mb_pred_mode8x8c_fix[ h->mb.i_chroma_pred_mode ] );
  1937. + break;
  1938. + case I_16x16:
  1939. + bs_write_ue( s, i_mb_i_offset + 1 + x264_mb_pred_mode16x16_fix[h->mb.i_intra16x16_pred_mode] +
  1940. + h->mb.i_cbp_chroma * 4 + ( h->mb.i_cbp_luma == 0 ? 0 : 12 ) );
  1941. + bs_write_ue( s, x264_mb_pred_mode8x8c_fix[ h->mb.i_chroma_pred_mode ] );
  1942. + break;
  1943. }
  1944. - bs_write_ue( s, x264_mb_pred_mode8x8c_fix[ h->mb.i_chroma_pred_mode ] );
  1945. - }
  1946. - else if( i_mb_type == I_16x16 )
  1947. - {
  1948. - bs_write_ue( s, i_mb_i_offset + 1 + x264_mb_pred_mode16x16_fix[h->mb.i_intra16x16_pred_mode] +
  1949. - h->mb.i_cbp_chroma * 4 + ( h->mb.i_cbp_luma == 0 ? 0 : 12 ) );
  1950. - bs_write_ue( s, x264_mb_pred_mode8x8c_fix[ h->mb.i_chroma_pred_mode ] );
  1951. - }
  1952. - else if( i_mb_type == P_L0 )
  1953. - {
  1954. - if( h->mb.i_partition == D_16x16 )
  1955. +#if !RDO_SKIP_BS
  1956. + case I_PCM:
  1957. {
  1958. - bs_write1( s, 1 );
  1959. -
  1960. - if( h->mb.pic.i_fref[0] > 1 )
  1961. - bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
  1962. - x264_cavlc_mb_mvd( h, 0, 0, 4 );
  1963. + uint8_t *p_start = s->p_start;
  1964. + bs_write_ue( s, i_mb_i_offset + 25 );
  1965. + i_mb_pos_tex = bs_pos( s );
  1966. + h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start;
  1967. +
  1968. + bs_align_0( s );
  1969. +
  1970. + memcpy( s->p, h->mb.pic.p_fenc[0], 256 );
  1971. + s->p += 256;
  1972. + for( i = 0; i < 8; i++ )
  1973. + memcpy( s->p + i*8, h->mb.pic.p_fenc[1] + i*FENC_STRIDE, 8 );
  1974. + s->p += 64;
  1975. + for( i = 0; i < 8; i++ )
  1976. + memcpy( s->p + i*8, h->mb.pic.p_fenc[2] + i*FENC_STRIDE, 8 );
  1977. + s->p += 64;
  1978. +
  1979. + bs_init( s, s->p, s->p_end - s->p );
  1980. + s->p_start = p_start;
  1981. +
  1982. + /* if PCM is chosen, we need to store reconstructed frame data */
  1983. + h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.p_fenc[0], FENC_STRIDE, 16 );
  1984. + h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE, 8 );
  1985. + h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2], FENC_STRIDE, 8 );
  1986. +
  1987. + h->stat.frame.i_tex_bits += bs_pos(s) - i_mb_pos_tex;
  1988. + return;
  1989. }
  1990. - else if( h->mb.i_partition == D_16x8 )
  1991. +#endif
  1992. + case P_L0:
  1993. {
  1994. - bs_write_ue( s, 1 );
  1995. - if( h->mb.pic.i_fref[0] > 1 )
  1996. + if( h->mb.i_partition == D_16x16 )
  1997. {
  1998. - bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
  1999. - bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[8]] );
  2000. + bs_write1( s, 1 );
  2001. +
  2002. + if( h->mb.pic.i_fref[0] > 1 )
  2003. + bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
  2004. + x264_cavlc_mb_mvd( h, 0, 0, 4 );
  2005. }
  2006. - x264_cavlc_mb_mvd( h, 0, 0, 4 );
  2007. - x264_cavlc_mb_mvd( h, 0, 8, 4 );
  2008. + else if( h->mb.i_partition == D_16x8 )
  2009. + {
  2010. + bs_write_ue( s, 1 );
  2011. + if( h->mb.pic.i_fref[0] > 1 )
  2012. + {
  2013. + bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
  2014. + bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[8]] );
  2015. + }
  2016. + x264_cavlc_mb_mvd( h, 0, 0, 4 );
  2017. + x264_cavlc_mb_mvd( h, 0, 8, 4 );
  2018. + }
  2019. + else if( h->mb.i_partition == D_8x16 )
  2020. + {
  2021. + bs_write_ue( s, 2 );
  2022. + if( h->mb.pic.i_fref[0] > 1 )
  2023. + {
  2024. + bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
  2025. + bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[4]] );
  2026. + }
  2027. + x264_cavlc_mb_mvd( h, 0, 0, 2 );
  2028. + x264_cavlc_mb_mvd( h, 0, 4, 2 );
  2029. + }
  2030. + break;
  2031. }
  2032. - else if( h->mb.i_partition == D_8x16 )
  2033. + case P_8x8:
  2034. {
  2035. - bs_write_ue( s, 2 );
  2036. - if( h->mb.pic.i_fref[0] > 1 )
  2037. + int b_sub_ref;
  2038. + if( (h->mb.cache.ref[0][x264_scan8[0]] | h->mb.cache.ref[0][x264_scan8[ 4]] |
  2039. + h->mb.cache.ref[0][x264_scan8[8]] | h->mb.cache.ref[0][x264_scan8[12]]) == 0 )
  2040. + {
  2041. + bs_write_ue( s, 4 );
  2042. + b_sub_ref = 0;
  2043. + }
  2044. + else
  2045. + {
  2046. + bs_write_ue( s, 3 );
  2047. + b_sub_ref = 1;
  2048. + }
  2049. +
  2050. + /* sub mb type */
  2051. + if( h->param.analyse.inter & X264_ANALYSE_PSUB8x8 )
  2052. + for( i = 0; i < 4; i++ )
  2053. + bs_write_ue( s, sub_mb_type_p_to_golomb[ h->mb.i_sub_partition[i] ] );
  2054. + else
  2055. + bs_write( s, 4, 0xf );
  2056. +
  2057. + /* ref0 */
  2058. + if( b_sub_ref )
  2059. {
  2060. bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
  2061. bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[4]] );
  2062. + bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[8]] );
  2063. + bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[12]] );
  2064. }
  2065. - x264_cavlc_mb_mvd( h, 0, 0, 2 );
  2066. - x264_cavlc_mb_mvd( h, 0, 4, 2 );
  2067. - }
  2068. - }
  2069. - else if( i_mb_type == P_8x8 )
  2070. - {
  2071. - int b_sub_ref;
  2072. - if( (h->mb.cache.ref[0][x264_scan8[0]] | h->mb.cache.ref[0][x264_scan8[ 4]] |
  2073. - h->mb.cache.ref[0][x264_scan8[8]] | h->mb.cache.ref[0][x264_scan8[12]]) == 0 )
  2074. - {
  2075. - bs_write_ue( s, 4 );
  2076. - b_sub_ref = 0;
  2077. - }
  2078. - else
  2079. - {
  2080. - bs_write_ue( s, 3 );
  2081. - b_sub_ref = 1;
  2082. - }
  2083.  
  2084. - /* sub mb type */
  2085. - if( h->param.analyse.inter & X264_ANALYSE_PSUB8x8 )
  2086. for( i = 0; i < 4; i++ )
  2087. - bs_write_ue( s, sub_mb_type_p_to_golomb[ h->mb.i_sub_partition[i] ] );
  2088. - else
  2089. - bs_write( s, 4, 0xf );
  2090. -
  2091. - /* ref0 */
  2092. - if( b_sub_ref )
  2093. - {
  2094. - bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
  2095. - bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[4]] );
  2096. - bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[8]] );
  2097. - bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[12]] );
  2098. + x264_cavlc_mb8x8_mvd( h, i );
  2099. + break;
  2100. }
  2101. + case B_8x8:
  2102. + {
  2103. + bs_write_ue( s, 22 );
  2104.  
  2105. - for( i = 0; i < 4; i++ )
  2106. - x264_cavlc_mb8x8_mvd( h, i );
  2107. - }
  2108. - else if( i_mb_type == B_8x8 )
  2109. - {
  2110. - bs_write_ue( s, 22 );
  2111. -
  2112. - /* sub mb type */
  2113. - for( i = 0; i < 4; i++ )
  2114. - bs_write_ue( s, sub_mb_type_b_to_golomb[ h->mb.i_sub_partition[i] ] );
  2115. + /* sub mb type */
  2116. + for( i = 0; i < 4; i++ )
  2117. + bs_write_ue( s, sub_mb_type_b_to_golomb[ h->mb.i_sub_partition[i] ] );
  2118.  
  2119. - /* ref */
  2120. - if( h->mb.pic.i_fref[0] > 1 )
  2121. + /* ref */
  2122. + if( h->mb.pic.i_fref[0] > 1 )
  2123. + for( i = 0; i < 4; i++ )
  2124. + if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
  2125. + bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[i*4]] );
  2126. + if( h->mb.pic.i_fref[1] > 1 )
  2127. + for( i = 0; i < 4; i++ )
  2128. + if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
  2129. + bs_write_te( s, h->mb.pic.i_fref[1] - 1, h->mb.cache.ref[1][x264_scan8[i*4]] );
  2130. +
  2131. + /* mvd */
  2132. for( i = 0; i < 4; i++ )
  2133. if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
  2134. - bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[i*4]] );
  2135. - if( h->mb.pic.i_fref[1] > 1 )
  2136. + x264_cavlc_mb_mvd( h, 0, 4*i, 2 );
  2137. for( i = 0; i < 4; i++ )
  2138. if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
  2139. - bs_write_te( s, h->mb.pic.i_fref[1] - 1, h->mb.cache.ref[1][x264_scan8[i*4]] );
  2140. -
  2141. - /* mvd */
  2142. - for( i = 0; i < 4; i++ )
  2143. - if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
  2144. - x264_cavlc_mb_mvd( h, 0, 4*i, 2 );
  2145. - for( i = 0; i < 4; i++ )
  2146. - if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
  2147. - x264_cavlc_mb_mvd( h, 1, 4*i, 2 );
  2148. - }
  2149. - else if( i_mb_type != B_DIRECT )
  2150. - {
  2151. - /* All B mode */
  2152. - /* Motion Vector */
  2153. - const uint8_t (*b_list)[2] = x264_mb_type_list_table[i_mb_type];
  2154. - const int i_ref0_max = h->mb.pic.i_fref[0] - 1;
  2155. - const int i_ref1_max = h->mb.pic.i_fref[1] - 1;
  2156. -
  2157. - bs_write_ue( s, mb_type_b_to_golomb[ h->mb.i_partition - D_16x8 ][ i_mb_type - B_L0_L0 ] );
  2158. - if( h->mb.i_partition == D_16x16 )
  2159. + x264_cavlc_mb_mvd( h, 1, 4*i, 2 );
  2160. + break;
  2161. + }
  2162. + case B_DIRECT:
  2163. {
  2164. - if( i_ref0_max && b_list[0][0] ) bs_write_te( s, i_ref0_max, h->mb.cache.ref[0][x264_scan8[0]] );
  2165. - if( i_ref1_max && b_list[1][0] ) bs_write_te( s, i_ref1_max, h->mb.cache.ref[1][x264_scan8[0]] );
  2166. - if( b_list[0][0] ) x264_cavlc_mb_mvd( h, 0, 0, 4 );
  2167. - if( b_list[1][0] ) x264_cavlc_mb_mvd( h, 1, 0, 4 );
  2168. + bs_write1( s, 1 );
  2169. + break;
  2170. }
  2171. - else
  2172. + default: /* Rest of the B types */
  2173. {
  2174. - if( i_ref0_max && b_list[0][0] ) bs_write_te( s, i_ref0_max, h->mb.cache.ref[0][x264_scan8[ 0]] );
  2175. - if( i_ref0_max && b_list[0][1] ) bs_write_te( s, i_ref0_max, h->mb.cache.ref[0][x264_scan8[12]] );
  2176. - if( i_ref1_max && b_list[1][0] ) bs_write_te( s, i_ref1_max, h->mb.cache.ref[1][x264_scan8[ 0]] );
  2177. - if( i_ref1_max && b_list[1][1] ) bs_write_te( s, i_ref1_max, h->mb.cache.ref[1][x264_scan8[12]] );
  2178. - if( h->mb.i_partition == D_16x8 )
  2179. + const uint8_t (*b_list)[2] = x264_mb_type_list_table[i_mb_type];
  2180. + const int i_ref0_max = h->mb.pic.i_fref[0] - 1;
  2181. + const int i_ref1_max = h->mb.pic.i_fref[1] - 1;
  2182. +
  2183. + bs_write_ue( s, mb_type_b_to_golomb[ h->mb.i_partition - D_16x8 ][ i_mb_type - B_L0_L0 ] );
  2184. + if( h->mb.i_partition == D_16x16 )
  2185. {
  2186. + if( i_ref0_max && b_list[0][0] ) bs_write_te( s, i_ref0_max, h->mb.cache.ref[0][x264_scan8[0]] );
  2187. + if( i_ref1_max && b_list[1][0] ) bs_write_te( s, i_ref1_max, h->mb.cache.ref[1][x264_scan8[0]] );
  2188. if( b_list[0][0] ) x264_cavlc_mb_mvd( h, 0, 0, 4 );
  2189. - if( b_list[0][1] ) x264_cavlc_mb_mvd( h, 0, 8, 4 );
  2190. if( b_list[1][0] ) x264_cavlc_mb_mvd( h, 1, 0, 4 );
  2191. - if( b_list[1][1] ) x264_cavlc_mb_mvd( h, 1, 8, 4 );
  2192. }
  2193. - else //if( h->mb.i_partition == D_8x16 )
  2194. + else
  2195. {
  2196. - if( b_list[0][0] ) x264_cavlc_mb_mvd( h, 0, 0, 2 );
  2197. - if( b_list[0][1] ) x264_cavlc_mb_mvd( h, 0, 4, 2 );
  2198. - if( b_list[1][0] ) x264_cavlc_mb_mvd( h, 1, 0, 2 );
  2199. - if( b_list[1][1] ) x264_cavlc_mb_mvd( h, 1, 4, 2 );
  2200. + if( i_ref0_max && b_list[0][0] ) bs_write_te( s, i_ref0_max, h->mb.cache.ref[0][x264_scan8[ 0]] );
  2201. + if( i_ref0_max && b_list[0][1] ) bs_write_te( s, i_ref0_max, h->mb.cache.ref[0][x264_scan8[12]] );
  2202. + if( i_ref1_max && b_list[1][0] ) bs_write_te( s, i_ref1_max, h->mb.cache.ref[1][x264_scan8[ 0]] );
  2203. + if( i_ref1_max && b_list[1][1] ) bs_write_te( s, i_ref1_max, h->mb.cache.ref[1][x264_scan8[12]] );
  2204. + if( h->mb.i_partition == D_16x8 )
  2205. + {
  2206. + if( b_list[0][0] ) x264_cavlc_mb_mvd( h, 0, 0, 4 );
  2207. + if( b_list[0][1] ) x264_cavlc_mb_mvd( h, 0, 8, 4 );
  2208. + if( b_list[1][0] ) x264_cavlc_mb_mvd( h, 1, 0, 4 );
  2209. + if( b_list[1][1] ) x264_cavlc_mb_mvd( h, 1, 8, 4 );
  2210. + }
  2211. + else //if( h->mb.i_partition == D_8x16 )
  2212. + {
  2213. + if( b_list[0][0] ) x264_cavlc_mb_mvd( h, 0, 0, 2 );
  2214. + if( b_list[0][1] ) x264_cavlc_mb_mvd( h, 0, 4, 2 );
  2215. + if( b_list[1][0] ) x264_cavlc_mb_mvd( h, 1, 0, 2 );
  2216. + if( b_list[1][1] ) x264_cavlc_mb_mvd( h, 1, 4, 2 );
  2217. + }
  2218. }
  2219. + break;
  2220. }
  2221. }
  2222. - else //if( i_mb_type == B_DIRECT )
  2223. - bs_write1( s, 1 );
  2224.  
  2225. #if !RDO_SKIP_BS
  2226. i_mb_pos_tex = bs_pos( s );
  2227. @@ -505,16 +512,16 @@ void x264_macroblock_write_cavlc( x264_t *h )
  2228.  
  2229. /* Coded block patern */
  2230. if( i_mb_type == I_4x4 || i_mb_type == I_8x8 )
  2231. - bs_write_ue( s, intra4x4_cbp_to_golomb[( h->mb.i_cbp_chroma << 4 )|h->mb.i_cbp_luma] );
  2232. + bs_write_ue( s, intra4x4_cbp_to_golomb[h->mb.i_cbp_combined&0x3f] );
  2233. else if( i_mb_type != I_16x16 )
  2234. - bs_write_ue( s, inter_cbp_to_golomb[( h->mb.i_cbp_chroma << 4 )|h->mb.i_cbp_luma] );
  2235. + bs_write_ue( s, inter_cbp_to_golomb[h->mb.i_cbp_combined&0x3f] );
  2236.  
  2237. /* transform size 8x8 flag */
  2238. if( x264_mb_transform_8x8_allowed( h ) && h->mb.i_cbp_luma )
  2239. bs_write1( s, h->mb.b_transform_8x8 );
  2240.  
  2241. /* write residual */
  2242. - if( h->mb.i_cbp_luma || h->mb.i_cbp_chroma || i_mb_type == I_16x16 )
  2243. + if( h->mb.i_cbp_combined&0x3f || i_mb_type == I_16x16 )
  2244. {
  2245. x264_cavlc_mb_qp_delta( h );
  2246.  
  2247. @@ -561,24 +568,24 @@ static int x264_partition_size_cavlc( x264_t *h, int i8, int i_pixel )
  2248. int b_8x16 = h->mb.i_partition == D_8x16;
  2249. int j;
  2250.  
  2251. - if( i_mb_type == P_8x8 )
  2252. - {
  2253. - x264_cavlc_mb8x8_mvd( h, i8 );
  2254. - bs_write_ue( s, sub_mb_type_p_to_golomb[ h->mb.i_sub_partition[i8] ] );
  2255. - }
  2256. - else if( i_mb_type == P_L0 )
  2257. - x264_cavlc_mb_mvd( h, 0, 4*i8, 4>>b_8x16 );
  2258. - else if( i_mb_type > B_DIRECT && i_mb_type < B_8x8 )
  2259. + switch( i_mb_type )
  2260. {
  2261. - if( x264_mb_type_list_table[ i_mb_type ][0][!!i8] ) x264_cavlc_mb_mvd( h, 0, 4*i8, 4>>b_8x16 );
  2262. - if( x264_mb_type_list_table[ i_mb_type ][1][!!i8] ) x264_cavlc_mb_mvd( h, 1, 4*i8, 4>>b_8x16 );
  2263. - }
  2264. - else //if( i_mb_type == B_8x8 )
  2265. - {
  2266. - if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i8] ] )
  2267. - x264_cavlc_mb_mvd( h, 0, 4*i8, 2 );
  2268. - if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i8] ] )
  2269. - x264_cavlc_mb_mvd( h, 1, 4*i8, 2 );
  2270. + case P_L0:
  2271. + x264_cavlc_mb_mvd( h, 0, 4*i8, 4>>b_8x16 );
  2272. + break;
  2273. + case P_8x8:
  2274. + x264_cavlc_mb8x8_mvd( h, i8 );
  2275. + bs_write_ue( s, sub_mb_type_p_to_golomb[ h->mb.i_sub_partition[i8] ] );
  2276. + break;
  2277. + case B_8x8:
  2278. + if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i8] ] )
  2279. + x264_cavlc_mb_mvd( h, 0, 4*i8, 2 );
  2280. + if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i8] ] )
  2281. + x264_cavlc_mb_mvd( h, 1, 4*i8, 2 );
  2282. + break;
  2283. + default: /* Rest of the B types */
  2284. + if( x264_mb_type_list_table[ i_mb_type ][0][!!i8] ) x264_cavlc_mb_mvd( h, 0, 4*i8, 4>>b_8x16 );
  2285. + if( x264_mb_type_list_table[ i_mb_type ][1][!!i8] ) x264_cavlc_mb_mvd( h, 1, 4*i8, 4>>b_8x16 );
  2286. }
  2287.  
  2288. for( j = (i_pixel < PIXEL_8x8); j >= 0; j-- )
  2289. @@ -618,6 +625,8 @@ static int cavlc_intra4x4_pred_size( x264_t *h, int i4, int i_mode )
  2290. static int x264_partition_i8x8_size_cavlc( x264_t *h, int i8, int i_mode )
  2291. {
  2292. h->out.bs.i_bits_encoded = cavlc_intra4x4_pred_size( h, 4*i8, i_mode );
  2293. + /* We can't use h->mb.i_cbp_combined here because it's only calculated at the end of
  2294. + * x264_macroblock_encode(), which hasn't been called at this point. */
  2295. bs_write_ue( &h->out.bs, intra4x4_cbp_to_golomb[( h->mb.i_cbp_chroma << 4 )|h->mb.i_cbp_luma] );
  2296. x264_macroblock_luma_write_cavlc( h, i8, i8 );
  2297. return h->out.bs.i_bits_encoded;
  2298. diff --git a/encoder/macroblock.c b/encoder/macroblock.c
  2299. index fa7942d..f5f6267 100644
  2300. --- a/encoder/macroblock.c
  2301. +++ b/encoder/macroblock.c
  2302. @@ -488,7 +488,7 @@ static void x264_macroblock_encode_skip( x264_t *h )
  2303. h->mb.i_cbp_chroma = 0x00;
  2304. memset( h->mb.cache.non_zero_count, 0, X264_SCAN8_SIZE );
  2305. /* store cbp */
  2306. - h->mb.cbp[h->mb.i_mb_xy] = 0;
  2307. + h->mb.i_cbp_combined = 0;
  2308. }
  2309.  
  2310. /*****************************************************************************
  2311. @@ -604,7 +604,6 @@ void x264_predict_lossless_16x16( x264_t *h, int i_mode )
  2312. *****************************************************************************/
  2313. void x264_macroblock_encode( x264_t *h )
  2314. {
  2315. - int i_cbp_dc = 0;
  2316. int i_qp = h->mb.i_qp;
  2317. int b_decimate = h->mb.b_dct_decimate;
  2318. int b_force_no_skip = 0;
  2319. @@ -880,34 +879,28 @@ void x264_macroblock_encode( x264_t *h )
  2320. /* encode the 8x8 blocks */
  2321. x264_mb_encode_8x8_chroma( h, !IS_INTRA( h->mb.i_type ), h->mb.i_chroma_qp );
  2322.  
  2323. - if( h->param.b_cabac )
  2324. - {
  2325. - i_cbp_dc = h->mb.cache.non_zero_count[x264_scan8[24]]
  2326. + int i_cbp_dc = h->mb.cache.non_zero_count[x264_scan8[24]]
  2327. | h->mb.cache.non_zero_count[x264_scan8[25]] << 1
  2328. | h->mb.cache.non_zero_count[x264_scan8[26]] << 2;
  2329. - }
  2330.  
  2331. /* store cbp */
  2332. - h->mb.cbp[h->mb.i_mb_xy] = (i_cbp_dc << 8) | (h->mb.i_cbp_chroma << 4) | h->mb.i_cbp_luma;
  2333. + h->mb.i_cbp_combined = (i_cbp_dc << 8) | (h->mb.i_cbp_chroma << 4) | h->mb.i_cbp_luma;
  2334.  
  2335. /* Check for P_SKIP
  2336. * XXX: in the me perhaps we should take x264_mb_predict_mv_pskip into account
  2337. * (if multiple mv give same result)*/
  2338. if( !b_force_no_skip )
  2339. {
  2340. - if( h->mb.i_type == P_L0 && h->mb.i_partition == D_16x16 &&
  2341. - !(h->mb.i_cbp_luma | h->mb.i_cbp_chroma) &&
  2342. - M32( h->mb.cache.mv[0][x264_scan8[0]] ) == M32( h->mb.cache.pskip_mv )
  2343. + if( h->mb.i_type == P_L0 && h->mb.i_partition == D_16x16 && !h->mb.i_cbp_combined
  2344. + && M32( h->mb.cache.mv[0][x264_scan8[0]] ) == M32( h->mb.cache.pskip_mv )
  2345. && h->mb.cache.ref[0][x264_scan8[0]] == 0 )
  2346. {
  2347. h->mb.i_type = P_SKIP;
  2348. }
  2349.  
  2350. /* Check for B_SKIP */
  2351. - if( h->mb.i_type == B_DIRECT && !(h->mb.i_cbp_luma | h->mb.i_cbp_chroma) )
  2352. - {
  2353. + if( h->mb.i_type == B_DIRECT && !h->mb.i_cbp_combined )
  2354. h->mb.i_type = B_SKIP;
  2355. - }
  2356. }
  2357. }
  2358.  
  2359. --
  2360. 1.6.1.2
  2361.  
  2362.  
  2363. From c20ab8386e321ef2f5b159055a902b43b40913ec Mon Sep 17 00:00:00 2001
  2364. From: Yusuke Nakamura <muken.the.vfrmaniac@gmail.com>
  2365. Date: Mon, 8 Feb 2010 01:48:38 -0800
  2366. Subject: [PATCH 10/14] Write PASP atom in mp4 muxing
  2367. Adds container-level aspect ratio support.
  2368.  
  2369. ---
  2370. output/mp4.c | 3 ++-
  2371. 1 files changed, 2 insertions(+), 1 deletions(-)
  2372.  
  2373. diff --git a/output/mp4.c b/output/mp4.c
  2374. index e3ad9c6..b817c82 100644
  2375. --- a/output/mp4.c
  2376. +++ b/output/mp4.c
  2377. @@ -121,7 +121,7 @@ static int close_file( hnd_t handle, int64_t largest_pts, int64_t second_largest
  2378. if( mdhd_duration != total_duration )
  2379. {
  2380. uint64_t last_dts = gf_isom_get_sample_dts( p_mp4->p_file, p_mp4->i_track, p_mp4->i_numframe );
  2381. - uint32_t last_duration = (uint32_t)( mdhd_duration > last_dts ? mdhd_duration - last_dts : (largest_pts - second_largest_pts) * p_mp4->i_time_inc );
  2382. + uint32_t last_duration = (uint32_t)( mdhd_duration > last_dts ? mdhd_duration - last_dts : (largest_pts - second_largest_pts) * p_mp4->i_time_inc );
  2383. gf_isom_set_last_sample_duration( p_mp4->p_file, p_mp4->i_track, last_duration );
  2384. total_duration = gf_isom_get_media_duration( p_mp4->p_file, p_mp4->i_track );
  2385. }
  2386. @@ -212,6 +212,7 @@ static int set_param( hnd_t handle, x264_param_t *p_param )
  2387. dw *= sar ;
  2388. else
  2389. dh /= sar;
  2390. + gf_isom_set_pixel_aspect_ratio( p_mp4->p_file, p_mp4->i_track, p_mp4->i_descidx, p_param->vui.i_sar_width, p_param->vui.i_sar_height );
  2391. gf_isom_set_track_layout_info( p_mp4->p_file, p_mp4->i_track, dw, dh, 0, 0, 0 );
  2392. }
  2393.  
  2394. --
  2395. 1.6.1.2
  2396.  
  2397.  
  2398. From 83ac6d576d252c91d96006cf73b57748e7bac537 Mon Sep 17 00:00:00 2001
  2399. From: Henrik Gramner <hengar-6@student.ltu.se>
  2400. Date: Mon, 8 Feb 2010 15:53:52 -0800
  2401. Subject: [PATCH 11/14] Faster 2x2 chroma DC dequant
  2402.  
  2403. ---
  2404. doc/standards.txt | 1 +
  2405. encoder/macroblock.c | 24 +++++++++---------------
  2406. 2 files changed, 10 insertions(+), 15 deletions(-)
  2407.  
  2408. diff --git a/doc/standards.txt b/doc/standards.txt
  2409. index db9a691..7474d8f 100644
  2410. --- a/doc/standards.txt
  2411. +++ b/doc/standards.txt
  2412. @@ -4,6 +4,7 @@ checkasm is written in gcc, with no attempt at compatibility with anything else.
  2413. We make the following additional assumptions which are true of real systems but not guaranteed by C99:
  2414. * Two's complement.
  2415. * Signed right-shifts are sign-extended.
  2416. +* int is 32-bit or larger.
  2417.  
  2418. x86-specific assumptions:
  2419. * The stack is 16-byte aligned. We align it on entry to libx264 and on entry to any thread, but the compiler must preserve alignment after that.
  2420. diff --git a/encoder/macroblock.c b/encoder/macroblock.c
  2421. index f5f6267..3d859de 100644
  2422. --- a/encoder/macroblock.c
  2423. +++ b/encoder/macroblock.c
  2424. @@ -42,30 +42,24 @@ static inline void zigzag_scan_2x2_dc( int16_t level[4], int16_t dct[4] )
  2425. int d1 = dct[2] + dct[3]; \
  2426. int d2 = dct[0] - dct[1]; \
  2427. int d3 = dct[2] - dct[3]; \
  2428. - int dmf = dequant_mf[i_qp%6][0]; \
  2429. - int qbits = i_qp/6 - 5; \
  2430. - if( qbits > 0 ) \
  2431. - { \
  2432. - dmf <<= qbits; \
  2433. - qbits = 0; \
  2434. - }
  2435. + int dmf = dequant_mf[i_qp%6][0] << i_qp/6;
  2436.  
  2437. static inline void idct_dequant_2x2_dc( int16_t dct[4], int16_t dct4x4[4][16], int dequant_mf[6][16], int i_qp )
  2438. {
  2439. IDCT_DEQUANT_START
  2440. - dct4x4[0][0] = (d0 + d1) * dmf >> -qbits;
  2441. - dct4x4[1][0] = (d0 - d1) * dmf >> -qbits;
  2442. - dct4x4[2][0] = (d2 + d3) * dmf >> -qbits;
  2443. - dct4x4[3][0] = (d2 - d3) * dmf >> -qbits;
  2444. + dct4x4[0][0] = (d0 + d1) * dmf >> 5;
  2445. + dct4x4[1][0] = (d0 - d1) * dmf >> 5;
  2446. + dct4x4[2][0] = (d2 + d3) * dmf >> 5;
  2447. + dct4x4[3][0] = (d2 - d3) * dmf >> 5;
  2448. }
  2449.  
  2450. static inline void idct_dequant_2x2_dconly( int16_t out[4], int16_t dct[4], int dequant_mf[6][16], int i_qp )
  2451. {
  2452. IDCT_DEQUANT_START
  2453. - out[0] = (d0 + d1) * dmf >> -qbits;
  2454. - out[1] = (d0 - d1) * dmf >> -qbits;
  2455. - out[2] = (d2 + d3) * dmf >> -qbits;
  2456. - out[3] = (d2 - d3) * dmf >> -qbits;
  2457. + out[0] = (d0 + d1) * dmf >> 5;
  2458. + out[1] = (d0 - d1) * dmf >> 5;
  2459. + out[2] = (d2 + d3) * dmf >> 5;
  2460. + out[3] = (d2 - d3) * dmf >> 5;
  2461. }
  2462.  
  2463. static inline void dct2x2dc( int16_t d[4], int16_t dct4x4[4][16] )
  2464. --
  2465. 1.6.1.2
  2466.  
  2467.  
  2468. From 7b8fd33d747b99a338ab04ed923c3cf83ad3e134 Mon Sep 17 00:00:00 2001
  2469. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  2470. Date: Tue, 9 Feb 2010 15:08:31 -0800
  2471. Subject: [PATCH 12/14] Make psy-(rd|trellis) use more precision in userdata SEI
  2472.  
  2473. ---
  2474. common/common.c | 2 +-
  2475. 1 files changed, 1 insertions(+), 1 deletions(-)
  2476.  
  2477. diff --git a/common/common.c b/common/common.c
  2478. index 6d1d7f0..aaccdf2 100644
  2479. --- a/common/common.c
  2480. +++ b/common/common.c
  2481. @@ -886,7 +886,7 @@ char *x264_param2string( x264_param_t *p, int b_res )
  2482. s += sprintf( s, " subme=%d", p->analyse.i_subpel_refine );
  2483. s += sprintf( s, " psy=%d", p->analyse.b_psy );
  2484. if( p->analyse.b_psy )
  2485. - s += sprintf( s, " psy_rd=%.1f:%.1f", p->analyse.f_psy_rd, p->analyse.f_psy_trellis );
  2486. + s += sprintf( s, " psy_rd=%.2f:%.2f", p->analyse.f_psy_rd, p->analyse.f_psy_trellis );
  2487. s += sprintf( s, " mixed_ref=%d", p->analyse.b_mixed_references );
  2488. s += sprintf( s, " me_range=%d", p->analyse.i_me_range );
  2489. s += sprintf( s, " chroma_me=%d", p->analyse.b_chroma_me );
  2490. --
  2491. 1.6.1.2
  2492.  
  2493.  
  2494. From 75480be89f05681f5b7f3b66a46057771f17e2a8 Mon Sep 17 00:00:00 2001
  2495. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  2496. Date: Wed, 10 Feb 2010 12:12:29 -0800
  2497. Subject: [PATCH 13/14] Overhaul sliced-threads VBV
  2498. Make predictors thread-local and allow each thread to poll the others to get their predicted sizes.
  2499. Many, many other tweaks to improve quality with small VBV and sliced threads.
  2500.  
  2501. ---
  2502. encoder/encoder.c | 4 +-
  2503. encoder/ratecontrol.c | 145 ++++++++++++++++++++++++++++++-------------------
  2504. encoder/slicetype.c | 4 +-
  2505. 3 files changed, 93 insertions(+), 60 deletions(-)
  2506.  
  2507. diff --git a/encoder/encoder.c b/encoder/encoder.c
  2508. index 8e9c118..b3da30e 100644
  2509. --- a/encoder/encoder.c
  2510. +++ b/encoder/encoder.c
  2511. @@ -2061,6 +2061,8 @@ static int x264_threaded_slices_write( x264_t *h )
  2512. for( i = 0; i <= h->sps->i_mb_height; i++ )
  2513. x264_fdec_filter_row( h, i );
  2514.  
  2515. + x264_threads_merge_ratecontrol( h );
  2516. +
  2517. for( i = 1; i < h->param.i_threads; i++ )
  2518. {
  2519. x264_t *t = h->thread[i];
  2520. @@ -2076,8 +2078,6 @@ static int x264_threaded_slices_write( x264_t *h )
  2521. ((int*)&h->stat.frame)[j] += ((int*)&t->stat.frame)[j];
  2522. }
  2523.  
  2524. - x264_threads_merge_ratecontrol( h );
  2525. -
  2526. return 0;
  2527. }
  2528.  
  2529. diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
  2530. index e314ba2..57439c2 100644
  2531. --- a/encoder/ratecontrol.c
  2532. +++ b/encoder/ratecontrol.c
  2533. @@ -137,6 +137,7 @@ struct x264_ratecontrol_t
  2534. double frame_size_estimated;
  2535. double frame_size_planned;
  2536. double slice_size_planned;
  2537. + double max_frame_error;
  2538. predictor_t (*row_pred)[2];
  2539. predictor_t row_preds[5][2];
  2540. predictor_t *pred_b_from_p; /* predict B-frame size from P-frame satd */
  2541. @@ -505,17 +506,21 @@ int x264_ratecontrol_new( x264_t *h )
  2542.  
  2543. rc->lstep = pow( 2, h->param.rc.i_qp_step / 6.0 );
  2544. rc->last_qscale = qp2qscale(26);
  2545. - CHECKED_MALLOC( rc->pred, 5*sizeof(predictor_t) );
  2546. + int num_preds = h->param.b_sliced_threads * h->param.i_threads + 1;
  2547. + CHECKED_MALLOC( rc->pred, 5 * sizeof(predictor_t) * num_preds );
  2548. CHECKED_MALLOC( rc->pred_b_from_p, sizeof(predictor_t) );
  2549. for( i = 0; i < 5; i++ )
  2550. {
  2551. rc->last_qscale_for[i] = qp2qscale( ABR_INIT_QP );
  2552. rc->lmin[i] = qp2qscale( h->param.rc.i_qp_min );
  2553. rc->lmax[i] = qp2qscale( h->param.rc.i_qp_max );
  2554. - rc->pred[i].coeff= 2.0;
  2555. - rc->pred[i].count= 1.0;
  2556. - rc->pred[i].decay= 0.5;
  2557. - rc->pred[i].offset= 0.0;
  2558. + for( j = 0; j < num_preds; j++ )
  2559. + {
  2560. + rc->pred[i+j*5].coeff= 2.0;
  2561. + rc->pred[i+j*5].count= 1.0;
  2562. + rc->pred[i+j*5].decay= 0.5;
  2563. + rc->pred[i+j*5].offset= 0.0;
  2564. + }
  2565. for( j = 0; j < 2; j++ )
  2566. {
  2567. rc->row_preds[i][j].coeff= .25;
  2568. @@ -988,18 +993,12 @@ void x264_ratecontrol_delete( x264_t *h )
  2569.  
  2570. void x264_ratecontrol_set_estimated_size( x264_t *h, int bits )
  2571. {
  2572. - x264_pthread_mutex_lock( &h->fenc->mutex );
  2573. h->rc->frame_size_estimated = bits;
  2574. - x264_pthread_mutex_unlock( &h->fenc->mutex );
  2575. }
  2576.  
  2577. -int x264_ratecontrol_get_estimated_size( x264_t const *h)
  2578. +int x264_ratecontrol_get_estimated_size( x264_t const *h )
  2579. {
  2580. - int size;
  2581. - x264_pthread_mutex_lock( &h->fenc->mutex );
  2582. - size = h->rc->frame_size_estimated;
  2583. - x264_pthread_mutex_unlock( &h->fenc->mutex );
  2584. - return size;
  2585. + return h->rc->frame_size_estimated;
  2586. }
  2587.  
  2588. static void accum_p_qp_update( x264_t *h, float qp )
  2589. @@ -1173,6 +1172,7 @@ void x264_ratecontrol_mb( x264_t *h, int bits )
  2590. /* tweak quality based on difference from predicted size */
  2591. if( y < h->i_threadslice_end-1 )
  2592. {
  2593. + int i;
  2594. int prev_row_qp = h->fdec->i_row_qp[y];
  2595. int i_qp_max = X264_MIN( prev_row_qp + h->param.rc.i_qp_step, h->param.rc.i_qp_max );
  2596. int i_qp_min = X264_MAX( prev_row_qp - h->param.rc.i_qp_step, h->param.rc.i_qp_min );
  2597. @@ -1186,19 +1186,23 @@ void x264_ratecontrol_mb( x264_t *h, int bits )
  2598.  
  2599. float buffer_left_planned = rc->buffer_fill - rc->frame_size_planned;
  2600. float slice_size_planned = h->param.b_sliced_threads ? rc->slice_size_planned : rc->frame_size_planned;
  2601. - float size_of_other_slices = rc->frame_size_planned - slice_size_planned;
  2602. + float size_of_other_slices = 0;
  2603. + if( h->param.b_sliced_threads )
  2604. + {
  2605. + for( i = 0; i < h->param.i_threads; i++ )
  2606. + if( h != h->thread[i] )
  2607. + size_of_other_slices += x264_ratecontrol_get_estimated_size( h->thread[i] );
  2608. + }
  2609. + else
  2610. + rc->max_frame_error = X264_MAX( 0.05, 1.0 / (h->sps->i_mb_width) );
  2611. +
  2612. /* More threads means we have to be more cautious in letting ratecontrol use up extra bits. */
  2613. float rc_tol = buffer_left_planned / h->param.i_threads * rc->rate_tolerance;
  2614. - float max_frame_error = X264_MAX( 0.05, 1.0 / h->sps->i_mb_height );
  2615. - int b1 = predict_row_size_sum( h, y, rc->qpm );
  2616. -
  2617. - /* Assume that if this slice has become larger than expected,
  2618. - * the other slices will have gotten equally larger. */
  2619. - b1 += X264_MAX( size_of_other_slices * b1 / slice_size_planned, size_of_other_slices );
  2620. + int b1 = predict_row_size_sum( h, y, rc->qpm ) + size_of_other_slices;
  2621.  
  2622. /* Don't modify the row QPs until a sufficent amount of the bits of the frame have been processed, in case a flat */
  2623. /* area at the top of the frame was measured inaccurately. */
  2624. - if( row_bits_so_far(h,y) < 0.05 * (rc->frame_size_planned-size_of_other_slices) )
  2625. + if( row_bits_so_far( h, y ) < 0.05 * slice_size_planned )
  2626. return;
  2627.  
  2628. if( h->sh.i_type != SLICE_TYPE_I )
  2629. @@ -1213,8 +1217,7 @@ void x264_ratecontrol_mb( x264_t *h, int bits )
  2630. (b1 > rc->frame_size_planned && rc->qpm < rc->qp_novbv)) )
  2631. {
  2632. rc->qpm ++;
  2633. - b1 = predict_row_size_sum( h, y, rc->qpm );
  2634. - b1 += X264_MAX( size_of_other_slices * b1 / slice_size_planned, size_of_other_slices );
  2635. + b1 = predict_row_size_sum( h, y, rc->qpm ) + size_of_other_slices;
  2636. }
  2637.  
  2638. while( rc->qpm > i_qp_min
  2639. @@ -1223,20 +1226,18 @@ void x264_ratecontrol_mb( x264_t *h, int bits )
  2640. || b1 < (rc->buffer_fill - rc->buffer_size + rc->buffer_rate) * 1.1) )
  2641. {
  2642. rc->qpm --;
  2643. - b1 = predict_row_size_sum( h, y, rc->qpm );
  2644. - b1 += X264_MAX( size_of_other_slices * b1 / slice_size_planned, size_of_other_slices );
  2645. + b1 = predict_row_size_sum( h, y, rc->qpm ) + size_of_other_slices;
  2646. }
  2647.  
  2648. /* avoid VBV underflow */
  2649. while( (rc->qpm < h->param.rc.i_qp_max)
  2650. - && (rc->buffer_fill - b1 < rc->buffer_rate * max_frame_error) )
  2651. + && (rc->buffer_fill - b1 < rc->buffer_rate * rc->max_frame_error) )
  2652. {
  2653. rc->qpm ++;
  2654. - b1 = predict_row_size_sum( h, y, rc->qpm );
  2655. - b1 += X264_MAX( size_of_other_slices * b1 / slice_size_planned, size_of_other_slices );
  2656. + b1 = predict_row_size_sum( h, y, rc->qpm ) + size_of_other_slices;
  2657. }
  2658.  
  2659. - x264_ratecontrol_set_estimated_size(h, b1);
  2660. + x264_ratecontrol_set_estimated_size( h, predict_row_size_sum( h, y, rc->qpm ) );
  2661. }
  2662.  
  2663. /* loses the fractional part of the frame-wise qp */
  2664. @@ -1958,56 +1959,88 @@ static float rate_estimate_qscale( x264_t *h )
  2665. }
  2666. }
  2667.  
  2668. +void x264_threads_normalize_predictors( x264_t *h )
  2669. +{
  2670. + int i;
  2671. + double totalsize = 0;
  2672. + for( i = 0; i < h->param.i_threads; i++ )
  2673. + totalsize += h->thread[i]->rc->slice_size_planned;
  2674. + double factor = h->rc->frame_size_planned / totalsize;
  2675. + for( i = 0; i < h->param.i_threads; i++ )
  2676. + h->thread[i]->rc->slice_size_planned *= factor;
  2677. +}
  2678. +
  2679. void x264_threads_distribute_ratecontrol( x264_t *h )
  2680. {
  2681. - int i, row, totalsize = 0;
  2682. - if( h->rc->b_vbv )
  2683. - for( row = 0; row < h->sps->i_mb_height; row++ )
  2684. - totalsize += h->fdec->i_row_satd[row];
  2685. + int i, row;
  2686. + x264_ratecontrol_t *rc = h->rc;
  2687. +
  2688. + /* Initialize row predictors */
  2689. + if( h->i_frame == 0 )
  2690. + for( i = 0; i < h->param.i_threads; i++ )
  2691. + {
  2692. + x264_ratecontrol_t *t = h->thread[i]->rc;
  2693. + memcpy( t->row_preds, rc->row_preds, sizeof(rc->row_preds) );
  2694. + }
  2695. +
  2696. for( i = 0; i < h->param.i_threads; i++ )
  2697. {
  2698. x264_t *t = h->thread[i];
  2699. - x264_ratecontrol_t *rc = h->rc;
  2700. - memcpy( t->rc, rc, sizeof(x264_ratecontrol_t) );
  2701. + memcpy( t->rc, rc, offsetof(x264_ratecontrol_t, row_pred) );
  2702. + t->rc->row_pred = &t->rc->row_preds[h->sh.i_type];
  2703. /* Calculate the planned slice size. */
  2704. - if( h->rc->b_vbv && rc->frame_size_planned )
  2705. + if( rc->b_vbv && rc->frame_size_planned )
  2706. {
  2707. int size = 0;
  2708. for( row = t->i_threadslice_start; row < t->i_threadslice_end; row++ )
  2709. size += h->fdec->i_row_satd[row];
  2710. - t->rc->slice_size_planned = size * rc->frame_size_planned / totalsize;
  2711. + t->rc->slice_size_planned = predict_size( &rc->pred[h->sh.i_type + (i+1)*5], rc->qpm, size );
  2712. }
  2713. else
  2714. t->rc->slice_size_planned = 0;
  2715. }
  2716. + if( rc->b_vbv && rc->frame_size_planned )
  2717. + {
  2718. + x264_threads_normalize_predictors( h );
  2719. +
  2720. + if( rc->single_frame_vbv )
  2721. + {
  2722. + /* Compensate for our max frame error threshold: give more bits (proportionally) to smaller slices. */
  2723. + for( i = 0; i < h->param.i_threads; i++ )
  2724. + {
  2725. + x264_t *t = h->thread[i];
  2726. + t->rc->max_frame_error = X264_MAX( 0.05, 1.0 / (t->i_threadslice_end - t->i_threadslice_start) );
  2727. + t->rc->slice_size_planned += 2 * t->rc->max_frame_error * rc->frame_size_planned;
  2728. + }
  2729. + x264_threads_normalize_predictors( h );
  2730. + }
  2731. +
  2732. + for( i = 0; i < h->param.i_threads; i++ )
  2733. + h->thread[i]->rc->frame_size_estimated = h->thread[i]->rc->slice_size_planned;
  2734. + }
  2735. }
  2736.  
  2737. void x264_threads_merge_ratecontrol( x264_t *h )
  2738. {
  2739. - int i, j, k;
  2740. + int i, row;
  2741. x264_ratecontrol_t *rc = h->rc;
  2742. x264_emms();
  2743.  
  2744. - for( i = 1; i < h->param.i_threads; i++ )
  2745. + for( i = 0; i < h->param.i_threads; i++ )
  2746. {
  2747. - x264_ratecontrol_t *t = h->thread[i]->rc;
  2748. - rc->qpa_rc += t->qpa_rc;
  2749. - rc->qpa_aq += t->qpa_aq;
  2750. - for( j = 0; j < 5; j++ )
  2751. - for( k = 0; k < 2; k++ )
  2752. - {
  2753. - rc->row_preds[j][k].coeff += t->row_preds[j][k].coeff;
  2754. - rc->row_preds[j][k].offset += t->row_preds[j][k].offset;
  2755. - rc->row_preds[j][k].count += t->row_preds[j][k].count;
  2756. - }
  2757. + x264_t *t = h->thread[i];
  2758. + x264_ratecontrol_t *rct = h->thread[i]->rc;
  2759. + int size = 0;
  2760. + for( row = t->i_threadslice_start; row < t->i_threadslice_end; row++ )
  2761. + size += h->fdec->i_row_satd[row];
  2762. + int bits = t->stat.frame.i_mv_bits + t->stat.frame.i_tex_bits + t->stat.frame.i_misc_bits;
  2763. + int mb_count = (t->i_threadslice_end - t->i_threadslice_start) * h->sps->i_mb_width;
  2764. + update_predictor( &rc->pred[h->sh.i_type+5*i], qp2qscale(rct->qpa_rc/mb_count), size, bits );
  2765. + if( !i )
  2766. + continue;
  2767. + rc->qpa_rc += rct->qpa_rc;
  2768. + rc->qpa_aq += rct->qpa_aq;
  2769. }
  2770. - for( j = 0; j < 5; j++ )
  2771. - for( k = 0; k < 2; k++ )
  2772. - {
  2773. - rc->row_preds[j][k].coeff /= h->param.i_threads;
  2774. - rc->row_preds[j][k].offset /= h->param.i_threads;
  2775. - rc->row_preds[j][k].count /= h->param.i_threads;
  2776. - }
  2777. }
  2778.  
  2779. void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next )
  2780. diff --git a/encoder/slicetype.c b/encoder/slicetype.c
  2781. index 057f6a6..bb2ed64 100644
  2782. --- a/encoder/slicetype.c
  2783. +++ b/encoder/slicetype.c
  2784. @@ -1394,10 +1394,10 @@ int x264_rc_analyse_slice( x264_t *h )
  2785. int mb_xy = y * h->mb.i_mb_stride;
  2786. for( x = h->fdec->i_pir_start_col; x <= h->fdec->i_pir_end_col; x++, mb_xy++ )
  2787. {
  2788. - int intra_cost = (h->fenc->i_intra_cost[mb_xy] * ip_factor) >> 8;
  2789. + int intra_cost = (h->fenc->i_intra_cost[mb_xy] * ip_factor + 128) >> 8;
  2790. int inter_cost = h->fenc->lowres_costs[b-p0][p1-b][mb_xy];
  2791. int diff = intra_cost - inter_cost;
  2792. - h->fdec->i_row_satd[y] += diff;
  2793. + h->fdec->i_row_satd[y] += (diff * frames[b]->i_inv_qscale_factor[mb_xy] + 128) >> 8;
  2794. cost += diff;
  2795. }
  2796. }
  2797. --
  2798. 1.6.1.2
  2799.  
  2800.  
  2801. From 293ae5edfae553a5c00ebb854b579994f7010a9a Mon Sep 17 00:00:00 2001
  2802. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  2803. Date: Wed, 10 Feb 2010 13:44:28 -0800
  2804. Subject: [PATCH 14/14] Allow longer keyints with intra refresh
  2805. If a long keyint is specified (longer than macroblock width-1), the refresh will simply not occur all the time.
  2806. In other words, a refresh will take place, and then x264 will wait until keyint is over to start another refresh.
  2807.  
  2808. ---
  2809. encoder/encoder.c | 9 ++++-----
  2810. 1 files changed, 4 insertions(+), 5 deletions(-)
  2811.  
  2812. diff --git a/encoder/encoder.c b/encoder/encoder.c
  2813. index b3da30e..64f41fb 100644
  2814. --- a/encoder/encoder.c
  2815. +++ b/encoder/encoder.c
  2816. @@ -599,8 +599,6 @@ static int x264_validate_parameters( x264_t *h )
  2817. x264_log( h, X264_LOG_WARNING, "ref > 1 + intra-refresh is not supported\n" );
  2818. h->param.i_frame_reference = 1;
  2819. }
  2820. - if( h->param.b_intra_refresh )
  2821. - h->param.i_keyint_max = X264_MIN( h->param.i_keyint_max, (h->param.i_width+15)/16 - 1 );
  2822. h->param.i_keyint_min = x264_clip3( h->param.i_keyint_min, 1, h->param.i_keyint_max/2+1 );
  2823. h->param.rc.i_lookahead = x264_clip3( h->param.rc.i_lookahead, 0, X264_LOOKAHEAD_MAX );
  2824. {
  2825. @@ -2306,12 +2304,12 @@ int x264_encoder_encode( x264_t *h,
  2826. if( h->param.b_intra_refresh && h->fenc->i_type == X264_TYPE_P )
  2827. {
  2828. int pocdiff = (h->fdec->i_poc - h->fref0[0]->i_poc)/2;
  2829. - float increment = ((float)h->sps->i_mb_width-1) / h->param.i_keyint_max;
  2830. + float increment = X264_MAX( ((float)h->sps->i_mb_width-1) / h->param.i_keyint_max, 1 );
  2831. if( IS_X264_TYPE_I( h->fref0[0]->i_type ) )
  2832. h->fdec->f_pir_position = 0;
  2833. else
  2834. {
  2835. - if( h->fref0[0]->i_pir_end_col == h->sps->i_mb_width - 1 )
  2836. + if( h->fdec->f_pir_position >= h->param.i_keyint_max )
  2837. {
  2838. h->fdec->f_pir_position = 0;
  2839. h->fenc->b_keyframe = 1;
  2840. @@ -2357,8 +2355,9 @@ int x264_encoder_encode( x264_t *h,
  2841.  
  2842. if( h->fenc->i_type != X264_TYPE_IDR )
  2843. {
  2844. + int time_to_recovery = X264_MIN( h->sps->i_mb_width - 1, h->param.i_keyint_max ) + h->param.i_bframe;
  2845. x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
  2846. - x264_sei_recovery_point_write( h, &h->out.bs, h->param.i_keyint_max );
  2847. + x264_sei_recovery_point_write( h, &h->out.bs, time_to_recovery );
  2848. x264_nal_end( h );
  2849. overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD;
  2850. }
  2851. --
  2852. 1.6.1.2
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement