Advertisement
Guest User

Untitled

a guest
Jun 11th, 2017
534
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Diff 37.17 KB | None | 0 0
  1. From 31e458faf03c8c673c1d7e8c9f6dd40c193d211f Mon Sep 17 00:00:00 2001
  2. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  3. Date: Tue, 17 Aug 2010 14:38:41 -0700
  4. Subject: [PATCH 1/5] Fix 2 bugs with slice-max-size
  5.  Macroblock re-encoding didn't restore mv/tex bit counters (slightly inaccurate 2-pass).
  6.  Bitstream buffer check didn't work correctly (insanely large frames could break encoding).
  7.  
  8. ---
  9. encoder/encoder.c |   13 ++++++++++---
  10.  1 files changed, 10 insertions(+), 3 deletions(-)
  11.  
  12. diff --git a/encoder/encoder.c b/encoder/encoder.c
  13. index 6fd8838..52b6fb5 100644
  14. --- a/encoder/encoder.c
  15. +++ b/encoder/encoder.c
  16. @@ -1827,6 +1827,8 @@ static int x264_slice_write( x264_t *h )
  17.      bs_t bs_bak;
  18.      x264_cabac_t cabac_bak;
  19.      uint8_t cabac_prevbyte_bak = 0; /* Shut up GCC. */
  20. +    int mv_bits_bak = 0;
  21. +    int tex_bits_bak = 0;
  22.      /* Assume no more than 3 bytes of NALU escaping.
  23.       * NALUs other than the first use a 3-byte startcode. */
  24.      int overhead_guess = (NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal)) + 3;
  25. @@ -1873,8 +1875,14 @@ static int x264_slice_write( x264_t *h )
  26.      while( (mb_xy = i_mb_x + i_mb_y * h->mb.i_mb_width) <= h->sh.i_last_mb )
  27.      {
  28.          int mb_spos = bs_pos(&h->out.bs) + x264_cabac_pos(&h->cabac);
  29. +
  30. +        if( x264_bitstream_check_buffer( h ) )
  31. +            return -1;
  32. +
  33.          if( h->param.i_slice_max_size > 0 )
  34.          {
  35. +            mv_bits_bak = h->stat.frame.i_mv_bits;
  36. +            tex_bits_bak = h->stat.frame.i_tex_bits;
  37.              /* We don't need the contexts because flushing the CABAC encoder has no context
  38.               * dependency and macroblocks are only re-encoded in the case where a slice is
  39.               * ended (and thus the content of all contexts are thrown away). */
  40. @@ -1903,9 +1911,6 @@ static int x264_slice_write( x264_t *h )
  41.          /* encode this macroblock -> be careful it can change the mb type to P_SKIP if needed */
  42.          x264_macroblock_encode( h );
  43.  
  44. -        if( x264_bitstream_check_buffer( h ) )
  45. -            return -1;
  46. -
  47.          if( h->param.b_cabac )
  48.          {
  49.              if( mb_xy > h->sh.i_first_mb && !(h->sh.b_mbaff && (i_mb_y&1)) )
  50. @@ -1943,6 +1948,8 @@ static int x264_slice_write( x264_t *h )
  51.          {
  52.              if( mb_xy != h->sh.i_first_mb )
  53.              {
  54. +                h->stat.frame.i_mv_bits = mv_bits_bak;
  55. +                h->stat.frame.i_tex_bits = tex_bits_bak;
  56.                  if( h->param.b_cabac )
  57.                  {
  58.                      memcpy( &h->cabac, &cabac_bak, offsetof(x264_cabac_t, f8_bits_encoded) );
  59. --
  60. 1.7.1
  61.  
  62.  
  63. From 08b1e36390cdc7758102af31dc4f7f4e05ec0055 Mon Sep 17 00:00:00 2001
  64. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  65. Date: Mon, 23 Aug 2010 18:59:35 -0400
  66. Subject: [PATCH 2/5] Add missing emms to x264_nal_encode
  67.  Only matters for applications using the low-latency callback feature.
  68.  
  69. ---
  70. common/bitstream.c |    1 +
  71.  1 files changed, 1 insertions(+), 0 deletions(-)
  72.  
  73. diff --git a/common/bitstream.c b/common/bitstream.c
  74. index ad8c16e..c760544 100644
  75. --- a/common/bitstream.c
  76. +++ b/common/bitstream.c
  77. @@ -79,6 +79,7 @@ void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal )
  78.  
  79.      nal->i_payload = size+4;
  80.      nal->p_payload = orig_dst;
  81. +    x264_emms();
  82.  }
  83.  
  84.  void x264_bitstream_init( int cpu, x264_bitstream_function_t *pf )
  85. --
  86. 1.7.1
  87.  
  88.  
  89. From c74f9c99a9af959c137f33a20d17bd55b3deb11d Mon Sep 17 00:00:00 2001
  90. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  91. Date: Mon, 16 Aug 2010 17:47:11 -0700
  92. Subject: [PATCH 3/5] Simplify addressing logic for interlaced-related arrays
  93.  In progressive mode, just make [0] and [1] point to the same place.
  94.  
  95. ---
  96. common/deblock.c    |    4 ++--
  97.  common/macroblock.c |    6 ++++--
  98.  encoder/encoder.c   |    2 +-
  99.  3 files changed, 7 insertions(+), 5 deletions(-)
  100.  
  101. diff --git a/common/deblock.c b/common/deblock.c
  102. index 0b61248..2d4f230 100644
  103. --- a/common/deblock.c
  104. +++ b/common/deblock.c
  105. @@ -316,7 +316,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
  106.          int mb_xy = h->mb.i_mb_xy;
  107.          int transform_8x8 = h->mb.mb_transform_size[h->mb.i_mb_xy];
  108.          int intra_cur = IS_INTRA( h->mb.type[mb_xy] );
  109. -        uint8_t (*bs)[4][4] = h->deblock_strength[mb_y&b_interlaced][mb_x];
  110. +        uint8_t (*bs)[4][4] = h->deblock_strength[mb_y&1][mb_x];
  111.  
  112.          pixel *pixy = h->fdec->plane[0] + 16*mb_y*stridey  + 16*mb_x;
  113.          pixel *pixuv = h->fdec->plane[1] + 8*mb_y*strideuv + 16*mb_x;
  114. @@ -403,7 +403,7 @@ void x264_macroblock_deblock( x264_t *h )
  115.      if( qp <= qp_thresh || h->mb.i_type == P_SKIP )
  116.          return;
  117.  
  118. -    uint8_t (*bs)[4][4] = h->deblock_strength[h->mb.i_mb_y&h->sh.b_mbaff][h->mb.i_mb_x];
  119. +    uint8_t (*bs)[4][4] = h->deblock_strength[h->mb.i_mb_y&1][h->mb.i_mb_x];
  120.      if( IS_INTRA( h->mb.i_type ) )
  121.          memset( bs, 3, 2*4*4*sizeof(uint8_t) );
  122.      else
  123. diff --git a/common/macroblock.c b/common/macroblock.c
  124. index 5899b15..7347645 100644
  125. --- a/common/macroblock.c
  126. +++ b/common/macroblock.c
  127. @@ -320,8 +320,10 @@ int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
  128.                  /* shouldn't really be initialized, just silences a valgrind false-positive in predict_8x8_filter_mmx */
  129.                  CHECKED_MALLOCZERO( h->intra_border_backup[i][j], (h->sps->i_mb_width*16+32) * sizeof(pixel) );
  130.                  h->intra_border_backup[i][j] += 16;
  131. +                h->intra_border_backup[1][j] = h->intra_border_backup[i][j];
  132.              }
  133.              CHECKED_MALLOC( h->deblock_strength[i], sizeof(**h->deblock_strength) * h->mb.i_mb_width );
  134. +            h->deblock_strength[1] = h->deblock_strength[i];
  135.          }
  136.  
  137.      /* Allocate scratch buffer */
  138. @@ -493,7 +495,7 @@ static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x
  139.                       ? 16 * mb_x + w * (mb_y&~1) * i_stride + (mb_y&1) * i_stride
  140.                       : 16 * mb_x + w * mb_y * i_stride;
  141.      pixel *plane_fdec = &h->fdec->plane[i][i_pix_offset];
  142. -    pixel *intra_fdec = &h->intra_border_backup[mb_y & h->sh.b_mbaff][i][mb_x*16];
  143. +    pixel *intra_fdec = &h->intra_border_backup[mb_y&1][i][mb_x*16];
  144.      int ref_pix_offset[2] = { i_pix_offset, i_pix_offset };
  145.      x264_frame_t **fref[2] = { h->fref0, h->fref1 };
  146.      if( b_interlaced )
  147. @@ -1086,7 +1088,7 @@ static void ALWAYS_INLINE x264_macroblock_store_pic( x264_t *h, int mb_x, int mb
  148.      int i_pix_offset = b_interlaced
  149.                       ? 16 * mb_x + w * (mb_y&~1) * i_stride + (mb_y&1) * i_stride
  150.                       : 16 * mb_x + w * mb_y * i_stride;
  151. -    pixel *intra_fdec = &h->intra_border_backup[mb_y & h->sh.b_mbaff][i][mb_x*16];
  152. +    pixel *intra_fdec = &h->intra_border_backup[mb_y&1][i][mb_x*16];
  153.      if( i )
  154.      {
  155.          h->mc.store_interleave_8x8x2( &h->fdec->plane[1][i_pix_offset], i_stride2, h->mb.pic.p_fdec[1], h->mb.pic.p_fdec[2] );
  156. diff --git a/encoder/encoder.c b/encoder/encoder.c
  157. index 52b6fb5..0b65d51 100644
  158. --- a/encoder/encoder.c
  159. +++ b/encoder/encoder.c
  160. @@ -2038,7 +2038,7 @@ static int x264_slice_write( x264_t *h )
  161.          if( b_deblock )
  162.          {
  163.              int mvy_limit = 4 >> h->sh.b_mbaff;
  164. -            uint8_t (*bs)[4][4] = h->deblock_strength[h->mb.i_mb_y&h->sh.b_mbaff][h->mb.i_mb_x];
  165. +            uint8_t (*bs)[4][4] = h->deblock_strength[h->mb.i_mb_y&1][h->mb.i_mb_x];
  166.              x264_macroblock_cache_load_deblock( h );
  167.              if( IS_INTRA( h->mb.type[h->mb.i_mb_xy] ) )
  168.                  memset( bs, 3, 2*4*4*sizeof(uint8_t) );
  169. --
  170. 1.7.1
  171.  
  172.  
  173. From 636d85b07cab192f796485969bc5e7a5538b8372 Mon Sep 17 00:00:00 2001
  174. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  175. Date: Sat, 21 Aug 2010 16:51:39 -0500
  176. Subject: [PATCH 4/5] Add global #define for maximum reference count
  177.  This should make it easier to play around with reference frame counts that exceed the spec maximum.
  178.  
  179. ---
  180. common/common.h     |   35 ++++++++++++++++++-----------------
  181.  common/frame.h      |    6 +++---
  182.  common/macroblock.c |   10 +++++-----
  183.  encoder/encoder.c   |   16 ++++++++--------
  184.  encoder/set.c       |    2 +-
  185.  5 files changed, 35 insertions(+), 34 deletions(-)
  186.  
  187. diff --git a/common/common.h b/common/common.h
  188. index 72fc1d8..670fd12 100644
  189. --- a/common/common.h
  190. +++ b/common/common.h
  191. @@ -51,6 +51,7 @@ do {\
  192.  } while( 0 )
  193.  
  194.  #define X264_BFRAME_MAX 16
  195. +#define X264_REF_MAX 16
  196.  #define X264_THREAD_MAX 128
  197.  #define X264_PCM_COST (384*BIT_DEPTH+16)
  198.  #define X264_LOOKAHEAD_MAX 250
  199. @@ -340,10 +341,10 @@ typedef struct
  200.      {
  201.          int idc;
  202.          int arg;
  203. -    } ref_pic_list_order[2][16];
  204. +    } ref_pic_list_order[2][X264_REF_MAX];
  205.  
  206.      /* P-frame weighting */
  207. -    x264_weight_t weight[32][3];
  208. +    x264_weight_t weight[X264_REF_MAX*2][3];
  209.  
  210.      int i_mmco_remove_from_end;
  211.      int i_mmco_command_count;
  212. @@ -351,7 +352,7 @@ typedef struct
  213.      {
  214.          int i_difference_of_pic_nums;
  215.          int i_poc;
  216. -    } mmco[16];
  217. +    } mmco[X264_REF_MAX];
  218.  
  219.      int i_cabac_init_idc;
  220.  
  221. @@ -479,7 +480,7 @@ struct x264_t
  222.          x264_frame_t **blank_unused;
  223.  
  224.          /* frames used for reference + sentinels */
  225. -        x264_frame_t *reference[16+2];
  226. +        x264_frame_t *reference[X264_REF_MAX+2];
  227.  
  228.          int i_last_keyframe;       /* Frame number of the last keyframe */
  229.          int i_last_idr;            /* Frame number of the last IDR (not RP)*/
  230. @@ -511,9 +512,9 @@ struct x264_t
  231.  
  232.      /* references lists */
  233.      int             i_ref0;
  234. -    x264_frame_t    *fref0[16+3];     /* ref list 0 */
  235. +    x264_frame_t    *fref0[X264_REF_MAX+3];     /* ref list 0 */
  236.      int             i_ref1;
  237. -    x264_frame_t    *fref1[16+3];     /* ref list 1 */
  238. +    x264_frame_t    *fref1[X264_REF_MAX+3];     /* ref list 1 */
  239.      int             b_ref_reorder[2];
  240.  
  241.      /* hrd */
  242. @@ -605,14 +606,14 @@ struct x264_t
  243.          int16_t (*mv[2])[2];                /* mb mv. set to 0 for intra mb */
  244.          uint8_t (*mvd[2])[8][2];            /* absolute value of mb mv difference with predict, clipped to [0,33]. set to 0 if intra. cabac only */
  245.          int8_t   *ref[2];                   /* mb ref. set to -1 if non used (intra or Lx only) */
  246. -        int16_t (*mvr[2][32])[2];           /* 16x16 mv for each possible ref */
  247. +        int16_t (*mvr[2][X264_REF_MAX*2])[2];/* 16x16 mv for each possible ref */
  248.          int8_t  *skipbp;                    /* block pattern for SKIP or DIRECT (sub)mbs. B-frames + cabac only */
  249.          int8_t  *mb_transform_size;         /* transform_size_8x8_flag of each mb */
  250.          uint16_t *slice_table;              /* sh->first_mb of the slice that the indexed mb is part of
  251.                                               * NOTE: this will fail on resolutions above 2^16 MBs... */
  252.  
  253.           /* buffer for weighted versions of the reference frames */
  254. -        pixel *p_weight_buf[16];
  255. +        pixel *p_weight_buf[X264_REF_MAX];
  256.  
  257.          /* current value */
  258.          int     i_type;
  259. @@ -675,9 +676,9 @@ struct x264_t
  260.  
  261.              /* pointer over mb of the references */
  262.              int i_fref[2];
  263. -            pixel *p_fref[2][32][4+1]; /* last: yN, yH, yV, yHV, uv */
  264. -            pixel *p_fref_w[32];  /* weighted fullpel luma */
  265. -            uint16_t *p_integral[2][16];
  266. +            pixel *p_fref[2][X264_REF_MAX*2][4+1]; /* last: yN, yH, yV, yHV, uv */
  267. +            pixel *p_fref_w[X264_REF_MAX*2];  /* weighted fullpel luma */
  268. +            uint16_t *p_integral[2][X264_REF_MAX];
  269.  
  270.              /* fref stride */
  271.              int     i_stride[3];
  272. @@ -732,15 +733,15 @@ struct x264_t
  273.          int     i_chroma_lambda2_offset;
  274.  
  275.          /* B_direct and weighted prediction */
  276. -        int16_t dist_scale_factor_buf[2][32][4];
  277. +        int16_t dist_scale_factor_buf[2][X264_REF_MAX*2][4];
  278.          int16_t (*dist_scale_factor)[4];
  279. -        int8_t bipred_weight_buf[2][32][4];
  280. +        int8_t bipred_weight_buf[2][X264_REF_MAX*2][4];
  281.          int8_t (*bipred_weight)[4];
  282.          /* maps fref1[0]'s ref indices into the current list0 */
  283.  #define map_col_to_list0(col) h->mb.map_col_to_list0[(col)+2]
  284. -        int8_t  map_col_to_list0[18];
  285. +        int8_t  map_col_to_list0[X264_REF_MAX+2];
  286.          int ref_blind_dupe; /* The index of the blind reference frame duplicate. */
  287. -        int8_t deblock_ref_table[32+2];
  288. +        int8_t deblock_ref_table[X264_REF_MAX*2+2];
  289.  #define deblock_ref_table(x) h->mb.deblock_ref_table[(x)+2]
  290.      } mb;
  291.  
  292. @@ -765,7 +766,7 @@ struct x264_t
  293.              int i_mb_count_p;
  294.              int i_mb_count_skip;
  295.              int i_mb_count_8x8dct[2];
  296. -            int i_mb_count_ref[2][32];
  297. +            int i_mb_count_ref[2][X264_REF_MAX*2];
  298.              int i_mb_partition[17];
  299.              int i_mb_cbp[6];
  300.              int i_mb_pred_mode[4][13];
  301. @@ -794,7 +795,7 @@ struct x264_t
  302.          int64_t i_mb_count[5][19];
  303.          int64_t i_mb_partition[2][17];
  304.          int64_t i_mb_count_8x8dct[2];
  305. -        int64_t i_mb_count_ref[2][2][32];
  306. +        int64_t i_mb_count_ref[2][2][X264_REF_MAX*2];
  307.          int64_t i_mb_cbp[6];
  308.          int64_t i_mb_pred_mode[4][13];
  309.          /* */
  310. diff --git a/common/frame.h b/common/frame.h
  311. index fcc28d7..3e0a3f5 100644
  312. --- a/common/frame.h
  313. +++ b/common/frame.h
  314. @@ -75,8 +75,8 @@ typedef struct x264_frame
  315.      pixel *buffer[4];
  316.      pixel *buffer_lowres[4];
  317.  
  318. -    x264_weight_t weight[16][3]; /* [ref_index][plane] */
  319. -    pixel *weighted[16]; /* plane[0] weighted of the reference frames */
  320. +    x264_weight_t weight[X264_REF_MAX][3]; /* [ref_index][plane] */
  321. +    pixel *weighted[X264_REF_MAX]; /* plane[0] weighted of the reference frames */
  322.      int b_duplicate;
  323.      struct x264_frame *orig;
  324.  
  325. @@ -97,7 +97,7 @@ typedef struct x264_frame
  326.      int     *lowres_mv_costs[2][X264_BFRAME_MAX+1];
  327.      int8_t  *ref[2];
  328.      int     i_ref[2];
  329. -    int     ref_poc[2][16];
  330. +    int     ref_poc[2][X264_REF_MAX];
  331.      int16_t inv_ref_poc[2]; // inverse values of ref0 poc to avoid divisions in temporal MV prediction
  332.  
  333.      /* for adaptive B-frame decision.
  334. diff --git a/common/macroblock.c b/common/macroblock.c
  335. index 7347645..6efd7e6 100644
  336. --- a/common/macroblock.c
  337. +++ b/common/macroblock.c
  338. @@ -233,11 +233,11 @@ int x264_macroblock_cache_allocate( x264_t *h )
  339.  
  340.      for( int i = 0; i < 2; i++ )
  341.      {
  342. -        int i_refs = X264_MIN(16, (i ? 1 + !!h->param.i_bframe_pyramid : h->param.i_frame_reference) ) << h->param.b_interlaced;
  343. +        int i_refs = X264_MIN(X264_REF_MAX, (i ? 1 + !!h->param.i_bframe_pyramid : h->param.i_frame_reference) ) << h->param.b_interlaced;
  344.          if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
  345. -            i_refs = X264_MIN(16, i_refs + 2); //smart weights add two duplicate frames
  346. +            i_refs = X264_MIN(X264_REF_MAX, i_refs + 2); //smart weights add two duplicate frames
  347.          else if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_BLIND )
  348. -            i_refs = X264_MIN(16, i_refs + 1); //blind weights add one duplicate frame
  349. +            i_refs = X264_MIN(X264_REF_MAX, i_refs + 1); //blind weights add one duplicate frame
  350.  
  351.          for( int j = !i; j < i_refs; j++ )
  352.          {
  353. @@ -289,10 +289,10 @@ fail:
  354.  void x264_macroblock_cache_free( x264_t *h )
  355.  {
  356.      for( int i = 0; i < 2; i++ )
  357. -        for( int j = !i; j < 32; j++ )
  358. +        for( int j = !i; j < X264_REF_MAX*2; j++ )
  359.              if( h->mb.mvr[i][j] )
  360.                  x264_free( h->mb.mvr[i][j]-1 );
  361. -    for( int i = 0; i < 16; i++ )
  362. +    for( int i = 0; i < X264_REF_MAX; i++ )
  363.          x264_free( h->mb.p_weight_buf[i] );
  364.  
  365.      if( h->param.b_cabac )
  366. diff --git a/encoder/encoder.c b/encoder/encoder.c
  367. index 0b65d51..f6d9965 100644
  368. --- a/encoder/encoder.c
  369. +++ b/encoder/encoder.c
  370. @@ -571,8 +571,8 @@ static int x264_validate_parameters( x264_t *h )
  371.              h->param.i_slice_count = 0;
  372.      }
  373.  
  374. -    h->param.i_frame_reference = x264_clip3( h->param.i_frame_reference, 1, 16 );
  375. -    h->param.i_dpb_size = x264_clip3( h->param.i_dpb_size, 1, 16 );
  376. +    h->param.i_frame_reference = x264_clip3( h->param.i_frame_reference, 1, X264_REF_MAX );
  377. +    h->param.i_dpb_size = x264_clip3( h->param.i_dpb_size, 1, X264_REF_MAX );
  378.      h->param.i_keyint_max = x264_clip3( h->param.i_keyint_max, 1, X264_KEYINT_MAX_INFINITE );
  379.      if( h->param.i_scenecut_threshold < 0 )
  380.          h->param.i_scenecut_threshold = 0;
  381. @@ -1005,7 +1005,7 @@ x264_t *x264_encoder_open( x264_param_t *param )
  382.  
  383.      CHECKED_MALLOCZERO( h->frames.unused[0], (h->frames.i_delay + 3) * sizeof(x264_frame_t *) );
  384.      /* Allocate room for max refs plus a few extra just in case. */
  385. -    CHECKED_MALLOCZERO( h->frames.unused[1], (h->i_thread_frames + 20) * sizeof(x264_frame_t *) );
  386. +    CHECKED_MALLOCZERO( h->frames.unused[1], (h->i_thread_frames + X264_REF_MAX + 4) * sizeof(x264_frame_t *) );
  387.      CHECKED_MALLOCZERO( h->frames.current, (h->param.i_sync_lookahead + h->param.i_bframe
  388.                          + h->i_thread_frames + 3) * sizeof(x264_frame_t *) );
  389.      if( h->param.analyse.i_weighted_pred > 0 )
  390. @@ -1434,9 +1434,9 @@ int x264_weighted_reference_duplicate( x264_t *h, int i_ref, const x264_weight_t
  391.  
  392.      /* shift the frames to make space for the dupe. */
  393.      h->b_ref_reorder[0] = 1;
  394. -    if( h->i_ref0 < 16 )
  395. +    if( h->i_ref0 < X264_REF_MAX )
  396.          ++h->i_ref0;
  397. -    h->fref0[15] = NULL;
  398. +    h->fref0[X264_REF_MAX-1] = NULL;
  399.      x264_frame_unshift( &h->fref0[j], newframe );
  400.  
  401.      return j;
  402. @@ -1616,7 +1616,7 @@ static inline void x264_reference_build_list( x264_t *h, int i_poc )
  403.          h->mb.ref_blind_dupe = idx;
  404.      }
  405.  
  406. -    assert( h->i_ref0 + h->i_ref1 <= 16 );
  407. +    assert( h->i_ref0 + h->i_ref1 <= X264_REF_MAX );
  408.      h->mb.pic.i_fref[0] = h->i_ref0;
  409.      h->mb.pic.i_fref[1] = h->i_ref1;
  410.  }
  411. @@ -2801,7 +2801,7 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
  412.              h->stat.i_mb_pred_mode[i][j] += h->stat.frame.i_mb_pred_mode[i][j];
  413.      if( h->sh.i_type != SLICE_TYPE_I )
  414.          for( int i_list = 0; i_list < 2; i_list++ )
  415. -            for( int i = 0; i < 32; i++ )
  416. +            for( int i = 0; i < X264_REF_MAX*2; i++ )
  417.                  h->stat.i_mb_count_ref[h->sh.i_type][i_list][i] += h->stat.frame.i_mb_count_ref[i_list][i];
  418.      if( h->sh.i_type == SLICE_TYPE_P )
  419.      {
  420. @@ -3169,7 +3169,7 @@ void    x264_encoder_close  ( x264_t *h )
  421.                  char *p = buf;
  422.                  int64_t i_den = 0;
  423.                  int i_max = 0;
  424. -                for( int i = 0; i < 32; i++ )
  425. +                for( int i = 0; i < X264_REF_MAX*2; i++ )
  426.                      if( h->stat.i_mb_count_ref[i_slice][i_list][i] )
  427.                      {
  428.                          i_den += h->stat.i_mb_count_ref[i_slice][i_list][i];
  429. diff --git a/encoder/set.c b/encoder/set.c
  430. index a520b8a..2c93618 100644
  431. --- a/encoder/set.c
  432. +++ b/encoder/set.c
  433. @@ -125,7 +125,7 @@ void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param )
  434.      /* extra slot with pyramid so that we don't have to override the
  435.       * order of forgetting old pictures */
  436.      sps->vui.i_max_dec_frame_buffering =
  437. -    sps->i_num_ref_frames = X264_MIN(16, X264_MAX4(param->i_frame_reference, 1 + sps->vui.i_num_reorder_frames,
  438. +    sps->i_num_ref_frames = X264_MIN(X264_REF_MAX, X264_MAX4(param->i_frame_reference, 1 + sps->vui.i_num_reorder_frames,
  439.                              param->i_bframe_pyramid ? 4 : 1, param->i_dpb_size));
  440.      sps->i_num_ref_frames -= param->i_bframe_pyramid == X264_B_PYRAMID_STRICT;
  441.  
  442. --
  443. 1.7.1
  444.  
  445.  
  446. From b72c556de4a733600b07a61a818120fdd1228fc3 Mon Sep 17 00:00:00 2001
  447. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  448. Date: Sat, 21 Aug 2010 00:15:53 -0700
  449. Subject: [PATCH 5/5] CAVLC "trellis"
  450.  ~3-10% improved compression with CAVLC.
  451.  --trellis is now a valid option with CAVLC.
  452.  Perhaps more importantly, this means psy-trellis now works with CAVLC.
  453.  
  454. This isn't a real trellis; it's actually just a simplified QNS.
  455. But it takes enough shortcuts that it's still roughly as fast as a trellis; just not quite optimal.
  456. Thus the name is a bit of a misnomer, but we're reusing the option name because it does the same thing.
  457. A real trellis would be better, but CAVLC is much harder to trellis than CABAC.
  458. I'm not aware of any published polynomial-time solutions that are significantly close to optimal.
  459. ---
  460. encoder/cavlc.c      |    6 +-
  461.  encoder/encoder.c    |    2 -
  462.  encoder/macroblock.c |    2 +-
  463.  encoder/rdo.c        |  268 ++++++++++++++++++++++++++++++++++++++++++++++---
  464.  x264.c               |    2 +-
  465.  5 files changed, 256 insertions(+), 24 deletions(-)
  466.  
  467. diff --git a/encoder/cavlc.c b/encoder/cavlc.c
  468. index 6f0b60f..2f7cde9 100644
  469. --- a/encoder/cavlc.c
  470. +++ b/encoder/cavlc.c
  471. @@ -95,7 +95,7 @@ static inline int block_residual_write_cavlc_escape( x264_t *h, int i_suffix_len
  472.              {
  473.  #if RDO_SKIP_BS
  474.                  /* Weight highly against overflows. */
  475. -                s->i_bits_encoded += 1000000;
  476. +                s->i_bits_encoded += 2000;
  477.  #else
  478.                  x264_log(h, X264_LOG_WARNING, "OVERFLOW levelcode=%d is only allowed in High Profile\n", i_level_code );
  479.                  /* clip level, preserving sign */
  480. @@ -113,7 +113,7 @@ static inline int block_residual_write_cavlc_escape( x264_t *h, int i_suffix_len
  481.      return i_suffix_length;
  482.  }
  483.  
  484. -static int block_residual_write_cavlc( x264_t *h, int i_ctxBlockCat, dctcoef *l, int nC )
  485. +static int block_residual_write_cavlc_internal( x264_t *h, int i_ctxBlockCat, dctcoef *l, int nC )
  486.  {
  487.      bs_t *s = &h->out.bs;
  488.      static const uint8_t ctz_index[8] = {3,0,1,0,2,0,1,0};
  489. @@ -199,7 +199,7 @@ static const uint8_t ct_index[17] = {0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,3};
  490.      if( !*nnz )\
  491.          bs_write_vlc( &h->out.bs, x264_coeff0_token[nC] );\
  492.      else\
  493. -        *nnz = block_residual_write_cavlc(h,cat,l,nC);\
  494. +        *nnz = block_residual_write_cavlc_internal(h,cat,l,nC);\
  495.  }
  496.  
  497.  static void cavlc_qp_delta( x264_t *h )
  498. diff --git a/encoder/encoder.c b/encoder/encoder.c
  499. index f6d9965..f5fe2c5 100644
  500. --- a/encoder/encoder.c
  501. +++ b/encoder/encoder.c
  502. @@ -683,8 +683,6 @@ static int x264_validate_parameters( x264_t *h )
  503.          h->param.analyse.intra &= ~X264_ANALYSE_I8x8;
  504.      }
  505.      h->param.analyse.i_chroma_qp_offset = x264_clip3(h->param.analyse.i_chroma_qp_offset, -12, 12);
  506. -    if( !h->param.b_cabac )
  507. -        h->param.analyse.i_trellis = 0;
  508.      h->param.analyse.i_trellis = x264_clip3( h->param.analyse.i_trellis, 0, 2 );
  509.      if( !h->param.analyse.b_psy )
  510.      {
  511. diff --git a/encoder/macroblock.c b/encoder/macroblock.c
  512. index 99cb433..4297cfb 100644
  513. --- a/encoder/macroblock.c
  514. +++ b/encoder/macroblock.c
  515. @@ -739,7 +739,7 @@ void x264_macroblock_encode( x264_t *h )
  516.          else if( h->mb.b_transform_8x8 )
  517.          {
  518.              ALIGNED_ARRAY_16( dctcoef, dct8x8,[4],[64] );
  519. -            b_decimate &= !h->mb.b_trellis; // 8x8 trellis is inherently optimal decimation
  520. +            b_decimate &= !h->mb.b_trellis || !h->param.b_cabac; // 8x8 trellis is inherently optimal decimation for CABAC
  521.              h->dctf.sub16x16_dct8( dct8x8, h->mb.pic.p_fenc[0], h->mb.pic.p_fdec[0] );
  522.              h->nr_count[1] += h->mb.b_noise_reduction * 4;
  523.  
  524. diff --git a/encoder/rdo.c b/encoder/rdo.c
  525. index d4e6b0c..c935a9e 100644
  526. --- a/encoder/rdo.c
  527. +++ b/encoder/rdo.c
  528. @@ -410,10 +410,12 @@ typedef struct {
  529.  // comparable to the input. so unquant is the direct inverse of quant,
  530.  // and uses the dct scaling factors, not the idct ones.
  531.  
  532. -static ALWAYS_INLINE int quant_trellis_cabac( x264_t *h, dctcoef *dct,
  533. -                                 const uint16_t *quant_mf, const int *unquant_mf,
  534. -                                 const int *coef_weight, const uint8_t *zigzag,
  535. -                                 int i_ctxBlockCat, int i_lambda2, int b_ac, int dc, int i_coefs, int idx )
  536. +static ALWAYS_INLINE
  537. +int quant_trellis_cabac( x264_t *h, dctcoef *dct,
  538. +                         const uint16_t *quant_mf, const int *unquant_mf,
  539. +                         const int *coef_weight, const uint8_t *zigzag,
  540. +                         int i_ctxBlockCat, int i_lambda2, int b_ac,
  541. +                         int dc, int i_coefs, int idx )
  542.  {
  543.      int abs_coefs[64], signs[64];
  544.      trellis_node_t nodes[2][8];
  545. @@ -629,35 +631,267 @@ static ALWAYS_INLINE int quant_trellis_cabac( x264_t *h, dctcoef *dct,
  546.      return 1;
  547.  }
  548.  
  549. +/* FIXME: This is a gigantic hack.  See below.
  550. + *
  551. + * CAVLC is much more difficult to trellis than CABAC.
  552. + *
  553. + * CABAC has only three states to track: significance map, last, and the
  554. + * level state machine.
  555. + * CAVLC, by comparison, has five: coeff_token (trailing + total),
  556. + * total_zeroes, zero_run, and the level state machine.
  557. + *
  558. + * I know of no paper that has managed to design a close-to-optimal trellis
  559. + * that covers all five of these and isn't exponential-time.  As a result, this
  560. + * "trellis" isn't: it's just a QNS search.  Patches welcome for something better.
  561. + * It's actually surprisingly fast, albeit not quite optimal.  It's pretty close
  562. + * though; since CAVLC only has 2^16 possible rounding modes (assuming only two
  563. + * roundings as options), a bruteforce search is feasible.  Testing shows
  564. + * that this QNS is reasonably close to optimal in terms of compression.
  565. + *
  566. + * TODO:
  567. + *  Don't bother changing large coefficients when it wouldn't affect bit cost
  568. + *  (e.g. only affecting bypassed suffix bits).
  569. + *  Don't re-run all parts of CAVLC bit cost calculation when not necessary.
  570. + *  e.g. when changing a coefficient from one non-zero value to another in
  571. + *  such a way that trailing ones and suffix length isn't affected. */
  572. +static ALWAYS_INLINE
  573. +int quant_trellis_cavlc( x264_t *h, dctcoef *dct,
  574. +                         const uint16_t *quant_mf, const int *unquant_mf,
  575. +                         const int *coef_weight, const uint8_t *zigzag,
  576. +                         int i_ctxBlockCat, int i_lambda2, int b_ac,
  577. +                         int dc, int i_coefs, int idx, int b_8x8 )
  578. +{
  579. +    ALIGNED_16( dctcoef quant_coefs[2][16] );
  580. +    ALIGNED_16( dctcoef coefs[16] ) = {0};
  581. +    int delta_distortion[16];
  582. +    int64_t score = 1ULL<<62;
  583. +    int i, j;
  584. +    const int f = 1<<15;
  585. +    int nC = i_ctxBlockCat == DCT_CHROMA_DC ? 4 : ct_index[x264_mb_predict_non_zero_code( h, i_ctxBlockCat == DCT_LUMA_DC ? 0 : idx )];
  586. +
  587. +    /* Code for handling 8x8dct -> 4x4dct CAVLC munging.  Input/output use a different
  588. +     * step/start/end than internal processing. */
  589. +    int step = 1;
  590. +    int start = b_ac;
  591. +    int end = i_coefs - 1;
  592. +    if( b_8x8 )
  593. +    {
  594. +        start = idx&3;
  595. +        end = 60 + start;
  596. +        step = 4;
  597. +    }
  598. +
  599. +    i_lambda2 <<= LAMBDA_BITS;
  600. +
  601. +    /* Find last non-zero coefficient. */
  602. +    for( i = end; i >= start; i -= step )
  603. +        if( (unsigned)(dct[zigzag[i]] * (dc?quant_mf[0]>>1:quant_mf[zigzag[i]]) + f-1) >= 2*f )
  604. +            break;
  605. +
  606. +    if( i < start )
  607. +        goto zeroblock;
  608. +
  609. +    /* Prepare for QNS search: calculate distortion caused by each DCT coefficient
  610. +     * rounding to be searched.
  611. +     *
  612. +     * We only search two roundings (nearest and nearest-1) like in CABAC trellis,
  613. +     * so we just store the difference in distortion between them. */
  614. +    int i_last_nnz = b_8x8 ? i >> 2 : i;
  615. +    int64_t distortion_cost = 0;
  616. +    int coef_mask = 0;
  617. +    int round_mask = 0;
  618. +    for( i = b_ac, j = start; i <= i_last_nnz; i++, j += step )
  619. +    {
  620. +        int coef = dct[zigzag[j]];
  621. +        int abs_coef = abs(coef);
  622. +        int sign = coef < 0 ? -1 : 1;
  623. +        int nearest_quant = ( f + abs_coef * (dc?quant_mf[0]>>1:quant_mf[zigzag[j]]) ) >> 16;
  624. +        quant_coefs[1][i] = quant_coefs[0][i] = sign * nearest_quant;
  625. +        coefs[i] = quant_coefs[1][i];
  626. +        if( nearest_quant )
  627. +        {
  628. +            /* We initialize the trellis with a deadzone halfway between nearest rounding
  629. +             * and always-round-down.  This gives much better results than initializing to either
  630. +             * extreme.
  631. +             * FIXME: should we initialize to the deadzones used by deadzone quant? */
  632. +            int deadzone_quant = ( f/2 + abs_coef * (dc?quant_mf[0]>>1:quant_mf[zigzag[j]]) ) >> 16;
  633. +            int unquant1 = (((dc?unquant_mf[0]<<1:unquant_mf[zigzag[j]]) * (nearest_quant-0) + 128) >> 8);
  634. +            int unquant0 = (((dc?unquant_mf[0]<<1:unquant_mf[zigzag[j]]) * (nearest_quant-1) + 128) >> 8);
  635. +            int d1 = abs_coef - unquant1;
  636. +            int d0 = abs_coef - unquant0;
  637. +            delta_distortion[i] = (d0*d0 - d1*d1) * (dc?256:coef_weight[j]);
  638. +
  639. +            /* Psy trellis: bias in favor of higher AC coefficients in the reconstructed frame. */
  640. +            if( h->mb.i_psy_trellis && j && !dc && i_ctxBlockCat != DCT_CHROMA_AC )
  641. +            {
  642. +                int orig_coef = b_8x8 ? h->mb.pic.fenc_dct8[idx>>2][zigzag[j]] : h->mb.pic.fenc_dct4[idx][zigzag[j]];
  643. +                int predicted_coef = orig_coef - coef;
  644. +                int psy_weight = b_8x8 ? x264_dct8_weight_tab[zigzag[j]] : x264_dct4_weight_tab[zigzag[j]];
  645. +                int psy_value0 = h->mb.i_psy_trellis * abs(predicted_coef + unquant0 * sign);
  646. +                int psy_value1 = h->mb.i_psy_trellis * abs(predicted_coef + unquant1 * sign);
  647. +                delta_distortion[i] += (psy_value0 - psy_value1) * psy_weight;
  648. +            }
  649. +
  650. +            quant_coefs[0][i] = sign * (nearest_quant-1);
  651. +            if( deadzone_quant != nearest_quant )
  652. +            {
  653. +                coefs[i] = quant_coefs[0][i];
  654. +                distortion_cost += delta_distortion[i];
  655. +            }
  656. +            else
  657. +                round_mask |= 1 << i;
  658. +        }
  659. +        else
  660. +            delta_distortion[i] = 0;
  661. +        coef_mask |= (!!coefs[i]) << i;
  662. +    }
  663. +
  664. +    /* Calculate the cost of the starting state. */
  665. +    h->out.bs.i_bits_encoded = 0;
  666. +    if( !coef_mask )
  667. +        bs_write_vlc( &h->out.bs, x264_coeff0_token[nC] );
  668. +    else
  669. +        block_residual_write_cavlc_internal( h, i_ctxBlockCat, coefs + b_ac, nC );
  670. +    score = distortion_cost + ((int64_t)h->out.bs.i_bits_encoded * i_lambda2);
  671. +
  672. +    /* QNS loop: pick the change that improves RD the most, apply it, repeat.
  673. +     * coef_mask and round_mask are used to simplify tracking of nonzeroness
  674. +     * and rounding modes chosen. */
  675. +    while( 1 )
  676. +    {
  677. +        int64_t iter_score = score;
  678. +        int iter_distortion_delta = 0;
  679. +        int iter_coef = -1;
  680. +        int iter_mask = coef_mask;
  681. +        int iter_round = round_mask;
  682. +        for( i = b_ac; i <= i_last_nnz; i++ )
  683. +        {
  684. +            if( !delta_distortion[i] )
  685. +                continue;
  686. +
  687. +            /* Set up all the variables for this iteration. */
  688. +            int cur_round = round_mask ^ (1 << i);
  689. +            int round_change = (cur_round >> i)&1;
  690. +            int old_coef = coefs[i];
  691. +            int new_coef = quant_coefs[round_change][i];
  692. +            int cur_mask = (coef_mask&~(1 << i))|(!!new_coef << i);
  693. +            int cur_distortion_delta = delta_distortion[i] * (round_change ? -1 : 1);
  694. +            int64_t cur_score = cur_distortion_delta + distortion_cost;
  695. +            coefs[i] = new_coef;
  696. +
  697. +            /* Count up bits. */
  698. +            h->out.bs.i_bits_encoded = 0;
  699. +            if( !cur_mask )
  700. +                bs_write_vlc( &h->out.bs, x264_coeff0_token[nC] );
  701. +            else
  702. +                block_residual_write_cavlc_internal( h, i_ctxBlockCat, coefs + b_ac, nC );
  703. +            cur_score += (int64_t)h->out.bs.i_bits_encoded * i_lambda2;
  704. +
  705. +            coefs[i] = old_coef;
  706. +            if( cur_score < iter_score )
  707. +            {
  708. +                iter_score = cur_score;
  709. +                iter_coef = i;
  710. +                iter_mask = cur_mask;
  711. +                iter_round = cur_round;
  712. +                iter_distortion_delta = cur_distortion_delta;
  713. +            }
  714. +        }
  715. +        if( iter_coef >= 0 )
  716. +        {
  717. +            score = iter_score;
  718. +            coef_mask = iter_mask;
  719. +            round_mask = iter_round;
  720. +            distortion_cost += iter_distortion_delta;
  721. +            coefs[iter_coef] = quant_coefs[((round_mask >> iter_coef)&1)][iter_coef];
  722. +            /* Don't try adjusting coefficients we've already adjusted.
  723. +             * Testing suggests this doesn't hurt results -- and sometimes actually helps. */
  724. +            delta_distortion[iter_coef] = 0;
  725. +        }
  726. +        else
  727. +            break;
  728. +    }
  729. +
  730. +    if( coef_mask )
  731. +    {
  732. +        for( i = b_ac, j = start; i <= i_last_nnz; i++, j += step )
  733. +            dct[zigzag[j]] = coefs[i];
  734. +        for( ; j <= end; j += step )
  735. +            dct[zigzag[j]] = 0;
  736. +        return 1;
  737. +    }
  738. +
  739. +zeroblock:
  740. +    if( !dc )
  741. +    {
  742. +        if( b_8x8 )
  743. +            for( i = start; i <= end; i+=step )
  744. +                dct[zigzag[i]] = 0;
  745. +        else
  746. +            memset( dct, 0, 16*sizeof(dctcoef) );
  747. +    }
  748. +    return 0;
  749. +}
  750. +
  751.  const static uint8_t x264_zigzag_scan2[4] = {0,1,2,3};
  752.  
  753.  int x264_quant_dc_trellis( x264_t *h, dctcoef *dct, int i_quant_cat,
  754.                             int i_qp, int i_ctxBlockCat, int b_intra, int b_chroma )
  755.  {
  756. -    return quant_trellis_cabac( h, dct,
  757. +    if( h->param.b_cabac )
  758. +        return quant_trellis_cabac( h, dct,
  759. +            h->quant4_mf[i_quant_cat][i_qp], h->unquant4_mf[i_quant_cat][i_qp],
  760. +            NULL, i_ctxBlockCat==DCT_CHROMA_DC ? x264_zigzag_scan2 : x264_zigzag_scan4[h->mb.b_interlaced],
  761. +            i_ctxBlockCat, h->mb.i_trellis_lambda2[b_chroma][b_intra], 0, 1, i_ctxBlockCat==DCT_CHROMA_DC ? 4 : 16, 0 );
  762. +
  763. +    return quant_trellis_cavlc( h, dct,
  764.          h->quant4_mf[i_quant_cat][i_qp], h->unquant4_mf[i_quant_cat][i_qp],
  765.          NULL, i_ctxBlockCat==DCT_CHROMA_DC ? x264_zigzag_scan2 : x264_zigzag_scan4[h->mb.b_interlaced],
  766. -        i_ctxBlockCat, h->mb.i_trellis_lambda2[b_chroma][b_intra], 0, 1, i_ctxBlockCat==DCT_CHROMA_DC ? 4 : 16, 0 );
  767. +        i_ctxBlockCat, h->mb.i_trellis_lambda2[b_chroma][b_intra], 0, 1, i_ctxBlockCat==DCT_CHROMA_DC ? 4 : 16, 0, 0 );
  768.  }
  769.  
  770.  int x264_quant_4x4_trellis( x264_t *h, dctcoef *dct, int i_quant_cat,
  771.                              int i_qp, int i_ctxBlockCat, int b_intra, int b_chroma, int idx )
  772.  {
  773.      int b_ac = (i_ctxBlockCat == DCT_LUMA_AC || i_ctxBlockCat == DCT_CHROMA_AC);
  774. -    return quant_trellis_cabac( h, dct,
  775. -        h->quant4_mf[i_quant_cat][i_qp], h->unquant4_mf[i_quant_cat][i_qp],
  776. -        x264_dct4_weight2_zigzag[h->mb.b_interlaced],
  777. -        x264_zigzag_scan4[h->mb.b_interlaced],
  778. -        i_ctxBlockCat, h->mb.i_trellis_lambda2[b_chroma][b_intra], b_ac, 0, 16, idx );
  779. +    if( h->param.b_cabac )
  780. +        return quant_trellis_cabac( h, dct,
  781. +            h->quant4_mf[i_quant_cat][i_qp], h->unquant4_mf[i_quant_cat][i_qp],
  782. +            x264_dct4_weight2_zigzag[h->mb.b_interlaced],
  783. +            x264_zigzag_scan4[h->mb.b_interlaced],
  784. +            i_ctxBlockCat, h->mb.i_trellis_lambda2[b_chroma][b_intra], b_ac, 0, 16, idx );
  785. +
  786. +    return quant_trellis_cavlc( h, dct,
  787. +            h->quant4_mf[i_quant_cat][i_qp], h->unquant4_mf[i_quant_cat][i_qp],
  788. +            x264_dct4_weight2_zigzag[h->mb.b_interlaced],
  789. +            x264_zigzag_scan4[h->mb.b_interlaced],
  790. +            i_ctxBlockCat, h->mb.i_trellis_lambda2[b_chroma][b_intra], b_ac, 0, 16, idx, 0 );
  791.  }
  792.  
  793.  int x264_quant_8x8_trellis( x264_t *h, dctcoef *dct, int i_quant_cat,
  794.                              int i_qp, int b_intra, int idx )
  795.  {
  796. -    return quant_trellis_cabac( h, dct,
  797. -        h->quant8_mf[i_quant_cat][i_qp], h->unquant8_mf[i_quant_cat][i_qp],
  798. -        x264_dct8_weight2_zigzag[h->mb.b_interlaced],
  799. -        x264_zigzag_scan8[h->mb.b_interlaced],
  800. -        DCT_LUMA_8x8, h->mb.i_trellis_lambda2[0][b_intra], 0, 0, 64, idx );
  801. -}
  802. +    if( h->param.b_cabac )
  803. +    {
  804. +        return quant_trellis_cabac( h, dct,
  805. +            h->quant8_mf[i_quant_cat][i_qp], h->unquant8_mf[i_quant_cat][i_qp],
  806. +            x264_dct8_weight2_zigzag[h->mb.b_interlaced],
  807. +            x264_zigzag_scan8[h->mb.b_interlaced],
  808. +            DCT_LUMA_8x8, h->mb.i_trellis_lambda2[0][b_intra], 0, 0, 64, idx );
  809. +    }
  810.  
  811. +    /* 8x8 CAVLC is split into 4 4x4 blocks */
  812. +    int nzaccum = 0;
  813. +    for( int i = 0; i < 4; i++ )
  814. +    {
  815. +        int nz = quant_trellis_cavlc( h, dct,
  816. +            h->quant8_mf[i_quant_cat][i_qp], h->unquant8_mf[i_quant_cat][i_qp],
  817. +            x264_dct8_weight2_zigzag[h->mb.b_interlaced],
  818. +            x264_zigzag_scan8[h->mb.b_interlaced],
  819. +            DCT_LUMA_4x4, h->mb.i_trellis_lambda2[0][b_intra], 0, 0, 16, idx*4+i, 1 );
  820. +        /* Set up nonzero count for future calls */
  821. +        h->mb.cache.non_zero_count[x264_scan8[idx*4+i]] = nz;
  822. +        nzaccum |= nz;
  823. +    }
  824. +    return nzaccum;
  825. +}
  826. diff --git a/x264.c b/x264.c
  827. index 9c3ce5e..7d98518 100644
  828. --- a/x264.c
  829. +++ b/x264.c
  830. @@ -595,7 +595,7 @@ static void Help( x264_param_t *defaults, int longhelp )
  831.      H2( "      --no-mixed-refs         Don't decide references on a per partition basis\n" );
  832.      H2( "      --no-chroma-me          Ignore chroma in motion estimation\n" );
  833.      H1( "      --no-8x8dct             Disable adaptive spatial transform size\n" );
  834. -    H1( "  -t, --trellis <integer>     Trellis RD quantization. Requires CABAC. [%d]\n"
  835. +    H1( "  -t, --trellis <integer>     Trellis RD quantization. [%d]\n"
  836.          "                                  - 0: disabled\n"
  837.          "                                  - 1: enabled only on the final encode of a MB\n"
  838.          "                                  - 2: enabled on all mode decisions\n", defaults->analyse.i_trellis );
  839. --
  840. 1.7.1
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement