Advertisement
Guest User

Untitled

a guest
Jun 13th, 2017
517
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Diff 43.45 KB | None | 0 0
  1. From 636d85b07cab192f796485969bc5e7a5538b8372 Mon Sep 17 00:00:00 2001
  2. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  3. Date: Sat, 21 Aug 2010 16:51:39 -0500
  4. Subject: [PATCH 1/9] Add global #define for maximum reference count
  5.  This should make it easier to play around with reference frame counts that exceed the spec maximum.
  6.  
  7. ---
  8. common/common.h     |   35 ++++++++++++++++++-----------------
  9.  common/frame.h      |    6 +++---
  10.  common/macroblock.c |   10 +++++-----
  11.  encoder/encoder.c   |   16 ++++++++--------
  12.  encoder/set.c       |    2 +-
  13.  5 files changed, 35 insertions(+), 34 deletions(-)
  14.  
  15. diff --git a/common/common.h b/common/common.h
  16. index 72fc1d8..670fd12 100644
  17. --- a/common/common.h
  18. +++ b/common/common.h
  19. @@ -51,6 +51,7 @@ do {\
  20.  } while( 0 )
  21.  
  22.  #define X264_BFRAME_MAX 16
  23. +#define X264_REF_MAX 16
  24.  #define X264_THREAD_MAX 128
  25.  #define X264_PCM_COST (384*BIT_DEPTH+16)
  26.  #define X264_LOOKAHEAD_MAX 250
  27. @@ -340,10 +341,10 @@ typedef struct
  28.      {
  29.          int idc;
  30.          int arg;
  31. -    } ref_pic_list_order[2][16];
  32. +    } ref_pic_list_order[2][X264_REF_MAX];
  33.  
  34.      /* P-frame weighting */
  35. -    x264_weight_t weight[32][3];
  36. +    x264_weight_t weight[X264_REF_MAX*2][3];
  37.  
  38.      int i_mmco_remove_from_end;
  39.      int i_mmco_command_count;
  40. @@ -351,7 +352,7 @@ typedef struct
  41.      {
  42.          int i_difference_of_pic_nums;
  43.          int i_poc;
  44. -    } mmco[16];
  45. +    } mmco[X264_REF_MAX];
  46.  
  47.      int i_cabac_init_idc;
  48.  
  49. @@ -479,7 +480,7 @@ struct x264_t
  50.          x264_frame_t **blank_unused;
  51.  
  52.          /* frames used for reference + sentinels */
  53. -        x264_frame_t *reference[16+2];
  54. +        x264_frame_t *reference[X264_REF_MAX+2];
  55.  
  56.          int i_last_keyframe;       /* Frame number of the last keyframe */
  57.          int i_last_idr;            /* Frame number of the last IDR (not RP)*/
  58. @@ -511,9 +512,9 @@ struct x264_t
  59.  
  60.      /* references lists */
  61.      int             i_ref0;
  62. -    x264_frame_t    *fref0[16+3];     /* ref list 0 */
  63. +    x264_frame_t    *fref0[X264_REF_MAX+3];     /* ref list 0 */
  64.      int             i_ref1;
  65. -    x264_frame_t    *fref1[16+3];     /* ref list 1 */
  66. +    x264_frame_t    *fref1[X264_REF_MAX+3];     /* ref list 1 */
  67.      int             b_ref_reorder[2];
  68.  
  69.      /* hrd */
  70. @@ -605,14 +606,14 @@ struct x264_t
  71.          int16_t (*mv[2])[2];                /* mb mv. set to 0 for intra mb */
  72.          uint8_t (*mvd[2])[8][2];            /* absolute value of mb mv difference with predict, clipped to [0,33]. set to 0 if intra. cabac only */
  73.          int8_t   *ref[2];                   /* mb ref. set to -1 if non used (intra or Lx only) */
  74. -        int16_t (*mvr[2][32])[2];           /* 16x16 mv for each possible ref */
  75. +        int16_t (*mvr[2][X264_REF_MAX*2])[2];/* 16x16 mv for each possible ref */
  76.          int8_t  *skipbp;                    /* block pattern for SKIP or DIRECT (sub)mbs. B-frames + cabac only */
  77.          int8_t  *mb_transform_size;         /* transform_size_8x8_flag of each mb */
  78.          uint16_t *slice_table;              /* sh->first_mb of the slice that the indexed mb is part of
  79.                                               * NOTE: this will fail on resolutions above 2^16 MBs... */
  80.  
  81.           /* buffer for weighted versions of the reference frames */
  82. -        pixel *p_weight_buf[16];
  83. +        pixel *p_weight_buf[X264_REF_MAX];
  84.  
  85.          /* current value */
  86.          int     i_type;
  87. @@ -675,9 +676,9 @@ struct x264_t
  88.  
  89.              /* pointer over mb of the references */
  90.              int i_fref[2];
  91. -            pixel *p_fref[2][32][4+1]; /* last: yN, yH, yV, yHV, uv */
  92. -            pixel *p_fref_w[32];  /* weighted fullpel luma */
  93. -            uint16_t *p_integral[2][16];
  94. +            pixel *p_fref[2][X264_REF_MAX*2][4+1]; /* last: yN, yH, yV, yHV, uv */
  95. +            pixel *p_fref_w[X264_REF_MAX*2];  /* weighted fullpel luma */
  96. +            uint16_t *p_integral[2][X264_REF_MAX];
  97.  
  98.              /* fref stride */
  99.              int     i_stride[3];
  100. @@ -732,15 +733,15 @@ struct x264_t
  101.          int     i_chroma_lambda2_offset;
  102.  
  103.          /* B_direct and weighted prediction */
  104. -        int16_t dist_scale_factor_buf[2][32][4];
  105. +        int16_t dist_scale_factor_buf[2][X264_REF_MAX*2][4];
  106.          int16_t (*dist_scale_factor)[4];
  107. -        int8_t bipred_weight_buf[2][32][4];
  108. +        int8_t bipred_weight_buf[2][X264_REF_MAX*2][4];
  109.          int8_t (*bipred_weight)[4];
  110.          /* maps fref1[0]'s ref indices into the current list0 */
  111.  #define map_col_to_list0(col) h->mb.map_col_to_list0[(col)+2]
  112. -        int8_t  map_col_to_list0[18];
  113. +        int8_t  map_col_to_list0[X264_REF_MAX+2];
  114.          int ref_blind_dupe; /* The index of the blind reference frame duplicate. */
  115. -        int8_t deblock_ref_table[32+2];
  116. +        int8_t deblock_ref_table[X264_REF_MAX*2+2];
  117.  #define deblock_ref_table(x) h->mb.deblock_ref_table[(x)+2]
  118.      } mb;
  119.  
  120. @@ -765,7 +766,7 @@ struct x264_t
  121.              int i_mb_count_p;
  122.              int i_mb_count_skip;
  123.              int i_mb_count_8x8dct[2];
  124. -            int i_mb_count_ref[2][32];
  125. +            int i_mb_count_ref[2][X264_REF_MAX*2];
  126.              int i_mb_partition[17];
  127.              int i_mb_cbp[6];
  128.              int i_mb_pred_mode[4][13];
  129. @@ -794,7 +795,7 @@ struct x264_t
  130.          int64_t i_mb_count[5][19];
  131.          int64_t i_mb_partition[2][17];
  132.          int64_t i_mb_count_8x8dct[2];
  133. -        int64_t i_mb_count_ref[2][2][32];
  134. +        int64_t i_mb_count_ref[2][2][X264_REF_MAX*2];
  135.          int64_t i_mb_cbp[6];
  136.          int64_t i_mb_pred_mode[4][13];
  137.          /* */
  138. diff --git a/common/frame.h b/common/frame.h
  139. index fcc28d7..3e0a3f5 100644
  140. --- a/common/frame.h
  141. +++ b/common/frame.h
  142. @@ -75,8 +75,8 @@ typedef struct x264_frame
  143.      pixel *buffer[4];
  144.      pixel *buffer_lowres[4];
  145.  
  146. -    x264_weight_t weight[16][3]; /* [ref_index][plane] */
  147. -    pixel *weighted[16]; /* plane[0] weighted of the reference frames */
  148. +    x264_weight_t weight[X264_REF_MAX][3]; /* [ref_index][plane] */
  149. +    pixel *weighted[X264_REF_MAX]; /* plane[0] weighted of the reference frames */
  150.      int b_duplicate;
  151.      struct x264_frame *orig;
  152.  
  153. @@ -97,7 +97,7 @@ typedef struct x264_frame
  154.      int     *lowres_mv_costs[2][X264_BFRAME_MAX+1];
  155.      int8_t  *ref[2];
  156.      int     i_ref[2];
  157. -    int     ref_poc[2][16];
  158. +    int     ref_poc[2][X264_REF_MAX];
  159.      int16_t inv_ref_poc[2]; // inverse values of ref0 poc to avoid divisions in temporal MV prediction
  160.  
  161.      /* for adaptive B-frame decision.
  162. diff --git a/common/macroblock.c b/common/macroblock.c
  163. index 7347645..6efd7e6 100644
  164. --- a/common/macroblock.c
  165. +++ b/common/macroblock.c
  166. @@ -233,11 +233,11 @@ int x264_macroblock_cache_allocate( x264_t *h )
  167.  
  168.      for( int i = 0; i < 2; i++ )
  169.      {
  170. -        int i_refs = X264_MIN(16, (i ? 1 + !!h->param.i_bframe_pyramid : h->param.i_frame_reference) ) << h->param.b_interlaced;
  171. +        int i_refs = X264_MIN(X264_REF_MAX, (i ? 1 + !!h->param.i_bframe_pyramid : h->param.i_frame_reference) ) << h->param.b_interlaced;
  172.          if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
  173. -            i_refs = X264_MIN(16, i_refs + 2); //smart weights add two duplicate frames
  174. +            i_refs = X264_MIN(X264_REF_MAX, i_refs + 2); //smart weights add two duplicate frames
  175.          else if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_BLIND )
  176. -            i_refs = X264_MIN(16, i_refs + 1); //blind weights add one duplicate frame
  177. +            i_refs = X264_MIN(X264_REF_MAX, i_refs + 1); //blind weights add one duplicate frame
  178.  
  179.          for( int j = !i; j < i_refs; j++ )
  180.          {
  181. @@ -289,10 +289,10 @@ fail:
  182.  void x264_macroblock_cache_free( x264_t *h )
  183.  {
  184.      for( int i = 0; i < 2; i++ )
  185. -        for( int j = !i; j < 32; j++ )
  186. +        for( int j = !i; j < X264_REF_MAX*2; j++ )
  187.              if( h->mb.mvr[i][j] )
  188.                  x264_free( h->mb.mvr[i][j]-1 );
  189. -    for( int i = 0; i < 16; i++ )
  190. +    for( int i = 0; i < X264_REF_MAX; i++ )
  191.          x264_free( h->mb.p_weight_buf[i] );
  192.  
  193.      if( h->param.b_cabac )
  194. diff --git a/encoder/encoder.c b/encoder/encoder.c
  195. index 0b65d51..f6d9965 100644
  196. --- a/encoder/encoder.c
  197. +++ b/encoder/encoder.c
  198. @@ -571,8 +571,8 @@ static int x264_validate_parameters( x264_t *h )
  199.              h->param.i_slice_count = 0;
  200.      }
  201.  
  202. -    h->param.i_frame_reference = x264_clip3( h->param.i_frame_reference, 1, 16 );
  203. -    h->param.i_dpb_size = x264_clip3( h->param.i_dpb_size, 1, 16 );
  204. +    h->param.i_frame_reference = x264_clip3( h->param.i_frame_reference, 1, X264_REF_MAX );
  205. +    h->param.i_dpb_size = x264_clip3( h->param.i_dpb_size, 1, X264_REF_MAX );
  206.      h->param.i_keyint_max = x264_clip3( h->param.i_keyint_max, 1, X264_KEYINT_MAX_INFINITE );
  207.      if( h->param.i_scenecut_threshold < 0 )
  208.          h->param.i_scenecut_threshold = 0;
  209. @@ -1005,7 +1005,7 @@ x264_t *x264_encoder_open( x264_param_t *param )
  210.  
  211.      CHECKED_MALLOCZERO( h->frames.unused[0], (h->frames.i_delay + 3) * sizeof(x264_frame_t *) );
  212.      /* Allocate room for max refs plus a few extra just in case. */
  213. -    CHECKED_MALLOCZERO( h->frames.unused[1], (h->i_thread_frames + 20) * sizeof(x264_frame_t *) );
  214. +    CHECKED_MALLOCZERO( h->frames.unused[1], (h->i_thread_frames + X264_REF_MAX + 4) * sizeof(x264_frame_t *) );
  215.      CHECKED_MALLOCZERO( h->frames.current, (h->param.i_sync_lookahead + h->param.i_bframe
  216.                          + h->i_thread_frames + 3) * sizeof(x264_frame_t *) );
  217.      if( h->param.analyse.i_weighted_pred > 0 )
  218. @@ -1434,9 +1434,9 @@ int x264_weighted_reference_duplicate( x264_t *h, int i_ref, const x264_weight_t
  219.  
  220.      /* shift the frames to make space for the dupe. */
  221.      h->b_ref_reorder[0] = 1;
  222. -    if( h->i_ref0 < 16 )
  223. +    if( h->i_ref0 < X264_REF_MAX )
  224.          ++h->i_ref0;
  225. -    h->fref0[15] = NULL;
  226. +    h->fref0[X264_REF_MAX-1] = NULL;
  227.      x264_frame_unshift( &h->fref0[j], newframe );
  228.  
  229.      return j;
  230. @@ -1616,7 +1616,7 @@ static inline void x264_reference_build_list( x264_t *h, int i_poc )
  231.          h->mb.ref_blind_dupe = idx;
  232.      }
  233.  
  234. -    assert( h->i_ref0 + h->i_ref1 <= 16 );
  235. +    assert( h->i_ref0 + h->i_ref1 <= X264_REF_MAX );
  236.      h->mb.pic.i_fref[0] = h->i_ref0;
  237.      h->mb.pic.i_fref[1] = h->i_ref1;
  238.  }
  239. @@ -2801,7 +2801,7 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
  240.              h->stat.i_mb_pred_mode[i][j] += h->stat.frame.i_mb_pred_mode[i][j];
  241.      if( h->sh.i_type != SLICE_TYPE_I )
  242.          for( int i_list = 0; i_list < 2; i_list++ )
  243. -            for( int i = 0; i < 32; i++ )
  244. +            for( int i = 0; i < X264_REF_MAX*2; i++ )
  245.                  h->stat.i_mb_count_ref[h->sh.i_type][i_list][i] += h->stat.frame.i_mb_count_ref[i_list][i];
  246.      if( h->sh.i_type == SLICE_TYPE_P )
  247.      {
  248. @@ -3169,7 +3169,7 @@ void    x264_encoder_close  ( x264_t *h )
  249.                  char *p = buf;
  250.                  int64_t i_den = 0;
  251.                  int i_max = 0;
  252. -                for( int i = 0; i < 32; i++ )
  253. +                for( int i = 0; i < X264_REF_MAX*2; i++ )
  254.                      if( h->stat.i_mb_count_ref[i_slice][i_list][i] )
  255.                      {
  256.                          i_den += h->stat.i_mb_count_ref[i_slice][i_list][i];
  257. diff --git a/encoder/set.c b/encoder/set.c
  258. index a520b8a..2c93618 100644
  259. --- a/encoder/set.c
  260. +++ b/encoder/set.c
  261. @@ -125,7 +125,7 @@ void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param )
  262.      /* extra slot with pyramid so that we don't have to override the
  263.       * order of forgetting old pictures */
  264.      sps->vui.i_max_dec_frame_buffering =
  265. -    sps->i_num_ref_frames = X264_MIN(16, X264_MAX4(param->i_frame_reference, 1 + sps->vui.i_num_reorder_frames,
  266. +    sps->i_num_ref_frames = X264_MIN(X264_REF_MAX, X264_MAX4(param->i_frame_reference, 1 + sps->vui.i_num_reorder_frames,
  267.                              param->i_bframe_pyramid ? 4 : 1, param->i_dpb_size));
  268.      sps->i_num_ref_frames -= param->i_bframe_pyramid == X264_B_PYRAMID_STRICT;
  269.  
  270. --
  271. 1.7.1
  272.  
  273.  
  274. From cd21d0551318972a58a7e497e0321e373f0d1237 Mon Sep 17 00:00:00 2001
  275. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  276. Date: Sat, 21 Aug 2010 00:15:53 -0700
  277. Subject: [PATCH 2/9] CAVLC "trellis"
  278.  ~3-10% improved compression with CAVLC.
  279.  --trellis is now a valid option with CAVLC.
  280.  Perhaps more importantly, this means psy-trellis now works with CAVLC.
  281.  
  282. This isn't a real trellis; it's actually just a simplified QNS.
  283. But it takes enough shortcuts that it's still roughly as fast as a trellis; just not quite optimal.
  284. Thus the name is a bit of a misnomer, but we're reusing the option name because it does the same thing.
  285. A real trellis would be better, but CAVLC is much harder to trellis than CABAC.
  286. I'm not aware of any published polynomial-time solutions that are significantly close to optimal.
  287. ---
  288. encoder/cavlc.c      |    6 +-
  289.  encoder/encoder.c    |    2 -
  290.  encoder/macroblock.c |    2 +-
  291.  encoder/rdo.c        |  263 ++++++++++++++++++++++++++++++++++++++++++++++---
  292.  x264.c               |    2 +-
  293.  5 files changed, 251 insertions(+), 24 deletions(-)
  294.  
  295. diff --git a/encoder/cavlc.c b/encoder/cavlc.c
  296. index 6f0b60f..2f7cde9 100644
  297. --- a/encoder/cavlc.c
  298. +++ b/encoder/cavlc.c
  299. @@ -95,7 +95,7 @@ static inline int block_residual_write_cavlc_escape( x264_t *h, int i_suffix_len
  300.              {
  301.  #if RDO_SKIP_BS
  302.                  /* Weight highly against overflows. */
  303. -                s->i_bits_encoded += 1000000;
  304. +                s->i_bits_encoded += 2000;
  305.  #else
  306.                  x264_log(h, X264_LOG_WARNING, "OVERFLOW levelcode=%d is only allowed in High Profile\n", i_level_code );
  307.                  /* clip level, preserving sign */
  308. @@ -113,7 +113,7 @@ static inline int block_residual_write_cavlc_escape( x264_t *h, int i_suffix_len
  309.      return i_suffix_length;
  310.  }
  311.  
  312. -static int block_residual_write_cavlc( x264_t *h, int i_ctxBlockCat, dctcoef *l, int nC )
  313. +static int block_residual_write_cavlc_internal( x264_t *h, int i_ctxBlockCat, dctcoef *l, int nC )
  314.  {
  315.      bs_t *s = &h->out.bs;
  316.      static const uint8_t ctz_index[8] = {3,0,1,0,2,0,1,0};
  317. @@ -199,7 +199,7 @@ static const uint8_t ct_index[17] = {0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,3};
  318.      if( !*nnz )\
  319.          bs_write_vlc( &h->out.bs, x264_coeff0_token[nC] );\
  320.      else\
  321. -        *nnz = block_residual_write_cavlc(h,cat,l,nC);\
  322. +        *nnz = block_residual_write_cavlc_internal(h,cat,l,nC);\
  323.  }
  324.  
  325.  static void cavlc_qp_delta( x264_t *h )
  326. diff --git a/encoder/encoder.c b/encoder/encoder.c
  327. index f6d9965..f5fe2c5 100644
  328. --- a/encoder/encoder.c
  329. +++ b/encoder/encoder.c
  330. @@ -683,8 +683,6 @@ static int x264_validate_parameters( x264_t *h )
  331.          h->param.analyse.intra &= ~X264_ANALYSE_I8x8;
  332.      }
  333.      h->param.analyse.i_chroma_qp_offset = x264_clip3(h->param.analyse.i_chroma_qp_offset, -12, 12);
  334. -    if( !h->param.b_cabac )
  335. -        h->param.analyse.i_trellis = 0;
  336.      h->param.analyse.i_trellis = x264_clip3( h->param.analyse.i_trellis, 0, 2 );
  337.      if( !h->param.analyse.b_psy )
  338.      {
  339. diff --git a/encoder/macroblock.c b/encoder/macroblock.c
  340. index 99cb433..4297cfb 100644
  341. --- a/encoder/macroblock.c
  342. +++ b/encoder/macroblock.c
  343. @@ -739,7 +739,7 @@ void x264_macroblock_encode( x264_t *h )
  344.          else if( h->mb.b_transform_8x8 )
  345.          {
  346.              ALIGNED_ARRAY_16( dctcoef, dct8x8,[4],[64] );
  347. -            b_decimate &= !h->mb.b_trellis; // 8x8 trellis is inherently optimal decimation
  348. +            b_decimate &= !h->mb.b_trellis || !h->param.b_cabac; // 8x8 trellis is inherently optimal decimation for CABAC
  349.              h->dctf.sub16x16_dct8( dct8x8, h->mb.pic.p_fenc[0], h->mb.pic.p_fdec[0] );
  350.              h->nr_count[1] += h->mb.b_noise_reduction * 4;
  351.  
  352. diff --git a/encoder/rdo.c b/encoder/rdo.c
  353. index d4e6b0c..36ba677 100644
  354. --- a/encoder/rdo.c
  355. +++ b/encoder/rdo.c
  356. @@ -410,10 +410,12 @@ typedef struct {
  357.  // comparable to the input. so unquant is the direct inverse of quant,
  358.  // and uses the dct scaling factors, not the idct ones.
  359.  
  360. -static ALWAYS_INLINE int quant_trellis_cabac( x264_t *h, dctcoef *dct,
  361. -                                 const uint16_t *quant_mf, const int *unquant_mf,
  362. -                                 const int *coef_weight, const uint8_t *zigzag,
  363. -                                 int i_ctxBlockCat, int i_lambda2, int b_ac, int dc, int i_coefs, int idx )
  364. +static ALWAYS_INLINE
  365. +int quant_trellis_cabac( x264_t *h, dctcoef *dct,
  366. +                         const uint16_t *quant_mf, const int *unquant_mf,
  367. +                         const int *coef_weight, const uint8_t *zigzag,
  368. +                         int i_ctxBlockCat, int i_lambda2, int b_ac,
  369. +                         int dc, int i_coefs, int idx )
  370.  {
  371.      int abs_coefs[64], signs[64];
  372.      trellis_node_t nodes[2][8];
  373. @@ -629,35 +631,262 @@ static ALWAYS_INLINE int quant_trellis_cabac( x264_t *h, dctcoef *dct,
  374.      return 1;
  375.  }
  376.  
  377. +/* FIXME: This is a gigantic hack.  See below.
  378. + *
  379. + * CAVLC is much more difficult to trellis than CABAC.
  380. + *
  381. + * CABAC has only three states to track: significance map, last, and the
  382. + * level state machine.
  383. + * CAVLC, by comparison, has five: coeff_token (trailing + total),
  384. + * total_zeroes, zero_run, and the level state machine.
  385. + *
  386. + * I know of no paper that has managed to design a close-to-optimal trellis
  387. + * that covers all five of these and isn't exponential-time.  As a result, this
  388. + * "trellis" isn't: it's just a QNS search.  Patches welcome for something better.
  389. + * It's actually surprisingly fast, albeit not quite optimal.  It's pretty close
  390. + * though; since CAVLC only has 2^16 possible rounding modes (assuming only two
  391. + * roundings as options), a bruteforce search is feasible.  Testing shows
  392. + * that this QNS is reasonably close to optimal in terms of compression.
  393. + *
  394. + * TODO:
  395. + *  Don't bother changing large coefficients when it wouldn't affect bit cost
  396. + *  (e.g. only affecting bypassed suffix bits).
  397. + *  Don't re-run all parts of CAVLC bit cost calculation when not necessary.
  398. + *  e.g. when changing a coefficient from one non-zero value to another in
  399. + *  such a way that trailing ones and suffix length isn't affected. */
  400. +static ALWAYS_INLINE
  401. +int quant_trellis_cavlc( x264_t *h, dctcoef *dct,
  402. +                         const uint16_t *quant_mf, const int *unquant_mf,
  403. +                         const int *coef_weight, const uint8_t *zigzag,
  404. +                         int i_ctxBlockCat, int i_lambda2, int b_ac,
  405. +                         int dc, int i_coefs, int idx, int b_8x8 )
  406. +{
  407. +    ALIGNED_16( dctcoef quant_coefs[2][16] );
  408. +    ALIGNED_16( dctcoef coefs[16] ) = {0};
  409. +    int delta_distortion[16];
  410. +    int64_t score = 1ULL<<62;
  411. +    int i, j;
  412. +    const int f = 1<<15;
  413. +    int nC = i_ctxBlockCat == DCT_CHROMA_DC ? 4 : ct_index[x264_mb_predict_non_zero_code( h, i_ctxBlockCat == DCT_LUMA_DC ? 0 : idx )];
  414. +
  415. +    /* Code for handling 8x8dct -> 4x4dct CAVLC munging.  Input/output use a different
  416. +     * step/start/end than internal processing. */
  417. +    int step = 1;
  418. +    int start = b_ac;
  419. +    int end = i_coefs - 1;
  420. +    if( b_8x8 )
  421. +    {
  422. +        start = idx&3;
  423. +        end = 60 + start;
  424. +        step = 4;
  425. +    }
  426. +
  427. +    i_lambda2 <<= LAMBDA_BITS;
  428. +
  429. +    /* Find last non-zero coefficient. */
  430. +    for( i = end; i >= start; i -= step )
  431. +        if( (unsigned)(dct[zigzag[i]] * (dc?quant_mf[0]>>1:quant_mf[zigzag[i]]) + f-1) >= 2*f )
  432. +            break;
  433. +
  434. +    if( i < start )
  435. +        goto zeroblock;
  436. +
  437. +    /* Prepare for QNS search: calculate distortion caused by each DCT coefficient
  438. +     * rounding to be searched.
  439. +     *
  440. +     * We only search two roundings (nearest and nearest-1) like in CABAC trellis,
  441. +     * so we just store the difference in distortion between them. */
  442. +    int i_last_nnz = b_8x8 ? i >> 2 : i;
  443. +    int coef_mask = 0;
  444. +    int round_mask = 0;
  445. +    for( i = b_ac, j = start; i <= i_last_nnz; i++, j += step )
  446. +    {
  447. +        int coef = dct[zigzag[j]];
  448. +        int abs_coef = abs(coef);
  449. +        int sign = coef < 0 ? -1 : 1;
  450. +        int nearest_quant = ( f + abs_coef * (dc?quant_mf[0]>>1:quant_mf[zigzag[j]]) ) >> 16;
  451. +        quant_coefs[1][i] = quant_coefs[0][i] = sign * nearest_quant;
  452. +        coefs[i] = quant_coefs[1][i];
  453. +        if( nearest_quant )
  454. +        {
  455. +            /* We initialize the trellis with a deadzone halfway between nearest rounding
  456. +             * and always-round-down.  This gives much better results than initializing to either
  457. +             * extreme.
  458. +             * FIXME: should we initialize to the deadzones used by deadzone quant? */
  459. +            int deadzone_quant = ( f/2 + abs_coef * (dc?quant_mf[0]>>1:quant_mf[zigzag[j]]) ) >> 16;
  460. +            int unquant1 = (((dc?unquant_mf[0]<<1:unquant_mf[zigzag[j]]) * (nearest_quant-0) + 128) >> 8);
  461. +            int unquant0 = (((dc?unquant_mf[0]<<1:unquant_mf[zigzag[j]]) * (nearest_quant-1) + 128) >> 8);
  462. +            int d1 = abs_coef - unquant1;
  463. +            int d0 = abs_coef - unquant0;
  464. +            delta_distortion[i] = (d0*d0 - d1*d1) * (dc?256:coef_weight[j]);
  465. +
  466. +            /* Psy trellis: bias in favor of higher AC coefficients in the reconstructed frame. */
  467. +            if( h->mb.i_psy_trellis && j && !dc && i_ctxBlockCat != DCT_CHROMA_AC )
  468. +            {
  469. +                int orig_coef = b_8x8 ? h->mb.pic.fenc_dct8[idx>>2][zigzag[j]] : h->mb.pic.fenc_dct4[idx][zigzag[j]];
  470. +                int predicted_coef = orig_coef - coef;
  471. +                int psy_weight = b_8x8 ? x264_dct8_weight_tab[zigzag[j]] : x264_dct4_weight_tab[zigzag[j]];
  472. +                int psy_value0 = h->mb.i_psy_trellis * abs(predicted_coef + unquant0 * sign);
  473. +                int psy_value1 = h->mb.i_psy_trellis * abs(predicted_coef + unquant1 * sign);
  474. +                delta_distortion[i] += (psy_value0 - psy_value1) * psy_weight;
  475. +            }
  476. +
  477. +            quant_coefs[0][i] = sign * (nearest_quant-1);
  478. +            if( deadzone_quant != nearest_quant )
  479. +                coefs[i] = quant_coefs[0][i];
  480. +            else
  481. +                round_mask |= 1 << i;
  482. +        }
  483. +        else
  484. +            delta_distortion[i] = 0;
  485. +        coef_mask |= (!!coefs[i]) << i;
  486. +    }
  487. +
  488. +    /* Calculate the cost of the starting state. */
  489. +    h->out.bs.i_bits_encoded = 0;
  490. +    if( !coef_mask )
  491. +        bs_write_vlc( &h->out.bs, x264_coeff0_token[nC] );
  492. +    else
  493. +        block_residual_write_cavlc_internal( h, i_ctxBlockCat, coefs + b_ac, nC );
  494. +    score = (int64_t)h->out.bs.i_bits_encoded * i_lambda2;
  495. +
  496. +    /* QNS loop: pick the change that improves RD the most, apply it, repeat.
  497. +     * coef_mask and round_mask are used to simplify tracking of nonzeroness
  498. +     * and rounding modes chosen. */
  499. +    while( 1 )
  500. +    {
  501. +        int64_t iter_score = score;
  502. +        int iter_distortion_delta = 0;
  503. +        int iter_coef = -1;
  504. +        int iter_mask = coef_mask;
  505. +        int iter_round = round_mask;
  506. +        for( i = b_ac; i <= i_last_nnz; i++ )
  507. +        {
  508. +            if( !delta_distortion[i] )
  509. +                continue;
  510. +
  511. +            /* Set up all the variables for this iteration. */
  512. +            int cur_round = round_mask ^ (1 << i);
  513. +            int round_change = (cur_round >> i)&1;
  514. +            int old_coef = coefs[i];
  515. +            int new_coef = quant_coefs[round_change][i];
  516. +            int cur_mask = (coef_mask&~(1 << i))|(!!new_coef << i);
  517. +            int cur_distortion_delta = delta_distortion[i] * (round_change ? -1 : 1);
  518. +            int64_t cur_score = cur_distortion_delta;
  519. +            coefs[i] = new_coef;
  520. +
  521. +            /* Count up bits. */
  522. +            h->out.bs.i_bits_encoded = 0;
  523. +            if( !cur_mask )
  524. +                bs_write_vlc( &h->out.bs, x264_coeff0_token[nC] );
  525. +            else
  526. +                block_residual_write_cavlc_internal( h, i_ctxBlockCat, coefs + b_ac, nC );
  527. +            cur_score += (int64_t)h->out.bs.i_bits_encoded * i_lambda2;
  528. +
  529. +            coefs[i] = old_coef;
  530. +            if( cur_score < iter_score )
  531. +            {
  532. +                iter_score = cur_score;
  533. +                iter_coef = i;
  534. +                iter_mask = cur_mask;
  535. +                iter_round = cur_round;
  536. +                iter_distortion_delta = cur_distortion_delta;
  537. +            }
  538. +        }
  539. +        if( iter_coef >= 0 )
  540. +        {
  541. +            score = iter_score - iter_distortion_delta;
  542. +            coef_mask = iter_mask;
  543. +            round_mask = iter_round;
  544. +            coefs[iter_coef] = quant_coefs[((round_mask >> iter_coef)&1)][iter_coef];
  545. +            /* Don't try adjusting coefficients we've already adjusted.
  546. +             * Testing suggests this doesn't hurt results -- and sometimes actually helps. */
  547. +            delta_distortion[iter_coef] = 0;
  548. +        }
  549. +        else
  550. +            break;
  551. +    }
  552. +
  553. +    if( coef_mask )
  554. +    {
  555. +        for( i = b_ac, j = start; i <= i_last_nnz; i++, j += step )
  556. +            dct[zigzag[j]] = coefs[i];
  557. +        for( ; j <= end; j += step )
  558. +            dct[zigzag[j]] = 0;
  559. +        return 1;
  560. +    }
  561. +
  562. +zeroblock:
  563. +    if( !dc )
  564. +    {
  565. +        if( b_8x8 )
  566. +            for( i = start; i <= end; i+=step )
  567. +                dct[zigzag[i]] = 0;
  568. +        else
  569. +            memset( dct, 0, 16*sizeof(dctcoef) );
  570. +    }
  571. +    return 0;
  572. +}
  573. +
  574.  const static uint8_t x264_zigzag_scan2[4] = {0,1,2,3};
  575.  
  576.  int x264_quant_dc_trellis( x264_t *h, dctcoef *dct, int i_quant_cat,
  577.                             int i_qp, int i_ctxBlockCat, int b_intra, int b_chroma )
  578.  {
  579. -    return quant_trellis_cabac( h, dct,
  580. +    if( h->param.b_cabac )
  581. +        return quant_trellis_cabac( h, dct,
  582. +            h->quant4_mf[i_quant_cat][i_qp], h->unquant4_mf[i_quant_cat][i_qp],
  583. +            NULL, i_ctxBlockCat==DCT_CHROMA_DC ? x264_zigzag_scan2 : x264_zigzag_scan4[h->mb.b_interlaced],
  584. +            i_ctxBlockCat, h->mb.i_trellis_lambda2[b_chroma][b_intra], 0, 1, i_ctxBlockCat==DCT_CHROMA_DC ? 4 : 16, 0 );
  585. +
  586. +    return quant_trellis_cavlc( h, dct,
  587.          h->quant4_mf[i_quant_cat][i_qp], h->unquant4_mf[i_quant_cat][i_qp],
  588.          NULL, i_ctxBlockCat==DCT_CHROMA_DC ? x264_zigzag_scan2 : x264_zigzag_scan4[h->mb.b_interlaced],
  589. -        i_ctxBlockCat, h->mb.i_trellis_lambda2[b_chroma][b_intra], 0, 1, i_ctxBlockCat==DCT_CHROMA_DC ? 4 : 16, 0 );
  590. +        i_ctxBlockCat, h->mb.i_trellis_lambda2[b_chroma][b_intra], 0, 1, i_ctxBlockCat==DCT_CHROMA_DC ? 4 : 16, 0, 0 );
  591.  }
  592.  
  593.  int x264_quant_4x4_trellis( x264_t *h, dctcoef *dct, int i_quant_cat,
  594.                              int i_qp, int i_ctxBlockCat, int b_intra, int b_chroma, int idx )
  595.  {
  596.      int b_ac = (i_ctxBlockCat == DCT_LUMA_AC || i_ctxBlockCat == DCT_CHROMA_AC);
  597. -    return quant_trellis_cabac( h, dct,
  598. -        h->quant4_mf[i_quant_cat][i_qp], h->unquant4_mf[i_quant_cat][i_qp],
  599. -        x264_dct4_weight2_zigzag[h->mb.b_interlaced],
  600. -        x264_zigzag_scan4[h->mb.b_interlaced],
  601. -        i_ctxBlockCat, h->mb.i_trellis_lambda2[b_chroma][b_intra], b_ac, 0, 16, idx );
  602. +    if( h->param.b_cabac )
  603. +        return quant_trellis_cabac( h, dct,
  604. +            h->quant4_mf[i_quant_cat][i_qp], h->unquant4_mf[i_quant_cat][i_qp],
  605. +            x264_dct4_weight2_zigzag[h->mb.b_interlaced],
  606. +            x264_zigzag_scan4[h->mb.b_interlaced],
  607. +            i_ctxBlockCat, h->mb.i_trellis_lambda2[b_chroma][b_intra], b_ac, 0, 16, idx );
  608. +
  609. +    return quant_trellis_cavlc( h, dct,
  610. +            h->quant4_mf[i_quant_cat][i_qp], h->unquant4_mf[i_quant_cat][i_qp],
  611. +            x264_dct4_weight2_zigzag[h->mb.b_interlaced],
  612. +            x264_zigzag_scan4[h->mb.b_interlaced],
  613. +            i_ctxBlockCat, h->mb.i_trellis_lambda2[b_chroma][b_intra], b_ac, 0, 16, idx, 0 );
  614.  }
  615.  
  616.  int x264_quant_8x8_trellis( x264_t *h, dctcoef *dct, int i_quant_cat,
  617.                              int i_qp, int b_intra, int idx )
  618.  {
  619. -    return quant_trellis_cabac( h, dct,
  620. -        h->quant8_mf[i_quant_cat][i_qp], h->unquant8_mf[i_quant_cat][i_qp],
  621. -        x264_dct8_weight2_zigzag[h->mb.b_interlaced],
  622. -        x264_zigzag_scan8[h->mb.b_interlaced],
  623. -        DCT_LUMA_8x8, h->mb.i_trellis_lambda2[0][b_intra], 0, 0, 64, idx );
  624. -}
  625. +    if( h->param.b_cabac )
  626. +    {
  627. +        return quant_trellis_cabac( h, dct,
  628. +            h->quant8_mf[i_quant_cat][i_qp], h->unquant8_mf[i_quant_cat][i_qp],
  629. +            x264_dct8_weight2_zigzag[h->mb.b_interlaced],
  630. +            x264_zigzag_scan8[h->mb.b_interlaced],
  631. +            DCT_LUMA_8x8, h->mb.i_trellis_lambda2[0][b_intra], 0, 0, 64, idx );
  632. +    }
  633.  
  634. +    /* 8x8 CAVLC is split into 4 4x4 blocks */
  635. +    int nzaccum = 0;
  636. +    for( int i = 0; i < 4; i++ )
  637. +    {
  638. +        int nz = quant_trellis_cavlc( h, dct,
  639. +            h->quant8_mf[i_quant_cat][i_qp], h->unquant8_mf[i_quant_cat][i_qp],
  640. +            x264_dct8_weight2_zigzag[h->mb.b_interlaced],
  641. +            x264_zigzag_scan8[h->mb.b_interlaced],
  642. +            DCT_LUMA_4x4, h->mb.i_trellis_lambda2[0][b_intra], 0, 0, 16, idx*4+i, 1 );
  643. +        /* Set up nonzero count for future calls */
  644. +        h->mb.cache.non_zero_count[x264_scan8[idx*4+i]] = nz;
  645. +        nzaccum |= nz;
  646. +    }
  647. +    return nzaccum;
  648. +}
  649. diff --git a/x264.c b/x264.c
  650. index 9c3ce5e..7d98518 100644
  651. --- a/x264.c
  652. +++ b/x264.c
  653. @@ -595,7 +595,7 @@ static void Help( x264_param_t *defaults, int longhelp )
  654.      H2( "      --no-mixed-refs         Don't decide references on a per partition basis\n" );
  655.      H2( "      --no-chroma-me          Ignore chroma in motion estimation\n" );
  656.      H1( "      --no-8x8dct             Disable adaptive spatial transform size\n" );
  657. -    H1( "  -t, --trellis <integer>     Trellis RD quantization. Requires CABAC. [%d]\n"
  658. +    H1( "  -t, --trellis <integer>     Trellis RD quantization. [%d]\n"
  659.          "                                  - 0: disabled\n"
  660.          "                                  - 1: enabled only on the final encode of a MB\n"
  661.          "                                  - 2: enabled on all mode decisions\n", defaults->analyse.i_trellis );
  662. --
  663. 1.7.1
  664.  
  665.  
  666. From 5b8f40714b10df5a5bf24ebb6be530a8458e2fdf Mon Sep 17 00:00:00 2001
  667. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  668. Date: Thu, 26 Aug 2010 09:12:01 -0400
  669. Subject: [PATCH 3/9] Don't do deblock-aware RD if deblocking is off
  670.  
  671. ---
  672. encoder/analyse.c |    2 +-
  673.  1 files changed, 1 insertions(+), 1 deletions(-)
  674.  
  675. diff --git a/encoder/analyse.c b/encoder/analyse.c
  676. index fdc2498..3ddd3f0 100644
  677. --- a/encoder/analyse.c
  678. +++ b/encoder/analyse.c
  679. @@ -357,7 +357,7 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
  680.      /* mbrd == 2 -> RD refinement */
  681.      /* mbrd == 3 -> QPRD */
  682.      a->i_mbrd = (subme>=6) + (subme>=8) + (h->param.analyse.i_subpel_refine>=10);
  683. -    h->mb.b_deblock_rdo = h->param.analyse.i_subpel_refine >= 9;
  684. +    h->mb.b_deblock_rdo = h->param.analyse.i_subpel_refine >= 9 && h->sh.i_disable_deblocking_filter_idc != 1;
  685.  
  686.      x264_mb_analyse_init_qp( h, a, i_qp );
  687.  
  688. --
  689. 1.7.1
  690.  
  691.  
  692. From 5978cbc53dec1e7023b2ba9c9f9ce6ed24ffc68b Mon Sep 17 00:00:00 2001
  693. From: Anton Mitrofanov <BugMaster@narod.ru>
  694. Date: Sun, 29 Aug 2010 16:35:32 +0400
  695. Subject: [PATCH 4/9] Fix bug in 2pass if the first P-frames are all skip
  696.  last_qscale_for was read before being initialized in this case, resulting
  697.  in the value from the previous iteration being used instead.
  698.  
  699. ---
  700. encoder/ratecontrol.c |    5 +++++
  701.  1 files changed, 5 insertions(+), 0 deletions(-)
  702.  
  703. diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
  704. index d9d118a..cf51f37 100644
  705. --- a/encoder/ratecontrol.c
  706. +++ b/encoder/ratecontrol.c
  707. @@ -2518,6 +2518,7 @@ static int init_pass2( x264_t *h )
  708.      const int filter_size = (int)(qblur*4) | 1;
  709.      double expected_bits;
  710.      double *qscale, *blurred_qscale;
  711. +    double base_cplx = h->mb.i_mb_count * (h->param.i_bframe ? 120 : 80);
  712.  
  713.      /* find total/average complexity & const_bits */
  714.      for( int i = 0; i < rcc->num_entries; i++ )
  715. @@ -2602,6 +2603,10 @@ static int init_pass2( x264_t *h )
  716.          rcc->last_accum_p_norm = 1;
  717.          rcc->accum_p_norm = 0;
  718.  
  719. +        rcc->last_qscale_for[0] =
  720. +        rcc->last_qscale_for[1] =
  721. +        rcc->last_qscale_for[2] = pow( base_cplx, 1 - rcc->qcompress ) / rate_factor;
  722. +
  723.          /* find qscale */
  724.          for( int i = 0; i < rcc->num_entries; i++ )
  725.          {
  726. --
  727. 1.7.1
  728.  
  729.  
  730. From 26f9e9417034eaccccc7ec0bc225eaef3f0f4de0 Mon Sep 17 00:00:00 2001
  731. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  732. Date: Sun, 29 Aug 2010 22:18:07 -0700
  733. Subject: [PATCH 5/9] Faster cabac_encode_ue_bypass
  734.  Use CLZ + a lut instead of a loop.
  735.  
  736. ---
  737. common/cabac.c |   15 ++++++++++-----
  738.  1 files changed, 10 insertions(+), 5 deletions(-)
  739.  
  740. diff --git a/common/cabac.c b/common/cabac.c
  741. index d0888d0..cd57d90 100644
  742. --- a/common/cabac.c
  743. +++ b/common/cabac.c
  744. @@ -850,14 +850,19 @@ void x264_cabac_encode_bypass_c( x264_cabac_t *cb, int b )
  745.      x264_cabac_putbyte( cb );
  746.  }
  747.  
  748. +static const int bypass_lut[16] =
  749. +{
  750. +    -1,      0x2,     0x14,     0x68,     0x1d0,     0x7a0,     0x1f40,     0x7e80,
  751. +    0x1fd00, 0x7fa00, 0x1ff400, 0x7fe800, 0x1ffd000, 0x7ffa000, 0x1fff4000, 0x7ffe8000
  752. +};
  753. +
  754.  void x264_cabac_encode_ue_bypass( x264_cabac_t *cb, int exp_bits, int val )
  755.  {
  756. -    int k, i;
  757. -    for( k = exp_bits; val >= (1<<k); k++ )
  758. -        val -= 1<<k;
  759. -    uint32_t x = (((1<<(k-exp_bits))-1)<<(k+1))+val;
  760. +    uint32_t v = val + (1<<exp_bits);
  761. +    int k = 31 - x264_clz( v );
  762. +    uint32_t x = (bypass_lut[k-exp_bits]<<exp_bits) + v;
  763.      k = 2*k+1-exp_bits;
  764. -    i = ((k-1)&7)+1;
  765. +    int i = ((k-1)&7)+1;
  766.      do {
  767.          k -= i;
  768.          cb->i_low <<= i;
  769. --
  770. 1.7.1
  771.  
  772.  
  773. From 4c6ed36e092bb4fd3fb86668c34a07a9abfc170d Mon Sep 17 00:00:00 2001
  774. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  775. Date: Mon, 30 Aug 2010 12:32:31 -0700
  776. Subject: [PATCH 6/9] Use POC type 2 for streams with no B-frames
  777.  Saves a few bits per slice header.
  778.  
  779. ---
  780. encoder/set.c |    2 +-
  781.  1 files changed, 1 insertions(+), 1 deletions(-)
  782.  
  783. diff --git a/encoder/set.c b/encoder/set.c
  784. index 2c93618..2b3bbce 100644
  785. --- a/encoder/set.c
  786. +++ b/encoder/set.c
  787. @@ -135,7 +135,7 @@ void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param )
  788.      while( (1 << sps->i_log2_max_frame_num) <= max_frame_num )
  789.          sps->i_log2_max_frame_num++;
  790.  
  791. -    sps->i_poc_type = 0;
  792. +    sps->i_poc_type = param->i_bframe ? 0 : 2;
  793.      if( sps->i_poc_type == 0 )
  794.      {
  795.          int max_delta_poc = (param->i_bframe + 2) * (!!param->i_bframe_pyramid + 1) * 2;
  796. --
  797. 1.7.1
  798.  
  799.  
  800. From 59557dc35a4d70a8ebaec969f83f4de043c58b31 Mon Sep 17 00:00:00 2001
  801. From: Takashi Hirata <silverfilain@gmail.com>
  802. Date: Mon, 30 Aug 2010 18:13:49 +0900
  803. Subject: [PATCH 7/9] Add support for level 1b
  804.  This level is a stupid hack in the H.264 spec, so it's a stupid hack in x264 too.
  805.  Since level is an integer, calling applications need to set level_idc=9 to use it.
  806.  String-based option handling will accept "1b" just fine though, so CLI users don't have to worry.
  807.  
  808. ---
  809. common/common.c   |    4 +++-
  810.  common/set.h      |    1 +
  811.  encoder/encoder.c |   12 ++++++++----
  812.  encoder/set.c     |   17 ++++++++++++++---
  813.  4 files changed, 26 insertions(+), 8 deletions(-)
  814.  
  815. diff --git a/common/common.c b/common/common.c
  816. index 47fcaa2..b0bb4e7 100644
  817. --- a/common/common.c
  818. +++ b/common/common.c
  819. @@ -603,7 +603,9 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
  820.          p->b_deterministic = atobool(value);
  821.      OPT2("level", "level-idc")
  822.      {
  823. -        if( atof(value) < 6 )
  824. +        if( !strcmp(value, "1b") )
  825. +            p->i_level_idc = 9;
  826. +        else if( atof(value) < 6 )
  827.              p->i_level_idc = (int)(10*atof(value)+.5);
  828.          else
  829.              p->i_level_idc = atoi(value);
  830. diff --git a/common/set.h b/common/set.h
  831. index ee27d74..6625ae4 100644
  832. --- a/common/set.h
  833. +++ b/common/set.h
  834. @@ -59,6 +59,7 @@ typedef struct
  835.      int b_constraint_set0;
  836.      int b_constraint_set1;
  837.      int b_constraint_set2;
  838. +    int b_constraint_set3;
  839.  
  840.      int i_log2_max_frame_num;
  841.  
  842. diff --git a/encoder/encoder.c b/encoder/encoder.c
  843. index f5fe2c5..2f8626c 100644
  844. --- a/encoder/encoder.c
  845. +++ b/encoder/encoder.c
  846. @@ -1157,16 +1157,20 @@ x264_t *x264_encoder_open( x264_param_t *param )
  847.                            h->sps->i_profile_idc == PROFILE_HIGH ? "High" :
  848.                            h->sps->i_profile_idc == PROFILE_HIGH10 ? "High 10" :
  849.                            "High 4:4:4 Predictive";
  850. +    char level[4];
  851. +    snprintf( level, sizeof(level), "%d.%d", h->sps->i_level_idc/10, h->sps->i_level_idc%10 );
  852. +    if( h->sps->i_level_idc == 9 || ( h->sps->i_level_idc == 11 && h->sps->b_constraint_set3 ) )
  853. +        strcpy( level, "1b" );
  854.  
  855.      if( h->sps->i_profile_idc < PROFILE_HIGH10 )
  856.      {
  857. -        x264_log( h, X264_LOG_INFO, "profile %s, level %d.%d\n",
  858. -            profile, h->sps->i_level_idc/10, h->sps->i_level_idc%10 );
  859. +        x264_log( h, X264_LOG_INFO, "profile %s, level %s\n",
  860. +            profile, level );
  861.      }
  862.      else
  863.      {
  864. -        x264_log( h, X264_LOG_INFO, "profile %s, level %d.%d, bit depth %d\n",
  865. -            profile, h->sps->i_level_idc/10, h->sps->i_level_idc%10, BIT_DEPTH );
  866. +        x264_log( h, X264_LOG_INFO, "profile %s, level %s, bit depth %d\n",
  867. +            profile, level, BIT_DEPTH );
  868.      }
  869.  
  870.      return h;
  871. diff --git a/encoder/set.c b/encoder/set.c
  872. index 2b3bbce..3dee484 100644
  873. --- a/encoder/set.c
  874. +++ b/encoder/set.c
  875. @@ -112,7 +112,6 @@ void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param )
  876.          sps->i_profile_idc  = PROFILE_MAIN;
  877.      else
  878.          sps->i_profile_idc  = PROFILE_BASELINE;
  879. -    sps->i_level_idc = param->i_level_idc;
  880.  
  881.      sps->b_constraint_set0  = sps->i_profile_idc == PROFILE_BASELINE;
  882.      /* x264 doesn't support the features that are in Baseline and not in Main,
  883. @@ -121,6 +120,17 @@ void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param )
  884.      /* Never set constraint_set2, it is not necessary and not used in real world. */
  885.      sps->b_constraint_set2  = 0;
  886.  
  887. +    if( param->i_level_idc == 9 && ( sps->i_profile_idc >= PROFILE_BASELINE && sps->i_profile_idc <= PROFILE_EXTENDED ) )
  888. +    {
  889. +        sps->b_constraint_set3 = 1; /* level 1b with Baseline, Main or Extended profile is signalled via constraint_set3 */
  890. +        sps->i_level_idc      = 11;
  891. +    }
  892. +    else
  893. +    {
  894. +        sps->b_constraint_set3 = 0;
  895. +        sps->i_level_idc = param->i_level_idc;
  896. +    }
  897. +
  898.      sps->vui.i_num_reorder_frames = param->i_bframe_pyramid ? 2 : param->i_bframe ? 1 : 0;
  899.      /* extra slot with pyramid so that we don't have to override the
  900.       * order of forgetting old pictures */
  901. @@ -252,8 +262,9 @@ void x264_sps_write( bs_t *s, x264_sps_t *sps )
  902.      bs_write( s, 1, sps->b_constraint_set0 );
  903.      bs_write( s, 1, sps->b_constraint_set1 );
  904.      bs_write( s, 1, sps->b_constraint_set2 );
  905. +    bs_write( s, 1, sps->b_constraint_set3 );
  906.  
  907. -    bs_write( s, 5, 0 );    /* reserved */
  908. +    bs_write( s, 4, 0 );    /* reserved */
  909.  
  910.      bs_write( s, 8, sps->i_level_idc );
  911.  
  912. @@ -640,7 +651,7 @@ void x264_filler_write( x264_t *h, bs_t *s, int filler )
  913.  const x264_level_t x264_levels[] =
  914.  {
  915.      { 10,   1485,    99,   152064,     64,    175,  64, 64,  0, 2, 0, 0, 1 },
  916. -//  {"1b",  1485,    99,   152064,    128,    350,  64, 64,  0, 2, 0, 0, 1 },
  917. +    {  9,   1485,    99,   152064,    128,    350,  64, 64,  0, 2, 0, 0, 1 }, /* "1b" */
  918.      { 11,   3000,   396,   345600,    192,    500, 128, 64,  0, 2, 0, 0, 1 },
  919.      { 12,   6000,   396,   912384,    384,   1000, 128, 64,  0, 2, 0, 0, 1 },
  920.      { 13,  11880,   396,   912384,    768,   2000, 128, 64,  0, 2, 0, 0, 1 },
  921. --
  922. 1.7.1
  923.  
  924.  
  925. From 1572fda2ac8080f615e7bce85f8b556c292afdf9 Mon Sep 17 00:00:00 2001
  926. From: Anton Mitrofanov <BugMaster@narod.ru>
  927. Date: Tue, 31 Aug 2010 08:45:22 -0700
  928. Subject: [PATCH 8/9] Allow --demuxer forcing with known extensions
  929.  
  930. ---
  931. x264.c |    4 ++--
  932.  1 files changed, 2 insertions(+), 2 deletions(-)
  933.  
  934. diff --git a/x264.c b/x264.c
  935. index 7d98518..bf2b3ee 100644
  936. --- a/x264.c
  937. +++ b/x264.c
  938. @@ -933,9 +933,9 @@ static int select_output( const char *muxer, char *filename, x264_param_t *param
  939.  static int select_input( const char *demuxer, char *used_demuxer, char *filename,
  940.                           hnd_t *p_handle, video_info_t *info, cli_input_opt_t *opt )
  941.  {
  942. -    const char *ext = get_filename_extension( filename );
  943. -    int b_regular = strcmp( filename, "-" );
  944.      int b_auto = !strcasecmp( demuxer, "auto" );
  945. +    const char *ext = b_auto ? get_filename_extension( filename ) : "";
  946. +    int b_regular = strcmp( filename, "-" );
  947.      if( !b_regular && b_auto )
  948.          ext = "raw";
  949.      b_regular = b_regular && x264_is_regular_file_path( filename );
  950. --
  951. 1.7.1
  952.  
  953.  
  954. From bcb3c527f918864b26094732de05f466b91633f8 Mon Sep 17 00:00:00 2001
  955. From: Henrik Gramner <hengar-6@student.ltu.se>
  956. Date: Wed, 1 Sep 2010 00:53:42 +0200
  957. Subject: [PATCH 9/9] Faster nal_escape asm
  958.  
  959. ---
  960. common/x86/bitstream-a.asm |   77 ++++++++++++++++++++++++++-----------------
  961.  1 files changed, 46 insertions(+), 31 deletions(-)
  962.  
  963. diff --git a/common/x86/bitstream-a.asm b/common/x86/bitstream-a.asm
  964. index 69a47a7..25b426a 100644
  965. --- a/common/x86/bitstream-a.asm
  966. +++ b/common/x86/bitstream-a.asm
  967. @@ -30,74 +30,89 @@ SECTION .text
  968.  ;-----------------------------------------------------------------------------
  969.  
  970.  %macro NAL_LOOP 2
  971. +%1_escape:
  972. +    ; Detect false positive to avoid unneccessary escape loop
  973. +    xor      r3d, r3d
  974. +    cmp byte [r0+r1-1], 0
  975. +    setnz    r3b
  976. +    xor      r3d, r4d
  977. +    jnz .escape
  978. +    jmp %1_continue
  979.  ALIGN 16
  980.  %1:
  981. -    mova      m0, [r1+r2]
  982. -    mova      m1, m0
  983. -%if mmsize == 8
  984. -    psllq     m0, 8
  985. -%else
  986. -    pslldq    m0, 1
  987. -%endif
  988. -    %2   [r0+r1], m1
  989. -    por       m1, m0
  990. -    pcmpeqb   m1, m2
  991. +    mova      m3, m1
  992. +    mova      m2, m0
  993. +    pcmpeqb   m1, m4
  994. +    pcmpeqb   m0, m4
  995.      pmovmskb r3d, m1
  996. -    test     r3d, r3d
  997. -    jnz .escape
  998. -    add       r1, mmsize
  999. +    %2   [r0+r1], m2
  1000. +    pmovmskb r4d, m0
  1001. +    shl      r3d, mmsize
  1002. +    mova      m0, [r1+r2+2*mmsize]
  1003. +    or       r4d, r3d
  1004. +    mova      m1, [r1+r2+3*mmsize]
  1005. +    lea      r3d, [r4+r4+1]
  1006. +    %2 [r0+r1+mmsize], m3
  1007. +    and      r4d, r3d
  1008. +    jnz %1_escape
  1009. +%1_continue:
  1010. +    add       r1, 2*mmsize
  1011.      jl %1
  1012.  %endmacro
  1013.  
  1014.  %macro NAL_ESCAPE 1
  1015.  
  1016.  cglobal nal_escape_%1, 3,5
  1017. -    pxor      m2, m2
  1018. +    mov      r3w, [r1]
  1019.      sub       r1, r2 ; r1 = offset of current src pointer from end of src
  1020. +    pxor      m4, m4
  1021.      sub       r0, r1 ; r0 = projected end of dst, assuming no more escapes
  1022. -
  1023. -    mov      r3b, [r1+r2]
  1024. -    mov  [r0+r1], r3b
  1025. -    inc       r1
  1026. +    mov  [r0+r1], r3w
  1027. +    add       r1, 2
  1028.      jge .ret
  1029.  
  1030.      ; Start off by jumping into the escape loop in
  1031.      ; case there's an escape at the start.
  1032.      ; And do a few more in scalar until src is aligned again.
  1033. -    lea      r4d, [r1+r2]
  1034. -    or       r4d, -mmsize
  1035. -    neg      r4d
  1036.      jmp .first_escape
  1037.  
  1038.      NAL_LOOP .loop_aligned, mova
  1039.  %if mmsize==16
  1040. +    jmp .ret
  1041.      NAL_LOOP .loop_unaligned, movu
  1042.  %endif
  1043. -
  1044.  .ret:
  1045.      movifnidn rax, r0
  1046.      RET
  1047. +
  1048.  ALIGN 16
  1049.  .escape:
  1050. -    mov      r4d, mmsize
  1051. -.first_escape:
  1052. -    mov      r3b, [r1+r2]
  1053. +    ; Skip bytes that are known to be valid
  1054. +    and      r4d, r3d
  1055. +    bsf      r3d, r4d
  1056. +    add       r1, r3
  1057.  .escape_loop:
  1058. -    mov  [r0+r1], r3b
  1059. -    inc      r1
  1060. +    inc       r1
  1061.      jge .ret
  1062. -    mov      r3b, [r1+r2]
  1063. -    cmp      r3b, 3
  1064. +.first_escape:
  1065. +    movzx    r3d, byte [r1+r2]
  1066. +    lea       r4, [r1+r2]
  1067. +    cmp      r3d, 3
  1068.      jna .escape_check
  1069.  .no_escape:
  1070. -    dec      r4d
  1071. -    jg .escape_loop
  1072. +    mov  [r0+r1], r3b
  1073. +    test     r4d, mmsize-1 ; Do SIMD when src is aligned
  1074. +    jnz .escape_loop
  1075. +    mova      m0, [r4]
  1076. +    mova      m1, [r4+mmsize]
  1077.  %if mmsize==16
  1078.      lea      r4d, [r0+r1]
  1079.      test     r4d, mmsize-1
  1080.      jnz .loop_unaligned
  1081.  %endif
  1082.      jmp .loop_aligned
  1083. +
  1084. +ALIGN 16
  1085.  .escape_check:
  1086.      cmp word [r0+r1-2], 0
  1087.      jnz .no_escape
  1088. --
  1089. 1.7.1
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement