Advertisement
Guest User

Untitled

a guest
Jun 17th, 2017
519
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Diff 68.63 KB | None | 0 0
  1. From 5f104e9957cc4b69f7197fecf93648a0e2ae0e59 Mon Sep 17 00:00:00 2001
  2. From: Anton Mitrofanov <BugMaster@narod.ru>
  3. Date: Mon, 20 Sep 2010 13:10:13 +0400
  4. Subject: [PATCH 1/8] Fix DTS/bitrate calculation if the first PTS wasn't zero
  5.  Fix bitrate calculation with DTS compression.
  6.  
  7. ---
  8. common/common.h   |    1 +
  9.  encoder/encoder.c |   11 +++++++----
  10.  x264.c            |   10 ++++++----
  11.  x264.h            |    2 +-
  12.  4 files changed, 15 insertions(+), 9 deletions(-)
  13.  
  14. diff --git a/common/common.h b/common/common.h
  15. index efb25be..132cfee 100644
  16. --- a/common/common.h
  17. +++ b/common/common.h
  18. @@ -499,6 +499,7 @@ struct x264_t
  19.          int i_delay;    /* Number of frames buffered for B reordering */
  20.          int     i_bframe_delay;
  21.          int64_t i_bframe_delay_time;
  22. +        int64_t i_first_pts;
  23.          int64_t i_init_delta;
  24.          int64_t i_prev_reordered_pts[2];
  25.          int64_t i_largest_pts;
  26. diff --git a/encoder/encoder.c b/encoder/encoder.c
  27. index fa4401b..2b679a0 100644
  28. --- a/encoder/encoder.c
  29. +++ b/encoder/encoder.c
  30. @@ -2329,8 +2329,10 @@ int     x264_encoder_encode( x264_t *h,
  31.  
  32.          fenc->i_frame = h->frames.i_input++;
  33.  
  34. +        if( fenc->i_frame == 0 )
  35. +            h->frames.i_first_pts = fenc->i_pts;
  36.          if( h->frames.i_bframe_delay && fenc->i_frame == h->frames.i_bframe_delay )
  37. -            h->frames.i_bframe_delay_time = fenc->i_pts;
  38. +            h->frames.i_bframe_delay_time = fenc->i_pts - h->frames.i_first_pts;
  39.  
  40.          if( h->param.b_vfr_input && fenc->i_pts <= h->frames.i_largest_pts )
  41.              x264_log( h, X264_LOG_WARNING, "non-strictly-monotonic PTS\n" );
  42. @@ -2495,8 +2497,8 @@ int     x264_encoder_encode( x264_t *h,
  43.              {
  44.                  /* DTS compression */
  45.                  if( h->i_frame == 1 )
  46. -                    thread_current->frames.i_init_delta = h->fenc->i_reordered_pts * h->i_dts_compress_multiplier;
  47. -                h->fdec->i_dts = h->i_frame * thread_current->frames.i_init_delta / h->i_dts_compress_multiplier;
  48. +                    thread_current->frames.i_init_delta = (h->fenc->i_reordered_pts - h->frames.i_first_pts) * h->i_dts_compress_multiplier;
  49. +                h->fdec->i_dts = h->i_frame * thread_current->frames.i_init_delta / h->i_dts_compress_multiplier + h->frames.i_first_pts * h->i_dts_compress_multiplier;
  50.              }
  51.          }
  52.          else
  53. @@ -3110,7 +3112,8 @@ void    x264_encoder_close  ( x264_t *h )
  54.              f_bitrate = fps * SUM3(h->stat.i_frame_size) / i_count / 125;
  55.          else
  56.          {
  57. -            float duration = (float)(2 * h->frames.i_largest_pts - h->frames.i_second_largest_pts) * h->param.i_timebase_num / h->param.i_timebase_den;
  58. +            float duration = (float)(2 * h->frames.i_largest_pts - h->frames.i_second_largest_pts - h->frames.i_first_pts)
  59. +                           * h->i_dts_compress_multiplier * h->param.i_timebase_num / h->param.i_timebase_den;
  60.              f_bitrate = SUM3(h->stat.i_frame_size) / duration / 125;
  61.          }
  62.  
  63. diff --git a/x264.c b/x264.c
  64. index a1e7147..f74f096 100644
  65. --- a/x264.c
  66. +++ b/x264.c
  67. @@ -1584,7 +1584,7 @@ static int  Encode( x264_param_t *param, cli_opt_t *opt )
  68.      int64_t second_largest_pts = -1;
  69.      int64_t ticks_per_frame;
  70.      double  duration;
  71. -    int     prev_timebase_den = param->i_timebase_den / gcd( param->i_timebase_num, param->i_timebase_den );
  72. +    int     prev_timebase_den;
  73.      int     dts_compress_multiplier;
  74.      double  pulldown_pts = 0;
  75.  
  76. @@ -1603,6 +1603,8 @@ static int  Encode( x264_param_t *param, cli_opt_t *opt )
  77.          param->i_timebase_den = param->i_fps_num * pulldown->fps_factor;
  78.      }
  79.  
  80. +    prev_timebase_den = param->i_timebase_den / gcd( param->i_timebase_num, param->i_timebase_den );
  81. +
  82.      if( ( h = x264_encoder_open( param ) ) == NULL )
  83.      {
  84.          x264_cli_log( "x264", X264_LOG_ERROR, "x264_encoder_open failed\n" );
  85. @@ -1727,6 +1729,8 @@ static int  Encode( x264_param_t *param, cli_opt_t *opt )
  86.      if( pts_warning_cnt >= MAX_PTS_WARNING && cli_log_level < X264_LOG_DEBUG )
  87.          x264_cli_log( "x264", X264_LOG_WARNING, "%d suppressed nonmonotonic pts warnings\n", pts_warning_cnt-MAX_PTS_WARNING );
  88.  
  89. +    largest_pts *= dts_compress_multiplier;
  90. +    second_largest_pts *= dts_compress_multiplier;
  91.      /* duration algorithm fails when only 1 frame is output */
  92.      if( i_frame_output == 1 )
  93.          duration = (double)param->i_fps_den / param->i_fps_num;
  94. @@ -1734,8 +1738,6 @@ static int  Encode( x264_param_t *param, cli_opt_t *opt )
  95.          duration = (double)(2 * last_dts - prev_dts - first_dts) * param->i_timebase_num / param->i_timebase_den;
  96.      else
  97.          duration = (double)(2 * largest_pts - second_largest_pts) * param->i_timebase_num / param->i_timebase_den;
  98. -    if( !(opt->i_pulldown && !param->b_vfr_input) )
  99. -        duration *= dts_compress_multiplier;
  100.  
  101.      i_end = x264_mdate();
  102.      /* Erase progress indicator before printing encoding stats. */
  103. @@ -1754,7 +1756,7 @@ static int  Encode( x264_param_t *param, cli_opt_t *opt )
  104.      }
  105.  
  106.      filter.free( opt->hin );
  107. -    output.close_file( opt->hout, largest_pts * dts_compress_multiplier, second_largest_pts * dts_compress_multiplier );
  108. +    output.close_file( opt->hout, largest_pts, second_largest_pts );
  109.  
  110.      if( i_frame_output > 0 )
  111.      {
  112. diff --git a/x264.h b/x264.h
  113. index 8174015..71f5f55 100644
  114. --- a/x264.h
  115. +++ b/x264.h
  116. @@ -653,7 +653,7 @@ typedef struct
  117.      int     b_keyframe;
  118.      /* In: user pts, Out: pts of encoded picture (user)*/
  119.      int64_t i_pts;
  120. -    /* Out: frame dts. Since the pts of the first frame is always zero,
  121. +    /* Out: frame dts. When the pts of the first frame is close to zero,
  122.       *      initial frames may have a negative dts which must be dealt with by any muxer */
  123.      int64_t i_dts;
  124.      /* In: custom encoding parameters to be set from this frame forwards
  125. --
  126. 1.7.2.3
  127.  
  128.  
  129. From 5cba26f757ec00a7b95656615813e692685ee138 Mon Sep 17 00:00:00 2001
  130. From: Anton Mitrofanov <BugMaster@narod.ru>
  131. Date: Sat, 25 Sep 2010 15:55:32 -0700
  132. Subject: [PATCH 2/8] Fix CFR ratecontrol with timebase != 1/fps
  133.  Fixes VBV + DTS compression, among other things.
  134.  
  135. ---
  136. encoder/encoder.c   |    2 +-
  137.  encoder/slicetype.c |    4 ++--
  138.  x264.c              |    1 +
  139.  x264.h              |    4 +++-
  140.  4 files changed, 7 insertions(+), 4 deletions(-)
  141.  
  142. diff --git a/encoder/encoder.c b/encoder/encoder.c
  143. index 2b679a0..3570776 100644
  144. --- a/encoder/encoder.c
  145. +++ b/encoder/encoder.c
  146. @@ -626,7 +626,7 @@ static int x264_validate_parameters( x264_t *h )
  147.          h->param.rc.i_lookahead = X264_MIN( h->param.rc.i_lookahead, X264_MAX( h->param.i_keyint_max, bufsize*fps ) );
  148.      }
  149.  
  150. -    if( !h->param.i_timebase_num || !h->param.i_timebase_den )
  151. +    if( !h->param.i_timebase_num || !h->param.i_timebase_den || !(h->param.b_vfr_input || h->param.b_pulldown) )
  152.      {
  153.          h->param.i_timebase_num = h->param.i_fps_den;
  154.          h->param.i_timebase_den = h->param.i_fps_num;
  155. diff --git a/encoder/slicetype.c b/encoder/slicetype.c
  156. index 0d87908..d08cf02 100644
  157. --- a/encoder/slicetype.c
  158. +++ b/encoder/slicetype.c
  159. @@ -1260,12 +1260,12 @@ void x264_slicetype_decide( x264_t *h )
  160.              if( h->param.b_vfr_input )
  161.              {
  162.                  if( lookahead_size-- > 1 )
  163. -                    h->lookahead->next.list[i]->i_duration = 2 * (h->lookahead->next.list[i+1]->i_pts - h->lookahead->next.list[i]->i_pts);
  164. +                    h->lookahead->next.list[i]->i_duration = 2 * (h->lookahead->next.list[i+1]->i_pts - h->lookahead->next.list[i]->i_pts) * h->i_dts_compress_multiplier;
  165.                  else
  166.                      h->lookahead->next.list[i]->i_duration = h->i_prev_duration;
  167.              }
  168.              else
  169. -                h->lookahead->next.list[i]->i_duration = delta_tfi_divisor[h->lookahead->next.list[i]->i_pic_struct];
  170. +                h->lookahead->next.list[i]->i_duration = delta_tfi_divisor[h->lookahead->next.list[i]->i_pic_struct] * h->i_dts_compress_multiplier;
  171.              h->i_prev_duration = h->lookahead->next.list[i]->i_duration;
  172.  
  173.              if( h->lookahead->next.list[i]->i_frame > h->i_disp_fields_last_frame && lookahead_size > 0 )
  174. diff --git a/x264.c b/x264.c
  175. index f74f096..5bd2af7 100644
  176. --- a/x264.c
  177. +++ b/x264.c
  178. @@ -1595,6 +1595,7 @@ static int  Encode( x264_param_t *param, cli_opt_t *opt )
  179.      /* set up pulldown */
  180.      if( opt->i_pulldown && !param->b_vfr_input )
  181.      {
  182. +        param->b_pulldown = 1;
  183.          param->b_pic_struct = 1;
  184.          pulldown = &pulldown_values[opt->i_pulldown];
  185.          param->i_timebase_num = param->i_fps_den;
  186. diff --git a/x264.h b/x264.h
  187. index 71f5f55..56d424c 100644
  188. --- a/x264.h
  189. +++ b/x264.h
  190. @@ -383,7 +383,9 @@ typedef struct x264_param_t
  191.      int b_annexb;               /* if set, place start codes (4 bytes) before NAL units,
  192.                                   * otherwise place size (4 bytes) before NAL units. */
  193.      int i_sps_id;               /* SPS and PPS id number */
  194. -    int b_vfr_input;            /* VFR input */
  195. +    int b_vfr_input;            /* VFR input.  If 1, use timebase and timestamps for ratecontrol purposes.
  196. +                                 * If 0, use fps only. */
  197. +    int b_pulldown;             /* use explicity set timebase for CFR */
  198.      uint32_t i_fps_num;
  199.      uint32_t i_fps_den;
  200.      uint32_t i_timebase_num;    /* Timebase numerator */
  201. --
  202. 1.7.2.3
  203.  
  204.  
  205. From 54073becc7cfc3a1b574d954d1017cd58cbe8b2a Mon Sep 17 00:00:00 2001
  206. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  207. Date: Mon, 27 Sep 2010 05:39:13 -0700
  208. Subject: [PATCH 3/8] Add missing emms for dump-yuv
  209.  
  210. ---
  211. encoder/encoder.c |    3 +++
  212.  1 files changed, 3 insertions(+), 0 deletions(-)
  213.  
  214. diff --git a/encoder/encoder.c b/encoder/encoder.c
  215. index 3570776..7f3d5bc 100644
  216. --- a/encoder/encoder.c
  217. +++ b/encoder/encoder.c
  218. @@ -2934,7 +2934,10 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
  219.           }
  220.  
  221.      if( h->param.psz_dump_yuv )
  222. +    {
  223.          x264_frame_dump( h );
  224. +        x264_emms();
  225. +    }
  226.  
  227.      return frame_size;
  228.  }
  229. --
  230. 1.7.2.3
  231.  
  232.  
  233. From 991166c935516f19f65485ebb447f079aca41e0e Mon Sep 17 00:00:00 2001
  234. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  235. Date: Tue, 21 Sep 2010 17:11:00 -0700
  236. Subject: [PATCH 4/8] Make slice-max-size more aggressive in considering escape bytes
  237.  The x264 assumption of randomly distributed escape bytes fails in the case of CABAC + an enormous number of identical macroblocks.
  238.  This patch attempts to compensate for this.
  239.  It is probably safe to assume in calling applications that x264 practically never violates the slice size limitation.
  240.  
  241. ---
  242. encoder/encoder.c |   63 +++++++++++++++++++++++++++++++++-------------------
  243.  1 files changed, 40 insertions(+), 23 deletions(-)
  244.  
  245. diff --git a/encoder/encoder.c b/encoder/encoder.c
  246. index 7f3d5bc..b9e66ac 100644
  247. --- a/encoder/encoder.c
  248. +++ b/encoder/encoder.c
  249. @@ -1834,10 +1834,12 @@ static int x264_slice_write( x264_t *h )
  250.      uint8_t cabac_prevbyte_bak = 0; /* Shut up GCC. */
  251.      int mv_bits_bak = 0;
  252.      int tex_bits_bak = 0;
  253. -    /* Assume no more than 3 bytes of NALU escaping.
  254. -     * NALUs other than the first use a 3-byte startcode. */
  255. -    int overhead_guess = (NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal)) + 3;
  256. -    int slice_max_size = h->param.i_slice_max_size > 0 ? (h->param.i_slice_max_size-overhead_guess)*8 : INT_MAX;
  257. +    /* NALUs other than the first use a 3-byte startcode.
  258. +     * Add one extra byte for the rbsp, and one more for the final CABAC putbyte.
  259. +     * Then add an extra 5 bytes just in case, to account for random NAL escapes and
  260. +     * other inaccuracies. */
  261. +    int overhead_guess = (NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal)) + 1 + h->param.b_cabac + 5;
  262. +    int slice_max_size = h->param.i_slice_max_size > 0 ? (h->param.i_slice_max_size-overhead_guess)*8 : 0;
  263.      int starting_bits = bs_pos(&h->out.bs);
  264.      int b_deblock = h->sh.i_disable_deblocking_filter_idc != 1;
  265.      int b_hpel = h->fdec->b_kept_as_ref;
  266. @@ -1884,7 +1886,7 @@ static int x264_slice_write( x264_t *h )
  267.          if( x264_bitstream_check_buffer( h ) )
  268.              return -1;
  269.  
  270. -        if( h->param.i_slice_max_size > 0 )
  271. +        if( slice_max_size )
  272.          {
  273.              mv_bits_bak = h->stat.frame.i_mv_bits;
  274.              tex_bits_bak = h->stat.frame.i_tex_bits;
  275. @@ -1948,35 +1950,50 @@ static int x264_slice_write( x264_t *h )
  276.          int total_bits = bs_pos(&h->out.bs) + x264_cabac_pos(&h->cabac);
  277.          int mb_size = total_bits - mb_spos;
  278.  
  279. -        /* We'll just re-encode this last macroblock if we go over the max slice size. */
  280. -        if( total_bits - starting_bits > slice_max_size && !h->mb.b_reencode_mb )
  281. -        {
  282. -            if( mb_xy != h->sh.i_first_mb )
  283. +        if( slice_max_size )
  284. +        {
  285. +            /* Count the skip run, just in case. */
  286. +            if( !h->param.b_cabac )
  287. +                total_bits += bs_size_ue_big( i_skip );
  288. +            /* HACK: we assume no more than 3 bytes of NALU escaping, but
  289. +             * this can fail in CABAC streams with an extremely large number of identical
  290. +             * blocks in sequence (e.g. all-black intra blocks).
  291. +             * Thus, every 64 blocks, pretend we've used a byte.
  292. +             * For reference, a seqeuence of identical empty-CBP i16x16 blocks will use
  293. +             * one byte after 26 macroblocks, assuming a perfectly adapted CABAC.
  294. +             * That's 78 macroblocks to generate the 3-byte sequence to trigger an escape. */
  295. +            else if( ((mb_xy - h->sh.i_first_mb) & 63) == 63 )
  296. +                slice_max_size -= 8;
  297. +            /* We'll just re-encode this last macroblock if we go over the max slice size. */
  298. +            if( total_bits - starting_bits > slice_max_size && !h->mb.b_reencode_mb )
  299.              {
  300. -                h->stat.frame.i_mv_bits = mv_bits_bak;
  301. -                h->stat.frame.i_tex_bits = tex_bits_bak;
  302. -                if( h->param.b_cabac )
  303. +                if( mb_xy != h->sh.i_first_mb )
  304.                  {
  305. -                    memcpy( &h->cabac, &cabac_bak, offsetof(x264_cabac_t, f8_bits_encoded) );
  306. -                    h->cabac.p[-1] = cabac_prevbyte_bak;
  307. +                    h->stat.frame.i_mv_bits = mv_bits_bak;
  308. +                    h->stat.frame.i_tex_bits = tex_bits_bak;
  309. +                    if( h->param.b_cabac )
  310. +                    {
  311. +                        memcpy( &h->cabac, &cabac_bak, offsetof(x264_cabac_t, f8_bits_encoded) );
  312. +                        h->cabac.p[-1] = cabac_prevbyte_bak;
  313. +                    }
  314. +                    else
  315. +                    {
  316. +                        h->out.bs = bs_bak;
  317. +                        i_skip = i_skip_bak;
  318. +                    }
  319. +                    h->mb.b_reencode_mb = 1;
  320. +                    h->sh.i_last_mb = mb_xy-1;
  321. +                    break;
  322.                  }
  323.                  else
  324.                  {
  325. -                    h->out.bs = bs_bak;
  326. -                    i_skip = i_skip_bak;
  327. +                    h->sh.i_last_mb = mb_xy;
  328. +                    h->mb.b_reencode_mb = 0;
  329.                  }
  330. -                h->mb.b_reencode_mb = 1;
  331. -                h->sh.i_last_mb = mb_xy-1;
  332. -                break;
  333.              }
  334.              else
  335. -            {
  336. -                h->sh.i_last_mb = mb_xy;
  337.                  h->mb.b_reencode_mb = 0;
  338. -            }
  339.          }
  340. -        else
  341. -            h->mb.b_reencode_mb = 0;
  342.  
  343.  #if HAVE_VISUALIZE
  344.          if( h->param.b_visualize )
  345. --
  346. 1.7.2.3
  347.  
  348.  
  349. From 3f15d8ea707d4985a38059fe58ce0a0993ceeb94 Mon Sep 17 00:00:00 2001
  350. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  351. Date: Mon, 27 Sep 2010 05:39:02 -0700
  352. Subject: [PATCH 5/8] Various cosmetics
  353.  
  354. ---
  355. encoder/encoder.c |   12 ++++++------
  356.  encoder/set.c     |   17 ++---------------
  357.  2 files changed, 8 insertions(+), 21 deletions(-)
  358.  
  359. diff --git a/encoder/encoder.c b/encoder/encoder.c
  360. index b9e66ac..28ded05 100644
  361. --- a/encoder/encoder.c
  362. +++ b/encoder/encoder.c
  363. @@ -2943,12 +2943,12 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
  364.  
  365.      /* Remove duplicates, must be done near the end as breaks h->fref0 array
  366.       * by freeing some of its pointers. */
  367. -     for( int i = 0; i < h->i_ref0; i++ )
  368. -         if( h->fref0[i] && h->fref0[i]->b_duplicate )
  369. -         {
  370. -             x264_frame_push_blank_unused( h, h->fref0[i] );
  371. -             h->fref0[i] = 0;
  372. -         }
  373. +    for( int i = 0; i < h->i_ref0; i++ )
  374. +        if( h->fref0[i] && h->fref0[i]->b_duplicate )
  375. +        {
  376. +            x264_frame_push_blank_unused( h, h->fref0[i] );
  377. +            h->fref0[i] = 0;
  378. +        }
  379.  
  380.      if( h->param.psz_dump_yuv )
  381.      {
  382. diff --git a/encoder/set.c b/encoder/set.c
  383. index b0d2149..a003012 100644
  384. --- a/encoder/set.c
  385. +++ b/encoder/set.c
  386. @@ -294,34 +294,21 @@ void x264_sps_write( bs_t *s, x264_sps_t *sps )
  387.      }
  388.      else if( sps->i_poc_type == 1 )
  389.      {
  390. -        int i;
  391. -
  392.          bs_write( s, 1, sps->b_delta_pic_order_always_zero );
  393.          bs_write_se( s, sps->i_offset_for_non_ref_pic );
  394.          bs_write_se( s, sps->i_offset_for_top_to_bottom_field );
  395.          bs_write_ue( s, sps->i_num_ref_frames_in_poc_cycle );
  396.  
  397. -        for( i = 0; i < sps->i_num_ref_frames_in_poc_cycle; i++ )
  398. -        {
  399. +        for( int i = 0; i < sps->i_num_ref_frames_in_poc_cycle; i++ )
  400.              bs_write_se( s, sps->i_offset_for_ref_frame[i] );
  401. -        }
  402.      }
  403.      bs_write_ue( s, sps->i_num_ref_frames );
  404.      bs_write( s, 1, sps->b_gaps_in_frame_num_value_allowed );
  405.      bs_write_ue( s, sps->i_mb_width - 1 );
  406. -    if (sps->b_frame_mbs_only)
  407. -    {
  408. -        bs_write_ue( s, sps->i_mb_height - 1);
  409. -    }
  410. -    else // interlaced
  411. -    {
  412. -        bs_write_ue( s, sps->i_mb_height/2 - 1);
  413. -    }
  414. +    bs_write_ue( s, (sps->i_mb_height >> !sps->b_frame_mbs_only) - 1);
  415.      bs_write( s, 1, sps->b_frame_mbs_only );
  416.      if( !sps->b_frame_mbs_only )
  417. -    {
  418.          bs_write( s, 1, sps->b_mb_adaptive_frame_field );
  419. -    }
  420.      bs_write( s, 1, sps->b_direct8x8_inference );
  421.  
  422.      bs_write( s, 1, sps->b_crop );
  423. --
  424. 1.7.2.3
  425.  
  426.  
  427. From 377efcd2643ba657a6d26c4599a9cc4022ca84e8 Mon Sep 17 00:00:00 2001
  428. From: Alex Wright <alexw0885@gmail.com>
  429. Date: Sun, 19 Sep 2010 05:08:22 -0700
  430. Subject: [PATCH 6/8] Chroma mode decision/subpel for B-frames
  431.  Improves compression ~0.4-1%. Helps more on videos with lots of chroma detail.
  432.  Enabled at subme 9 (preset slower) and higher.
  433.  
  434. ---
  435. common/macroblock.c |    5 +-
  436.  encoder/analyse.c   |  118 +++++++++++++++++++++++++++++++++++++++++++++++++--
  437.  2 files changed, 117 insertions(+), 6 deletions(-)
  438.  
  439. diff --git a/common/macroblock.c b/common/macroblock.c
  440. index b6c91d6..7f0348e 100644
  441. --- a/common/macroblock.c
  442. +++ b/common/macroblock.c
  443. @@ -448,8 +448,9 @@ void x264_macroblock_thread_init( x264_t *h )
  444.      h->mb.i_subpel_refine = h->param.analyse.i_subpel_refine;
  445.      if( h->sh.i_type == SLICE_TYPE_B && (h->mb.i_subpel_refine == 6 || h->mb.i_subpel_refine == 8) )
  446.          h->mb.i_subpel_refine--;
  447. -    h->mb.b_chroma_me = h->param.analyse.b_chroma_me && h->sh.i_type == SLICE_TYPE_P
  448. -                        && h->mb.i_subpel_refine >= 5;
  449. +    h->mb.b_chroma_me = h->param.analyse.b_chroma_me &&
  450. +                        ((h->sh.i_type == SLICE_TYPE_P && h->mb.i_subpel_refine >= 5) ||
  451. +                         (h->sh.i_type == SLICE_TYPE_B && h->mb.i_subpel_refine >= 9));
  452.      h->mb.b_dct_decimate = h->sh.i_type == SLICE_TYPE_B ||
  453.                            (h->param.analyse.b_dct_decimate && h->sh.i_type != SLICE_TYPE_I);
  454.  
  455. diff --git a/encoder/analyse.c b/encoder/analyse.c
  456. index c4162e9..6ed13ba 100644
  457. --- a/encoder/analyse.c
  458. +++ b/encoder/analyse.c
  459. @@ -1679,6 +1679,37 @@ static void x264_mb_analyse_inter_p4x8( x264_t *h, x264_mb_analysis_t *a, int i8
  460.          a->l0.i_cost4x8[i8x8] += x264_mb_analyse_inter_p4x4_chroma( h, a, p_fref, i8x8, PIXEL_4x8 );
  461.  }
  462.  
  463. +static ALWAYS_INLINE int x264_analyse_bi_chroma( x264_t *h, x264_mb_analysis_t *a, int idx, int i_pixel )
  464. +{
  465. +    ALIGNED_ARRAY_8( pixel, pix, [4],[8*8] );
  466. +    ALIGNED_ARRAY_8( pixel,  bi, [2],[8*8] );
  467. +    int l0_mvy_offset, l1_mvy_offset;
  468. +    int i_chroma_cost = 0;
  469. +
  470. +#define COST_BI_CHROMA( m0, m1, width, height ) \
  471. +{ \
  472. +    l0_mvy_offset = h->mb.b_interlaced & m0.i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0; \
  473. +    l1_mvy_offset = h->mb.b_interlaced & m1.i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0; \
  474. +    h->mc.mc_chroma( pix[0], pix[1], 8, m0.p_fref[4], m0.i_stride[1], m0.mv[0], m0.mv[1] + l0_mvy_offset, width, height ); \
  475. +    h->mc.mc_chroma( pix[2], pix[3], 8, m1.p_fref[4], m1.i_stride[1], m1.mv[0], m1.mv[1] + l1_mvy_offset, width, height ); \
  476. +    h->mc.avg[i_pixel+3]( bi[0], 8, pix[0], 8, pix[2], 8, h->mb.bipred_weight[m0.i_ref][m1.i_ref] ); \
  477. +    h->mc.avg[i_pixel+3]( bi[1], 8, pix[1], 8, pix[3], 8, h->mb.bipred_weight[m0.i_ref][m1.i_ref] ); \
  478. +    i_chroma_cost  = h->pixf.mbcmp[i_pixel+3]( m0.p_fenc[1], FENC_STRIDE, bi[0], 8 ); \
  479. +    i_chroma_cost += h->pixf.mbcmp[i_pixel+3]( m0.p_fenc[2], FENC_STRIDE, bi[1], 8 ); \
  480. +}
  481. +
  482. +    if( i_pixel == PIXEL_16x16 )
  483. +        COST_BI_CHROMA( a->l0.bi16x16, a->l1.bi16x16, 8, 8 )
  484. +    else if( i_pixel == PIXEL_16x8 )
  485. +        COST_BI_CHROMA( a->l0.me16x8[idx], a->l1.me16x8[idx], 8, 4 )
  486. +    else if( i_pixel == PIXEL_8x16 )
  487. +        COST_BI_CHROMA( a->l0.me8x16[idx], a->l1.me8x16[idx], 4, 8 )
  488. +    else
  489. +        COST_BI_CHROMA( a->l0.me8x8[idx], a->l1.me8x8[idx], 4, 4 )
  490. +
  491. +    return i_chroma_cost;
  492. +}
  493. +
  494.  static void x264_mb_analyse_inter_direct( x264_t *h, x264_mb_analysis_t *a )
  495.  {
  496.      /* Assumes that fdec still contains the results of
  497. @@ -1693,15 +1724,29 @@ static void x264_mb_analyse_inter_direct( x264_t *h, x264_mb_analysis_t *a )
  498.          {
  499.              const int x = (i&1)*8;
  500.              const int y = (i>>1)*8;
  501. -            a->i_cost16x16direct +=
  502. -            a->i_cost8x8direct[i] =
  503. -                h->pixf.mbcmp[PIXEL_8x8]( &p_fenc[x+y*FENC_STRIDE], FENC_STRIDE, &p_fdec[x+y*FDEC_STRIDE], FDEC_STRIDE );
  504. +            a->i_cost8x8direct[i] = h->pixf.mbcmp[PIXEL_8x8]( &p_fenc[x+y*FENC_STRIDE], FENC_STRIDE,
  505. +                                                              &p_fdec[x+y*FDEC_STRIDE], FDEC_STRIDE );
  506. +            if( h->mb.b_chroma_me )
  507. +            {
  508. +                a->i_cost8x8direct[i] += h->pixf.mbcmp[PIXEL_4x4]( &h->mb.pic.p_fenc[1][(x>>1)+(y>>1)*FENC_STRIDE], FENC_STRIDE,
  509. +                                                                   &h->mb.pic.p_fdec[1][(x>>1)+(y>>1)*FDEC_STRIDE], FDEC_STRIDE )
  510. +                                      +  h->pixf.mbcmp[PIXEL_4x4]( &h->mb.pic.p_fenc[2][(x>>1)+(y>>1)*FENC_STRIDE], FENC_STRIDE,
  511. +                                                                   &h->mb.pic.p_fdec[2][(x>>1)+(y>>1)*FDEC_STRIDE], FDEC_STRIDE );
  512. +            }
  513. +            a->i_cost16x16direct += a->i_cost8x8direct[i];
  514.  
  515.              /* mb type cost */
  516.              a->i_cost8x8direct[i] += a->i_lambda * i_sub_mb_b_cost_table[D_DIRECT_8x8];
  517.          }
  518.      else
  519. +    {
  520.          a->i_cost16x16direct += h->pixf.mbcmp[PIXEL_16x16]( p_fenc, FENC_STRIDE, p_fdec, FDEC_STRIDE );
  521. +        if( h->mb.b_chroma_me )
  522. +        {
  523. +            a->i_cost16x16direct += h->pixf.mbcmp[PIXEL_8x8]( h->mb.pic.p_fenc[1], FENC_STRIDE, h->mb.pic.p_fdec[1], FDEC_STRIDE )
  524. +                                 +  h->pixf.mbcmp[PIXEL_8x8]( h->mb.pic.p_fenc[2], FENC_STRIDE, h->mb.pic.p_fdec[2], FDEC_STRIDE );
  525. +        }
  526. +    }
  527.  }
  528.  
  529.  static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
  530. @@ -1807,6 +1852,9 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
  531.                       + a->l0.bi16x16.cost_mv
  532.                       + a->l1.bi16x16.cost_mv;
  533.  
  534. +    if( h->mb.b_chroma_me )
  535. +        a->i_cost16x16bi += x264_analyse_bi_chroma( h, a, 0, PIXEL_16x16 );
  536. +
  537.      /* Always try the 0,0,0,0 vector; helps avoid errant motion vectors in fades */
  538.      if( M32( a->l0.bi16x16.mv ) | M32( a->l1.bi16x16.mv ) )
  539.      {
  540. @@ -1819,6 +1867,39 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
  541.                                  h->mb.bipred_weight[a->l0.bi16x16.i_ref][a->l1.bi16x16.i_ref] );
  542.          int cost00 = h->pixf.mbcmp[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE, pix0, 16 )
  543.                     + ref_costs + l0_mv_cost + l1_mv_cost;
  544. +
  545. +        if( h->mb.b_chroma_me )
  546. +        {
  547. +            ALIGNED_ARRAY_8( pixel, pixuv, [2],[8*FENC_STRIDE] );
  548. +            ALIGNED_ARRAY_8( pixel, bi, [8*FENC_STRIDE] );
  549. +
  550. +            if( h->mb.b_interlaced & a->l0.bi16x16.i_ref )
  551. +            {
  552. +                int l0_mvy_offset = h->mb.b_interlaced & a->l0.bi16x16.i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0;
  553. +                h->mc.mc_chroma( pixuv[0], pixuv[0]+8, FENC_STRIDE, h->mb.pic.p_fref[0][a->l0.bi16x16.i_ref][4],
  554. +                                 h->mb.pic.i_stride[1], 0, 0 + l0_mvy_offset, 8, 8 );
  555. +            }
  556. +            else
  557. +                h->mc.load_deinterleave_8x8x2_fenc( pixuv[0], h->mb.pic.p_fref[0][a->l0.bi16x16.i_ref][4], h->mb.pic.i_stride[1] );
  558. +
  559. +            if( h->mb.b_interlaced & a->l1.bi16x16.i_ref )
  560. +            {
  561. +                int l1_mvy_offset = h->mb.b_interlaced & a->l1.bi16x16.i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0;
  562. +                h->mc.mc_chroma( pixuv[1], pixuv[1]+8, FENC_STRIDE, h->mb.pic.p_fref[1][a->l1.bi16x16.i_ref][4],
  563. +                                 h->mb.pic.i_stride[1], 0, 0 + l1_mvy_offset, 8, 8 );
  564. +            }
  565. +            else
  566. +                h->mc.load_deinterleave_8x8x2_fenc( pixuv[1], h->mb.pic.p_fref[1][a->l1.bi16x16.i_ref][4], h->mb.pic.i_stride[1] );
  567. +
  568. +            h->mc.avg[PIXEL_8x8]( bi, FENC_STRIDE, pixuv[0], FENC_STRIDE, pixuv[1], FENC_STRIDE,
  569. +                                  h->mb.bipred_weight[a->l0.bi16x16.i_ref][a->l1.bi16x16.i_ref] );
  570. +            h->mc.avg[PIXEL_8x8]( bi+8, FENC_STRIDE, pixuv[0]+8, FENC_STRIDE, pixuv[1]+8, FENC_STRIDE,
  571. +                                  h->mb.bipred_weight[a->l0.bi16x16.i_ref][a->l1.bi16x16.i_ref] );
  572. +
  573. +            cost00 += h->pixf.mbcmp[PIXEL_8x8]( h->mb.pic.p_fenc[1], FENC_STRIDE, bi, FENC_STRIDE )
  574. +                   +  h->pixf.mbcmp[PIXEL_8x8]( h->mb.pic.p_fenc[2], FENC_STRIDE, bi+8, FENC_STRIDE );
  575. +        }
  576. +
  577.          if( cost00 < a->i_cost16x16bi )
  578.          {
  579.              M32( a->l0.bi16x16.mv ) = 0;
  580. @@ -2017,6 +2098,13 @@ static void x264_mb_analyse_inter_b8x8_mixed_ref( x264_t *h, x264_mb_analysis_t
  581.                           + a->l0.me8x8[i].i_ref_cost + a->l1.me8x8[i].i_ref_cost
  582.                           + a->i_lambda * i_sub_mb_b_cost_table[D_BI_8x8];
  583.  
  584. +        if( h->mb.b_chroma_me )
  585. +        {
  586. +            int i_chroma_cost = x264_analyse_bi_chroma( h, a, i, PIXEL_8x8 );
  587. +            i_part_cost_bi += i_chroma_cost;
  588. +            a->i_satd8x8[2][i] += i_chroma_cost;
  589. +        }
  590. +
  591.          a->l0.me8x8[i].cost += a->i_lambda * i_sub_mb_b_cost_table[D_L0_8x8];
  592.          a->l1.me8x8[i].cost += a->i_lambda * i_sub_mb_b_cost_table[D_L1_8x8];
  593.  
  594. @@ -2090,6 +2178,13 @@ static void x264_mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a )
  595.          a->l0.me8x8[i].cost += a->i_lambda * i_sub_mb_b_cost_table[D_L0_8x8];
  596.          a->l1.me8x8[i].cost += a->i_lambda * i_sub_mb_b_cost_table[D_L1_8x8];
  597.  
  598. +        if( h->mb.b_chroma_me )
  599. +        {
  600. +            int i_chroma_cost = x264_analyse_bi_chroma( h, a, i, PIXEL_8x8 );
  601. +            i_part_cost_bi += i_chroma_cost;
  602. +            a->i_satd8x8[2][i] += i_chroma_cost;
  603. +        }
  604. +
  605.          i_part_cost = a->l0.me8x8[i].cost;
  606.          h->mb.i_sub_partition[i] = D_L0_8x8;
  607.          COPY2_IF_LT( i_part_cost, a->l1.me8x8[i].cost, h->mb.i_sub_partition[i], D_L1_8x8 );
  608. @@ -2162,6 +2257,9 @@ static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a, int i
  609.                          + a->l0.me16x8[i].cost_mv + a->l1.me16x8[i].cost_mv + a->l0.me16x8[i].i_ref_cost
  610.                          + a->l1.me16x8[i].i_ref_cost;
  611.  
  612. +        if( h->mb.b_chroma_me )
  613. +            i_part_cost_bi += x264_analyse_bi_chroma( h, a, i, PIXEL_16x8 );
  614. +
  615.          i_part_cost = a->l0.me16x8[i].cost;
  616.          a->i_mb_partition16x8[i] = D_L0_8x8; /* not actually 8x8, only the L0 matters */
  617.  
  618. @@ -2252,6 +2350,9 @@ static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a, int i
  619.                          + a->l0.me8x16[i].cost_mv + a->l1.me8x16[i].cost_mv + a->l0.me8x16[i].i_ref_cost
  620.                          + a->l1.me8x16[i].i_ref_cost;
  621.  
  622. +        if( h->mb.b_chroma_me )
  623. +            i_part_cost_bi += x264_analyse_bi_chroma( h, a, i, PIXEL_8x16 );
  624. +
  625.          i_part_cost = a->l0.me8x16[i].cost;
  626.          a->i_mb_partition8x16[i] = D_L0_8x8;
  627.  
  628. @@ -3249,7 +3350,16 @@ intra_analysis:
  629.                  h->mb.i_partition = i_partition;
  630.              }
  631.  
  632. -            x264_mb_analyse_intra( h, &analysis, i_satd_inter );
  633. +            if( h->mb.b_chroma_me )
  634. +            {
  635. +                x264_mb_analyse_intra_chroma( h, &analysis );
  636. +                x264_mb_analyse_intra( h, &analysis, i_satd_inter - analysis.i_satd_i8x8chroma );
  637. +                analysis.i_satd_i16x16 += analysis.i_satd_i8x8chroma;
  638. +                analysis.i_satd_i8x8 += analysis.i_satd_i8x8chroma;
  639. +                analysis.i_satd_i4x4 += analysis.i_satd_i8x8chroma;
  640. +            }
  641. +            else
  642. +                x264_mb_analyse_intra( h, &analysis, i_satd_inter );
  643.  
  644.              if( analysis.i_mbrd )
  645.              {
  646. --
  647. 1.7.2.3
  648.  
  649.  
  650. From 594ff9664e7ac57a53fae8d9b5a1ecdf2ba5fa75 Mon Sep 17 00:00:00 2001
  651. From: Oskar Arvidsson <oskar@irock.se>
  652. Date: Mon, 27 Sep 2010 16:02:20 +0200
  653. Subject: [PATCH 7/8] Finish support for high-depth video throughout x264
  654.  Add support for high depth input in libx264.
  655.  Add support for 16-bit colorspaces in the filtering system.
  656.  Add support for input bit depths in the interval [9,16] with the raw demuxer.
  657.  Add a depth filter to dither input to x264.
  658.  
  659. ---
  660. Makefile                 |    2 +-
  661.  common/common.c          |   17 ++--
  662.  common/frame.c           |   14 +++
  663.  common/mc.c              |   11 +--
  664.  encoder/encoder.c        |    8 +-
  665.  filters/video/crop.c     |    8 +-
  666.  filters/video/depth.c    |  228 ++++++++++++++++++++++++++++++++++++++++++++++
  667.  filters/video/internal.c |    1 +
  668.  filters/video/resize.c   |   84 +++++++++++++----
  669.  filters/video/video.c    |    1 +
  670.  input/input.c            |   13 ++-
  671.  input/input.h            |    7 +-
  672.  input/raw.c              |   33 ++++++-
  673.  x264.c                   |   21 ++++-
  674.  x264.h                   |   14 +++-
  675.  15 files changed, 405 insertions(+), 57 deletions(-)
  676.  create mode 100644 filters/video/depth.c
  677.  
  678. diff --git a/Makefile b/Makefile
  679. index bab55e5..0cd7b82 100644
  680. --- a/Makefile
  681. +++ b/Makefile
  682. @@ -18,7 +18,7 @@ SRCCLI = x264.c input/input.c input/timecode.c input/raw.c input/y4m.c \
  683.           output/flv.c output/flv_bytestream.c filters/filters.c \
  684.           filters/video/video.c filters/video/source.c filters/video/internal.c \
  685.           filters/video/resize.c filters/video/cache.c filters/video/fix_vfr_pts.c \
  686. -         filters/video/select_every.c filters/video/crop.c
  687. +         filters/video/select_every.c filters/video/crop.c filters/video/depth.c
  688.  
  689.  SRCSO =
  690.  
  691. diff --git a/common/common.c b/common/common.c
  692. index c329cb0..aff5fc3 100644
  693. --- a/common/common.c
  694. +++ b/common/common.c
  695. @@ -33,6 +33,8 @@
  696.  #include <malloc.h>
  697.  #endif
  698.  
  699. +const int x264_bit_depth = BIT_DEPTH;
  700. +
  701.  static void x264_log_default( void *, int, const char *, va_list );
  702.  
  703.  /****************************************************************************
  704. @@ -1047,19 +1049,20 @@ int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_heigh
  705.      x264_picture_init( pic );
  706.      pic->img.i_csp = i_csp;
  707.      pic->img.i_plane = csp == X264_CSP_NV12 ? 2 : 3;
  708. -    pic->img.plane[0] = x264_malloc( 3 * i_width * i_height / 2 );
  709. +    int depth_factor = i_csp & X264_CSP_HIGH_DEPTH ? 2 : 1;
  710. +    pic->img.plane[0] = x264_malloc( 3 * i_width * i_height / 2 * depth_factor );
  711.      if( !pic->img.plane[0] )
  712.          return -1;
  713. -    pic->img.plane[1] = pic->img.plane[0] + i_width * i_height;
  714. +    pic->img.plane[1] = pic->img.plane[0] + i_width * i_height * depth_factor;
  715.      if( csp != X264_CSP_NV12 )
  716. -        pic->img.plane[2] = pic->img.plane[1] + i_width * i_height / 4;
  717. -    pic->img.i_stride[0] = i_width;
  718. +        pic->img.plane[2] = pic->img.plane[1] + i_width * i_height / 4 * depth_factor;
  719. +    pic->img.i_stride[0] = i_width * depth_factor;
  720.      if( csp == X264_CSP_NV12 )
  721. -        pic->img.i_stride[1] = i_width;
  722. +        pic->img.i_stride[1] = i_width * depth_factor;
  723.      else
  724.      {
  725. -        pic->img.i_stride[1] = i_width / 2;
  726. -        pic->img.i_stride[2] = i_width / 2;
  727. +        pic->img.i_stride[1] = i_width / 2 * depth_factor;
  728. +        pic->img.i_stride[2] = i_width / 2 * depth_factor;
  729.      }
  730.      return 0;
  731.  }
  732. diff --git a/common/frame.c b/common/frame.c
  733. index 0c3d77f..95666da 100644
  734. --- a/common/frame.c
  735. +++ b/common/frame.c
  736. @@ -263,6 +263,20 @@ int x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
  737.          return -1;
  738.      }
  739.  
  740. +#if X264_HIGH_BIT_DEPTH
  741. +    if( !(src->img.i_csp & X264_CSP_HIGH_DEPTH) )
  742. +    {
  743. +        x264_log( h, X264_LOG_ERROR, "This build of x264 requires high depth input. Rebuild to support 8-bit input.\n" );
  744. +        return -1;
  745. +    }
  746. +#else
  747. +    if( src->img.i_csp & X264_CSP_HIGH_DEPTH )
  748. +    {
  749. +        x264_log( h, X264_LOG_ERROR, "This build of x264 requires 8-bit input. Rebuild to support high depth input.\n" );
  750. +        return -1;
  751. +    }
  752. +#endif
  753. +
  754.      dst->i_type     = src->i_type;
  755.      dst->i_qpplus1  = src->i_qpplus1;
  756.      dst->i_pts      = dst->i_reordered_pts = src->i_pts;
  757. diff --git a/common/mc.c b/common/mc.c
  758. index 5b58a76..acc2312 100644
  759. --- a/common/mc.c
  760. +++ b/common/mc.c
  761. @@ -302,12 +302,7 @@ void x264_plane_copy_c( pixel *dst, int i_dst,
  762.  {
  763.      while( h-- )
  764.      {
  765. -#if X264_HIGH_BIT_DEPTH
  766. -        for( int i = 0; i < w; i++ )
  767. -            dst[i] = src[i] << (BIT_DEPTH-8);
  768. -#else
  769. -        memcpy( dst, src, w );
  770. -#endif
  771. +        memcpy( dst, src, w * sizeof(pixel) );
  772.          dst += i_dst;
  773.          src += i_src;
  774.      }
  775. @@ -320,8 +315,8 @@ void x264_plane_copy_interleave_c( pixel *dst, int i_dst,
  776.      for( int y=0; y<h; y++, dst+=i_dst, srcu+=i_srcu, srcv+=i_srcv )
  777.          for( int x=0; x<w; x++ )
  778.          {
  779. -            dst[2*x]   = srcu[x] << (BIT_DEPTH-8);
  780. -            dst[2*x+1] = srcv[x] << (BIT_DEPTH-8);
  781. +            dst[2*x]   = ((pixel*)srcu)[x];
  782. +            dst[2*x+1] = ((pixel*)srcv)[x];
  783.          }
  784.  }
  785.  
  786. diff --git a/encoder/encoder.c b/encoder/encoder.c
  787. index 28ded05..62a4350 100644
  788. --- a/encoder/encoder.c
  789. +++ b/encoder/encoder.c
  790. @@ -2777,12 +2777,14 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
  791.          x264_log( h, X264_LOG_WARNING, "invalid DTS: PTS is less than DTS\n" );
  792.  
  793.      pic_out->img.i_csp = X264_CSP_NV12;
  794. +#if X264_HIGH_BIT_DEPTH
  795. +    pic_out->img.i_csp |= X264_CSP_HIGH_DEPTH;
  796. +#endif
  797.      pic_out->img.i_plane = h->fdec->i_plane;
  798.      for( int i = 0; i < 2; i++ )
  799.      {
  800. -        pic_out->img.i_stride[i] = h->fdec->i_stride[i];
  801. -        // FIXME This breaks the API when pixel != uint8_t.
  802. -        pic_out->img.plane[i] = h->fdec->plane[i];
  803. +        pic_out->img.i_stride[i] = h->fdec->i_stride[i] * sizeof(pixel);
  804. +        pic_out->img.plane[i] = (uint8_t*)h->fdec->plane[i];
  805.      }
  806.  
  807.      x264_frame_push_unused( thread_current, h->fenc );
  808. diff --git a/filters/video/crop.c b/filters/video/crop.c
  809. index 2a3c2b1..b70476e 100644
  810. --- a/filters/video/crop.c
  811. +++ b/filters/video/crop.c
  812. @@ -103,8 +103,12 @@ static int get_frame( hnd_t handle, cli_pic_t *output, int frame )
  813.      output->img.height = h->dims[3];
  814.      /* shift the plane pointers down 'top' rows and right 'left' columns. */
  815.      for( int i = 0; i < output->img.planes; i++ )
  816. -        output->img.plane[i] += (int)(output->img.stride[i] * h->dims[1] * h->csp->height[i]
  817. -                                    + h->dims[0] * h->csp->width[i]);
  818. +    {
  819. +        intptr_t offset = output->img.stride[i] * h->dims[1] * h->csp->height[i];
  820. +        offset += h->dims[0] * h->csp->width[i];
  821. +        offset *= x264_cli_csp_depth_factor( output->img.csp );
  822. +        output->img.plane[i] += offset;
  823. +    }
  824.      return 0;
  825.  }
  826.  
  827. diff --git a/filters/video/depth.c b/filters/video/depth.c
  828. new file mode 100644
  829. index 0000000..a0411c5
  830. --- /dev/null
  831. +++ b/filters/video/depth.c
  832. @@ -0,0 +1,228 @@
  833. +/*****************************************************************************
  834. + * depth.c: x264 video depth filter
  835. + *****************************************************************************
  836. + * Copyright (C) 2010 Oskar Arvidsson <oskar@irock.se>
  837. + *
  838. + * This program is free software; you can redistribute it and/or modify
  839. + * it under the terms of the GNU General Public License as published by
  840. + * the Free Software Foundation; either version 2 of the License, or
  841. + * (at your option) any later version.
  842. + *
  843. + * This program is distributed in the hope that it will be useful,
  844. + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  845. + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  846. + * GNU General Public License for more details.
  847. + *
  848. + * You should have received a copy of the GNU General Public License
  849. + * along with this program; if not, write to the Free Software
  850. + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
  851. + *****************************************************************************/
  852. +
  853. +#include "video.h"
  854. +#define NAME "depth"
  855. +#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, NAME, __VA_ARGS__ )
  856. +
  857. +cli_vid_filter_t depth_filter;
  858. +
  859. +typedef struct
  860. +{
  861. +    hnd_t prev_hnd;
  862. +    cli_vid_filter_t prev_filter;
  863. +
  864. +    int bit_depth;
  865. +    int dst_csp;
  866. +    cli_pic_t buffer;
  867. +    int16_t *error_buf;
  868. +} depth_hnd_t;
  869. +
  870. +static int depth_filter_csp_is_supported( int csp )
  871. +{
  872. +    int csp_mask = csp & X264_CSP_MASK;
  873. +    return csp_mask == X264_CSP_I420 ||
  874. +           csp_mask == X264_CSP_I422 ||
  875. +           csp_mask == X264_CSP_I444 ||
  876. +           csp_mask == X264_CSP_YV12 ||
  877. +           csp_mask == X264_CSP_NV12;
  878. +}
  879. +
  880. +static int csp_num_interleaved( int csp, int plane )
  881. +{
  882. +    int csp_mask = csp & X264_CSP_MASK;
  883. +    return ( csp_mask == X264_CSP_NV12 && plane == 1 ) ? 2 : 1;
  884. +}
  885. +
  886. +/* The dithering algorithm is based on Sierra-2-4A error diffusion. It has been
  887. + * written in such a way so that if the source has been upconverted using the
  888. + * same algorithm as used in scale_image, dithering down to the source bit
  889. + * depth again is lossless. */
  890. +#define DITHER_PLANE( pitch ) \
  891. +static void dither_plane_##pitch( pixel *dst, int dst_stride, uint16_t *src, int src_stride, \
  892. +                                        int width, int height, int16_t *errors ) \
  893. +{ \
  894. +    const int lshift = 16-BIT_DEPTH; \
  895. +    const int rshift = 2*BIT_DEPTH-16; \
  896. +    const int pixel_max = (1 << BIT_DEPTH)-1; \
  897. +    const int half = 1 << (16-BIT_DEPTH); \
  898. +    memset( errors, 0, (width+1) * sizeof(int16_t) ); \
  899. +    for( int y = 0; y < height; y++, src += src_stride, dst += dst_stride ) \
  900. +    { \
  901. +        int err = 0; \
  902. +        for( int x = 0; x < width; x++ ) \
  903. +        { \
  904. +            err += errors[x] + errors[x+1]; \
  905. +            dst[x*pitch] = x264_clip3( (((src[x*pitch]+half)<<2)+err)*pixel_max >> 18, 0, pixel_max ); \
  906. +            errors[x] = src[x*pitch] - (dst[x*pitch] << lshift) - (dst[x*pitch] >> rshift); \
  907. +            err = errors[x] << 1; \
  908. +        } \
  909. +    } \
  910. +}
  911. +
  912. +DITHER_PLANE( 1 )
  913. +DITHER_PLANE( 2 )
  914. +
  915. +static void dither_image( cli_image_t *out, cli_image_t *img, int16_t *error_buf )
  916. +{
  917. +    int csp_mask = img->csp & X264_CSP_MASK;
  918. +    for( int i = 0; i < img->planes; i++ )
  919. +    {
  920. +        int num_interleaved = csp_num_interleaved( img->csp, i );
  921. +        int height = x264_cli_csps[csp_mask].height[i] * img->height;
  922. +        int width = x264_cli_csps[csp_mask].width[i] * img->width / num_interleaved;
  923. +
  924. +#define CALL_DITHER_PLANE( pitch, off ) \
  925. +        dither_plane_##pitch( ((pixel*)out->plane[i])+off, out->stride[i]/sizeof(pixel), \
  926. +                ((uint16_t*)img->plane[i])+off, img->stride[i]/2, width, height, error_buf )
  927. +
  928. +        if( num_interleaved == 1 )
  929. +        {
  930. +            CALL_DITHER_PLANE( 1, 0 );
  931. +        }
  932. +        else
  933. +        {
  934. +            CALL_DITHER_PLANE( 2, 0 );
  935. +            CALL_DITHER_PLANE( 2, 1 );
  936. +        }
  937. +    }
  938. +}
  939. +
  940. +static void scale_image( cli_image_t *output, cli_image_t *img )
  941. +{
  942. +    /* this function mimics how swscale does upconversion. 8-bit is converted
  943. +     * to 16-bit through left shifting the orginal value with 8 and then adding
  944. +     * the original value to that. This effectively keeps the full color range
  945. +     * while also being fast. for n-bit we basically do the same thing, but we
  946. +     * discard the lower 16-n bits. */
  947. +    int csp_mask = img->csp & X264_CSP_MASK;
  948. +    const int shift = 16-BIT_DEPTH;
  949. +    for( int i = 0; i < img->planes; i++ )
  950. +    {
  951. +        uint8_t *src = img->plane[i];
  952. +        uint16_t *dst = (uint16_t*)output->plane[i];
  953. +        int height = x264_cli_csps[csp_mask].height[i] * img->height;
  954. +        int width = x264_cli_csps[csp_mask].width[i] * img->width;
  955. +
  956. +        for( int j = 0; j < height; j++ )
  957. +        {
  958. +            for( int k = 0; k < width; k++ )
  959. +                dst[k] = ((src[k] << 8) + src[k]) >> shift;
  960. +
  961. +            src += img->stride[i];
  962. +            dst += output->stride[i]/2;
  963. +        }
  964. +    }
  965. +}
  966. +
  967. +static int get_frame( hnd_t handle, cli_pic_t *output, int frame )
  968. +{
  969. +    depth_hnd_t *h = handle;
  970. +
  971. +    if( h->prev_filter.get_frame( h->prev_hnd, output, frame ) )
  972. +        return -1;
  973. +
  974. +    if( h->bit_depth < 16 && output->img.csp & X264_CSP_HIGH_DEPTH )
  975. +    {
  976. +        dither_image( &h->buffer.img, &output->img, h->error_buf );
  977. +        output->img = h->buffer.img;
  978. +    }
  979. +    else if( h->bit_depth > 8 && !(output->img.csp & X264_CSP_HIGH_DEPTH) )
  980. +    {
  981. +        scale_image( &h->buffer.img, &output->img );
  982. +        output->img = h->buffer.img;
  983. +    }
  984. +    return 0;
  985. +}
  986. +
  987. +static int release_frame( hnd_t handle, cli_pic_t *pic, int frame )
  988. +{
  989. +    depth_hnd_t *h = handle;
  990. +    return h->prev_filter.release_frame( h->prev_hnd, pic, frame );
  991. +}
  992. +
  993. +static void free_filter( hnd_t handle )
  994. +{
  995. +    depth_hnd_t *h = handle;
  996. +    h->prev_filter.free( h->prev_hnd );
  997. +    x264_cli_pic_clean( &h->buffer );
  998. +    x264_free( h );
  999. +}
  1000. +
  1001. +static int init( hnd_t *handle, cli_vid_filter_t *filter, video_info_t *info,
  1002. +                 x264_param_t *param, char *opt_string )
  1003. +{
  1004. +    int ret = 0;
  1005. +    int change_fmt = (info->csp ^ param->i_csp) & X264_CSP_HIGH_DEPTH;
  1006. +    int csp = ~(~info->csp ^ change_fmt);
  1007. +    int bit_depth = 8*x264_cli_csp_depth_factor( csp );
  1008. +
  1009. +    if( opt_string )
  1010. +    {
  1011. +        static const char *optlist[] = { "bit_depth", NULL };
  1012. +        char **opts = x264_split_options( opt_string, optlist );
  1013. +
  1014. +        if( opts )
  1015. +        {
  1016. +            char *str_bit_depth = x264_get_option( "bit_depth", opts );
  1017. +            bit_depth = x264_otoi( str_bit_depth, -1 );
  1018. +
  1019. +            ret = bit_depth < 8 || bit_depth > 16;
  1020. +            csp = bit_depth > 8 ? csp | X264_CSP_HIGH_DEPTH : csp & ~X264_CSP_HIGH_DEPTH;
  1021. +            change_fmt = (info->csp ^ csp) & X264_CSP_HIGH_DEPTH;
  1022. +            x264_free_string_array( opts );
  1023. +        }
  1024. +        else
  1025. +            ret = 1;
  1026. +    }
  1027. +
  1028. +    FAIL_IF_ERROR( bit_depth != BIT_DEPTH, "this build supports only bit depth %d\n", BIT_DEPTH )
  1029. +    FAIL_IF_ERROR( ret, "unsupported bit depth conversion.\n" )
  1030. +
  1031. +    /* only add the filter to the chain if it's needed */
  1032. +    if( change_fmt || bit_depth != 8 * x264_cli_csp_depth_factor( csp ) )
  1033. +    {
  1034. +        FAIL_IF_ERROR( !depth_filter_csp_is_supported(csp), "unsupported colorspace.\n" )
  1035. +        depth_hnd_t *h = x264_malloc( sizeof(depth_hnd_t) + (info->width+1)*sizeof(int16_t) );
  1036. +
  1037. +        if( !h )
  1038. +            return -1;
  1039. +
  1040. +        h->error_buf = (int16_t*)(h + 1);
  1041. +        h->dst_csp = csp;
  1042. +        h->bit_depth = bit_depth;
  1043. +        h->prev_hnd = *handle;
  1044. +        h->prev_filter = *filter;
  1045. +
  1046. +        if( x264_cli_pic_alloc( &h->buffer, h->dst_csp, info->width, info->height ) )
  1047. +        {
  1048. +            x264_free( h );
  1049. +            return -1;
  1050. +        }
  1051. +
  1052. +        *handle = h;
  1053. +        *filter = depth_filter;
  1054. +        info->csp = h->dst_csp;
  1055. +    }
  1056. +
  1057. +    return 0;
  1058. +}
  1059. +
  1060. +cli_vid_filter_t depth_filter = { NAME, NULL, init, get_frame, release_frame, free_filter, NULL };
  1061. diff --git a/filters/video/internal.c b/filters/video/internal.c
  1062. index 444ea1f..ef096dc 100644
  1063. --- a/filters/video/internal.c
  1064. +++ b/filters/video/internal.c
  1065. @@ -51,6 +51,7 @@ int x264_cli_pic_copy( cli_pic_t *out, cli_pic_t *in )
  1066.      {
  1067.          int height = in->img.height * x264_cli_csps[csp].height[i];
  1068.          int width =  in->img.width  * x264_cli_csps[csp].width[i];
  1069. +        width *= x264_cli_csp_depth_factor( in->img.csp );
  1070.          x264_cli_plane_copy( out->img.plane[i], out->img.stride[i], in->img.plane[i],
  1071.                               in->img.stride[i], width, height );
  1072.      }
  1073. diff --git a/filters/video/resize.c b/filters/video/resize.c
  1074. index 38077b2..04b5e73 100644
  1075. --- a/filters/video/resize.c
  1076. +++ b/filters/video/resize.c
  1077. @@ -79,10 +79,21 @@ static void help( int longhelp )
  1078.              "            - fittobox: resizes the video based on the desired contraints\n"
  1079.              "               - width, height, both\n"
  1080.              "            - fittobox and sar: same as above except with specified sar\n"
  1081. -            "            simultaneously converting to the given colorspace\n"
  1082. -            "            using resizer method [\"bicubic\"]\n"
  1083. -            "             - fastbilinear, bilinear, bicubic, experimental, point,\n"
  1084. -            "             - area, bicublin, gauss, sinc, lanczos, spline\n" );
  1085. +            "            - csp: convert to the given csp. syntax: [name][:depth]\n"
  1086. +            "               - valid csp names [keep current]: " );
  1087. +
  1088. +    for( int i = X264_CSP_NONE+1; i < X264_CSP_CLI_MAX; i++ )
  1089. +    {
  1090. +        printf( "%s", x264_cli_csps[i].name );
  1091. +        if( i+1 < X264_CSP_CLI_MAX )
  1092. +            printf( ", " );
  1093. +    }
  1094. +    printf( "\n"
  1095. +            "               - depth: 8 or 16 bits per pixel [keep current]\n"
  1096. +            "            note: not all depths are supported by all csps.\n"
  1097. +            "            - method: use resizer method [\"bicubic\"]\n"
  1098. +            "               - fastbilinear, bilinear, bicubic, experimental, point,\n"
  1099. +            "               - area, bicublin, gauss, sinc, lanczos, spline\n" );
  1100.  }
  1101.  
  1102.  static uint32_t convert_cpu_to_flag( uint32_t cpu )
  1103. @@ -131,13 +142,15 @@ static int convert_csp_to_pix_fmt( int csp )
  1104.          return csp&X264_CSP_MASK;
  1105.      switch( csp&X264_CSP_MASK )
  1106.      {
  1107. -        case X264_CSP_I420: return PIX_FMT_YUV420P;
  1108. -        case X264_CSP_I422: return PIX_FMT_YUV422P;
  1109. -        case X264_CSP_I444: return PIX_FMT_YUV444P;
  1110. -        case X264_CSP_NV12: return PIX_FMT_NV12;
  1111. -        case X264_CSP_YV12: return PIX_FMT_YUV420P; /* specially handled via swapping chroma */
  1112. -        case X264_CSP_BGR:  return PIX_FMT_BGR24;
  1113. -        case X264_CSP_BGRA: return PIX_FMT_BGRA;
  1114. +        case X264_CSP_YV12: /* specially handled via swapping chroma */
  1115. +        case X264_CSP_I420: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_YUV420P16 : PIX_FMT_YUV420P;
  1116. +        case X264_CSP_I422: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_YUV422P16 : PIX_FMT_YUV422P;
  1117. +        case X264_CSP_I444: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_YUV444P16 : PIX_FMT_YUV444P;
  1118. +        case X264_CSP_RGB:  return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_RGB48     : PIX_FMT_RGB24;
  1119. +        /* the next 3 csps have no equivalent 16bit depth in swscale */
  1120. +        case X264_CSP_NV12: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_NONE      : PIX_FMT_NV12;
  1121. +        case X264_CSP_BGR:  return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_NONE      : PIX_FMT_BGR24;
  1122. +        case X264_CSP_BGRA: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_NONE      : PIX_FMT_BGRA;
  1123.          default:            return PIX_FMT_NONE;
  1124.      }
  1125.  }
  1126. @@ -147,23 +160,30 @@ static int pick_closest_supported_csp( int csp )
  1127.      int pix_fmt = convert_csp_to_pix_fmt( csp );
  1128.      switch( pix_fmt )
  1129.      {
  1130. +        case PIX_FMT_YUV420P16LE:
  1131. +        case PIX_FMT_YUV420P16BE:
  1132. +            return X264_CSP_I420 | X264_CSP_HIGH_DEPTH;
  1133.          case PIX_FMT_YUV422P:
  1134. -        case PIX_FMT_YUV422P16LE:
  1135. -        case PIX_FMT_YUV422P16BE:
  1136.          case PIX_FMT_YUYV422:
  1137.          case PIX_FMT_UYVY422:
  1138.              return X264_CSP_I422;
  1139. +        case PIX_FMT_YUV422P16LE:
  1140. +        case PIX_FMT_YUV422P16BE:
  1141. +            return X264_CSP_I422 | X264_CSP_HIGH_DEPTH;
  1142.          case PIX_FMT_YUV444P:
  1143. +            return X264_CSP_I444;
  1144.          case PIX_FMT_YUV444P16LE:
  1145.          case PIX_FMT_YUV444P16BE:
  1146. -            return X264_CSP_I444;
  1147. -        case PIX_FMT_RGB24:    // convert rgb to bgr
  1148. -        case PIX_FMT_RGB48BE:
  1149. -        case PIX_FMT_RGB48LE:
  1150. +            return X264_CSP_I444 | X264_CSP_HIGH_DEPTH;
  1151. +        case PIX_FMT_RGB24:
  1152.          case PIX_FMT_RGB565BE:
  1153.          case PIX_FMT_RGB565LE:
  1154.          case PIX_FMT_RGB555BE:
  1155.          case PIX_FMT_RGB555LE:
  1156. +            return X264_CSP_RGB;
  1157. +        case PIX_FMT_RGB48BE:
  1158. +        case PIX_FMT_RGB48LE:
  1159. +            return X264_CSP_RGB | X264_CSP_HIGH_DEPTH;
  1160.          case PIX_FMT_BGR24:
  1161.          case PIX_FMT_BGR565BE:
  1162.          case PIX_FMT_BGR565LE:
  1163. @@ -209,12 +229,27 @@ static int handle_opts( const char **optlist, char **opts, video_info_t *info, r
  1164.  
  1165.      if( str_csp )
  1166.      {
  1167. -        /* output csp was specified, lookup against valid values */
  1168. +        /* output csp was specified, first check if optional depth was provided */
  1169. +        char *str_depth = strchr( str_csp, ':' );
  1170. +        int depth = x264_cli_csp_depth_factor( info->csp ) * 8;
  1171. +        if( str_depth )
  1172. +        {
  1173. +            /* csp bit depth was specified */
  1174. +            *str_depth++ = '\0';
  1175. +            depth = x264_otoi( str_depth, -1 );
  1176. +            FAIL_IF_ERROR( depth != 8 && depth != 16, "unsupported bit depth %d\n", depth );
  1177. +        }
  1178. +        /* now lookup against the list of valid csps */
  1179.          int csp;
  1180. -        for( csp = X264_CSP_CLI_MAX-1; x264_cli_csps[csp].name && strcasecmp( x264_cli_csps[csp].name, str_csp ); )
  1181. -            csp--;
  1182. +        if( strlen( str_csp ) == 0 )
  1183. +            csp = info->csp & X264_CSP_MASK;
  1184. +        else
  1185. +            for( csp = X264_CSP_CLI_MAX-1; x264_cli_csps[csp].name && strcasecmp( x264_cli_csps[csp].name, str_csp ); )
  1186. +                csp--;
  1187.          FAIL_IF_ERROR( csp == X264_CSP_NONE, "unsupported colorspace `%s'\n", str_csp );
  1188.          h->dst_csp = csp;
  1189. +        if( depth == 16 )
  1190. +            h->dst_csp |= X264_CSP_HIGH_DEPTH;
  1191.      }
  1192.  
  1193.      /* if the input sar is currently invalid, set it to 1:1 so it can be used in math */
  1194. @@ -366,8 +401,17 @@ static int init( hnd_t *handle, cli_vid_filter_t *filter, video_info_t *info, x2
  1195.      h->swap_chroma = (info->csp & X264_CSP_MASK) == X264_CSP_YV12;
  1196.      int src_pix_fmt = convert_csp_to_pix_fmt( info->csp );
  1197.  
  1198. +    int src_pix_fmt_inv = convert_csp_to_pix_fmt( info->csp ^ X264_CSP_HIGH_DEPTH );
  1199. +    int dst_pix_fmt_inv = convert_csp_to_pix_fmt( h->dst_csp ^ X264_CSP_HIGH_DEPTH );
  1200. +
  1201.      /* confirm swscale can support this conversion */
  1202. +    FAIL_IF_ERROR( src_pix_fmt == PIX_FMT_NONE && src_pix_fmt_inv != PIX_FMT_NONE,
  1203. +                   "input colorspace %s with bit depth %d is not supported\n", sws_format_name( src_pix_fmt_inv ),
  1204. +                   info->csp & X264_CSP_HIGH_DEPTH ? 16 : 8 );
  1205.      FAIL_IF_ERROR( !sws_isSupportedInput( src_pix_fmt ), "input colorspace %s is not supported\n", sws_format_name( src_pix_fmt ) )
  1206. +    FAIL_IF_ERROR( h->dst.pix_fmt == PIX_FMT_NONE && dst_pix_fmt_inv != PIX_FMT_NONE,
  1207. +                   "input colorspace %s with bit depth %d is not supported\n", sws_format_name( dst_pix_fmt_inv ),
  1208. +                   h->dst_csp & X264_CSP_HIGH_DEPTH ? 16 : 8 );
  1209.      FAIL_IF_ERROR( !sws_isSupportedOutput( h->dst.pix_fmt ), "output colorspace %s is not supported\n", sws_format_name( h->dst.pix_fmt ) )
  1210.      FAIL_IF_ERROR( h->dst.height != info->height && info->interlaced,
  1211.                     "swscale is not compatible with interlaced vertical resizing\n" )
  1212. diff --git a/filters/video/video.c b/filters/video/video.c
  1213. index 61dc8c6..71ae01e 100644
  1214. --- a/filters/video/video.c
  1215. +++ b/filters/video/video.c
  1216. @@ -51,6 +51,7 @@ void x264_register_vid_filters()
  1217.      REGISTER_VFILTER( fix_vfr_pts );
  1218.      REGISTER_VFILTER( resize );
  1219.      REGISTER_VFILTER( select_every );
  1220. +    REGISTER_VFILTER( depth );
  1221.  #if HAVE_GPL
  1222.  #endif
  1223.  }
  1224. diff --git a/input/input.c b/input/input.c
  1225. index 78c7a88..a14bd3c 100644
  1226. --- a/input/input.c
  1227. +++ b/input/input.c
  1228. @@ -32,7 +32,8 @@ const x264_cli_csp_t x264_cli_csps[] = {
  1229.      [X264_CSP_YV12] = { "yv12", 3, { 1, .5, .5 }, { 1, .5, .5 }, 2, 2 },
  1230.      [X264_CSP_NV12] = { "nv12", 2, { 1,  1 },     { 1, .5 },     2, 2 },
  1231.      [X264_CSP_BGR]  = { "bgr",  1, { 3 },         { 1 },         1, 1 },
  1232. -    [X264_CSP_BGRA] = { "bgra", 1, { 4 },         { 1 },         1, 1 }
  1233. +    [X264_CSP_BGRA] = { "bgra", 1, { 4 },         { 1 },         1, 1 },
  1234. +    [X264_CSP_RGB]  = { "rgb",  1, { 3 },         { 1 },         1, 1 },
  1235.  };
  1236.  
  1237.  int x264_cli_csp_is_invalid( int csp )
  1238. @@ -41,6 +42,13 @@ int x264_cli_csp_is_invalid( int csp )
  1239.      return csp_mask <= X264_CSP_NONE || csp_mask >= X264_CSP_CLI_MAX || csp & X264_CSP_OTHER;
  1240.  }
  1241.  
  1242. +int x264_cli_csp_depth_factor( int csp )
  1243. +{
  1244. +    if( x264_cli_csp_is_invalid( csp ) )
  1245. +        return 0;
  1246. +    return (csp & X264_CSP_HIGH_DEPTH) ? 2 : 1;
  1247. +}
  1248. +
  1249.  uint64_t x264_cli_pic_plane_size( int csp, int width, int height, int plane )
  1250.  {
  1251.      int csp_mask = csp & X264_CSP_MASK;
  1252. @@ -48,6 +56,7 @@ uint64_t x264_cli_pic_plane_size( int csp, int width, int height, int plane )
  1253.          return 0;
  1254.      uint64_t size = (uint64_t)width * height;
  1255.      size *= x264_cli_csps[csp_mask].width[plane] * x264_cli_csps[csp_mask].height[plane];
  1256. +    size *= x264_cli_csp_depth_factor( csp );
  1257.      return size;
  1258.  }
  1259.  
  1260. @@ -78,7 +87,7 @@ int x264_cli_pic_alloc( cli_pic_t *pic, int csp, int width, int height )
  1261.           pic->img.plane[i] = x264_malloc( x264_cli_pic_plane_size( csp, width, height, i ) );
  1262.           if( !pic->img.plane[i] )
  1263.               return -1;
  1264. -         pic->img.stride[i] = width * x264_cli_csps[csp_mask].width[i];
  1265. +         pic->img.stride[i] = width * x264_cli_csps[csp_mask].width[i] * x264_cli_csp_depth_factor( csp );
  1266.      }
  1267.  
  1268.      return 0;
  1269. diff --git a/input/input.h b/input/input.h
  1270. index 972dd9c..43826d7 100644
  1271. --- a/input/input.h
  1272. +++ b/input/input.h
  1273. @@ -36,6 +36,7 @@ typedef struct
  1274.      char *index_file;
  1275.      char *resolution;
  1276.      char *colorspace;
  1277. +    int bit_depth;
  1278.      char *timebase;
  1279.      int seek;
  1280.  } cli_input_opt_t;
  1281. @@ -103,8 +104,9 @@ extern cli_input_t input;
  1282.  #define X264_CSP_I444          (X264_CSP_MAX+1)  /* yuv 4:4:4 planar    */
  1283.  #define X264_CSP_BGR           (X264_CSP_MAX+2)  /* packed bgr 24bits   */
  1284.  #define X264_CSP_BGRA          (X264_CSP_MAX+3)  /* packed bgr 32bits   */
  1285. -#define X264_CSP_CLI_MAX       (X264_CSP_MAX+4)  /* end of list         */
  1286. -#define X264_CSP_OTHER          0x2000           /* non x264 colorspace */
  1287. +#define X264_CSP_RGB           (X264_CSP_MAX+4)  /* packed rgb 24bits   */
  1288. +#define X264_CSP_CLI_MAX       (X264_CSP_MAX+5)  /* end of list         */
  1289. +#define X264_CSP_OTHER          0x4000           /* non x264 colorspace */
  1290.  
  1291.  typedef struct
  1292.  {
  1293. @@ -119,6 +121,7 @@ typedef struct
  1294.  extern const x264_cli_csp_t x264_cli_csps[];
  1295.  
  1296.  int      x264_cli_csp_is_invalid( int csp );
  1297. +int      x264_cli_csp_depth_factor( int csp );
  1298.  int      x264_cli_pic_alloc( cli_pic_t *pic, int csp, int width, int height );
  1299.  void     x264_cli_pic_clean( cli_pic_t *pic );
  1300.  uint64_t x264_cli_pic_plane_size( int csp, int width, int height, int plane );
  1301. diff --git a/input/raw.c b/input/raw.c
  1302. index f5fbed6..9617fb1 100644
  1303. --- a/input/raw.c
  1304. +++ b/input/raw.c
  1305. @@ -34,11 +34,12 @@ typedef struct
  1306.      int next_frame;
  1307.      uint64_t plane_size[4];
  1308.      uint64_t frame_size;
  1309. +    int bit_depth;
  1310.  } raw_hnd_t;
  1311.  
  1312.  static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, cli_input_opt_t *opt )
  1313.  {
  1314. -    raw_hnd_t *h = malloc( sizeof(raw_hnd_t) );
  1315. +    raw_hnd_t *h = calloc( 1, sizeof(raw_hnd_t) );
  1316.      if( !h )
  1317.          return -1;
  1318.  
  1319. @@ -61,8 +62,10 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
  1320.      else /* default */
  1321.          info->csp = X264_CSP_I420;
  1322.  
  1323. -    h->next_frame = 0;
  1324. -    info->vfr     = 0;
  1325. +    h->bit_depth = opt->bit_depth;
  1326. +    FAIL_IF_ERROR( h->bit_depth < 8 || h->bit_depth > 16, "unsupported bit depth `%d'\n", h->bit_depth );
  1327. +    if( h->bit_depth > 8 )
  1328. +        info->csp |= X264_CSP_HIGH_DEPTH;
  1329.  
  1330.      if( !strcmp( psz_filename, "-" ) )
  1331.          h->fh = stdin;
  1332. @@ -73,11 +76,15 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
  1333.  
  1334.      info->thread_safe = 1;
  1335.      info->num_frames  = 0;
  1336. -    h->frame_size = 0;
  1337. -    for( int i = 0; i < x264_cli_csps[info->csp].planes; i++ )
  1338. +    info->vfr         = 0;
  1339. +
  1340. +    const x264_cli_csp_t *csp = x264_cli_get_csp( info->csp );
  1341. +    for( int i = 0; i < csp->planes; i++ )
  1342.      {
  1343.          h->plane_size[i] = x264_cli_pic_plane_size( info->csp, info->width, info->height, i );
  1344.          h->frame_size += h->plane_size[i];
  1345. +        /* x264_cli_pic_plane_size returns the size in bytes, we need the value in pixels from here on */
  1346. +        h->plane_size[i] /= x264_cli_csp_depth_factor( info->csp );
  1347.      }
  1348.  
  1349.      if( x264_is_regular_file( h->fh ) )
  1350. @@ -95,8 +102,22 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
  1351.  static int read_frame_internal( cli_pic_t *pic, raw_hnd_t *h )
  1352.  {
  1353.      int error = 0;
  1354. +    int pixel_depth = x264_cli_csp_depth_factor( pic->img.csp );
  1355.      for( int i = 0; i < pic->img.planes && !error; i++ )
  1356. -        error |= fread( pic->img.plane[i], h->plane_size[i], 1, h->fh ) <= 0;
  1357. +    {
  1358. +        error |= fread( pic->img.plane[i], pixel_depth, h->plane_size[i], h->fh ) != h->plane_size[i];
  1359. +        if( h->bit_depth & 7 )
  1360. +        {
  1361. +            /* upconvert non 16bit high depth planes to 16bit using the same
  1362. +             * algorithm as used in the depth filter. */
  1363. +            uint16_t *plane = (uint16_t*)pic->img.plane[i];
  1364. +            uint64_t pixel_count = h->plane_size[i];
  1365. +            int lshift = 16 - h->bit_depth;
  1366. +            int rshift = 2*h->bit_depth - 16;
  1367. +            for( uint64_t j = 0; j < pixel_count; j++ )
  1368. +                plane[j] = (plane[j] << lshift) + (plane[j] >> rshift);
  1369. +        }
  1370. +    }
  1371.      return error;
  1372.  }
  1373.  
  1374. diff --git a/x264.c b/x264.c
  1375. index 5bd2af7..415181c 100644
  1376. --- a/x264.c
  1377. +++ b/x264.c
  1378. @@ -214,7 +214,7 @@ static void print_version_info()
  1379.  #else
  1380.      printf( "using a non-gcc compiler\n" );
  1381.  #endif
  1382. -    printf( "configuration: --bit-depth=%d\n", BIT_DEPTH );
  1383. +    printf( "configuration: --bit-depth=%d\n", x264_bit_depth );
  1384.      printf( "x264 license: " );
  1385.  #if HAVE_GPL
  1386.      printf( "GPL version 2 or later\n" );
  1387. @@ -375,7 +375,7 @@ static void Help( x264_param_t *defaults, int longhelp )
  1388.  #else
  1389.          "no",
  1390.  #endif
  1391. -        BIT_DEPTH
  1392. +        x264_bit_depth
  1393.        );
  1394.      H0( "Example usage:\n" );
  1395.      H0( "\n" );
  1396. @@ -697,6 +697,7 @@ static void Help( x264_param_t *defaults, int longhelp )
  1397.          "                                  - %s\n", demuxer_names[0], stringify_names( buf, demuxer_names ) );
  1398.      H1( "      --input-csp <string>    Specify input colorspace format for raw input\n" );
  1399.      print_csp_names( longhelp );
  1400. +    H1( "      --input-depth <integer> Specify input bit depth for raw input\n" );
  1401.      H1( "      --input-res <intxint>   Specify input resolution (width x height)\n" );
  1402.      H1( "      --index <string>        Filename for input index file\n" );
  1403.      H0( "      --sar width:height      Specify Sample Aspect Ratio\n" );
  1404. @@ -769,7 +770,8 @@ enum {
  1405.      OPT_LOG_LEVEL,
  1406.      OPT_VIDEO_FILTER,
  1407.      OPT_INPUT_RES,
  1408. -    OPT_INPUT_CSP
  1409. +    OPT_INPUT_CSP,
  1410. +    OPT_INPUT_DEPTH
  1411.  } OptionsOPT;
  1412.  
  1413.  static char short_options[] = "8A:B:b:f:hI:i:m:o:p:q:r:t:Vvw";
  1414. @@ -921,6 +923,7 @@ static struct option long_options[] =
  1415.      { "video-filter", required_argument, NULL, OPT_VIDEO_FILTER },
  1416.      { "input-res",   required_argument, NULL, OPT_INPUT_RES },
  1417.      { "input-csp",   required_argument, NULL, OPT_INPUT_CSP },
  1418. +    { "input-depth", required_argument, NULL, OPT_INPUT_DEPTH },
  1419.      {0, 0, 0, 0}
  1420.  };
  1421.  
  1422. @@ -1082,10 +1085,16 @@ static int init_vid_filters( char *sequence, hnd_t *handle, video_info_t *info,
  1423.      if( csp > X264_CSP_NONE && csp < X264_CSP_MAX )
  1424.          param->i_csp = info->csp;
  1425.      else
  1426. -        param->i_csp = X264_CSP_I420;
  1427. +        param->i_csp = X264_CSP_I420 | ( info->csp & X264_CSP_HIGH_DEPTH );
  1428.      if( x264_init_vid_filter( "resize", handle, &filter, info, param, NULL ) )
  1429.          return -1;
  1430.  
  1431. +    char args[20];
  1432. +    sprintf( args, "bit_depth=%d", x264_bit_depth );
  1433. +
  1434. +    if( x264_init_vid_filter( "depth", handle, &filter, info, param, args ) )
  1435. +        return -1;
  1436. +
  1437.      return 0;
  1438.  }
  1439.  
  1440. @@ -1138,6 +1147,7 @@ static int Parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )
  1441.  
  1442.      memset( opt, 0, sizeof(cli_opt_t) );
  1443.      memset( &input_opt, 0, sizeof(cli_input_opt_t) );
  1444. +    input_opt.bit_depth = 8;
  1445.      opt->b_progress = 1;
  1446.  
  1447.      /* Presets are applied before all other options. */
  1448. @@ -1283,6 +1293,9 @@ static int Parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )
  1449.              case OPT_INPUT_CSP:
  1450.                  input_opt.colorspace = optarg;
  1451.                  break;
  1452. +            case OPT_INPUT_DEPTH:
  1453. +                input_opt.bit_depth = atoi( optarg );
  1454. +                break;
  1455.              default:
  1456.  generic_option:
  1457.              {
  1458. diff --git a/x264.h b/x264.h
  1459. index 56d424c..c9b182a 100644
  1460. --- a/x264.h
  1461. +++ b/x264.h
  1462. @@ -180,7 +180,8 @@ static const char * const x264_open_gop_names[] = { "none", "normal", "bluray",
  1463.  #define X264_CSP_YV12           0x0002  /* yvu 4:2:0 planar */
  1464.  #define X264_CSP_NV12           0x0003  /* yuv 4:2:0, with one y plane and one packed u+v */
  1465.  #define X264_CSP_MAX            0x0004  /* end of list */
  1466. -#define X264_CSP_VFLIP          0x1000  /* */
  1467. +#define X264_CSP_VFLIP          0x1000  /* the csp is vertically flipped */
  1468. +#define X264_CSP_HIGH_DEPTH     0x2000  /* the csp has a depth of 16 bits per pixel component */
  1469.  
  1470.  /* Slice type */
  1471.  #define X264_TYPE_AUTO          0x0000  /* Let x264 choose the right type */
  1472. @@ -342,7 +343,7 @@ typedef struct x264_param_t
  1473.      {
  1474.          int         i_rc_method;    /* X264_RC_* */
  1475.  
  1476. -        int         i_qp_constant;  /* 0 to (51 + 6*(BIT_DEPTH-8)) */
  1477. +        int         i_qp_constant;  /* 0 to (51 + 6*(x264_bit_depth-8)) */
  1478.          int         i_qp_min;       /* min allowed QP value */
  1479.          int         i_qp_max;       /* max allowed QP value */
  1480.          int         i_qp_step;      /* max QP step between frames */
  1481. @@ -566,6 +567,15 @@ int     x264_param_apply_profile( x264_param_t *, const char *profile );
  1482.   * Picture structures and functions
  1483.   ****************************************************************************/
  1484.  
  1485. +/* x264_bit_depth:
  1486. + *      Specifies the number of bits per pixel that x264 uses. This is also the
  1487. + *      bit depth that x264 encodes in. If this value is > 8, x264 will read
  1488. + *      two bytes of input data for each pixel sample, and expect the upper
  1489. + *      (16-x264_bit_depth) bits to be zero.
  1490. + *      Note: The flag X264_CSP_HIGH_DEPTH must be used to specify the
  1491. + *      colorspace depth as well. */
  1492. +extern const int x264_bit_depth;
  1493. +
  1494.  enum pic_struct_e
  1495.  {
  1496.      PIC_STRUCT_AUTO              = 0, // automatically decide (default)
  1497. --
  1498. 1.7.2.3
  1499.  
  1500.  
  1501. From 195cf9bd51203eb18cdff5542b27caf635e7b1cf Mon Sep 17 00:00:00 2001
  1502. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  1503. Date: Sun, 26 Sep 2010 21:04:30 -0700
  1504. Subject: [PATCH 8/8] Add High 10 Intra profile support (AVC-Intra)
  1505.  x264 should now be able to encode compliant AVC-Intra 50.
  1506.  With a 10-bit-compiled version of x264, a sample commandline for 1080i25 might be:
  1507.  --interlaced --keyint 1 --vbv-bufsize 2000 --bitrate 50000 --vbv-maxrate 50000 --nal-hrd cbr
  1508.  
  1509. Also print "Constrained Baseline" for baseline profile, since that's all x264 (and everything else in the world) supports.
  1510. Also reorganize parameter validation a bit to reduce some spurious warnings.
  1511. ---
  1512. encoder/encoder.c |   14 +++++++++-----
  1513.  encoder/set.c     |   15 ++++++++++-----
  1514.  2 files changed, 19 insertions(+), 10 deletions(-)
  1515.  
  1516. diff --git a/encoder/encoder.c b/encoder/encoder.c
  1517. index 62a4350..a1e8383 100644
  1518. --- a/encoder/encoder.c
  1519. +++ b/encoder/encoder.c
  1520. @@ -439,6 +439,13 @@ static int x264_validate_parameters( x264_t *h )
  1521.      if( h->i_thread_frames > 1 )
  1522.          h->param.nalu_process = NULL;
  1523.  
  1524. +    h->param.i_keyint_max = x264_clip3( h->param.i_keyint_max, 1, X264_KEYINT_MAX_INFINITE );
  1525. +    if( h->param.i_keyint_max == 1 )
  1526. +    {
  1527. +        h->param.b_intra_refresh = 0;
  1528. +        h->param.analyse.i_weighted_pred = 0;
  1529. +    }
  1530. +
  1531.      if( h->param.b_interlaced )
  1532.      {
  1533.          if( h->param.analyse.i_me_method >= X264_ME_ESA )
  1534. @@ -576,7 +583,6 @@ static int x264_validate_parameters( x264_t *h )
  1535.  
  1536.      h->param.i_frame_reference = x264_clip3( h->param.i_frame_reference, 1, X264_REF_MAX );
  1537.      h->param.i_dpb_size = x264_clip3( h->param.i_dpb_size, 1, X264_REF_MAX );
  1538. -    h->param.i_keyint_max = x264_clip3( h->param.i_keyint_max, 1, X264_KEYINT_MAX_INFINITE );
  1539.      if( h->param.i_scenecut_threshold < 0 )
  1540.          h->param.i_scenecut_threshold = 0;
  1541.      if( !h->param.analyse.i_subpel_refine && h->param.analyse.i_direct_mv_pred > X264_DIRECT_PRED_SPATIAL )
  1542. @@ -586,8 +592,6 @@ static int x264_validate_parameters( x264_t *h )
  1543.      }
  1544.      h->param.i_bframe = x264_clip3( h->param.i_bframe, 0, X264_MIN( X264_BFRAME_MAX, h->param.i_keyint_max-1 ) );
  1545.      h->param.i_open_gop = x264_clip3( h->param.i_open_gop, X264_OPEN_GOP_NONE, X264_OPEN_GOP_BLURAY );
  1546. -    if( h->param.i_keyint_max == 1 )
  1547. -        h->param.b_intra_refresh = 0;
  1548.      h->param.i_bframe_bias = x264_clip3( h->param.i_bframe_bias, -90, 100 );
  1549.      if( h->param.i_bframe <= 1 )
  1550.          h->param.i_bframe_pyramid = X264_B_PYRAMID_NONE;
  1551. @@ -1155,10 +1159,10 @@ x264_t *x264_encoder_open( x264_param_t *param )
  1552.          fclose( f );
  1553.      }
  1554.  
  1555. -    const char *profile = h->sps->i_profile_idc == PROFILE_BASELINE ? "Baseline" :
  1556. +    const char *profile = h->sps->i_profile_idc == PROFILE_BASELINE ? "Constrained Baseline" :
  1557.                            h->sps->i_profile_idc == PROFILE_MAIN ? "Main" :
  1558.                            h->sps->i_profile_idc == PROFILE_HIGH ? "High" :
  1559. -                          h->sps->i_profile_idc == PROFILE_HIGH10 ? "High 10" :
  1560. +                          h->sps->i_profile_idc == PROFILE_HIGH10 ? (h->sps->b_constraint_set3 == 1 ? "High 10 Intra" : "High 10") :
  1561.                            "High 4:4:4 Predictive";
  1562.      char level[4];
  1563.      snprintf( level, sizeof(level), "%d.%d", h->sps->i_level_idc/10, h->sps->i_level_idc%10 );
  1564. diff --git a/encoder/set.c b/encoder/set.c
  1565. index a003012..0a24bf7 100644
  1566. --- a/encoder/set.c
  1567. +++ b/encoder/set.c
  1568. @@ -121,17 +121,17 @@ void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param )
  1569.      sps->b_constraint_set1  = sps->i_profile_idc <= PROFILE_MAIN;
  1570.      /* Never set constraint_set2, it is not necessary and not used in real world. */
  1571.      sps->b_constraint_set2  = 0;
  1572. +    sps->b_constraint_set3  = 0;
  1573.  
  1574. +    sps->i_level_idc = param->i_level_idc;
  1575.      if( param->i_level_idc == 9 && ( sps->i_profile_idc >= PROFILE_BASELINE && sps->i_profile_idc <= PROFILE_EXTENDED ) )
  1576.      {
  1577.          sps->b_constraint_set3 = 1; /* level 1b with Baseline, Main or Extended profile is signalled via constraint_set3 */
  1578.          sps->i_level_idc      = 11;
  1579.      }
  1580. -    else
  1581. -    {
  1582. -        sps->b_constraint_set3 = 0;
  1583. -        sps->i_level_idc = param->i_level_idc;
  1584. -    }
  1585. +    /* High 10 Intra profile */
  1586. +    if( param->i_keyint_max == 1 && sps->i_profile_idc == PROFILE_HIGH10 )
  1587. +        sps->b_constraint_set3 = 1;
  1588.  
  1589.      sps->vui.i_num_reorder_frames = param->i_bframe_pyramid ? 2 : param->i_bframe ? 1 : 0;
  1590.      /* extra slot with pyramid so that we don't have to override the
  1591. @@ -140,6 +140,11 @@ void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param )
  1592.      sps->i_num_ref_frames = X264_MIN(X264_REF_MAX, X264_MAX4(param->i_frame_reference, 1 + sps->vui.i_num_reorder_frames,
  1593.                              param->i_bframe_pyramid ? 4 : 1, param->i_dpb_size));
  1594.      sps->i_num_ref_frames -= param->i_bframe_pyramid == X264_B_PYRAMID_STRICT;
  1595. +    if( param->i_keyint_max == 1 )
  1596. +    {
  1597. +        sps->i_num_ref_frames = 0;
  1598. +        sps->vui.i_max_dec_frame_buffering = 0;
  1599. +    }
  1600.  
  1601.      /* number of refs + current frame */
  1602.      int max_frame_num = sps->vui.i_max_dec_frame_buffering * (!!param->i_bframe_pyramid+1) + 1;
  1603. --
  1604. 1.7.2.3
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement