Untitled

From 5f104e9957cc4b69f7197fecf93648a0e2ae0e59 Mon Sep 17 00:00:00 2001
From: Anton Mitrofanov <BugMaster@narod.ru>
Date: Mon, 20 Sep 2010 13:10:13 +0400
Subject: [PATCH 1/8] Fix DTS/bitrate calculation if the first PTS wasn't zero
 Fix bitrate calculation with DTS compression.

---
 common/common.h   |    1 +
 encoder/encoder.c |   11 +++++++----
 x264.c            |   10 ++++++----
 x264.h            |    2 +-
 4 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/common/common.h b/common/common.h
index efb25be..132cfee 100644
--- a/common/common.h
+++ b/common/common.h
@@ -499,6 +499,7 @@ struct x264_t
         int i_delay;    /* Number of frames buffered for B reordering */
         int     i_bframe_delay;
         int64_t i_bframe_delay_time;
+        int64_t i_first_pts;
         int64_t i_init_delta;
         int64_t i_prev_reordered_pts[2];
         int64_t i_largest_pts;
diff --git a/encoder/encoder.c b/encoder/encoder.c
index fa4401b..2b679a0 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -2329,8 +2329,10 @@ int     x264_encoder_encode( x264_t *h,

         fenc->i_frame = h->frames.i_input++;

+        if( fenc->i_frame == 0 )
+            h->frames.i_first_pts = fenc->i_pts;
         if( h->frames.i_bframe_delay && fenc->i_frame == h->frames.i_bframe_delay )
-            h->frames.i_bframe_delay_time = fenc->i_pts;
+            h->frames.i_bframe_delay_time = fenc->i_pts - h->frames.i_first_pts;

         if( h->param.b_vfr_input && fenc->i_pts <= h->frames.i_largest_pts )
             x264_log( h, X264_LOG_WARNING, "non-strictly-monotonic PTS\n" );
@@ -2495,8 +2497,8 @@ int     x264_encoder_encode( x264_t *h,
             {
                 /* DTS compression */
                 if( h->i_frame == 1 )
-                    thread_current->frames.i_init_delta = h->fenc->i_reordered_pts * h->i_dts_compress_multiplier;
-                h->fdec->i_dts = h->i_frame * thread_current->frames.i_init_delta / h->i_dts_compress_multiplier;
+                    thread_current->frames.i_init_delta = (h->fenc->i_reordered_pts - h->frames.i_first_pts) * h->i_dts_compress_multiplier;
+                h->fdec->i_dts = h->i_frame * thread_current->frames.i_init_delta / h->i_dts_compress_multiplier + h->frames.i_first_pts * h->i_dts_compress_multiplier;
             }
         }
         else
@@ -3110,7 +3112,8 @@ void    x264_encoder_close  ( x264_t *h )
             f_bitrate = fps * SUM3(h->stat.i_frame_size) / i_count / 125;
         else
         {
-            float duration = (float)(2 * h->frames.i_largest_pts - h->frames.i_second_largest_pts) * h->param.i_timebase_num / h->param.i_timebase_den;
+            float duration = (float)(2 * h->frames.i_largest_pts - h->frames.i_second_largest_pts - h->frames.i_first_pts)
+                           * h->i_dts_compress_multiplier * h->param.i_timebase_num / h->param.i_timebase_den;
             f_bitrate = SUM3(h->stat.i_frame_size) / duration / 125;
         }

diff --git a/x264.c b/x264.c
index a1e7147..f74f096 100644
--- a/x264.c
+++ b/x264.c
@@ -1584,7 +1584,7 @@ static int  Encode( x264_param_t *param, cli_opt_t *opt )
     int64_t second_largest_pts = -1;
     int64_t ticks_per_frame;
     double  duration;
-    int     prev_timebase_den = param->i_timebase_den / gcd( param->i_timebase_num, param->i_timebase_den );
+    int     prev_timebase_den;
     int     dts_compress_multiplier;
     double  pulldown_pts = 0;

@@ -1603,6 +1603,8 @@ static int  Encode( x264_param_t *param, cli_opt_t *opt )
         param->i_timebase_den = param->i_fps_num * pulldown->fps_factor;
     }

+    prev_timebase_den = param->i_timebase_den / gcd( param->i_timebase_num, param->i_timebase_den );
+
     if( ( h = x264_encoder_open( param ) ) == NULL )
     {
         x264_cli_log( "x264", X264_LOG_ERROR, "x264_encoder_open failed\n" );
@@ -1727,6 +1729,8 @@ static int  Encode( x264_param_t *param, cli_opt_t *opt )
     if( pts_warning_cnt >= MAX_PTS_WARNING && cli_log_level < X264_LOG_DEBUG )
         x264_cli_log( "x264", X264_LOG_WARNING, "%d suppressed nonmonotonic pts warnings\n", pts_warning_cnt-MAX_PTS_WARNING );

+    largest_pts *= dts_compress_multiplier;
+    second_largest_pts *= dts_compress_multiplier;
     /* duration algorithm fails when only 1 frame is output */
     if( i_frame_output == 1 )
         duration = (double)param->i_fps_den / param->i_fps_num;
@@ -1734,8 +1738,6 @@ static int  Encode( x264_param_t *param, cli_opt_t *opt )
         duration = (double)(2 * last_dts - prev_dts - first_dts) * param->i_timebase_num / param->i_timebase_den;
     else
         duration = (double)(2 * largest_pts - second_largest_pts) * param->i_timebase_num / param->i_timebase_den;
-    if( !(opt->i_pulldown && !param->b_vfr_input) )
-        duration *= dts_compress_multiplier;

     i_end = x264_mdate();
     /* Erase progress indicator before printing encoding stats. */
@@ -1754,7 +1756,7 @@ static int  Encode( x264_param_t *param, cli_opt_t *opt )
     }

     filter.free( opt->hin );
-    output.close_file( opt->hout, largest_pts * dts_compress_multiplier, second_largest_pts * dts_compress_multiplier );
+    output.close_file( opt->hout, largest_pts, second_largest_pts );

     if( i_frame_output > 0 )
     {
diff --git a/x264.h b/x264.h
index 8174015..71f5f55 100644
--- a/x264.h
+++ b/x264.h
@@ -653,7 +653,7 @@ typedef struct
     int     b_keyframe;
     /* In: user pts, Out: pts of encoded picture (user)*/
     int64_t i_pts;
-    /* Out: frame dts. Since the pts of the first frame is always zero,
+    /* Out: frame dts. When the pts of the first frame is close to zero,
      *      initial frames may have a negative dts which must be dealt with by any muxer */
     int64_t i_dts;
     /* In: custom encoding parameters to be set from this frame forwards
--
1.7.2.3


From 5cba26f757ec00a7b95656615813e692685ee138 Mon Sep 17 00:00:00 2001
From: Anton Mitrofanov <BugMaster@narod.ru>
Date: Sat, 25 Sep 2010 15:55:32 -0700
Subject: [PATCH 2/8] Fix CFR ratecontrol with timebase != 1/fps
 Fixes VBV + DTS compression, among other things.

---
 encoder/encoder.c   |    2 +-
 encoder/slicetype.c |    4 ++--
 x264.c              |    1 +
 x264.h              |    4 +++-
 4 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/encoder/encoder.c b/encoder/encoder.c
index 2b679a0..3570776 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -626,7 +626,7 @@ static int x264_validate_parameters( x264_t *h )
         h->param.rc.i_lookahead = X264_MIN( h->param.rc.i_lookahead, X264_MAX( h->param.i_keyint_max, bufsize*fps ) );
     }

-    if( !h->param.i_timebase_num || !h->param.i_timebase_den )
+    if( !h->param.i_timebase_num || !h->param.i_timebase_den || !(h->param.b_vfr_input || h->param.b_pulldown) )
     {
         h->param.i_timebase_num = h->param.i_fps_den;
         h->param.i_timebase_den = h->param.i_fps_num;
diff --git a/encoder/slicetype.c b/encoder/slicetype.c
index 0d87908..d08cf02 100644
--- a/encoder/slicetype.c
+++ b/encoder/slicetype.c
@@ -1260,12 +1260,12 @@ void x264_slicetype_decide( x264_t *h )
             if( h->param.b_vfr_input )
             {
                 if( lookahead_size-- > 1 )
-                    h->lookahead->next.list[i]->i_duration = 2 * (h->lookahead->next.list[i+1]->i_pts - h->lookahead->next.list[i]->i_pts);
+                    h->lookahead->next.list[i]->i_duration = 2 * (h->lookahead->next.list[i+1]->i_pts - h->lookahead->next.list[i]->i_pts) * h->i_dts_compress_multiplier;
                 else
                     h->lookahead->next.list[i]->i_duration = h->i_prev_duration;
             }
             else
-                h->lookahead->next.list[i]->i_duration = delta_tfi_divisor[h->lookahead->next.list[i]->i_pic_struct];
+                h->lookahead->next.list[i]->i_duration = delta_tfi_divisor[h->lookahead->next.list[i]->i_pic_struct] * h->i_dts_compress_multiplier;
             h->i_prev_duration = h->lookahead->next.list[i]->i_duration;

             if( h->lookahead->next.list[i]->i_frame > h->i_disp_fields_last_frame && lookahead_size > 0 )
diff --git a/x264.c b/x264.c
index f74f096..5bd2af7 100644
--- a/x264.c
+++ b/x264.c
@@ -1595,6 +1595,7 @@ static int  Encode( x264_param_t *param, cli_opt_t *opt )
     /* set up pulldown */
     if( opt->i_pulldown && !param->b_vfr_input )
     {
+        param->b_pulldown = 1;
         param->b_pic_struct = 1;
         pulldown = &pulldown_values[opt->i_pulldown];
         param->i_timebase_num = param->i_fps_den;
diff --git a/x264.h b/x264.h
index 71f5f55..56d424c 100644
--- a/x264.h
+++ b/x264.h
@@ -383,7 +383,9 @@ typedef struct x264_param_t
     int b_annexb;               /* if set, place start codes (4 bytes) before NAL units,
                                  * otherwise place size (4 bytes) before NAL units. */
     int i_sps_id;               /* SPS and PPS id number */
-    int b_vfr_input;            /* VFR input */
+    int b_vfr_input;            /* VFR input.  If 1, use timebase and timestamps for ratecontrol purposes.
+                                 * If 0, use fps only. */
+    int b_pulldown;             /* use explicity set timebase for CFR */
     uint32_t i_fps_num;
     uint32_t i_fps_den;
     uint32_t i_timebase_num;    /* Timebase numerator */
--
1.7.2.3


From 54073becc7cfc3a1b574d954d1017cd58cbe8b2a Mon Sep 17 00:00:00 2001
From: Jason Garrett-Glaser <darkshikari@gmail.com>
Date: Mon, 27 Sep 2010 05:39:13 -0700
Subject: [PATCH 3/8] Add missing emms for dump-yuv

---
 encoder/encoder.c |    3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/encoder/encoder.c b/encoder/encoder.c
index 3570776..7f3d5bc 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -2934,7 +2934,10 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
          }

     if( h->param.psz_dump_yuv )
+    {
         x264_frame_dump( h );
+        x264_emms();
+    }

     return frame_size;
 }
--
1.7.2.3


From 991166c935516f19f65485ebb447f079aca41e0e Mon Sep 17 00:00:00 2001
From: Jason Garrett-Glaser <darkshikari@gmail.com>
Date: Tue, 21 Sep 2010 17:11:00 -0700
Subject: [PATCH 4/8] Make slice-max-size more aggressive in considering escape bytes
 The x264 assumption of randomly distributed escape bytes fails in the case of CABAC + an enormous number of identical macroblocks.
 This patch attempts to compensate for this.
 It is probably safe to assume in calling applications that x264 practically never violates the slice size limitation.

---
 encoder/encoder.c |   63 +++++++++++++++++++++++++++++++++-------------------
 1 files changed, 40 insertions(+), 23 deletions(-)

diff --git a/encoder/encoder.c b/encoder/encoder.c
index 7f3d5bc..b9e66ac 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -1834,10 +1834,12 @@ static int x264_slice_write( x264_t *h )
     uint8_t cabac_prevbyte_bak = 0; /* Shut up GCC. */
     int mv_bits_bak = 0;
     int tex_bits_bak = 0;
-    /* Assume no more than 3 bytes of NALU escaping.
-     * NALUs other than the first use a 3-byte startcode. */
-    int overhead_guess = (NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal)) + 3;
-    int slice_max_size = h->param.i_slice_max_size > 0 ? (h->param.i_slice_max_size-overhead_guess)*8 : INT_MAX;
+    /* NALUs other than the first use a 3-byte startcode.
+     * Add one extra byte for the rbsp, and one more for the final CABAC putbyte.
+     * Then add an extra 5 bytes just in case, to account for random NAL escapes and
+     * other inaccuracies. */
+    int overhead_guess = (NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal)) + 1 + h->param.b_cabac + 5;
+    int slice_max_size = h->param.i_slice_max_size > 0 ? (h->param.i_slice_max_size-overhead_guess)*8 : 0;
     int starting_bits = bs_pos(&h->out.bs);
     int b_deblock = h->sh.i_disable_deblocking_filter_idc != 1;
     int b_hpel = h->fdec->b_kept_as_ref;
@@ -1884,7 +1886,7 @@ static int x264_slice_write( x264_t *h )
         if( x264_bitstream_check_buffer( h ) )
             return -1;

-        if( h->param.i_slice_max_size > 0 )
+        if( slice_max_size )
         {
             mv_bits_bak = h->stat.frame.i_mv_bits;
             tex_bits_bak = h->stat.frame.i_tex_bits;
@@ -1948,35 +1950,50 @@ static int x264_slice_write( x264_t *h )
         int total_bits = bs_pos(&h->out.bs) + x264_cabac_pos(&h->cabac);
         int mb_size = total_bits - mb_spos;

-        /* We'll just re-encode this last macroblock if we go over the max slice size. */
-        if( total_bits - starting_bits > slice_max_size && !h->mb.b_reencode_mb )
-        {
-            if( mb_xy != h->sh.i_first_mb )
+        if( slice_max_size )
+        {
+            /* Count the skip run, just in case. */
+            if( !h->param.b_cabac )
+                total_bits += bs_size_ue_big( i_skip );
+            /* HACK: we assume no more than 3 bytes of NALU escaping, but
+             * this can fail in CABAC streams with an extremely large number of identical
+             * blocks in sequence (e.g. all-black intra blocks).
+             * Thus, every 64 blocks, pretend we've used a byte.
+             * For reference, a seqeuence of identical empty-CBP i16x16 blocks will use
+             * one byte after 26 macroblocks, assuming a perfectly adapted CABAC.
+             * That's 78 macroblocks to generate the 3-byte sequence to trigger an escape. */
+            else if( ((mb_xy - h->sh.i_first_mb) & 63) == 63 )
+                slice_max_size -= 8;
+            /* We'll just re-encode this last macroblock if we go over the max slice size. */
+            if( total_bits - starting_bits > slice_max_size && !h->mb.b_reencode_mb )
             {
-                h->stat.frame.i_mv_bits = mv_bits_bak;
-                h->stat.frame.i_tex_bits = tex_bits_bak;
-                if( h->param.b_cabac )
+                if( mb_xy != h->sh.i_first_mb )
                 {
-                    memcpy( &h->cabac, &cabac_bak, offsetof(x264_cabac_t, f8_bits_encoded) );
-                    h->cabac.p[-1] = cabac_prevbyte_bak;
+                    h->stat.frame.i_mv_bits = mv_bits_bak;
+                    h->stat.frame.i_tex_bits = tex_bits_bak;
+                    if( h->param.b_cabac )
+                    {
+                        memcpy( &h->cabac, &cabac_bak, offsetof(x264_cabac_t, f8_bits_encoded) );
+                        h->cabac.p[-1] = cabac_prevbyte_bak;
+                    }
+                    else
+                    {
+                        h->out.bs = bs_bak;
+                        i_skip = i_skip_bak;
+                    }
+                    h->mb.b_reencode_mb = 1;
+                    h->sh.i_last_mb = mb_xy-1;
+                    break;
                 }
                 else
                 {
-                    h->out.bs = bs_bak;
-                    i_skip = i_skip_bak;
+                    h->sh.i_last_mb = mb_xy;
+                    h->mb.b_reencode_mb = 0;
                 }
-                h->mb.b_reencode_mb = 1;
-                h->sh.i_last_mb = mb_xy-1;
-                break;
             }
             else
-            {
-                h->sh.i_last_mb = mb_xy;
                 h->mb.b_reencode_mb = 0;
-            }
         }
-        else
-            h->mb.b_reencode_mb = 0;

 #if HAVE_VISUALIZE
         if( h->param.b_visualize )
--
1.7.2.3


From 3f15d8ea707d4985a38059fe58ce0a0993ceeb94 Mon Sep 17 00:00:00 2001
From: Jason Garrett-Glaser <darkshikari@gmail.com>
Date: Mon, 27 Sep 2010 05:39:02 -0700
Subject: [PATCH 5/8] Various cosmetics

---
 encoder/encoder.c |   12 ++++++------
 encoder/set.c     |   17 ++---------------
 2 files changed, 8 insertions(+), 21 deletions(-)

diff --git a/encoder/encoder.c b/encoder/encoder.c
index b9e66ac..28ded05 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -2943,12 +2943,12 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,

     /* Remove duplicates, must be done near the end as breaks h->fref0 array
      * by freeing some of its pointers. */
-     for( int i = 0; i < h->i_ref0; i++ )
-         if( h->fref0[i] && h->fref0[i]->b_duplicate )
-         {
-             x264_frame_push_blank_unused( h, h->fref0[i] );
-             h->fref0[i] = 0;
-         }
+    for( int i = 0; i < h->i_ref0; i++ )
+        if( h->fref0[i] && h->fref0[i]->b_duplicate )
+        {
+            x264_frame_push_blank_unused( h, h->fref0[i] );
+            h->fref0[i] = 0;
+        }

     if( h->param.psz_dump_yuv )
     {
diff --git a/encoder/set.c b/encoder/set.c
index b0d2149..a003012 100644
--- a/encoder/set.c
+++ b/encoder/set.c
@@ -294,34 +294,21 @@ void x264_sps_write( bs_t *s, x264_sps_t *sps )
     }
     else if( sps->i_poc_type == 1 )
     {
-        int i;
-
         bs_write( s, 1, sps->b_delta_pic_order_always_zero );
         bs_write_se( s, sps->i_offset_for_non_ref_pic );
         bs_write_se( s, sps->i_offset_for_top_to_bottom_field );
         bs_write_ue( s, sps->i_num_ref_frames_in_poc_cycle );

-        for( i = 0; i < sps->i_num_ref_frames_in_poc_cycle; i++ )
-        {
+        for( int i = 0; i < sps->i_num_ref_frames_in_poc_cycle; i++ )
             bs_write_se( s, sps->i_offset_for_ref_frame[i] );
-        }
     }
     bs_write_ue( s, sps->i_num_ref_frames );
     bs_write( s, 1, sps->b_gaps_in_frame_num_value_allowed );
     bs_write_ue( s, sps->i_mb_width - 1 );
-    if (sps->b_frame_mbs_only)
-    {
-        bs_write_ue( s, sps->i_mb_height - 1);
-    }
-    else // interlaced
-    {
-        bs_write_ue( s, sps->i_mb_height/2 - 1);
-    }
+    bs_write_ue( s, (sps->i_mb_height >> !sps->b_frame_mbs_only) - 1);
     bs_write( s, 1, sps->b_frame_mbs_only );
     if( !sps->b_frame_mbs_only )
-    {
         bs_write( s, 1, sps->b_mb_adaptive_frame_field );
-    }
     bs_write( s, 1, sps->b_direct8x8_inference );

     bs_write( s, 1, sps->b_crop );
--
1.7.2.3


From 377efcd2643ba657a6d26c4599a9cc4022ca84e8 Mon Sep 17 00:00:00 2001
From: Alex Wright <alexw0885@gmail.com>
Date: Sun, 19 Sep 2010 05:08:22 -0700
Subject: [PATCH 6/8] Chroma mode decision/subpel for B-frames
 Improves compression ~0.4-1%. Helps more on videos with lots of chroma detail.
 Enabled at subme 9 (preset slower) and higher.

---
 common/macroblock.c |    5 +-
 encoder/analyse.c   |  118 +++++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 117 insertions(+), 6 deletions(-)

diff --git a/common/macroblock.c b/common/macroblock.c
index b6c91d6..7f0348e 100644
--- a/common/macroblock.c
+++ b/common/macroblock.c
@@ -448,8 +448,9 @@ void x264_macroblock_thread_init( x264_t *h )
     h->mb.i_subpel_refine = h->param.analyse.i_subpel_refine;
     if( h->sh.i_type == SLICE_TYPE_B && (h->mb.i_subpel_refine == 6 || h->mb.i_subpel_refine == 8) )
         h->mb.i_subpel_refine--;
-    h->mb.b_chroma_me = h->param.analyse.b_chroma_me && h->sh.i_type == SLICE_TYPE_P
-                        && h->mb.i_subpel_refine >= 5;
+    h->mb.b_chroma_me = h->param.analyse.b_chroma_me &&
+                        ((h->sh.i_type == SLICE_TYPE_P && h->mb.i_subpel_refine >= 5) ||
+                         (h->sh.i_type == SLICE_TYPE_B && h->mb.i_subpel_refine >= 9));
     h->mb.b_dct_decimate = h->sh.i_type == SLICE_TYPE_B ||
                           (h->param.analyse.b_dct_decimate && h->sh.i_type != SLICE_TYPE_I);

diff --git a/encoder/analyse.c b/encoder/analyse.c
index c4162e9..6ed13ba 100644
--- a/encoder/analyse.c
+++ b/encoder/analyse.c
@@ -1679,6 +1679,37 @@ static void x264_mb_analyse_inter_p4x8( x264_t *h, x264_mb_analysis_t *a, int i8
         a->l0.i_cost4x8[i8x8] += x264_mb_analyse_inter_p4x4_chroma( h, a, p_fref, i8x8, PIXEL_4x8 );
 }

+static ALWAYS_INLINE int x264_analyse_bi_chroma( x264_t *h, x264_mb_analysis_t *a, int idx, int i_pixel )
+{
+    ALIGNED_ARRAY_8( pixel, pix, [4],[8*8] );
+    ALIGNED_ARRAY_8( pixel,  bi, [2],[8*8] );
+    int l0_mvy_offset, l1_mvy_offset;
+    int i_chroma_cost = 0;
+
+#define COST_BI_CHROMA( m0, m1, width, height ) \
+{ \
+    l0_mvy_offset = h->mb.b_interlaced & m0.i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0; \
+    l1_mvy_offset = h->mb.b_interlaced & m1.i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0; \
+    h->mc.mc_chroma( pix[0], pix[1], 8, m0.p_fref[4], m0.i_stride[1], m0.mv[0], m0.mv[1] + l0_mvy_offset, width, height ); \
+    h->mc.mc_chroma( pix[2], pix[3], 8, m1.p_fref[4], m1.i_stride[1], m1.mv[0], m1.mv[1] + l1_mvy_offset, width, height ); \
+    h->mc.avg[i_pixel+3]( bi[0], 8, pix[0], 8, pix[2], 8, h->mb.bipred_weight[m0.i_ref][m1.i_ref] ); \
+    h->mc.avg[i_pixel+3]( bi[1], 8, pix[1], 8, pix[3], 8, h->mb.bipred_weight[m0.i_ref][m1.i_ref] ); \
+    i_chroma_cost  = h->pixf.mbcmp[i_pixel+3]( m0.p_fenc[1], FENC_STRIDE, bi[0], 8 ); \
+    i_chroma_cost += h->pixf.mbcmp[i_pixel+3]( m0.p_fenc[2], FENC_STRIDE, bi[1], 8 ); \
+}
+
+    if( i_pixel == PIXEL_16x16 )
+        COST_BI_CHROMA( a->l0.bi16x16, a->l1.bi16x16, 8, 8 )
+    else if( i_pixel == PIXEL_16x8 )
+        COST_BI_CHROMA( a->l0.me16x8[idx], a->l1.me16x8[idx], 8, 4 )
+    else if( i_pixel == PIXEL_8x16 )
+        COST_BI_CHROMA( a->l0.me8x16[idx], a->l1.me8x16[idx], 4, 8 )
+    else
+        COST_BI_CHROMA( a->l0.me8x8[idx], a->l1.me8x8[idx], 4, 4 )
+
+    return i_chroma_cost;
+}
+
 static void x264_mb_analyse_inter_direct( x264_t *h, x264_mb_analysis_t *a )
 {
     /* Assumes that fdec still contains the results of
@@ -1693,15 +1724,29 @@ static void x264_mb_analyse_inter_direct( x264_t *h, x264_mb_analysis_t *a )
         {
             const int x = (i&1)*8;
             const int y = (i>>1)*8;
-            a->i_cost16x16direct +=
-            a->i_cost8x8direct[i] =
-                h->pixf.mbcmp[PIXEL_8x8]( &p_fenc[x+y*FENC_STRIDE], FENC_STRIDE, &p_fdec[x+y*FDEC_STRIDE], FDEC_STRIDE );
+            a->i_cost8x8direct[i] = h->pixf.mbcmp[PIXEL_8x8]( &p_fenc[x+y*FENC_STRIDE], FENC_STRIDE,
+                                                              &p_fdec[x+y*FDEC_STRIDE], FDEC_STRIDE );
+            if( h->mb.b_chroma_me )
+            {
+                a->i_cost8x8direct[i] += h->pixf.mbcmp[PIXEL_4x4]( &h->mb.pic.p_fenc[1][(x>>1)+(y>>1)*FENC_STRIDE], FENC_STRIDE,
+                                                                   &h->mb.pic.p_fdec[1][(x>>1)+(y>>1)*FDEC_STRIDE], FDEC_STRIDE )
+                                      +  h->pixf.mbcmp[PIXEL_4x4]( &h->mb.pic.p_fenc[2][(x>>1)+(y>>1)*FENC_STRIDE], FENC_STRIDE,
+                                                                   &h->mb.pic.p_fdec[2][(x>>1)+(y>>1)*FDEC_STRIDE], FDEC_STRIDE );
+            }
+            a->i_cost16x16direct += a->i_cost8x8direct[i];

             /* mb type cost */
             a->i_cost8x8direct[i] += a->i_lambda * i_sub_mb_b_cost_table[D_DIRECT_8x8];
         }
     else
+    {
         a->i_cost16x16direct += h->pixf.mbcmp[PIXEL_16x16]( p_fenc, FENC_STRIDE, p_fdec, FDEC_STRIDE );
+        if( h->mb.b_chroma_me )
+        {
+            a->i_cost16x16direct += h->pixf.mbcmp[PIXEL_8x8]( h->mb.pic.p_fenc[1], FENC_STRIDE, h->mb.pic.p_fdec[1], FDEC_STRIDE )
+                                 +  h->pixf.mbcmp[PIXEL_8x8]( h->mb.pic.p_fenc[2], FENC_STRIDE, h->mb.pic.p_fdec[2], FDEC_STRIDE );
+        }
+    }
 }

 static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
@@ -1807,6 +1852,9 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
                      + a->l0.bi16x16.cost_mv
                      + a->l1.bi16x16.cost_mv;

+    if( h->mb.b_chroma_me )
+        a->i_cost16x16bi += x264_analyse_bi_chroma( h, a, 0, PIXEL_16x16 );
+
     /* Always try the 0,0,0,0 vector; helps avoid errant motion vectors in fades */
     if( M32( a->l0.bi16x16.mv ) | M32( a->l1.bi16x16.mv ) )
     {
@@ -1819,6 +1867,39 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
                                 h->mb.bipred_weight[a->l0.bi16x16.i_ref][a->l1.bi16x16.i_ref] );
         int cost00 = h->pixf.mbcmp[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE, pix0, 16 )
                    + ref_costs + l0_mv_cost + l1_mv_cost;
+
+        if( h->mb.b_chroma_me )
+        {
+            ALIGNED_ARRAY_8( pixel, pixuv, [2],[8*FENC_STRIDE] );
+            ALIGNED_ARRAY_8( pixel, bi, [8*FENC_STRIDE] );
+
+            if( h->mb.b_interlaced & a->l0.bi16x16.i_ref )
+            {
+                int l0_mvy_offset = h->mb.b_interlaced & a->l0.bi16x16.i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0;
+                h->mc.mc_chroma( pixuv[0], pixuv[0]+8, FENC_STRIDE, h->mb.pic.p_fref[0][a->l0.bi16x16.i_ref][4],
+                                 h->mb.pic.i_stride[1], 0, 0 + l0_mvy_offset, 8, 8 );
+            }
+            else
+                h->mc.load_deinterleave_8x8x2_fenc( pixuv[0], h->mb.pic.p_fref[0][a->l0.bi16x16.i_ref][4], h->mb.pic.i_stride[1] );
+
+            if( h->mb.b_interlaced & a->l1.bi16x16.i_ref )
+            {
+                int l1_mvy_offset = h->mb.b_interlaced & a->l1.bi16x16.i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0;
+                h->mc.mc_chroma( pixuv[1], pixuv[1]+8, FENC_STRIDE, h->mb.pic.p_fref[1][a->l1.bi16x16.i_ref][4],
+                                 h->mb.pic.i_stride[1], 0, 0 + l1_mvy_offset, 8, 8 );
+            }
+            else
+                h->mc.load_deinterleave_8x8x2_fenc( pixuv[1], h->mb.pic.p_fref[1][a->l1.bi16x16.i_ref][4], h->mb.pic.i_stride[1] );
+
+            h->mc.avg[PIXEL_8x8]( bi, FENC_STRIDE, pixuv[0], FENC_STRIDE, pixuv[1], FENC_STRIDE,
+                                  h->mb.bipred_weight[a->l0.bi16x16.i_ref][a->l1.bi16x16.i_ref] );
+            h->mc.avg[PIXEL_8x8]( bi+8, FENC_STRIDE, pixuv[0]+8, FENC_STRIDE, pixuv[1]+8, FENC_STRIDE,
+                                  h->mb.bipred_weight[a->l0.bi16x16.i_ref][a->l1.bi16x16.i_ref] );
+
+            cost00 += h->pixf.mbcmp[PIXEL_8x8]( h->mb.pic.p_fenc[1], FENC_STRIDE, bi, FENC_STRIDE )
+                   +  h->pixf.mbcmp[PIXEL_8x8]( h->mb.pic.p_fenc[2], FENC_STRIDE, bi+8, FENC_STRIDE );
+        }
+
         if( cost00 < a->i_cost16x16bi )
         {
             M32( a->l0.bi16x16.mv ) = 0;
@@ -2017,6 +2098,13 @@ static void x264_mb_analyse_inter_b8x8_mixed_ref( x264_t *h, x264_mb_analysis_t
                          + a->l0.me8x8[i].i_ref_cost + a->l1.me8x8[i].i_ref_cost
                          + a->i_lambda * i_sub_mb_b_cost_table[D_BI_8x8];

+        if( h->mb.b_chroma_me )
+        {
+            int i_chroma_cost = x264_analyse_bi_chroma( h, a, i, PIXEL_8x8 );
+            i_part_cost_bi += i_chroma_cost;
+            a->i_satd8x8[2][i] += i_chroma_cost;
+        }
+
         a->l0.me8x8[i].cost += a->i_lambda * i_sub_mb_b_cost_table[D_L0_8x8];
         a->l1.me8x8[i].cost += a->i_lambda * i_sub_mb_b_cost_table[D_L1_8x8];

@@ -2090,6 +2178,13 @@ static void x264_mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a )
         a->l0.me8x8[i].cost += a->i_lambda * i_sub_mb_b_cost_table[D_L0_8x8];
         a->l1.me8x8[i].cost += a->i_lambda * i_sub_mb_b_cost_table[D_L1_8x8];

+        if( h->mb.b_chroma_me )
+        {
+            int i_chroma_cost = x264_analyse_bi_chroma( h, a, i, PIXEL_8x8 );
+            i_part_cost_bi += i_chroma_cost;
+            a->i_satd8x8[2][i] += i_chroma_cost;
+        }
+
         i_part_cost = a->l0.me8x8[i].cost;
         h->mb.i_sub_partition[i] = D_L0_8x8;
         COPY2_IF_LT( i_part_cost, a->l1.me8x8[i].cost, h->mb.i_sub_partition[i], D_L1_8x8 );
@@ -2162,6 +2257,9 @@ static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a, int i
                         + a->l0.me16x8[i].cost_mv + a->l1.me16x8[i].cost_mv + a->l0.me16x8[i].i_ref_cost
                         + a->l1.me16x8[i].i_ref_cost;

+        if( h->mb.b_chroma_me )
+            i_part_cost_bi += x264_analyse_bi_chroma( h, a, i, PIXEL_16x8 );
+
         i_part_cost = a->l0.me16x8[i].cost;
         a->i_mb_partition16x8[i] = D_L0_8x8; /* not actually 8x8, only the L0 matters */

@@ -2252,6 +2350,9 @@ static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a, int i
                         + a->l0.me8x16[i].cost_mv + a->l1.me8x16[i].cost_mv + a->l0.me8x16[i].i_ref_cost
                         + a->l1.me8x16[i].i_ref_cost;

+        if( h->mb.b_chroma_me )
+            i_part_cost_bi += x264_analyse_bi_chroma( h, a, i, PIXEL_8x16 );
+
         i_part_cost = a->l0.me8x16[i].cost;
         a->i_mb_partition8x16[i] = D_L0_8x8;

@@ -3249,7 +3350,16 @@ intra_analysis:
                 h->mb.i_partition = i_partition;
             }

-            x264_mb_analyse_intra( h, &analysis, i_satd_inter );
+            if( h->mb.b_chroma_me )
+            {
+                x264_mb_analyse_intra_chroma( h, &analysis );
+                x264_mb_analyse_intra( h, &analysis, i_satd_inter - analysis.i_satd_i8x8chroma );
+                analysis.i_satd_i16x16 += analysis.i_satd_i8x8chroma;
+                analysis.i_satd_i8x8 += analysis.i_satd_i8x8chroma;
+                analysis.i_satd_i4x4 += analysis.i_satd_i8x8chroma;
+            }
+            else
+                x264_mb_analyse_intra( h, &analysis, i_satd_inter );

             if( analysis.i_mbrd )
             {
--
1.7.2.3


From 594ff9664e7ac57a53fae8d9b5a1ecdf2ba5fa75 Mon Sep 17 00:00:00 2001
From: Oskar Arvidsson <oskar@irock.se>
Date: Mon, 27 Sep 2010 16:02:20 +0200
Subject: [PATCH 7/8] Finish support for high-depth video throughout x264
 Add support for high depth input in libx264.
 Add support for 16-bit colorspaces in the filtering system.
 Add support for input bit depths in the interval [9,16] with the raw demuxer.
 Add a depth filter to dither input to x264.

---
 Makefile                 |    2 +-
 common/common.c          |   17 ++--
 common/frame.c           |   14 +++
 common/mc.c              |   11 +--
 encoder/encoder.c        |    8 +-
 filters/video/crop.c     |    8 +-
 filters/video/depth.c    |  228 ++++++++++++++++++++++++++++++++++++++++++++++
 filters/video/internal.c |    1 +
 filters/video/resize.c   |   84 +++++++++++++----
 filters/video/video.c    |    1 +
 input/input.c            |   13 ++-
 input/input.h            |    7 +-
 input/raw.c              |   33 ++++++-
 x264.c                   |   21 ++++-
 x264.h                   |   14 +++-
 15 files changed, 405 insertions(+), 57 deletions(-)
 create mode 100644 filters/video/depth.c

diff --git a/Makefile b/Makefile
index bab55e5..0cd7b82 100644
--- a/Makefile
+++ b/Makefile
@@ -18,7 +18,7 @@ SRCCLI = x264.c input/input.c input/timecode.c input/raw.c input/y4m.c \
          output/flv.c output/flv_bytestream.c filters/filters.c \
          filters/video/video.c filters/video/source.c filters/video/internal.c \
          filters/video/resize.c filters/video/cache.c filters/video/fix_vfr_pts.c \
-         filters/video/select_every.c filters/video/crop.c
+         filters/video/select_every.c filters/video/crop.c filters/video/depth.c

 SRCSO =

diff --git a/common/common.c b/common/common.c
index c329cb0..aff5fc3 100644
--- a/common/common.c
+++ b/common/common.c
@@ -33,6 +33,8 @@
 #include <malloc.h>
 #endif

+const int x264_bit_depth = BIT_DEPTH;
+
 static void x264_log_default( void *, int, const char *, va_list );

 /****************************************************************************
@@ -1047,19 +1049,20 @@ int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_heigh
     x264_picture_init( pic );
     pic->img.i_csp = i_csp;
     pic->img.i_plane = csp == X264_CSP_NV12 ? 2 : 3;
-    pic->img.plane[0] = x264_malloc( 3 * i_width * i_height / 2 );
+    int depth_factor = i_csp & X264_CSP_HIGH_DEPTH ? 2 : 1;
+    pic->img.plane[0] = x264_malloc( 3 * i_width * i_height / 2 * depth_factor );
     if( !pic->img.plane[0] )
         return -1;
-    pic->img.plane[1] = pic->img.plane[0] + i_width * i_height;
+    pic->img.plane[1] = pic->img.plane[0] + i_width * i_height * depth_factor;
     if( csp != X264_CSP_NV12 )
-        pic->img.plane[2] = pic->img.plane[1] + i_width * i_height / 4;
-    pic->img.i_stride[0] = i_width;
+        pic->img.plane[2] = pic->img.plane[1] + i_width * i_height / 4 * depth_factor;
+    pic->img.i_stride[0] = i_width * depth_factor;
     if( csp == X264_CSP_NV12 )
-        pic->img.i_stride[1] = i_width;
+        pic->img.i_stride[1] = i_width * depth_factor;
     else
     {
-        pic->img.i_stride[1] = i_width / 2;
-        pic->img.i_stride[2] = i_width / 2;
+        pic->img.i_stride[1] = i_width / 2 * depth_factor;
+        pic->img.i_stride[2] = i_width / 2 * depth_factor;
     }
     return 0;
 }
diff --git a/common/frame.c b/common/frame.c
index 0c3d77f..95666da 100644
--- a/common/frame.c
+++ b/common/frame.c
@@ -263,6 +263,20 @@ int x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
         return -1;
     }

+#if X264_HIGH_BIT_DEPTH
+    if( !(src->img.i_csp & X264_CSP_HIGH_DEPTH) )
+    {
+        x264_log( h, X264_LOG_ERROR, "This build of x264 requires high depth input. Rebuild to support 8-bit input.\n" );
+        return -1;
+    }
+#else
+    if( src->img.i_csp & X264_CSP_HIGH_DEPTH )
+    {
+        x264_log( h, X264_LOG_ERROR, "This build of x264 requires 8-bit input. Rebuild to support high depth input.\n" );
+        return -1;
+    }
+#endif
+
     dst->i_type     = src->i_type;
     dst->i_qpplus1  = src->i_qpplus1;
     dst->i_pts      = dst->i_reordered_pts = src->i_pts;
diff --git a/common/mc.c b/common/mc.c
index 5b58a76..acc2312 100644
--- a/common/mc.c
+++ b/common/mc.c
@@ -302,12 +302,7 @@ void x264_plane_copy_c( pixel *dst, int i_dst,
 {
     while( h-- )
     {
-#if X264_HIGH_BIT_DEPTH
-        for( int i = 0; i < w; i++ )
-            dst[i] = src[i] << (BIT_DEPTH-8);
-#else
-        memcpy( dst, src, w );
-#endif
+        memcpy( dst, src, w * sizeof(pixel) );
         dst += i_dst;
         src += i_src;
     }
@@ -320,8 +315,8 @@ void x264_plane_copy_interleave_c( pixel *dst, int i_dst,
     for( int y=0; y<h; y++, dst+=i_dst, srcu+=i_srcu, srcv+=i_srcv )
         for( int x=0; x<w; x++ )
         {
-            dst[2*x]   = srcu[x] << (BIT_DEPTH-8);
-            dst[2*x+1] = srcv[x] << (BIT_DEPTH-8);
+            dst[2*x]   = ((pixel*)srcu)[x];
+            dst[2*x+1] = ((pixel*)srcv)[x];
         }
 }

diff --git a/encoder/encoder.c b/encoder/encoder.c
index 28ded05..62a4350 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -2777,12 +2777,14 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
         x264_log( h, X264_LOG_WARNING, "invalid DTS: PTS is less than DTS\n" );

     pic_out->img.i_csp = X264_CSP_NV12;
+#if X264_HIGH_BIT_DEPTH
+    pic_out->img.i_csp |= X264_CSP_HIGH_DEPTH;
+#endif
     pic_out->img.i_plane = h->fdec->i_plane;
     for( int i = 0; i < 2; i++ )
     {
-        pic_out->img.i_stride[i] = h->fdec->i_stride[i];
-        // FIXME This breaks the API when pixel != uint8_t.
-        pic_out->img.plane[i] = h->fdec->plane[i];
+        pic_out->img.i_stride[i] = h->fdec->i_stride[i] * sizeof(pixel);
+        pic_out->img.plane[i] = (uint8_t*)h->fdec->plane[i];
     }

     x264_frame_push_unused( thread_current, h->fenc );
diff --git a/filters/video/crop.c b/filters/video/crop.c
index 2a3c2b1..b70476e 100644
--- a/filters/video/crop.c
+++ b/filters/video/crop.c
@@ -103,8 +103,12 @@ static int get_frame( hnd_t handle, cli_pic_t *output, int frame )
     output->img.height = h->dims[3];
     /* shift the plane pointers down 'top' rows and right 'left' columns. */
     for( int i = 0; i < output->img.planes; i++ )
-        output->img.plane[i] += (int)(output->img.stride[i] * h->dims[1] * h->csp->height[i]
-                                    + h->dims[0] * h->csp->width[i]);
+    {
+        intptr_t offset = output->img.stride[i] * h->dims[1] * h->csp->height[i];
+        offset += h->dims[0] * h->csp->width[i];
+        offset *= x264_cli_csp_depth_factor( output->img.csp );
+        output->img.plane[i] += offset;
+    }
     return 0;
 }

diff --git a/filters/video/depth.c b/filters/video/depth.c
new file mode 100644
index 0000000..a0411c5
--- /dev/null
+++ b/filters/video/depth.c
@@ -0,0 +1,228 @@
+/*****************************************************************************
+ * depth.c: x264 video depth filter
+ *****************************************************************************
+ * Copyright (C) 2010 Oskar Arvidsson <oskar@irock.se>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *****************************************************************************/
+
+#include "video.h"
+#define NAME "depth"
+#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, NAME, __VA_ARGS__ )
+
+cli_vid_filter_t depth_filter;
+
+typedef struct
+{
+    hnd_t prev_hnd;
+    cli_vid_filter_t prev_filter;
+
+    int bit_depth;
+    int dst_csp;
+    cli_pic_t buffer;
+    int16_t *error_buf;
+} depth_hnd_t;
+
+static int depth_filter_csp_is_supported( int csp )
+{
+    int csp_mask = csp & X264_CSP_MASK;
+    return csp_mask == X264_CSP_I420 ||
+           csp_mask == X264_CSP_I422 ||
+           csp_mask == X264_CSP_I444 ||
+           csp_mask == X264_CSP_YV12 ||
+           csp_mask == X264_CSP_NV12;
+}
+
+static int csp_num_interleaved( int csp, int plane )
+{
+    int csp_mask = csp & X264_CSP_MASK;
+    return ( csp_mask == X264_CSP_NV12 && plane == 1 ) ? 2 : 1;
+}
+
+/* The dithering algorithm is based on Sierra-2-4A error diffusion. It has been
+ * written in such a way so that if the source has been upconverted using the
+ * same algorithm as used in scale_image, dithering down to the source bit
+ * depth again is lossless. */
+#define DITHER_PLANE( pitch ) \
+static void dither_plane_##pitch( pixel *dst, int dst_stride, uint16_t *src, int src_stride, \
+                                        int width, int height, int16_t *errors ) \
+{ \
+    const int lshift = 16-BIT_DEPTH; \
+    const int rshift = 2*BIT_DEPTH-16; \
+    const int pixel_max = (1 << BIT_DEPTH)-1; \
+    const int half = 1 << (16-BIT_DEPTH); \
+    memset( errors, 0, (width+1) * sizeof(int16_t) ); \
+    for( int y = 0; y < height; y++, src += src_stride, dst += dst_stride ) \
+    { \
+        int err = 0; \
+        for( int x = 0; x < width; x++ ) \
+        { \
+            err += errors[x] + errors[x+1]; \
+            dst[x*pitch] = x264_clip3( (((src[x*pitch]+half)<<2)+err)*pixel_max >> 18, 0, pixel_max ); \
+            errors[x] = src[x*pitch] - (dst[x*pitch] << lshift) - (dst[x*pitch] >> rshift); \
+            err = errors[x] << 1; \
+        } \
+    } \
+}
+
+DITHER_PLANE( 1 )
+DITHER_PLANE( 2 )
+
+static void dither_image( cli_image_t *out, cli_image_t *img, int16_t *error_buf )
+{
+    int csp_mask = img->csp & X264_CSP_MASK;
+    for( int i = 0; i < img->planes; i++ )
+    {
+        int num_interleaved = csp_num_interleaved( img->csp, i );
+        int height = x264_cli_csps[csp_mask].height[i] * img->height;
+        int width = x264_cli_csps[csp_mask].width[i] * img->width / num_interleaved;
+
+#define CALL_DITHER_PLANE( pitch, off ) \
+        dither_plane_##pitch( ((pixel*)out->plane[i])+off, out->stride[i]/sizeof(pixel), \
+                ((uint16_t*)img->plane[i])+off, img->stride[i]/2, width, height, error_buf )
+
+        if( num_interleaved == 1 )
+        {
+            CALL_DITHER_PLANE( 1, 0 );
+        }
+        else
+        {
+            CALL_DITHER_PLANE( 2, 0 );
+            CALL_DITHER_PLANE( 2, 1 );
+        }
+    }
+}
+
+static void scale_image( cli_image_t *output, cli_image_t *img )
+{
+    /* this function mimics how swscale does upconversion. 8-bit is converted
+     * to 16-bit through left shifting the orginal value with 8 and then adding
+     * the original value to that. This effectively keeps the full color range
+     * while also being fast. for n-bit we basically do the same thing, but we
+     * discard the lower 16-n bits. */
+    int csp_mask = img->csp & X264_CSP_MASK;
+    const int shift = 16-BIT_DEPTH;
+    for( int i = 0; i < img->planes; i++ )
+    {
+        uint8_t *src = img->plane[i];
+        uint16_t *dst = (uint16_t*)output->plane[i];
+        int height = x264_cli_csps[csp_mask].height[i] * img->height;
+        int width = x264_cli_csps[csp_mask].width[i] * img->width;
+
+        for( int j = 0; j < height; j++ )
+        {
+            for( int k = 0; k < width; k++ )
+                dst[k] = ((src[k] << 8) + src[k]) >> shift;
+
+            src += img->stride[i];
+            dst += output->stride[i]/2;
+        }
+    }
+}
+
+static int get_frame( hnd_t handle, cli_pic_t *output, int frame )
+{
+    depth_hnd_t *h = handle;
+
+    if( h->prev_filter.get_frame( h->prev_hnd, output, frame ) )
+        return -1;
+
+    if( h->bit_depth < 16 && output->img.csp & X264_CSP_HIGH_DEPTH )
+    {
+        dither_image( &h->buffer.img, &output->img, h->error_buf );
+        output->img = h->buffer.img;
+    }
+    else if( h->bit_depth > 8 && !(output->img.csp & X264_CSP_HIGH_DEPTH) )
+    {
+        scale_image( &h->buffer.img, &output->img );
+        output->img = h->buffer.img;
+    }
+    return 0;
+}
+
+static int release_frame( hnd_t handle, cli_pic_t *pic, int frame )
+{
+    depth_hnd_t *h = handle;
+    return h->prev_filter.release_frame( h->prev_hnd, pic, frame );
+}
+
+static void free_filter( hnd_t handle )
+{
+    depth_hnd_t *h = handle;
+    h->prev_filter.free( h->prev_hnd );
+    x264_cli_pic_clean( &h->buffer );
+    x264_free( h );
+}
+
+static int init( hnd_t *handle, cli_vid_filter_t *filter, video_info_t *info,
+                 x264_param_t *param, char *opt_string )
+{
+    int ret = 0;
+    int change_fmt = (info->csp ^ param->i_csp) & X264_CSP_HIGH_DEPTH;
+    int csp = ~(~info->csp ^ change_fmt);
+    int bit_depth = 8*x264_cli_csp_depth_factor( csp );
+
+    if( opt_string )
+    {
+        static const char *optlist[] = { "bit_depth", NULL };
+        char **opts = x264_split_options( opt_string, optlist );
+
+        if( opts )
+        {
+            char *str_bit_depth = x264_get_option( "bit_depth", opts );
+            bit_depth = x264_otoi( str_bit_depth, -1 );
+
+            ret = bit_depth < 8 || bit_depth > 16;
+            csp = bit_depth > 8 ? csp | X264_CSP_HIGH_DEPTH : csp & ~X264_CSP_HIGH_DEPTH;
+            change_fmt = (info->csp ^ csp) & X264_CSP_HIGH_DEPTH;
+            x264_free_string_array( opts );
+        }
+        else
+            ret = 1;
+    }
+
+    FAIL_IF_ERROR( bit_depth != BIT_DEPTH, "this build supports only bit depth %d\n", BIT_DEPTH )
+    FAIL_IF_ERROR( ret, "unsupported bit depth conversion.\n" )
+
+    /* only add the filter to the chain if it's needed */
+    if( change_fmt || bit_depth != 8 * x264_cli_csp_depth_factor( csp ) )
+    {
+        FAIL_IF_ERROR( !depth_filter_csp_is_supported(csp), "unsupported colorspace.\n" )
+        depth_hnd_t *h = x264_malloc( sizeof(depth_hnd_t) + (info->width+1)*sizeof(int16_t) );
+
+        if( !h )
+            return -1;
+
+        h->error_buf = (int16_t*)(h + 1);
+        h->dst_csp = csp;
+        h->bit_depth = bit_depth;
+        h->prev_hnd = *handle;
+        h->prev_filter = *filter;
+
+        if( x264_cli_pic_alloc( &h->buffer, h->dst_csp, info->width, info->height ) )
+        {
+            x264_free( h );
+            return -1;
+        }
+
+        *handle = h;
+        *filter = depth_filter;
+        info->csp = h->dst_csp;
+    }
+
+    return 0;
+}
+
+cli_vid_filter_t depth_filter = { NAME, NULL, init, get_frame, release_frame, free_filter, NULL };
diff --git a/filters/video/internal.c b/filters/video/internal.c
index 444ea1f..ef096dc 100644
--- a/filters/video/internal.c
+++ b/filters/video/internal.c
@@ -51,6 +51,7 @@ int x264_cli_pic_copy( cli_pic_t *out, cli_pic_t *in )
     {
         int height = in->img.height * x264_cli_csps[csp].height[i];
         int width =  in->img.width  * x264_cli_csps[csp].width[i];
+        width *= x264_cli_csp_depth_factor( in->img.csp );
         x264_cli_plane_copy( out->img.plane[i], out->img.stride[i], in->img.plane[i],
                              in->img.stride[i], width, height );
     }
diff --git a/filters/video/resize.c b/filters/video/resize.c
index 38077b2..04b5e73 100644
--- a/filters/video/resize.c
+++ b/filters/video/resize.c
@@ -79,10 +79,21 @@ static void help( int longhelp )
             "            - fittobox: resizes the video based on the desired contraints\n"
             "               - width, height, both\n"
             "            - fittobox and sar: same as above except with specified sar\n"
-            "            simultaneously converting to the given colorspace\n"
-            "            using resizer method [\"bicubic\"]\n"
-            "             - fastbilinear, bilinear, bicubic, experimental, point,\n"
-            "             - area, bicublin, gauss, sinc, lanczos, spline\n" );
+            "            - csp: convert to the given csp. syntax: [name][:depth]\n"
+            "               - valid csp names [keep current]: " );
+
+    for( int i = X264_CSP_NONE+1; i < X264_CSP_CLI_MAX; i++ )
+    {
+        printf( "%s", x264_cli_csps[i].name );
+        if( i+1 < X264_CSP_CLI_MAX )
+            printf( ", " );
+    }
+    printf( "\n"
+            "               - depth: 8 or 16 bits per pixel [keep current]\n"
+            "            note: not all depths are supported by all csps.\n"
+            "            - method: use resizer method [\"bicubic\"]\n"
+            "               - fastbilinear, bilinear, bicubic, experimental, point,\n"
+            "               - area, bicublin, gauss, sinc, lanczos, spline\n" );
 }

 static uint32_t convert_cpu_to_flag( uint32_t cpu )
@@ -131,13 +142,15 @@ static int convert_csp_to_pix_fmt( int csp )
         return csp&X264_CSP_MASK;
     switch( csp&X264_CSP_MASK )
     {
-        case X264_CSP_I420: return PIX_FMT_YUV420P;
-        case X264_CSP_I422: return PIX_FMT_YUV422P;
-        case X264_CSP_I444: return PIX_FMT_YUV444P;
-        case X264_CSP_NV12: return PIX_FMT_NV12;
-        case X264_CSP_YV12: return PIX_FMT_YUV420P; /* specially handled via swapping chroma */
-        case X264_CSP_BGR:  return PIX_FMT_BGR24;
-        case X264_CSP_BGRA: return PIX_FMT_BGRA;
+        case X264_CSP_YV12: /* specially handled via swapping chroma */
+        case X264_CSP_I420: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_YUV420P16 : PIX_FMT_YUV420P;
+        case X264_CSP_I422: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_YUV422P16 : PIX_FMT_YUV422P;
+        case X264_CSP_I444: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_YUV444P16 : PIX_FMT_YUV444P;
+        case X264_CSP_RGB:  return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_RGB48     : PIX_FMT_RGB24;
+        /* the next 3 csps have no equivalent 16bit depth in swscale */
+        case X264_CSP_NV12: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_NONE      : PIX_FMT_NV12;
+        case X264_CSP_BGR:  return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_NONE      : PIX_FMT_BGR24;
+        case X264_CSP_BGRA: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_NONE      : PIX_FMT_BGRA;
         default:            return PIX_FMT_NONE;
     }
 }
@@ -147,23 +160,30 @@ static int pick_closest_supported_csp( int csp )
     int pix_fmt = convert_csp_to_pix_fmt( csp );
     switch( pix_fmt )
     {
+        case PIX_FMT_YUV420P16LE:
+        case PIX_FMT_YUV420P16BE:
+            return X264_CSP_I420 | X264_CSP_HIGH_DEPTH;
         case PIX_FMT_YUV422P:
-        case PIX_FMT_YUV422P16LE:
-        case PIX_FMT_YUV422P16BE:
         case PIX_FMT_YUYV422:
         case PIX_FMT_UYVY422:
             return X264_CSP_I422;
+        case PIX_FMT_YUV422P16LE:
+        case PIX_FMT_YUV422P16BE:
+            return X264_CSP_I422 | X264_CSP_HIGH_DEPTH;
         case PIX_FMT_YUV444P:
+            return X264_CSP_I444;
         case PIX_FMT_YUV444P16LE:
         case PIX_FMT_YUV444P16BE:
-            return X264_CSP_I444;
-        case PIX_FMT_RGB24:    // convert rgb to bgr
-        case PIX_FMT_RGB48BE:
-        case PIX_FMT_RGB48LE:
+            return X264_CSP_I444 | X264_CSP_HIGH_DEPTH;
+        case PIX_FMT_RGB24:
         case PIX_FMT_RGB565BE:
         case PIX_FMT_RGB565LE:
         case PIX_FMT_RGB555BE:
         case PIX_FMT_RGB555LE:
+            return X264_CSP_RGB;
+        case PIX_FMT_RGB48BE:
+        case PIX_FMT_RGB48LE:
+            return X264_CSP_RGB | X264_CSP_HIGH_DEPTH;
         case PIX_FMT_BGR24:
         case PIX_FMT_BGR565BE:
         case PIX_FMT_BGR565LE:
@@ -209,12 +229,27 @@ static int handle_opts( const char **optlist, char **opts, video_info_t *info, r

     if( str_csp )
     {
-        /* output csp was specified, lookup against valid values */
+        /* output csp was specified, first check if optional depth was provided */
+        char *str_depth = strchr( str_csp, ':' );
+        int depth = x264_cli_csp_depth_factor( info->csp ) * 8;
+        if( str_depth )
+        {
+            /* csp bit depth was specified */
+            *str_depth++ = '\0';
+            depth = x264_otoi( str_depth, -1 );
+            FAIL_IF_ERROR( depth != 8 && depth != 16, "unsupported bit depth %d\n", depth );
+        }
+        /* now lookup against the list of valid csps */
         int csp;
-        for( csp = X264_CSP_CLI_MAX-1; x264_cli_csps[csp].name && strcasecmp( x264_cli_csps[csp].name, str_csp ); )
-            csp--;
+        if( strlen( str_csp ) == 0 )
+            csp = info->csp & X264_CSP_MASK;
+        else
+            for( csp = X264_CSP_CLI_MAX-1; x264_cli_csps[csp].name && strcasecmp( x264_cli_csps[csp].name, str_csp ); )
+                csp--;
         FAIL_IF_ERROR( csp == X264_CSP_NONE, "unsupported colorspace `%s'\n", str_csp );
         h->dst_csp = csp;
+        if( depth == 16 )
+            h->dst_csp |= X264_CSP_HIGH_DEPTH;
     }

     /* if the input sar is currently invalid, set it to 1:1 so it can be used in math */
@@ -366,8 +401,17 @@ static int init( hnd_t *handle, cli_vid_filter_t *filter, video_info_t *info, x2
     h->swap_chroma = (info->csp & X264_CSP_MASK) == X264_CSP_YV12;
     int src_pix_fmt = convert_csp_to_pix_fmt( info->csp );

+    int src_pix_fmt_inv = convert_csp_to_pix_fmt( info->csp ^ X264_CSP_HIGH_DEPTH );
+    int dst_pix_fmt_inv = convert_csp_to_pix_fmt( h->dst_csp ^ X264_CSP_HIGH_DEPTH );
+
     /* confirm swscale can support this conversion */
+    FAIL_IF_ERROR( src_pix_fmt == PIX_FMT_NONE && src_pix_fmt_inv != PIX_FMT_NONE,
+                   "input colorspace %s with bit depth %d is not supported\n", sws_format_name( src_pix_fmt_inv ),
+                   info->csp & X264_CSP_HIGH_DEPTH ? 16 : 8 );
     FAIL_IF_ERROR( !sws_isSupportedInput( src_pix_fmt ), "input colorspace %s is not supported\n", sws_format_name( src_pix_fmt ) )
+    FAIL_IF_ERROR( h->dst.pix_fmt == PIX_FMT_NONE && dst_pix_fmt_inv != PIX_FMT_NONE,
+                   "input colorspace %s with bit depth %d is not supported\n", sws_format_name( dst_pix_fmt_inv ),
+                   h->dst_csp & X264_CSP_HIGH_DEPTH ? 16 : 8 );
     FAIL_IF_ERROR( !sws_isSupportedOutput( h->dst.pix_fmt ), "output colorspace %s is not supported\n", sws_format_name( h->dst.pix_fmt ) )
     FAIL_IF_ERROR( h->dst.height != info->height && info->interlaced,
                    "swscale is not compatible with interlaced vertical resizing\n" )
diff --git a/filters/video/video.c b/filters/video/video.c
index 61dc8c6..71ae01e 100644
--- a/filters/video/video.c
+++ b/filters/video/video.c
@@ -51,6 +51,7 @@ void x264_register_vid_filters()
     REGISTER_VFILTER( fix_vfr_pts );
     REGISTER_VFILTER( resize );
     REGISTER_VFILTER( select_every );
+    REGISTER_VFILTER( depth );
 #if HAVE_GPL
 #endif
 }
diff --git a/input/input.c b/input/input.c
index 78c7a88..a14bd3c 100644
--- a/input/input.c
+++ b/input/input.c
@@ -32,7 +32,8 @@ const x264_cli_csp_t x264_cli_csps[] = {
     [X264_CSP_YV12] = { "yv12", 3, { 1, .5, .5 }, { 1, .5, .5 }, 2, 2 },
     [X264_CSP_NV12] = { "nv12", 2, { 1,  1 },     { 1, .5 },     2, 2 },
     [X264_CSP_BGR]  = { "bgr",  1, { 3 },         { 1 },         1, 1 },
-    [X264_CSP_BGRA] = { "bgra", 1, { 4 },         { 1 },         1, 1 }
+    [X264_CSP_BGRA] = { "bgra", 1, { 4 },         { 1 },         1, 1 },
+    [X264_CSP_RGB]  = { "rgb",  1, { 3 },         { 1 },         1, 1 },
 };

 int x264_cli_csp_is_invalid( int csp )
@@ -41,6 +42,13 @@ int x264_cli_csp_is_invalid( int csp )
     return csp_mask <= X264_CSP_NONE || csp_mask >= X264_CSP_CLI_MAX || csp & X264_CSP_OTHER;
 }

+int x264_cli_csp_depth_factor( int csp )
+{
+    if( x264_cli_csp_is_invalid( csp ) )
+        return 0;
+    return (csp & X264_CSP_HIGH_DEPTH) ? 2 : 1;
+}
+
 uint64_t x264_cli_pic_plane_size( int csp, int width, int height, int plane )
 {
     int csp_mask = csp & X264_CSP_MASK;
@@ -48,6 +56,7 @@ uint64_t x264_cli_pic_plane_size( int csp, int width, int height, int plane )
         return 0;
     uint64_t size = (uint64_t)width * height;
     size *= x264_cli_csps[csp_mask].width[plane] * x264_cli_csps[csp_mask].height[plane];
+    size *= x264_cli_csp_depth_factor( csp );
     return size;
 }

@@ -78,7 +87,7 @@ int x264_cli_pic_alloc( cli_pic_t *pic, int csp, int width, int height )
          pic->img.plane[i] = x264_malloc( x264_cli_pic_plane_size( csp, width, height, i ) );
          if( !pic->img.plane[i] )
              return -1;
-         pic->img.stride[i] = width * x264_cli_csps[csp_mask].width[i];
+         pic->img.stride[i] = width * x264_cli_csps[csp_mask].width[i] * x264_cli_csp_depth_factor( csp );
     }

     return 0;
diff --git a/input/input.h b/input/input.h
index 972dd9c..43826d7 100644
--- a/input/input.h
+++ b/input/input.h
@@ -36,6 +36,7 @@ typedef struct
     char *index_file;
     char *resolution;
     char *colorspace;
+    int bit_depth;
     char *timebase;
     int seek;
 } cli_input_opt_t;
@@ -103,8 +104,9 @@ extern cli_input_t input;
 #define X264_CSP_I444          (X264_CSP_MAX+1)  /* yuv 4:4:4 planar    */
 #define X264_CSP_BGR           (X264_CSP_MAX+2)  /* packed bgr 24bits   */
 #define X264_CSP_BGRA          (X264_CSP_MAX+3)  /* packed bgr 32bits   */
-#define X264_CSP_CLI_MAX       (X264_CSP_MAX+4)  /* end of list         */
-#define X264_CSP_OTHER          0x2000           /* non x264 colorspace */
+#define X264_CSP_RGB           (X264_CSP_MAX+4)  /* packed rgb 24bits   */
+#define X264_CSP_CLI_MAX       (X264_CSP_MAX+5)  /* end of list         */
+#define X264_CSP_OTHER          0x4000           /* non x264 colorspace */

 typedef struct
 {
@@ -119,6 +121,7 @@ typedef struct
 extern const x264_cli_csp_t x264_cli_csps[];

 int      x264_cli_csp_is_invalid( int csp );
+int      x264_cli_csp_depth_factor( int csp );
 int      x264_cli_pic_alloc( cli_pic_t *pic, int csp, int width, int height );
 void     x264_cli_pic_clean( cli_pic_t *pic );
 uint64_t x264_cli_pic_plane_size( int csp, int width, int height, int plane );
diff --git a/input/raw.c b/input/raw.c
index f5fbed6..9617fb1 100644
--- a/input/raw.c
+++ b/input/raw.c
@@ -34,11 +34,12 @@ typedef struct
     int next_frame;
     uint64_t plane_size[4];
     uint64_t frame_size;
+    int bit_depth;
 } raw_hnd_t;

 static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, cli_input_opt_t *opt )
 {
-    raw_hnd_t *h = malloc( sizeof(raw_hnd_t) );
+    raw_hnd_t *h = calloc( 1, sizeof(raw_hnd_t) );
     if( !h )
         return -1;

@@ -61,8 +62,10 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
     else /* default */
         info->csp = X264_CSP_I420;

-    h->next_frame = 0;
-    info->vfr     = 0;
+    h->bit_depth = opt->bit_depth;
+    FAIL_IF_ERROR( h->bit_depth < 8 || h->bit_depth > 16, "unsupported bit depth `%d'\n", h->bit_depth );
+    if( h->bit_depth > 8 )
+        info->csp |= X264_CSP_HIGH_DEPTH;

     if( !strcmp( psz_filename, "-" ) )
         h->fh = stdin;
@@ -73,11 +76,15 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c

     info->thread_safe = 1;
     info->num_frames  = 0;
-    h->frame_size = 0;
-    for( int i = 0; i < x264_cli_csps[info->csp].planes; i++ )
+    info->vfr         = 0;
+
+    const x264_cli_csp_t *csp = x264_cli_get_csp( info->csp );
+    for( int i = 0; i < csp->planes; i++ )
     {
         h->plane_size[i] = x264_cli_pic_plane_size( info->csp, info->width, info->height, i );
         h->frame_size += h->plane_size[i];
+        /* x264_cli_pic_plane_size returns the size in bytes, we need the value in pixels from here on */
+        h->plane_size[i] /= x264_cli_csp_depth_factor( info->csp );
     }

     if( x264_is_regular_file( h->fh ) )
@@ -95,8 +102,22 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
 static int read_frame_internal( cli_pic_t *pic, raw_hnd_t *h )
 {
     int error = 0;
+    int pixel_depth = x264_cli_csp_depth_factor( pic->img.csp );
     for( int i = 0; i < pic->img.planes && !error; i++ )
-        error |= fread( pic->img.plane[i], h->plane_size[i], 1, h->fh ) <= 0;
+    {
+        error |= fread( pic->img.plane[i], pixel_depth, h->plane_size[i], h->fh ) != h->plane_size[i];
+        if( h->bit_depth & 7 )
+        {
+            /* upconvert non 16bit high depth planes to 16bit using the same
+             * algorithm as used in the depth filter. */
+            uint16_t *plane = (uint16_t*)pic->img.plane[i];
+            uint64_t pixel_count = h->plane_size[i];
+            int lshift = 16 - h->bit_depth;
+            int rshift = 2*h->bit_depth - 16;
+            for( uint64_t j = 0; j < pixel_count; j++ )
+                plane[j] = (plane[j] << lshift) + (plane[j] >> rshift);
+        }
+    }
     return error;
 }

diff --git a/x264.c b/x264.c
index 5bd2af7..415181c 100644
--- a/x264.c
+++ b/x264.c
@@ -214,7 +214,7 @@ static void print_version_info()
 #else
     printf( "using a non-gcc compiler\n" );
 #endif
-    printf( "configuration: --bit-depth=%d\n", BIT_DEPTH );
+    printf( "configuration: --bit-depth=%d\n", x264_bit_depth );
     printf( "x264 license: " );
 #if HAVE_GPL
     printf( "GPL version 2 or later\n" );
@@ -375,7 +375,7 @@ static void Help( x264_param_t *defaults, int longhelp )
 #else
         "no",
 #endif
-        BIT_DEPTH
+        x264_bit_depth
       );
     H0( "Example usage:\n" );
     H0( "\n" );
@@ -697,6 +697,7 @@ static void Help( x264_param_t *defaults, int longhelp )
         "                                  - %s\n", demuxer_names[0], stringify_names( buf, demuxer_names ) );
     H1( "      --input-csp <string>    Specify input colorspace format for raw input\n" );
     print_csp_names( longhelp );
+    H1( "      --input-depth <integer> Specify input bit depth for raw input\n" );
     H1( "      --input-res <intxint>   Specify input resolution (width x height)\n" );
     H1( "      --index <string>        Filename for input index file\n" );
     H0( "      --sar width:height      Specify Sample Aspect Ratio\n" );
@@ -769,7 +770,8 @@ enum {
     OPT_LOG_LEVEL,
     OPT_VIDEO_FILTER,
     OPT_INPUT_RES,
-    OPT_INPUT_CSP
+    OPT_INPUT_CSP,
+    OPT_INPUT_DEPTH
 } OptionsOPT;

 static char short_options[] = "8A:B:b:f:hI:i:m:o:p:q:r:t:Vvw";
@@ -921,6 +923,7 @@ static struct option long_options[] =
     { "video-filter", required_argument, NULL, OPT_VIDEO_FILTER },
     { "input-res",   required_argument, NULL, OPT_INPUT_RES },
     { "input-csp",   required_argument, NULL, OPT_INPUT_CSP },
+    { "input-depth", required_argument, NULL, OPT_INPUT_DEPTH },
     {0, 0, 0, 0}
 };

@@ -1082,10 +1085,16 @@ static int init_vid_filters( char *sequence, hnd_t *handle, video_info_t *info,
     if( csp > X264_CSP_NONE && csp < X264_CSP_MAX )
         param->i_csp = info->csp;
     else
-        param->i_csp = X264_CSP_I420;
+        param->i_csp = X264_CSP_I420 | ( info->csp & X264_CSP_HIGH_DEPTH );
     if( x264_init_vid_filter( "resize", handle, &filter, info, param, NULL ) )
         return -1;

+    char args[20];
+    sprintf( args, "bit_depth=%d", x264_bit_depth );
+
+    if( x264_init_vid_filter( "depth", handle, &filter, info, param, args ) )
+        return -1;
+
     return 0;
 }

@@ -1138,6 +1147,7 @@ static int Parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )

     memset( opt, 0, sizeof(cli_opt_t) );
     memset( &input_opt, 0, sizeof(cli_input_opt_t) );
+    input_opt.bit_depth = 8;
     opt->b_progress = 1;

     /* Presets are applied before all other options. */
@@ -1283,6 +1293,9 @@ static int Parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )
             case OPT_INPUT_CSP:
                 input_opt.colorspace = optarg;
                 break;
+            case OPT_INPUT_DEPTH:
+                input_opt.bit_depth = atoi( optarg );
+                break;
             default:
 generic_option:
             {
diff --git a/x264.h b/x264.h
index 56d424c..c9b182a 100644
--- a/x264.h
+++ b/x264.h
@@ -180,7 +180,8 @@ static const char * const x264_open_gop_names[] = { "none", "normal", "bluray",
 #define X264_CSP_YV12           0x0002  /* yvu 4:2:0 planar */
 #define X264_CSP_NV12           0x0003  /* yuv 4:2:0, with one y plane and one packed u+v */
 #define X264_CSP_MAX            0x0004  /* end of list */
-#define X264_CSP_VFLIP          0x1000  /* */
+#define X264_CSP_VFLIP          0x1000  /* the csp is vertically flipped */
+#define X264_CSP_HIGH_DEPTH     0x2000  /* the csp has a depth of 16 bits per pixel component */

 /* Slice type */
 #define X264_TYPE_AUTO          0x0000  /* Let x264 choose the right type */
@@ -342,7 +343,7 @@ typedef struct x264_param_t
     {
         int         i_rc_method;    /* X264_RC_* */

-        int         i_qp_constant;  /* 0 to (51 + 6*(BIT_DEPTH-8)) */
+        int         i_qp_constant;  /* 0 to (51 + 6*(x264_bit_depth-8)) */
         int         i_qp_min;       /* min allowed QP value */
         int         i_qp_max;       /* max allowed QP value */
         int         i_qp_step;      /* max QP step between frames */
@@ -566,6 +567,15 @@ int     x264_param_apply_profile( x264_param_t *, const char *profile );
  * Picture structures and functions
  ****************************************************************************/

+/* x264_bit_depth:
+ *      Specifies the number of bits per pixel that x264 uses. This is also the
+ *      bit depth that x264 encodes in. If this value is > 8, x264 will read
+ *      two bytes of input data for each pixel sample, and expect the upper
+ *      (16-x264_bit_depth) bits to be zero.
+ *      Note: The flag X264_CSP_HIGH_DEPTH must be used to specify the
+ *      colorspace depth as well. */
+extern const int x264_bit_depth;
+
 enum pic_struct_e
 {
     PIC_STRUCT_AUTO              = 0, // automatically decide (default)
--
1.7.2.3


From 195cf9bd51203eb18cdff5542b27caf635e7b1cf Mon Sep 17 00:00:00 2001
From: Jason Garrett-Glaser <darkshikari@gmail.com>
Date: Sun, 26 Sep 2010 21:04:30 -0700
Subject: [PATCH 8/8] Add High 10 Intra profile support (AVC-Intra)
 x264 should now be able to encode compliant AVC-Intra 50.
 With a 10-bit-compiled version of x264, a sample commandline for 1080i25 might be:
 --interlaced --keyint 1 --vbv-bufsize 2000 --bitrate 50000 --vbv-maxrate 50000 --nal-hrd cbr

Also print "Constrained Baseline" for baseline profile, since that's all x264 (and everything else in the world) supports.
Also reorganize parameter validation a bit to reduce some spurious warnings.
---
 encoder/encoder.c |   14 +++++++++-----
 encoder/set.c     |   15 ++++++++++-----
 2 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/encoder/encoder.c b/encoder/encoder.c
index 62a4350..a1e8383 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -439,6 +439,13 @@ static int x264_validate_parameters( x264_t *h )
     if( h->i_thread_frames > 1 )
         h->param.nalu_process = NULL;

+    h->param.i_keyint_max = x264_clip3( h->param.i_keyint_max, 1, X264_KEYINT_MAX_INFINITE );
+    if( h->param.i_keyint_max == 1 )
+    {
+        h->param.b_intra_refresh = 0;
+        h->param.analyse.i_weighted_pred = 0;
+    }
+
     if( h->param.b_interlaced )
     {
         if( h->param.analyse.i_me_method >= X264_ME_ESA )
@@ -576,7 +583,6 @@ static int x264_validate_parameters( x264_t *h )

     h->param.i_frame_reference = x264_clip3( h->param.i_frame_reference, 1, X264_REF_MAX );
     h->param.i_dpb_size = x264_clip3( h->param.i_dpb_size, 1, X264_REF_MAX );
-    h->param.i_keyint_max = x264_clip3( h->param.i_keyint_max, 1, X264_KEYINT_MAX_INFINITE );
     if( h->param.i_scenecut_threshold < 0 )
         h->param.i_scenecut_threshold = 0;
     if( !h->param.analyse.i_subpel_refine && h->param.analyse.i_direct_mv_pred > X264_DIRECT_PRED_SPATIAL )
@@ -586,8 +592,6 @@ static int x264_validate_parameters( x264_t *h )
     }
     h->param.i_bframe = x264_clip3( h->param.i_bframe, 0, X264_MIN( X264_BFRAME_MAX, h->param.i_keyint_max-1 ) );
     h->param.i_open_gop = x264_clip3( h->param.i_open_gop, X264_OPEN_GOP_NONE, X264_OPEN_GOP_BLURAY );
-    if( h->param.i_keyint_max == 1 )
-        h->param.b_intra_refresh = 0;
     h->param.i_bframe_bias = x264_clip3( h->param.i_bframe_bias, -90, 100 );
     if( h->param.i_bframe <= 1 )
         h->param.i_bframe_pyramid = X264_B_PYRAMID_NONE;
@@ -1155,10 +1159,10 @@ x264_t *x264_encoder_open( x264_param_t *param )
         fclose( f );
     }

-    const char *profile = h->sps->i_profile_idc == PROFILE_BASELINE ? "Baseline" :
+    const char *profile = h->sps->i_profile_idc == PROFILE_BASELINE ? "Constrained Baseline" :
                           h->sps->i_profile_idc == PROFILE_MAIN ? "Main" :
                           h->sps->i_profile_idc == PROFILE_HIGH ? "High" :
-                          h->sps->i_profile_idc == PROFILE_HIGH10 ? "High 10" :
+                          h->sps->i_profile_idc == PROFILE_HIGH10 ? (h->sps->b_constraint_set3 == 1 ? "High 10 Intra" : "High 10") :
                           "High 4:4:4 Predictive";
     char level[4];
     snprintf( level, sizeof(level), "%d.%d", h->sps->i_level_idc/10, h->sps->i_level_idc%10 );
diff --git a/encoder/set.c b/encoder/set.c
index a003012..0a24bf7 100644
--- a/encoder/set.c
+++ b/encoder/set.c
@@ -121,17 +121,17 @@ void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param )
     sps->b_constraint_set1  = sps->i_profile_idc <= PROFILE_MAIN;
     /* Never set constraint_set2, it is not necessary and not used in real world. */
     sps->b_constraint_set2  = 0;
+    sps->b_constraint_set3  = 0;

+    sps->i_level_idc = param->i_level_idc;
     if( param->i_level_idc == 9 && ( sps->i_profile_idc >= PROFILE_BASELINE && sps->i_profile_idc <= PROFILE_EXTENDED ) )
     {
         sps->b_constraint_set3 = 1; /* level 1b with Baseline, Main or Extended profile is signalled via constraint_set3 */
         sps->i_level_idc      = 11;
     }
-    else
-    {
-        sps->b_constraint_set3 = 0;
-        sps->i_level_idc = param->i_level_idc;
-    }
+    /* High 10 Intra profile */
+    if( param->i_keyint_max == 1 && sps->i_profile_idc == PROFILE_HIGH10 )
+        sps->b_constraint_set3 = 1;

     sps->vui.i_num_reorder_frames = param->i_bframe_pyramid ? 2 : param->i_bframe ? 1 : 0;
     /* extra slot with pyramid so that we don't have to override the
@@ -140,6 +140,11 @@ void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param )
     sps->i_num_ref_frames = X264_MIN(X264_REF_MAX, X264_MAX4(param->i_frame_reference, 1 + sps->vui.i_num_reorder_frames,
                             param->i_bframe_pyramid ? 4 : 1, param->i_dpb_size));
     sps->i_num_ref_frames -= param->i_bframe_pyramid == X264_B_PYRAMID_STRICT;
+    if( param->i_keyint_max == 1 )
+    {
+        sps->i_num_ref_frames = 0;
+        sps->vui.i_max_dec_frame_buffering = 0;
+    }

     /* number of refs + current frame */
     int max_frame_num = sps->vui.i_max_dec_frame_buffering * (!!param->i_bframe_pyramid+1) + 1;
--
1.7.2.3