Untitled

From 97303087110867a0800610f30b1a467e6dd7c77e Mon Sep 17 00:00:00 2001
From: Jason Garrett-Glaser <darkshikari@gmail.com>
Date: Thu, 21 Jan 2010 10:00:07 -0800
Subject: [PATCH 01/11] Merge nnz_backup with scratch buffer
 Slightly less memory usage.

---
 common/common.h     |    1 -
 common/frame.c      |    5 +++--
 common/macroblock.c |    2 --
 encoder/encoder.c   |    4 +++-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/common/common.h b/common/common.h
index 02d1748..df39f26 100644
--- a/common/common.h
+++ b/common/common.h
@@ -532,7 +532,6 @@ struct x264_t
         int8_t  *skipbp;                    /* block pattern for SKIP or DIRECT (sub)mbs. B-frames + cabac only */
         int8_t  *mb_transform_size;         /* transform_size_8x8_flag of each mb */
         uint8_t *intra_border_backup[2][3]; /* bottom pixels of the previous mb row, used for intra prediction after the framebuffer has been deblocked */
-        uint8_t (*nnz_backup)[16];          /* when using cavlc + 8x8dct, the deblocker uses a modified nnz */

          /* buffer for weighted versions of the reference frames */
         uint8_t *p_weight_buf[16];
diff --git a/common/frame.c b/common/frame.c
index 08ef87f..3ef303a 100644
--- a/common/frame.c
+++ b/common/frame.c
@@ -661,9 +661,10 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
     int stride2y  = stridey << b_interlaced;
     int strideuv  = h->fdec->i_stride[1];
     int stride2uv = strideuv << b_interlaced;
+    uint8_t (*nnz_backup)[16] = h->scratch_buffer;

     if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
-        munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, munge_cavlc_nnz_row );
+        munge_cavlc_nnz( h, mb_y, nnz_backup, munge_cavlc_nnz_row );

     for( mb_x = 0; mb_x < h->sps->i_mb_width; mb_x += (~b_interlaced | mb_y)&1, mb_y ^= b_interlaced )
     {
@@ -823,7 +824,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
     }

     if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
-        munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, restore_cavlc_nnz_row );
+        munge_cavlc_nnz( h, mb_y, nnz_backup, restore_cavlc_nnz_row );
 }

 void x264_frame_deblock( x264_t *h )
diff --git a/common/macroblock.c b/common/macroblock.c
index 921d2b9..10f09ac 100644
--- a/common/macroblock.c
+++ b/common/macroblock.c
@@ -701,7 +701,6 @@ int x264_macroblock_cache_init( x264_t *h )

     /* all coeffs */
     CHECKED_MALLOC( h->mb.non_zero_count, i_mb_count * 24 * sizeof(uint8_t) );
-    CHECKED_MALLOC( h->mb.nnz_backup, h->sps->i_mb_width * 4 * 16 * sizeof(uint8_t) );

     if( h->param.b_cabac )
     {
@@ -797,7 +796,6 @@ void x264_macroblock_cache_end( x264_t *h )
     }
     x264_free( h->mb.intra4x4_pred_mode );
     x264_free( h->mb.non_zero_count );
-    x264_free( h->mb.nnz_backup );
     x264_free( h->mb.mb_transform_size );
     x264_free( h->mb.skipbp );
     x264_free( h->mb.cbp );
diff --git a/encoder/encoder.c b/encoder/encoder.c
index f97df1b..a0af7ea 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -1018,7 +1018,9 @@ x264_t *x264_encoder_open( x264_param_t *param )
         int buf_tesa = (h->param.analyse.i_me_method >= X264_ME_ESA) *
             ((me_range*2+18) * sizeof(int16_t) + (me_range+4) * (me_range+1) * 4 * sizeof(mvsad_t));
         int buf_mbtree = h->param.rc.b_mb_tree * ((h->sps->i_mb_width+3)&~3) * sizeof(int);
-        CHECKED_MALLOC( h->thread[i]->scratch_buffer, X264_MAX4( buf_hpel, buf_ssim, buf_tesa, buf_mbtree ) );
+        int buf_nnz = (h->param.b_cabac) * (h->sps->i_mb_width * 4 * 16 * sizeof(uint8_t));
+        int scratch_size = X264_MAX4( buf_hpel, buf_ssim, buf_tesa, X264_MAX( buf_mbtree, buf_nnz ) );
+        CHECKED_MALLOC( h->thread[i]->scratch_buffer, scratch_size );
     }

     if( x264_ratecontrol_new( h ) < 0 )
--
1.6.1.2


From 24d1da8806f4419347ed0954788f081924ed64dc Mon Sep 17 00:00:00 2001
From: Jason Garrett-Glaser <darkshikari@gmail.com>
Date: Thu, 21 Jan 2010 23:07:11 -0800
Subject: [PATCH 02/11] Fix bitstream alignment with multiple slices
 Broke multi-slice encoding on CPUs without unaligned access.
 New system simply forces a bitstream realignment at the start of each writing function and flushes when it reaches the end.

---
 common/bs.h       |   11 +++++++++++
 encoder/encoder.c |    2 +-
 encoder/set.c     |    8 ++++++++
 3 files changed, 20 insertions(+), 1 deletions(-)

diff --git a/common/bs.h b/common/bs.h
index d13eb03..3bbb436 100644
--- a/common/bs.h
+++ b/common/bs.h
@@ -92,6 +92,17 @@ static inline void bs_flush( bs_t *s )
     s->p += WORD_SIZE - s->i_left / 8;
     s->i_left = WORD_SIZE*8;
 }
+static inline void bs_realign( bs_t *s )
+{
+    int offset = ((intptr_t)s->p & 3);
+    if( offset )
+    {
+        s->p       = (uint8_t*)s->p - offset;
+        s->i_left  = (WORD_SIZE - offset)*8;
+        s->cur_bits = endian_fix32(*(uint32_t *)(s->p));
+        s->cur_bits >>= (4-offset)*8;
+    }
+}

 static inline void bs_write( bs_t *s, int i_count, uint32_t i_bits )
 {
diff --git a/encoder/encoder.c b/encoder/encoder.c
index a0af7ea..8afadb0 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -1206,7 +1206,6 @@ int x264_encoder_headers( x264_t *h, x264_nal_t **pp_nal, int *pi_nal )
     x264_pps_write( &h->out.bs, h->pps );
     if( x264_nal_end( h ) )
         return -1;
-    bs_flush( &h->out.bs );

     frame_size = x264_encoder_encapsulate_nals( h );

@@ -1657,6 +1656,7 @@ static int x264_slice_write( x264_t *h )
     /* Assume no more than 3 bytes of NALU escaping. */
     int slice_max_size = h->param.i_slice_max_size > 0 ? (h->param.i_slice_max_size-3-NALU_OVERHEAD)*8 : INT_MAX;
     int starting_bits = bs_pos(&h->out.bs);
+    bs_realign( &h->out.bs );

     /* Slice */
     x264_nal_start( h, h->i_nal_type, h->i_nal_ref_idc );
diff --git a/encoder/set.c b/encoder/set.c
index 641eae9..f79919b 100644
--- a/encoder/set.c
+++ b/encoder/set.c
@@ -210,6 +210,7 @@ void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param )

 void x264_sps_write( bs_t *s, x264_sps_t *sps )
 {
+    bs_realign( s );
     bs_write( s, 8, sps->i_profile_idc );
     bs_write( s, 1, sps->b_constraint_set0 );
     bs_write( s, 1, sps->b_constraint_set1 );
@@ -359,6 +360,7 @@ void x264_sps_write( bs_t *s, x264_sps_t *sps )
     }

     bs_rbsp_trailing( s );
+    bs_flush( s );
 }

 void x264_pps_init( x264_pps_t *pps, int i_id, x264_param_t *param, x264_sps_t *sps )
@@ -423,6 +425,7 @@ void x264_pps_init( x264_pps_t *pps, int i_id, x264_param_t *param, x264_sps_t *

 void x264_pps_write( bs_t *s, x264_pps_t *pps )
 {
+    bs_realign( s );
     bs_write_ue( s, pps->i_id );
     bs_write_ue( s, pps->i_sps_id );

@@ -465,12 +468,14 @@ void x264_pps_write( bs_t *s, x264_pps_t *pps )
     }

     bs_rbsp_trailing( s );
+    bs_flush( s );
 }

 void x264_sei_recovery_point_write( x264_t *h, bs_t *s, int recovery_frame_cnt )
 {
     int payload_size;

+    bs_realign( s );
     bs_write( s, 8, 0x06 ); // payload_type = Recovery Point
     payload_size = bs_size_ue( recovery_frame_cnt ) + 4;

@@ -482,6 +487,7 @@ void x264_sei_recovery_point_write( x264_t *h, bs_t *s, int recovery_frame_cnt )

     bs_align_10( s );
     bs_rbsp_trailing( s );
+    bs_flush( s );
 }

 int x264_sei_version_write( x264_t *h, bs_t *s )
@@ -505,6 +511,7 @@ int x264_sei_version_write( x264_t *h, bs_t *s )
              X264_BUILD, X264_VERSION, opts );
     length = strlen(version)+1+16;

+    bs_realign( s );
     bs_write( s, 8, 0x5 ); // payload_type = user_data_unregistered
     // payload_size
     for( i = 0; i <= length-255; i += 255 )
@@ -517,6 +524,7 @@ int x264_sei_version_write( x264_t *h, bs_t *s )
         bs_write( s, 8, version[i] );

     bs_rbsp_trailing( s );
+    bs_flush( s );

     x264_free( opts );
     x264_free( version );
--
1.6.1.2


From c19bfc3d3111129f572e1b6091f4919d6f6c1d96 Mon Sep 17 00:00:00 2001
From: David Conrad <lessen42@gmail.com>
Date: Sat, 23 Jan 2010 18:05:25 -0800
Subject: [PATCH 03/11] Fix lavf input with pipes and image sequences
 x264 should now be able to encode from an image sequence using an image2-style formatted string (e.g. file%02d.jpg).

---
 x264.c |   10 ++++------
 1 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/x264.c b/x264.c
index 3a07bb3..db33536 100644
--- a/x264.c
+++ b/x264.c
@@ -719,13 +719,11 @@ static int select_input( const char *demuxer, char *used_demuxer, char *filename
     if( b_regular )
     {
         FILE *f = fopen( filename, "r" );
-        if( !f )
+        if( f )
         {
-            fprintf( stderr, "x264 [error]: could not open input file `%s'\n", filename );
-            return -1;
+            b_regular = x264_is_regular_file( f );
+            fclose( f );
         }
-        b_regular = x264_is_regular_file( f );
-        fclose( f );
     }
     const char *module = b_auto ? ext : demuxer;

@@ -756,7 +754,7 @@ static int select_input( const char *demuxer, char *used_demuxer, char *filename
 #endif
 #ifdef LAVF_INPUT
         if( (b_auto || !strcasecmp( demuxer, "lavf" )) &&
-            (!b_regular || !lavf_input.open_file( filename, p_handle, info, opt )) )
+            !lavf_input.open_file( filename, p_handle, info, opt ) )
         {
             module = "lavf";
             b_auto = 0;
--
1.6.1.2


From 3b0dd65039d8ffe694ac005c25468fe0b7cfd764 Mon Sep 17 00:00:00 2001
From: Jason Garrett-Glaser <darkshikari@gmail.com>
Date: Mon, 25 Jan 2010 11:23:55 -0800
Subject: [PATCH 04/11] Hardcode the bs_t in cavlc.c; passing it around is a waste.

Saves ~1.5kb of code size, very slight speed boost.
---
 encoder/cavlc.c      |  143 ++++++++++++++++++++++++++------------------------
 encoder/encoder.c    |    2 +-
 encoder/macroblock.h |    2 +-
 encoder/rdo.c        |    6 +--
 4 files changed, 79 insertions(+), 74 deletions(-)

diff --git a/encoder/cavlc.c b/encoder/cavlc.c
index 59f362a..c65c9bd 100644
--- a/encoder/cavlc.c
+++ b/encoder/cavlc.c
@@ -61,8 +61,9 @@ static const uint8_t sub_mb_type_b_to_golomb[13]=
 /****************************************************************************
  * block_residual_write_cavlc:
  ****************************************************************************/
-static inline int block_residual_write_cavlc_escape( x264_t *h, bs_t *s, int i_suffix_length, int level )
+static inline int block_residual_write_cavlc_escape( x264_t *h, int i_suffix_length, int level )
 {
+    bs_t *s = &h->out.bs;
     static const uint16_t next_suffix[7] = { 0, 3, 6, 12, 24, 48, 0xffff };
     int i_level_prefix = 15;
     int mask = level >> 15;
@@ -112,8 +113,9 @@ static inline int block_residual_write_cavlc_escape( x264_t *h, bs_t *s, int i_s
     return i_suffix_length;
 }

-static int block_residual_write_cavlc( x264_t *h, bs_t *s, int i_ctxBlockCat, int16_t *l, int nC )
+static int block_residual_write_cavlc( x264_t *h, int i_ctxBlockCat, int16_t *l, int nC )
 {
+    bs_t *s = &h->out.bs;
     static const uint8_t ctz_index[8] = {3,0,1,0,2,0,1,0};
     static const int count_cat[5] = {16, 15, 16, 4, 15};
     x264_run_level_t runlevel;
@@ -157,7 +159,7 @@ static int block_residual_write_cavlc( x264_t *h, bs_t *s, int i_ctxBlockCat, in
             i_suffix_length = x264_level_token[i_suffix_length][val_original].i_next;
         }
         else
-            i_suffix_length = block_residual_write_cavlc_escape( h, s, i_suffix_length, val-LEVEL_TABLE_SIZE/2 );
+            i_suffix_length = block_residual_write_cavlc_escape( h, i_suffix_length, val-LEVEL_TABLE_SIZE/2 );
         for( i = i_trailing+1; i < i_total; i++ )
         {
             val = runlevel.level[i] + LEVEL_TABLE_SIZE/2;
@@ -167,7 +169,7 @@ static int block_residual_write_cavlc( x264_t *h, bs_t *s, int i_ctxBlockCat, in
                 i_suffix_length = x264_level_token[i_suffix_length][val].i_next;
             }
             else
-                i_suffix_length = block_residual_write_cavlc_escape( h, s, i_suffix_length, val-LEVEL_TABLE_SIZE/2 );
+                i_suffix_length = block_residual_write_cavlc_escape( h, i_suffix_length, val-LEVEL_TABLE_SIZE/2 );
         }
     }

@@ -191,18 +193,19 @@ static int block_residual_write_cavlc( x264_t *h, bs_t *s, int i_ctxBlockCat, in

 static const uint8_t ct_index[17] = {0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,3};

-#define block_residual_write_cavlc(h,s,cat,idx,l)\
+#define block_residual_write_cavlc(h,cat,idx,l)\
 {\
     int nC = cat == DCT_CHROMA_DC ? 4 : ct_index[x264_mb_predict_non_zero_code( h, cat == DCT_LUMA_DC ? 0 : idx )];\
     uint8_t *nnz = &h->mb.cache.non_zero_count[x264_scan8[idx]];\
     if( !*nnz )\
-        bs_write_vlc( s, x264_coeff0_token[nC] );\
+        bs_write_vlc( &h->out.bs, x264_coeff0_token[nC] );\
     else\
-        *nnz = block_residual_write_cavlc(h,s,cat,l,nC);\
+        *nnz = block_residual_write_cavlc(h,cat,l,nC);\
 }

-static void cavlc_qp_delta( x264_t *h, bs_t *s )
+static void cavlc_qp_delta( x264_t *h )
 {
+    bs_t *s = &h->out.bs;
     int i_dqp = h->mb.i_qp - h->mb.i_last_qp;

     /* Avoid writing a delta quant if we have an empty i16x16 block, e.g. in a completely flat background area */
@@ -225,39 +228,40 @@ static void cavlc_qp_delta( x264_t *h, bs_t *s )
     bs_write_se( s, i_dqp );
 }

-static void cavlc_mb_mvd( x264_t *h, bs_t *s, int i_list, int idx, int width )
+static void cavlc_mb_mvd( x264_t *h, int i_list, int idx, int width )
 {
+    bs_t *s = &h->out.bs;
     ALIGNED_4( int16_t mvp[2] );
     x264_mb_predict_mv( h, i_list, idx, width, mvp );
     bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[idx]][0] - mvp[0] );
     bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[idx]][1] - mvp[1] );
 }

-static inline void cavlc_mb8x8_mvd( x264_t *h, bs_t *s, int i )
+static inline void cavlc_mb8x8_mvd( x264_t *h, int i )
 {
     switch( h->mb.i_sub_partition[i] )
     {
         case D_L0_8x8:
-            cavlc_mb_mvd( h, s, 0, 4*i, 2 );
+            cavlc_mb_mvd( h, 0, 4*i, 2 );
             break;
         case D_L0_8x4:
-            cavlc_mb_mvd( h, s, 0, 4*i+0, 2 );
-            cavlc_mb_mvd( h, s, 0, 4*i+2, 2 );
+            cavlc_mb_mvd( h, 0, 4*i+0, 2 );
+            cavlc_mb_mvd( h, 0, 4*i+2, 2 );
             break;
         case D_L0_4x8:
-            cavlc_mb_mvd( h, s, 0, 4*i+0, 1 );
-            cavlc_mb_mvd( h, s, 0, 4*i+1, 1 );
+            cavlc_mb_mvd( h, 0, 4*i+0, 1 );
+            cavlc_mb_mvd( h, 0, 4*i+1, 1 );
             break;
         case D_L0_4x4:
-            cavlc_mb_mvd( h, s, 0, 4*i+0, 1 );
-            cavlc_mb_mvd( h, s, 0, 4*i+1, 1 );
-            cavlc_mb_mvd( h, s, 0, 4*i+2, 1 );
-            cavlc_mb_mvd( h, s, 0, 4*i+3, 1 );
+            cavlc_mb_mvd( h, 0, 4*i+0, 1 );
+            cavlc_mb_mvd( h, 0, 4*i+1, 1 );
+            cavlc_mb_mvd( h, 0, 4*i+2, 1 );
+            cavlc_mb_mvd( h, 0, 4*i+3, 1 );
             break;
     }
 }

-static inline void x264_macroblock_luma_write_cavlc( x264_t *h, bs_t *s, int i8start, int i8end )
+static inline void x264_macroblock_luma_write_cavlc( x264_t *h, int i8start, int i8end )
 {
     int i8, i4;
     if( h->mb.b_transform_8x8 )
@@ -271,20 +275,23 @@ static inline void x264_macroblock_luma_write_cavlc( x264_t *h, bs_t *s, int i8s
     for( i8 = i8start; i8 <= i8end; i8++ )
         if( h->mb.i_cbp_luma & (1 << i8) )
             for( i4 = 0; i4 < 4; i4++ )
-                block_residual_write_cavlc( h, s, DCT_LUMA_4x4, i4+i8*4, h->dct.luma4x4[i4+i8*4] );
+                block_residual_write_cavlc( h, DCT_LUMA_4x4, i4+i8*4, h->dct.luma4x4[i4+i8*4] );
 }

 /*****************************************************************************
  * x264_macroblock_write:
  *****************************************************************************/
-void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
+void x264_macroblock_write_cavlc( x264_t *h )
 {
+    bs_t *s = &h->out.bs;
     const int i_mb_type = h->mb.i_type;
     static const uint8_t i_offsets[3] = {5,23,0};
     int i_mb_i_offset = i_offsets[h->sh.i_type];
     int i;

-#if !RDO_SKIP_BS
+#if RDO_SKIP_BS
+    s->i_bits_encoded = 0;
+#else
     const int i_mb_pos_start = bs_pos( s );
     int       i_mb_pos_tex;
 #endif
@@ -365,7 +372,7 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )

             if( h->mb.pic.i_fref[0] > 1 )
                 bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
-            cavlc_mb_mvd( h, s, 0, 0, 4 );
+            cavlc_mb_mvd( h, 0, 0, 4 );
         }
         else if( h->mb.i_partition == D_16x8 )
         {
@@ -375,8 +382,8 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
                 bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
                 bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[8]] );
             }
-            cavlc_mb_mvd( h, s, 0, 0, 4 );
-            cavlc_mb_mvd( h, s, 0, 8, 4 );
+            cavlc_mb_mvd( h, 0, 0, 4 );
+            cavlc_mb_mvd( h, 0, 8, 4 );
         }
         else if( h->mb.i_partition == D_8x16 )
         {
@@ -386,8 +393,8 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
                 bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
                 bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[4]] );
             }
-            cavlc_mb_mvd( h, s, 0, 0, 2 );
-            cavlc_mb_mvd( h, s, 0, 4, 2 );
+            cavlc_mb_mvd( h, 0, 0, 2 );
+            cavlc_mb_mvd( h, 0, 4, 2 );
         }
     }
     else if( i_mb_type == P_8x8 )
@@ -422,7 +429,7 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
         }

         for( i = 0; i < 4; i++ )
-            cavlc_mb8x8_mvd( h, s, i );
+            cavlc_mb8x8_mvd( h, i );
     }
     else if( i_mb_type == B_8x8 )
     {
@@ -445,10 +452,10 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
         /* mvd */
         for( i = 0; i < 4; i++ )
             if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
-                cavlc_mb_mvd( h, s, 0, 4*i, 2 );
+                cavlc_mb_mvd( h, 0, 4*i, 2 );
         for( i = 0; i < 4; i++ )
             if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
-                cavlc_mb_mvd( h, s, 1, 4*i, 2 );
+                cavlc_mb_mvd( h, 1, 4*i, 2 );
     }
     else if( i_mb_type != B_DIRECT )
     {
@@ -463,8 +470,8 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
         {
             if( i_ref0_max && b_list[0][0] ) bs_write_te( s, i_ref0_max, h->mb.cache.ref[0][x264_scan8[0]] );
             if( i_ref1_max && b_list[1][0] ) bs_write_te( s, i_ref1_max, h->mb.cache.ref[1][x264_scan8[0]] );
-            if( b_list[0][0] ) cavlc_mb_mvd( h, s, 0, 0, 4 );
-            if( b_list[1][0] ) cavlc_mb_mvd( h, s, 1, 0, 4 );
+            if( b_list[0][0] ) cavlc_mb_mvd( h, 0, 0, 4 );
+            if( b_list[1][0] ) cavlc_mb_mvd( h, 1, 0, 4 );
         }
         else
         {
@@ -474,17 +481,17 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
             if( i_ref1_max && b_list[1][1] ) bs_write_te( s, i_ref1_max, h->mb.cache.ref[1][x264_scan8[12]] );
             if( h->mb.i_partition == D_16x8 )
             {
-                if( b_list[0][0] ) cavlc_mb_mvd( h, s, 0, 0, 4 );
-                if( b_list[0][1] ) cavlc_mb_mvd( h, s, 0, 8, 4 );
-                if( b_list[1][0] ) cavlc_mb_mvd( h, s, 1, 0, 4 );
-                if( b_list[1][1] ) cavlc_mb_mvd( h, s, 1, 8, 4 );
+                if( b_list[0][0] ) cavlc_mb_mvd( h, 0, 0, 4 );
+                if( b_list[0][1] ) cavlc_mb_mvd( h, 0, 8, 4 );
+                if( b_list[1][0] ) cavlc_mb_mvd( h, 1, 0, 4 );
+                if( b_list[1][1] ) cavlc_mb_mvd( h, 1, 8, 4 );
             }
             else //if( h->mb.i_partition == D_8x16 )
             {
-                if( b_list[0][0] ) cavlc_mb_mvd( h, s, 0, 0, 2 );
-                if( b_list[0][1] ) cavlc_mb_mvd( h, s, 0, 4, 2 );
-                if( b_list[1][0] ) cavlc_mb_mvd( h, s, 1, 0, 2 );
-                if( b_list[1][1] ) cavlc_mb_mvd( h, s, 1, 4, 2 );
+                if( b_list[0][0] ) cavlc_mb_mvd( h, 0, 0, 2 );
+                if( b_list[0][1] ) cavlc_mb_mvd( h, 0, 4, 2 );
+                if( b_list[1][0] ) cavlc_mb_mvd( h, 1, 0, 2 );
+                if( b_list[1][1] ) cavlc_mb_mvd( h, 1, 4, 2 );
             }
         }
     }
@@ -509,29 +516,29 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
     /* write residual */
     if( i_mb_type == I_16x16 )
     {
-        cavlc_qp_delta( h, s );
+        cavlc_qp_delta( h );

         /* DC Luma */
-        block_residual_write_cavlc( h, s, DCT_LUMA_DC, 24 , h->dct.luma16x16_dc );
+        block_residual_write_cavlc( h, DCT_LUMA_DC, 24 , h->dct.luma16x16_dc );

         /* AC Luma */
         if( h->mb.i_cbp_luma )
             for( i = 0; i < 16; i++ )
-                block_residual_write_cavlc( h, s, DCT_LUMA_AC, i, h->dct.luma4x4[i]+1 );
+                block_residual_write_cavlc( h, DCT_LUMA_AC, i, h->dct.luma4x4[i]+1 );
     }
     else if( h->mb.i_cbp_luma | h->mb.i_cbp_chroma )
     {
-        cavlc_qp_delta( h, s );
-        x264_macroblock_luma_write_cavlc( h, s, 0, 3 );
+        cavlc_qp_delta( h );
+        x264_macroblock_luma_write_cavlc( h, 0, 3 );
     }
     if( h->mb.i_cbp_chroma )
     {
         /* Chroma DC residual present */
-        block_residual_write_cavlc( h, s, DCT_CHROMA_DC, 25, h->dct.chroma_dc[0] );
-        block_residual_write_cavlc( h, s, DCT_CHROMA_DC, 26, h->dct.chroma_dc[1] );
+        block_residual_write_cavlc( h, DCT_CHROMA_DC, 25, h->dct.chroma_dc[0] );
+        block_residual_write_cavlc( h, DCT_CHROMA_DC, 26, h->dct.chroma_dc[1] );
         if( h->mb.i_cbp_chroma&0x02 ) /* Chroma AC residual present */
             for( i = 16; i < 24; i++ )
-                block_residual_write_cavlc( h, s, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1 );
+                block_residual_write_cavlc( h, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1 );
     }

 #if !RDO_SKIP_BS
@@ -549,36 +556,36 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
  *****************************************************************************/
 static int x264_partition_size_cavlc( x264_t *h, int i8, int i_pixel )
 {
+    bs_t *s = &h->out.bs;
     const int i_mb_type = h->mb.i_type;
     int b_8x16 = h->mb.i_partition == D_8x16;
     int j;
-    h->out.bs.i_bits_encoded = 0;

     if( i_mb_type == P_8x8 )
     {
-        cavlc_mb8x8_mvd( h, &h->out.bs, i8 );
-        bs_write_ue( &h->out.bs, sub_mb_type_p_to_golomb[ h->mb.i_sub_partition[i8] ] );
+        cavlc_mb8x8_mvd( h, i8 );
+        bs_write_ue( s, sub_mb_type_p_to_golomb[ h->mb.i_sub_partition[i8] ] );
     }
     else if( i_mb_type == P_L0 )
-        cavlc_mb_mvd( h, &h->out.bs, 0, 4*i8, 4>>b_8x16 );
+        cavlc_mb_mvd( h, 0, 4*i8, 4>>b_8x16 );
     else if( i_mb_type > B_DIRECT && i_mb_type < B_8x8 )
     {
-        if( x264_mb_type_list_table[ i_mb_type ][0][!!i8] ) cavlc_mb_mvd( h, &h->out.bs, 0, 4*i8, 4>>b_8x16 );
-        if( x264_mb_type_list_table[ i_mb_type ][1][!!i8] ) cavlc_mb_mvd( h, &h->out.bs, 1, 4*i8, 4>>b_8x16 );
+        if( x264_mb_type_list_table[ i_mb_type ][0][!!i8] ) cavlc_mb_mvd( h, 0, 4*i8, 4>>b_8x16 );
+        if( x264_mb_type_list_table[ i_mb_type ][1][!!i8] ) cavlc_mb_mvd( h, 1, 4*i8, 4>>b_8x16 );
     }
     else //if( i_mb_type == B_8x8 )
     {
         if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i8] ] )
-            cavlc_mb_mvd( h, &h->out.bs, 0, 4*i8, 2 );
+            cavlc_mb_mvd( h, 0, 4*i8, 2 );
         if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i8] ] )
-            cavlc_mb_mvd( h, &h->out.bs, 1, 4*i8, 2 );
+            cavlc_mb_mvd( h, 1, 4*i8, 2 );
     }

     for( j = (i_pixel < PIXEL_8x8); j >= 0; j-- )
     {
-        x264_macroblock_luma_write_cavlc( h, &h->out.bs, i8, i8 );
-        block_residual_write_cavlc( h, &h->out.bs, DCT_CHROMA_AC, 16+i8, h->dct.luma4x4[16+i8]+1 );
-        block_residual_write_cavlc( h, &h->out.bs, DCT_CHROMA_AC, 20+i8, h->dct.luma4x4[20+i8]+1 );
+        x264_macroblock_luma_write_cavlc( h, i8, i8 );
+        block_residual_write_cavlc( h, DCT_CHROMA_AC, 16+i8, h->dct.luma4x4[16+i8]+1 );
+        block_residual_write_cavlc( h, DCT_CHROMA_AC, 20+i8, h->dct.luma4x4[20+i8]+1 );
         i8 += x264_pixel_size[i_pixel].h >> 3;
     }

@@ -589,12 +596,12 @@ static int x264_subpartition_size_cavlc( x264_t *h, int i4, int i_pixel )
 {
     int b_8x4 = i_pixel == PIXEL_8x4;
     h->out.bs.i_bits_encoded = 0;
-    cavlc_mb_mvd( h, &h->out.bs, 0, i4, 1+b_8x4 );
-    block_residual_write_cavlc( h, &h->out.bs, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4] );
+    cavlc_mb_mvd( h, 0, i4, 1+b_8x4 );
+    block_residual_write_cavlc( h, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4] );
     if( i_pixel != PIXEL_4x4 )
     {
         i4 += 2-b_8x4;
-        block_residual_write_cavlc( h, &h->out.bs, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4] );
+        block_residual_write_cavlc( h, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4] );
     }

     return h->out.bs.i_bits_encoded;
@@ -612,14 +619,14 @@ static int x264_partition_i8x8_size_cavlc( x264_t *h, int i8, int i_mode )
 {
     h->out.bs.i_bits_encoded = cavlc_intra4x4_pred_size( h, 4*i8, i_mode );
     bs_write_ue( &h->out.bs, intra4x4_cbp_to_golomb[( h->mb.i_cbp_chroma << 4 )|h->mb.i_cbp_luma] );
-    x264_macroblock_luma_write_cavlc( h, &h->out.bs, i8, i8 );
+    x264_macroblock_luma_write_cavlc( h, i8, i8 );
     return h->out.bs.i_bits_encoded;
 }

 static int x264_partition_i4x4_size_cavlc( x264_t *h, int i4, int i_mode )
 {
     h->out.bs.i_bits_encoded = cavlc_intra4x4_pred_size( h, i4, i_mode );
-    block_residual_write_cavlc( h, &h->out.bs, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4] );
+    block_residual_write_cavlc( h, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4] );
     return h->out.bs.i_bits_encoded;
 }

@@ -628,14 +635,14 @@ static int x264_i8x8_chroma_size_cavlc( x264_t *h )
     h->out.bs.i_bits_encoded = bs_size_ue( x264_mb_pred_mode8x8c_fix[ h->mb.i_chroma_pred_mode ] );
     if( h->mb.i_cbp_chroma )
     {
-        block_residual_write_cavlc( h, &h->out.bs, DCT_CHROMA_DC, 25, h->dct.chroma_dc[0] );
-        block_residual_write_cavlc( h, &h->out.bs, DCT_CHROMA_DC, 26, h->dct.chroma_dc[1] );
+        block_residual_write_cavlc( h, DCT_CHROMA_DC, 25, h->dct.chroma_dc[0] );
+        block_residual_write_cavlc( h, DCT_CHROMA_DC, 26, h->dct.chroma_dc[1] );

         if( h->mb.i_cbp_chroma == 2 )
         {
             int i;
             for( i = 16; i < 24; i++ )
-                block_residual_write_cavlc( h, &h->out.bs, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1 );
+                block_residual_write_cavlc( h, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1 );
         }
     }
     return h->out.bs.i_bits_encoded;
diff --git a/encoder/encoder.c b/encoder/encoder.c
index 8afadb0..5dc281c 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -1741,7 +1741,7 @@ static int x264_slice_write( x264_t *h )
                     bs_write_ue( &h->out.bs, i_skip );  /* skip run */
                     i_skip = 0;
                 }
-                x264_macroblock_write_cavlc( h, &h->out.bs );
+                x264_macroblock_write_cavlc( h );
             }
         }

diff --git a/encoder/macroblock.h b/encoder/macroblock.h
index 24a43b1..25beb18 100644
--- a/encoder/macroblock.h
+++ b/encoder/macroblock.h
@@ -45,7 +45,7 @@ void x264_predict_lossless_16x16( x264_t *h, int i_mode );

 void x264_macroblock_encode      ( x264_t *h );
 void x264_macroblock_write_cabac ( x264_t *h, x264_cabac_t *cb );
-void x264_macroblock_write_cavlc ( x264_t *h, bs_t *s );
+void x264_macroblock_write_cavlc ( x264_t *h );

 void x264_macroblock_encode_p8x8( x264_t *h, int i8 );
 void x264_macroblock_encode_p4x4( x264_t *h, int i4 );
diff --git a/encoder/rdo.c b/encoder/rdo.c
index 9dee56d..3ed4a47 100644
--- a/encoder/rdo.c
+++ b/encoder/rdo.c
@@ -159,10 +159,8 @@ static int x264_rd_cost_mb( x264_t *h, int i_lambda2 )
     }
     else
     {
-        bs_t bs_tmp = h->out.bs;
-        bs_tmp.i_bits_encoded = 0;
-        x264_macroblock_size_cavlc( h, &bs_tmp );
-        i_bits = ( bs_tmp.i_bits_encoded * i_lambda2 + 128 ) >> 8;
+        x264_macroblock_size_cavlc( h );
+        i_bits = ( h->out.bs.i_bits_encoded * i_lambda2 + 128 ) >> 8;
     }

     h->mb.b_transform_8x8 = b_transform_bak;
--
1.6.1.2


From 32be11b2df49dc74f2ab6601e482197e27e2d637 Mon Sep 17 00:00:00 2001
From: Anton Mitrofanov <BugMaster@narod.ru>
Date: Tue, 26 Jan 2010 11:41:18 -0800
Subject: [PATCH 05/11] Various threading-related cosmetics
 Simplify a lot of code and remove some unnecessary variables.

---
 common/common.h       |   13 +++------
 encoder/analyse.c     |   12 ++++----
 encoder/encoder.c     |   68 ++++++++++++++++++++++++------------------------
 encoder/ratecontrol.c |   22 ++++++++--------
 4 files changed, 55 insertions(+), 60 deletions(-)

diff --git a/common/common.h b/common/common.h
index df39f26..2205047 100644
--- a/common/common.h
+++ b/common/common.h
@@ -362,16 +362,11 @@ struct x264_t

     /* frame number/poc */
     int             i_frame;
+    int             i_frame_num;

-    int             i_frame_offset; /* decoding only */
-    int             i_frame_num;    /* decoding only */
-    int             i_poc_msb;      /* decoding only */
-    int             i_poc_lsb;      /* decoding only */
-    int             i_poc;          /* decoding only */
-
-    int             i_thread_num;   /* threads only */
-    int             i_nal_type;     /* threads only */
-    int             i_nal_ref_idc;  /* threads only */
+    int             i_thread_frames;
+    int             i_nal_type;
+    int             i_nal_ref_idc;

     /* We use only one SPS and one PPS */
     x264_sps_t      sps_array[1];
diff --git a/encoder/analyse.c b/encoder/analyse.c
index 37d7fd9..666596b 100644
--- a/encoder/analyse.c
+++ b/encoder/analyse.c
@@ -413,7 +413,7 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
             int mb_height = h->sps->i_mb_height >> h->sh.b_mbaff;
             int thread_mvy_range = i_fmv_range;

-            if( h->param.i_threads > 1 && !h->param.b_sliced_threads )
+            if( h->i_thread_frames > 1 )
             {
                 int pix_y = (h->mb.i_mb_y | h->mb.b_interlaced) * 16;
                 int thresh = pix_y + h->param.analyse.i_mv_range_thread;
@@ -1167,7 +1167,7 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
         {
             h->mb.i_type = P_SKIP;
             x264_analyse_update_cache( h, a );
-            assert( h->mb.cache.pskip_mv[1] <= h->mb.mv_max_spel[1] || h->param.i_threads == 1 || h->param.b_sliced_threads );
+            assert( h->mb.cache.pskip_mv[1] <= h->mb.mv_max_spel[1] || h->i_thread_frames == 1 );
             return;
         }

@@ -1183,7 +1183,7 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
     }

     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.me16x16.i_ref );
-    assert( a->l0.me16x16.mv[1] <= h->mb.mv_max_spel[1] || h->param.i_threads == 1 || h->param.b_sliced_threads );
+    assert( a->l0.me16x16.mv[1] <= h->mb.mv_max_spel[1] || h->i_thread_frames == 1 );

     h->mb.i_type = P_L0;
     if( a->i_mbrd )
@@ -2403,7 +2403,7 @@ intra_analysis:
             /* Fast P_SKIP detection */
             if( h->param.analyse.b_fast_pskip )
             {
-                if( h->param.i_threads > 1 && !h->param.b_sliced_threads && h->mb.cache.pskip_mv[1] > h->mb.mv_max_spel[1] )
+                if( h->i_thread_frames > 1 && h->mb.cache.pskip_mv[1] > h->mb.mv_max_spel[1] )
                     // FIXME don't need to check this if the reference frame is done
                     {}
                 else if( h->param.analyse.i_subpel_refine >= 3 )
@@ -2422,7 +2422,7 @@ intra_analysis:
         {
             h->mb.i_type = P_SKIP;
             h->mb.i_partition = D_16x16;
-            assert( h->mb.cache.pskip_mv[1] <= h->mb.mv_max_spel[1] || h->param.i_threads == 1 || h->param.b_sliced_threads );
+            assert( h->mb.cache.pskip_mv[1] <= h->mb.mv_max_spel[1] || h->i_thread_frames == 1 );
         }
         else
         {
@@ -3143,7 +3143,7 @@ static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a  )
     }

 #ifndef NDEBUG
-    if( h->param.i_threads > 1 && !h->param.b_sliced_threads && !IS_INTRA(h->mb.i_type) )
+    if( h->i_thread_frames > 1 && !IS_INTRA(h->mb.i_type) )
     {
         int l;
         for( l=0; l <= (h->sh.i_type == SLICE_TYPE_B); l++ )
diff --git a/encoder/encoder.c b/encoder/encoder.c
index 5dc281c..5660286 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -421,6 +421,7 @@ static int x264_validate_parameters( x264_t *h )
     }
     else
         h->param.b_sliced_threads = 0;
+    h->i_thread_frames = h->param.b_sliced_threads ? 1 : h->param.i_threads;

     if( h->param.b_interlaced )
     {
@@ -589,8 +590,8 @@ static int x264_validate_parameters( x264_t *h )
         h->param.rc.i_lookahead = 0;
 #ifdef HAVE_PTHREAD
     if( h->param.i_sync_lookahead )
-        h->param.i_sync_lookahead = x264_clip3( h->param.i_sync_lookahead, h->param.i_threads + h->param.i_bframe, X264_LOOKAHEAD_MAX );
-    if( h->param.rc.b_stat_read || h->param.i_threads == 1 || h->param.b_sliced_threads )
+        h->param.i_sync_lookahead = x264_clip3( h->param.i_sync_lookahead, h->i_thread_frames + h->param.i_bframe, X264_LOOKAHEAD_MAX );
+    if( h->param.rc.b_stat_read || h->i_thread_frames == 1 )
         h->param.i_sync_lookahead = 0;
 #else
     h->param.i_sync_lookahead = 0;
@@ -708,7 +709,7 @@ static int x264_validate_parameters( x264_t *h )
     if( !h->param.analyse.i_weighted_pred && h->param.rc.b_mb_tree && h->param.analyse.b_psy && !h->param.b_interlaced )
         h->param.analyse.i_weighted_pred = X264_WEIGHTP_FAKE;

-    if( h->param.i_threads > 1 && !h->param.b_sliced_threads )
+    if( h->i_thread_frames > 1 )
     {
         int r = h->param.analyse.i_mv_range_thread;
         int r2;
@@ -718,7 +719,7 @@ static int x264_validate_parameters( x264_t *h )
             // the rest is allocated to whichever thread is far enough ahead to use it.
             // reserving more space increases quality for some videos, but costs more time
             // in thread synchronization.
-            int max_range = (h->param.i_height + X264_THREAD_HEIGHT) / h->param.i_threads - X264_THREAD_HEIGHT;
+            int max_range = (h->param.i_height + X264_THREAD_HEIGHT) / h->i_thread_frames - X264_THREAD_HEIGHT;
             r = max_range / 2;
         }
         r = X264_MAX( r, h->param.analyse.i_me_range );
@@ -886,8 +887,7 @@ x264_t *x264_encoder_open( x264_param_t *param )
     if( h->param.rc.b_mb_tree || h->param.rc.i_vbv_buffer_size )
         h->frames.i_delay = X264_MAX( h->frames.i_delay, h->param.rc.i_lookahead );
     i_slicetype_length = h->frames.i_delay;
-    if( !h->param.b_sliced_threads )
-        h->frames.i_delay += h->param.i_threads - 1;
+    h->frames.i_delay += h->i_thread_frames - 1;
     h->frames.i_delay = X264_MIN( h->frames.i_delay, X264_LOOKAHEAD_MAX );
     h->frames.i_delay += h->param.i_sync_lookahead;
     h->frames.i_bframe_delay = h->param.i_bframe ? (h->param.i_bframe_pyramid ? 2 : 1) : 0;
@@ -910,11 +910,11 @@ x264_t *x264_encoder_open( x264_param_t *param )

     CHECKED_MALLOCZERO( h->frames.unused[0], (h->frames.i_delay + 3) * sizeof(x264_frame_t *) );
     /* Allocate room for max refs plus a few extra just in case. */
-    CHECKED_MALLOCZERO( h->frames.unused[1], (h->param.i_threads + 20) * sizeof(x264_frame_t *) );
+    CHECKED_MALLOCZERO( h->frames.unused[1], (h->i_thread_frames + 20) * sizeof(x264_frame_t *) );
     CHECKED_MALLOCZERO( h->frames.current, (h->param.i_sync_lookahead + h->param.i_bframe
-                        + h->param.i_threads + 3) * sizeof(x264_frame_t *) );
+                        + h->i_thread_frames + 3) * sizeof(x264_frame_t *) );
     if( h->param.analyse.i_weighted_pred > 0 )
-        CHECKED_MALLOCZERO( h->frames.blank_unused, h->param.i_threads * 4 * sizeof(x264_frame_t *) );
+        CHECKED_MALLOCZERO( h->frames.blank_unused, h->i_thread_frames * 4 * sizeof(x264_frame_t *) );
     h->i_ref0 = 0;
     h->i_ref1 = 0;

@@ -977,7 +977,6 @@ x264_t *x264_encoder_open( x264_param_t *param )
     h->nal_buffer_size = h->out.i_bitstream * 3/2 + 4;

     h->thread[0] = h;
-    h->i_thread_num = 0;
     for( i = 1; i < h->param.i_threads + !!h->param.i_sync_lookahead; i++ )
         CHECKED_MALLOC( h->thread[i], sizeof(x264_t) );

@@ -1501,7 +1500,7 @@ static void x264_fdec_filter_row( x264_t *h, int mb_y )
         }
     }

-    if( h->param.i_threads > 1 && h->fdec->b_kept_as_ref && !h->param.b_sliced_threads )
+    if( h->i_thread_frames > 1 && h->fdec->b_kept_as_ref )
         x264_frame_cond_broadcast( h->fdec, mb_y*16 + (b_end ? 10000 : -(X264_THREAD_HEIGHT << h->sh.b_mbaff)) );

     min_y = X264_MAX( min_y*16-8, 0 );
@@ -1537,7 +1536,7 @@ static inline int x264_reference_update( x264_t *h )
     int i, j;
     if( !h->fdec->b_kept_as_ref )
     {
-        if( h->param.i_threads > 1 && !h->param.b_sliced_threads )
+        if( h->i_thread_frames > 1 )
         {
             x264_frame_push_unused( h, h->fdec );
             h->fdec = x264_frame_pop_unused( h, 1 );
@@ -2036,12 +2035,12 @@ int     x264_encoder_encode( x264_t *h,
     x264_t *thread_current, *thread_prev, *thread_oldest;
     int i_nal_type, i_nal_ref_idc, i_global_qp, i;

-    if( h->param.i_threads > 1 && !h->param.b_sliced_threads )
+    if( h->i_thread_frames > 1 )
     {
         thread_prev    = h->thread[ h->i_thread_phase ];
-        h->i_thread_phase = (h->i_thread_phase + 1) % h->param.i_threads;
+        h->i_thread_phase = (h->i_thread_phase + 1) % h->i_thread_frames;
         thread_current = h->thread[ h->i_thread_phase ];
-        thread_oldest  = h->thread[ (h->i_thread_phase + 1) % h->param.i_threads ];
+        thread_oldest  = h->thread[ (h->i_thread_phase + 1) % h->i_thread_frames ];
         x264_thread_sync_context( thread_current, thread_prev );
         x264_thread_sync_ratecontrol( thread_current, thread_prev, thread_oldest );
         h = thread_current;
@@ -2100,7 +2099,7 @@ int     x264_encoder_encode( x264_t *h,
         /* 2: Place the frame into the queue for its slice type decision */
         x264_lookahead_put_frame( h, fenc );

-        if( h->frames.i_input <= h->frames.i_delay + (h->param.b_sliced_threads ? 0 : 1 - h->param.i_threads) )
+        if( h->frames.i_input <= h->frames.i_delay + 1 - h->i_thread_frames )
         {
             /* Nothing yet to encode, waiting for filling of buffers */
             pic_out->i_type = X264_TYPE_AUTO;
@@ -2327,7 +2326,7 @@ int     x264_encoder_encode( x264_t *h,
     /* Write frame */
     h->i_threadslice_start = 0;
     h->i_threadslice_end = h->sps->i_mb_height;
-    if( !h->param.b_sliced_threads && h->param.i_threads > 1 )
+    if( h->i_thread_frames > 1 )
     {
         if( x264_pthread_create( &h->thread_handle, NULL, (void*)x264_slices_write, h ) )
             return -1;
@@ -2564,25 +2563,23 @@ void    x264_encoder_close  ( x264_t *h )

     x264_lookahead_delete( h );

-    for( i = 0; i < h->param.i_threads; i++ )
+    if( h->i_thread_frames > 1 )
     {
-        // don't strictly have to wait for the other threads, but it's simpler than canceling them
-        if( h->thread[i]->b_thread_active )
+        for( i = 0; i < h->i_thread_frames; i++ )
         {
-            x264_pthread_join( h->thread[i]->thread_handle, NULL );
-            assert( h->thread[i]->fenc->i_reference_count == 1 );
-            x264_frame_delete( h->thread[i]->fenc );
+            // don't strictly have to wait for the other threads, but it's simpler than canceling them
+            if( h->thread[i]->b_thread_active )
+            {
+                x264_pthread_join( h->thread[i]->thread_handle, NULL );
+                assert( h->thread[i]->fenc->i_reference_count == 1 );
+                x264_frame_delete( h->thread[i]->fenc );
+            }
         }
-    }
-
-    if( h->param.i_threads > 1 && !h->param.b_sliced_threads )
-    {
-        x264_t *thread_prev;

-        thread_prev = h->thread[h->i_thread_phase];
+        x264_t *thread_prev = h->thread[h->i_thread_phase];
         x264_thread_sync_ratecontrol( h, thread_prev, h );
         x264_thread_sync_ratecontrol( thread_prev, thread_prev, h );
-        h->i_frame = thread_prev->i_frame + 1 - h->param.i_threads;
+        h->i_frame = thread_prev->i_frame + 1 - h->i_thread_frames;
     }
     h->i_frame++;

@@ -2833,7 +2830,7 @@ void    x264_encoder_close  ( x264_t *h )
     x264_free( h->nal_buffer );
     x264_analyse_free_costs( h );

-    if( h->param.i_threads > 1)
+    if( h->i_thread_frames > 1)
         h = h->thread[h->i_thread_phase];

     /* frames */
@@ -2878,9 +2875,12 @@ int x264_encoder_delayed_frames( x264_t *h )
 {
     int delayed_frames = 0;
     int i;
-    for( i=0; i<h->param.i_threads; i++ )
-        delayed_frames += h->thread[i]->b_thread_active;
-    h = h->thread[h->i_thread_phase];
+    if( h->i_thread_frames > 1 )
+    {
+        for( i=0; i<h->i_thread_frames; i++ )
+            delayed_frames += h->thread[i]->b_thread_active;
+        h = h->thread[h->i_thread_phase];
+    }
     for( i=0; h->frames.current[i]; i++ )
         delayed_frames++;
     x264_pthread_mutex_lock( &h->lookahead->ofbuf.mutex );
diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
index 761ff2c..746b17a 100644
--- a/encoder/ratecontrol.c
+++ b/encoder/ratecontrol.c
@@ -1578,13 +1578,13 @@ static void update_vbv_plan( x264_t *h, int overhead )
 {
     x264_ratecontrol_t *rcc = h->rc;
     rcc->buffer_fill = h->thread[0]->rc->buffer_fill_final - overhead;
-    if( h->param.i_threads > 1 && !h->param.b_sliced_threads )
+    if( h->i_thread_frames > 1 )
     {
         int j = h->rc - h->thread[0]->rc;
         int i;
-        for( i=1; i<h->param.i_threads; i++ )
+        for( i=1; i<h->i_thread_frames; i++ )
         {
-            x264_t *t = h->thread[ (j+i)%h->param.i_threads ];
+            x264_t *t = h->thread[ (j+i)%h->i_thread_frames ];
             double bits = t->rc->frame_size_planned;
             if( !t->b_thread_active )
                 continue;
@@ -1794,7 +1794,7 @@ static float rate_estimate_qscale( x264_t *h )
     }
     else
     {
-        double abr_buffer = 2 * rcc->rate_tolerance * rcc->bitrate * (h->param.b_sliced_threads?1:h->param.i_threads);
+        double abr_buffer = 2 * rcc->rate_tolerance * rcc->bitrate * h->i_thread_frames;

         if( rcc->b_2pass )
         {
@@ -1804,13 +1804,13 @@ static float rate_estimate_qscale( x264_t *h )

             if( rcc->b_vbv )
             {
-                if( h->param.i_threads > 1 && !h->param.b_sliced_threads )
+                if( h->i_thread_frames > 1 )
                 {
                     int j = h->rc - h->thread[0]->rc;
                     int i;
-                    for( i=1; i<h->param.i_threads; i++ )
+                    for( i=1; i<h->i_thread_frames; i++ )
                     {
-                        x264_t *t = h->thread[ (j+i)%h->param.i_threads ];
+                        x264_t *t = h->thread[ (j+i)%h->i_thread_frames ];
                         double bits = t->rc->frame_size_planned;
                         if( !t->b_thread_active )
                             continue;
@@ -1821,16 +1821,16 @@ static float rate_estimate_qscale( x264_t *h )
             }
             else
             {
-                if( h->fenc->i_frame < h->param.i_threads )
+                if( h->fenc->i_frame < h->i_thread_frames )
                     predicted_bits += (int64_t)h->fenc->i_frame * rcc->bitrate / rcc->fps;
                 else
-                    predicted_bits += (int64_t)(h->param.i_threads - 1) * rcc->bitrate / rcc->fps;
+                    predicted_bits += (int64_t)(h->i_thread_frames - 1) * rcc->bitrate / rcc->fps;
             }

             diff = predicted_bits - (int64_t)rce.expected_bits;
             q = rce.new_qscale;
             q /= x264_clip3f((double)(abr_buffer - diff) / abr_buffer, .5, 2);
-            if( ((h->fenc->i_frame + 1 - h->param.i_threads) >= rcc->fps) &&
+            if( ((h->fenc->i_frame + 1 - h->i_thread_frames) >= rcc->fps) &&
                 (rcc->expected_bits_sum > 0))
             {
                 /* Adjust quant based on the difference between
@@ -1897,7 +1897,7 @@ static float rate_estimate_qscale( x264_t *h )
             }
             else
             {
-                int i_frame_done = h->fenc->i_frame + 1 - h->param.i_threads;
+                int i_frame_done = h->fenc->i_frame + 1 - h->i_thread_frames;

                 q = get_qscale( h, &rce, rcc->wanted_bits_window / rcc->cplxr_sum, h->fenc->i_frame );

--
1.6.1.2


From 25f3f396e7b7710fb575be6c5484c102dca860f2 Mon Sep 17 00:00:00 2001
From: Yusuke Nakamura <muken.the.vfrmaniac@gmail.com>
Date: Tue, 26 Jan 2010 16:01:54 -0800
Subject: [PATCH 06/11] Improve DTS generation, move DTS compression into libx264
 This change fixes some cases in which PTS could be less than DTS.

Additionally, a new parameter, b_dts_compress, enables DTS compression.
DTS compression eliminates negative DTS (i.e. initial delay) due to B-frames.
The algorithm changes timebase in order to avoid duplicating DTS.
Currently, in x264cli, only the FLV muxer uses it.  The MP4 muxer doesn't need it, as it uses an EditBox instead.
---
 common/common.c   |    1 +
 common/common.h   |    5 ++++
 encoder/encoder.c |   40 ++++++++++++++++++++++++++++++++++-
 output/flv.c      |   58 +++++++++++++++++-----------------------------------
 output/mp4.c      |   28 ++----------------------
 x264.c            |    6 ++++-
 x264.h            |    5 +++-
 7 files changed, 75 insertions(+), 68 deletions(-)

diff --git a/common/common.c b/common/common.c
index 9eed5c3..b454e37 100644
--- a/common/common.c
+++ b/common/common.c
@@ -157,6 +157,7 @@ void    x264_param_default( x264_param_t *param )
     param->b_annexb = 1;
     param->b_aud = 0;
     param->b_vfr_input = 1;
+    param->b_dts_compress = 0;
 }

 static int parse_enum( const char *arg, const char * const *names, int *dst )
diff --git a/common/common.h b/common/common.h
index 2205047..12d282a 100644
--- a/common/common.h
+++ b/common/common.h
@@ -375,6 +375,9 @@ struct x264_t
     x264_pps_t      *pps;
     int             i_idr_pic_id;

+    /* multiplier for DTS compression */
+    int             i_dts_compress_multiplier;
+
     /* quantization matrix for decoding, [cqm][qp%6][coef] */
     int             (*dequant4_mf[4])[16];   /* [4][6][16] */
     int             (*dequant8_mf[2])[64];   /* [2][6][64] */
@@ -428,6 +431,8 @@ struct x264_t
         int i_delay;    /* Number of frames buffered for B reordering */
         int     i_bframe_delay;
         int64_t i_bframe_delay_time;
+        int64_t i_init_delta;
+        int64_t i_prev_dts[2];
         int b_have_lowres;  /* Whether 1/2 resolution luma planes are being used */
         int b_have_sub8x8_esa;
     } frames;
diff --git a/encoder/encoder.c b/encoder/encoder.c
index 5660286..9ccc628 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -863,6 +863,18 @@ x264_t *x264_encoder_open( x264_param_t *param )
     h->i_frame = -1;
     h->i_frame_num = 0;
     h->i_idr_pic_id = 0;
+    if( h->param.b_dts_compress )
+    {
+        /* h->i_dts_compress_multiplier == h->frames.i_bframe_delay + 1 */
+        h->i_dts_compress_multiplier = h->param.i_bframe ? (h->param.i_bframe_pyramid ? 3 : 2) : 1;
+        if( h->i_dts_compress_multiplier != 1 )
+            x264_log( h, X264_LOG_DEBUG, "DTS compresion changed timebase: %d/%d -> %d/%d\n",
+                      h->param.i_timebase_num, h->param.i_timebase_den,
+                      h->param.i_timebase_num, h->param.i_timebase_den * h->i_dts_compress_multiplier );
+        h->param.i_timebase_den *= h->i_dts_compress_multiplier;
+    }
+    else
+        h->i_dts_compress_multiplier = 1;

     h->sps = &h->sps_array[0];
     x264_sps_init( h->sps, h->param.i_sps_id, &h->param );
@@ -2384,8 +2396,32 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
         pic_out->i_type = X264_TYPE_B;

     pic_out->b_keyframe = h->fenc->b_keyframe;
-    pic_out->i_pts = h->fenc->i_pts;
-    pic_out->i_dts = h->fenc->i_dts - h->frames.i_bframe_delay_time;
+
+    pic_out->i_pts = h->fenc->i_pts *= h->i_dts_compress_multiplier;
+    if( h->frames.i_bframe_delay )
+    {
+        int64_t *i_prev_dts = thread_current->frames.i_prev_dts;
+        if( h->i_frame <= h->frames.i_bframe_delay )
+        {
+            if( h->i_dts_compress_multiplier == 1 )
+                pic_out->i_dts = h->fenc->i_dts - h->frames.i_bframe_delay_time;
+            else
+            {
+                /* DTS compression */
+                if( h->i_frame == 1 )
+                    thread_current->frames.i_init_delta = h->fenc->i_dts * h->i_dts_compress_multiplier;
+                pic_out->i_dts = h->i_frame * thread_current->frames.i_init_delta / h->i_dts_compress_multiplier;
+            }
+        }
+        else
+            pic_out->i_dts = i_prev_dts[ (h->i_frame - h->frames.i_bframe_delay) % h->frames.i_bframe_delay ];
+        i_prev_dts[ h->i_frame % h->frames.i_bframe_delay ] = h->fenc->i_dts * h->i_dts_compress_multiplier;
+        h->fenc->i_dts = pic_out->i_dts;
+    }
+    else
+        pic_out->i_dts = h->fenc->i_dts;
+    assert( pic_out->i_pts >= pic_out->i_dts );
+
     pic_out->img.i_plane = h->fdec->i_plane;
     for(i = 0; i < 3; i++)
     {
diff --git a/output/flv.c b/output/flv.c
index 8a937cf..5ef5b0f 100644
--- a/output/flv.c
+++ b/output/flv.c
@@ -37,8 +37,6 @@ typedef struct
     int64_t i_fps_num;
     int64_t i_fps_den;
     int64_t i_framenum;
-    int     i_init_delay;
-    int     i_delay_time;

     uint64_t i_framerate_pos;
     uint64_t i_duration_pos;
@@ -46,8 +44,8 @@ typedef struct
     uint64_t i_bitrate_pos;

     uint8_t b_write_length;
-    int64_t i_init_delta;
-    int64_t i_prev_timestamps[2];
+    int64_t i_prev_dts;
+    int64_t i_prev_pts;

     int i_timebase_num;
     int i_timebase_den;
@@ -146,10 +144,8 @@ static int set_param( hnd_t handle, x264_param_t *p_param )
     p_flv->i_fps_den = p_param->i_fps_den;
     p_flv->i_timebase_num = p_param->i_timebase_num;
     p_flv->i_timebase_den = p_param->i_timebase_den;
-    p_flv->i_init_delay = p_param->i_bframe ? (p_param->i_bframe_pyramid ? 2 : 1) : 0;
     p_flv->b_vfr_input = p_param->b_vfr_input;

-
     return 0;
 }

@@ -216,45 +212,29 @@ static int write_frame( hnd_t handle, uint8_t *p_nalu, int i_size, x264_picture_
     flv_hnd_t *p_flv = handle;
     flv_buffer *c = p_flv->c;

-    int64_t dts;
-    int64_t cts;
-    int64_t offset;
-
-    if( !p_flv->i_framenum )
-        p_flv->i_delay_time = p_picture->i_dts;
+    int64_t dts = (int64_t)( (p_picture->i_dts * 1000 * ((double)p_flv->i_timebase_num / p_flv->i_timebase_den)) + 0.5 );
+    int64_t cts = (int64_t)( (p_picture->i_pts * 1000 * ((double)p_flv->i_timebase_num / p_flv->i_timebase_den)) + 0.5 );
+    int64_t offset = cts - dts;

-    if( !p_flv->i_init_delay )
-        dts = cts = (int64_t)((p_picture->i_pts * 1000 * p_flv->i_timebase_num / p_flv->i_timebase_den) + 0.5);
-    else
+    if( p_flv->i_framenum )
     {
-        // Use DTS compression
-        dts = p_picture->i_dts - p_flv->i_delay_time;
-
-        if( p_flv->i_framenum == 1 )
-            p_flv->i_init_delta = p_picture->i_dts - p_flv->i_delay_time;
-
-        if( p_flv->i_framenum > p_flv->i_init_delay )
+        int64_t prev_dts = (int64_t)( (p_flv->i_prev_dts * 1000 * ((double)p_flv->i_timebase_num / p_flv->i_timebase_den)) + 0.5 );
+        int64_t prev_cts = (int64_t)( (p_flv->i_prev_pts * 1000 * ((double)p_flv->i_timebase_num / p_flv->i_timebase_den)) + 0.5 );
+        if( prev_dts == dts )
         {
-            dts = p_flv->i_prev_timestamps[ (p_flv->i_framenum - p_flv->i_init_delay) % p_flv->i_init_delay ];
-            dts = (int64_t)((dts * 1000 * p_flv->i_timebase_num / p_flv->i_timebase_den) + 0.5);
+            double fps = ((double)p_flv->i_timebase_den / p_flv->i_timebase_num) / (p_picture->i_dts - p_flv->i_prev_dts);
+            fprintf( stderr, "flv [warning]: duplicate DTS %"PRId64" generated by rounding\n"
+                             "               current internal decoding framerate: %.6f fps\n", dts, fps );
         }
-        else if( p_flv->i_init_delta )
+        if( prev_cts == cts )
         {
-            // Compressed DTSs might not fit in input timescale
-            double compressed_dts;
-            compressed_dts = (p_flv->i_framenum * ((double)p_flv->i_init_delta / (2 * p_flv->i_init_delay)));
-            dts = (int64_t)((compressed_dts * 1000 * p_flv->i_timebase_num / p_flv->i_timebase_den) + 0.5);
+            double fps = ((double)p_flv->i_timebase_den / p_flv->i_timebase_num) / (p_picture->i_pts - p_flv->i_prev_pts);
+            fprintf( stderr, "flv [warning]: duplicating CTS %"PRId64" is generated by rounding\n"
+                             "               current internal composition framerate: %.6f fps\n", cts, fps );
         }
-
-        p_flv->i_prev_timestamps[ p_flv->i_framenum % p_flv->i_init_delay ] = p_picture->i_dts - p_flv->i_delay_time;
-
-        cts = p_picture->i_pts;
-        cts = (int64_t)((cts * 1000 * p_flv->i_timebase_num / p_flv->i_timebase_den) + 0.5);
-     }
-
-    offset = cts - dts;
-
-    assert( cts >= dts );
+    }
+    p_flv->i_prev_dts = p_picture->i_dts;
+    p_flv->i_prev_pts = p_picture->i_pts;

     // A new frame - write packet header
     x264_put_byte( c, FLV_TAG_TYPE_VIDEO );
diff --git a/output/mp4.c b/output/mp4.c
index 7889e4f..e3ad9c6 100644
--- a/output/mp4.c
+++ b/output/mp4.c
@@ -34,11 +34,7 @@ typedef struct
     int i_time_res;
     int64_t i_time_inc;
     int i_numframe;
-    int i_init_delay;
     int i_delay_time;
-
-    int64_t i_prev_timestamps[2];
-    int64_t i_init_delta;
 } mp4_hnd_t;

 static void recompute_bitrate_mp4( GF_ISOFile *p_file, int i_track )
@@ -195,8 +191,6 @@ static int set_param( hnd_t handle, x264_param_t *p_param )
     p_mp4->i_time_res = p_param->i_timebase_den;
     p_mp4->i_time_inc = p_param->i_timebase_num;

-    p_mp4->i_init_delay = p_param->i_bframe ? (p_param->i_bframe_pyramid ? 2 : 1) : 0;
-
     p_mp4->i_track = gf_isom_new_track( p_mp4->p_file, 0, GF_ISOM_MEDIA_VISUAL,
                                         p_mp4->i_time_res );

@@ -282,7 +276,6 @@ static int write_frame( hnd_t handle, uint8_t *p_nalu, int i_size, x264_picture_
     mp4_hnd_t *p_mp4 = handle;
     int64_t dts;
     int64_t cts;
-    int32_t offset = 0;

     memcpy( p_mp4->p_sample->data + p_mp4->p_sample->dataLength, p_nalu, i_size );
     p_mp4->p_sample->dataLength += i_size;
@@ -290,27 +283,12 @@ static int write_frame( hnd_t handle, uint8_t *p_nalu, int i_size, x264_picture_
     if( !p_mp4->i_numframe )
         p_mp4->i_delay_time = p_picture->i_dts * -1;

-    if( !p_mp4->i_init_delay )
-        dts = cts = p_picture->i_pts * p_mp4->i_time_inc;
-    else
-    {
-        if( p_mp4->i_numframe <= p_mp4->i_init_delay )
-            dts = p_picture->i_dts + p_mp4->i_delay_time;
-        else
-            dts = p_mp4->i_prev_timestamps[ (p_mp4->i_numframe - p_mp4->i_init_delay) % p_mp4->i_init_delay ] + p_mp4->i_delay_time;
-
-        // unordered pts
-        p_mp4->i_prev_timestamps[ p_mp4->i_numframe % p_mp4->i_init_delay ] = p_picture->i_dts + p_mp4->i_delay_time;
-
-        dts *= p_mp4->i_time_inc;
-        cts = (p_picture->i_pts + p_mp4->i_delay_time) * p_mp4->i_time_inc;
-
-        offset = cts - dts;
-    }
+    dts = (p_picture->i_dts + p_mp4->i_delay_time) * p_mp4->i_time_inc;
+    cts = (p_picture->i_pts + p_mp4->i_delay_time) * p_mp4->i_time_inc;

     p_mp4->p_sample->IsRAP = p_picture->b_keyframe;
     p_mp4->p_sample->DTS = dts;
-    p_mp4->p_sample->CTS_Offset = offset;
+    p_mp4->p_sample->CTS_Offset = (uint32_t)(cts - dts);
     gf_isom_add_sample( p_mp4->p_file, p_mp4->i_track, p_mp4->i_descidx, p_mp4->p_sample );

     p_mp4->p_sample->dataLength = 0;
diff --git a/x264.c b/x264.c
index db33536..d77fa47 100644
--- a/x264.c
+++ b/x264.c
@@ -683,6 +683,7 @@ static int select_output( const char *muxer, char *filename, x264_param_t *param
         output = mp4_output;
         param->b_annexb = 0;
         param->b_aud = 0;
+        param->b_dts_compress = 0;
         param->b_repeat_headers = 0;
 #else
         fprintf( stderr, "x264 [error]: not compiled with MP4 output support\n" );
@@ -694,6 +695,7 @@ static int select_output( const char *muxer, char *filename, x264_param_t *param
         output = mkv_output;
         param->b_annexb = 0;
         param->b_aud = 0;
+        param->b_dts_compress = 0;
         param->b_repeat_headers = 0;
     }
     else if( !strcasecmp( ext, "flv" ) )
@@ -701,6 +703,7 @@ static int select_output( const char *muxer, char *filename, x264_param_t *param
         output = flv_output;
         param->b_annexb = 0;
         param->b_aud = 0;
+        param->b_dts_compress = 1;
         param->b_repeat_headers = 0;
     }
     else
@@ -1528,7 +1531,7 @@ static int  Encode( x264_param_t *param, cli_opt_t *opt )
             {
                 if( h->param.i_log_level >= X264_LOG_DEBUG || pts_warning_cnt < MAX_PTS_WARNING )
                     fprintf( stderr, "x264 [warning]: non-strictly-monotonic pts at frame %d (%"PRId64" <= %"PRId64")\n",
-                             i_frame, pic.i_pts, largest_pts );
+                             i_frame, pic.i_pts * h->i_dts_compress_multiplier, largest_pts * h->i_dts_compress_multiplier );
                 else if( pts_warning_cnt == MAX_PTS_WARNING )
                     fprintf( stderr, "x264 [warning]: too many nonmonotonic pts warnings, suppressing further ones\n" );
                 pts_warning_cnt++;
@@ -1583,6 +1586,7 @@ static int  Encode( x264_param_t *param, cli_opt_t *opt )
         duration = (double)param->i_fps_den / param->i_fps_num;
     else
         duration = (double)(2 * largest_pts - second_largest_pts) * param->i_timebase_num / param->i_timebase_den;
+    duration *= h->i_dts_compress_multiplier;

     i_end = x264_mdate();
     input.picture_clean( &pic );
diff --git a/x264.h b/x264.h
index 1223df7..2550864 100644
--- a/x264.h
+++ b/x264.h
@@ -35,7 +35,7 @@

 #include <stdarg.h>

-#define X264_BUILD 83
+#define X264_BUILD 84

 /* x264_t:
  *      opaque handler for encoder */
@@ -316,6 +316,9 @@ typedef struct x264_param_t
     int b_vfr_input;            /* VFR input */
     int i_timebase_num;         /* Timebase numerator */
     int i_timebase_den;         /* Timebase denominator */
+    int b_dts_compress;         /* DTS compression: this algorithm eliminates negative DTS
+                                 * by compressing them to be less than the second PTS.
+                                 * Warning: this will change the timebase! */

     /* Slicing parameters */
     int i_slice_max_size;    /* Max size per slice in bytes; includes estimated NAL overhead. */
--
1.6.1.2


From 1590d5060503c837af1866057f00d44a666a1b24 Mon Sep 17 00:00:00 2001
From: Diogo Franco <diogomfranco@gmail.com>
Date: Wed, 27 Jan 2010 09:26:35 -0800
Subject: [PATCH 07/11] Fix cross-compiling with lavf, add support for ffms2.pc
 Also update configure script to work with newest ffms.

---
 configure |   52 ++++++++++++++++++++++++++++++----------------------
 1 files changed, 30 insertions(+), 22 deletions(-)

diff --git a/configure b/configure
index 9f04a18..133a569 100755
--- a/configure
+++ b/configure
@@ -416,25 +416,23 @@ fi

 if [ "$lavf_input" = "auto" ] ; then
     lavf_input="no"
-    if [ `${cross_prefix}pkg-config --exists libavformat libavcodec libswscale 2>$DEVNULL` ] ; then
-        LAVF_LDFLAGS="$LAVF_LDFLAGS $(pkg-config --libs libavformat libavcodec libswscale)"
-        LAVF_CFLAGS="$LAVF_CFLAGS $(pkg-config --cflags libavformat libavcodec libswscale)"
+    if ${cross_prefix}pkg-config --exists libavformat libavcodec libswscale 2>$DEVNULL; then
+        LAVF_LIBS="$LAVF_LIBS $(${cross_prefix}pkg-config --libs libavformat libavcodec libswscale)"
+        LAVF_CFLAGS="$LAVF_CFLAGS $(${cross_prefix}pkg-config --cflags libavformat libavcodec libswscale)"
     fi
-    if [ -z "$LAVF_LDFLAGS" -a -z "$LAVF_CFLAGS" ]; then
-        LAVF_LDFLAGS="-lavformat -lswscale"
+    if [ -z "$LAVF_LIBS" -a -z "$LAVF_CFLAGS" ]; then
+        LAVF_LIBS="-lavformat -lswscale"
         for lib in -lpostproc -lavcodec -lavutil -lm -lz -lbz2 $libpthread -lavifil32; do
-            cc_check "" $lib && LAVF_LDFLAGS="$LAVF_LDFLAGS $lib"
+            cc_check "" $lib && LAVF_LIBS="$LAVF_LIBS $lib"
         done
     fi
-    LAVF_LDFLAGS="-L. $LAVF_LDFLAGS"
-    if cc_check libavformat/avformat.h "$LAVF_CFLAGS $LAVF_LDFLAGS" && \
-       cc_check libswscale/swscale.h "$LAVF_CFLAGS $LAVF_LDFLAGS" ; then
+    LAVF_LIBS="-L. $LAVF_LIBS"
+    if cc_check libavformat/avformat.h "$LAVF_CFLAGS $LAVF_LIBS" && \
+       cc_check libswscale/swscale.h "$LAVF_CFLAGS $LAVF_LIBS" ; then
         # avcodec_decode_video2 is currently the most recently added function that we use; it was added in r18351
-        if cc_check libavformat/avformat.h "$LAVF_CFLAGS $LAVF_LDFLAGS" "avcodec_decode_video2( NULL, NULL, NULL, NULL );" ; then
+        if cc_check libavformat/avformat.h "$LAVF_CFLAGS $LAVF_LIBS" "avcodec_decode_video2( NULL, NULL, NULL, NULL );" ; then
             lavf_input="yes"
             echo "#define LAVF_INPUT" >> config.h
-            LDFLAGSCLI="$LDFLAGSCLI $LAVF_LDFLAGS"
-            [ -n "$LAVF_CFLAGS" ] && CFLAGS="$CFLAGS $LAVF_CFLAGS"
         else
             echo "Warning: libavformat is too old, update to ffmpeg r18351+"
         fi
@@ -443,19 +441,29 @@ fi

 if [ "$ffms_input" = "auto" ] ; then
     ffms_input="no"
-    if [ "$lavf_input" = "yes" ] ; then
-        if cc_check ffms.h -lFFMS2 "FFMS_DestroyVideoSource(0);" ; then
-           ffms_input="yes"
-           echo "#define FFMS_INPUT" >> config.h
-           LDFLAGSCLI="$LDFLAGSCLI -lFFMS2"
-        elif cc_check ffms.h "-lFFMS2 $LAVF_LDFLAGS -lstdc++" "FFMS_DestroyVideoSource(0);" ; then
-           ffms_input="yes"
-           echo "#define FFMS_INPUT" >> config.h
-           LDFLAGSCLI="-lFFMS2 $LDFLAGSCLI -lstdc++"
-        fi
+    if ${cross_prefix}pkg-config --exists ffms2 2>$DEVNULL; then
+        FFMS2_LIBS="$FFMS2_LIBS $(${cross_prefix}pkg-config --libs ffms2)"
+        FFMS2_CFLAGS="$FFMS2_LIBS $(${cross_prefix}pkg-config --cflags ffms2)"
+    fi
+    [ -z "$FFMS2_LIBS" ] && FFMS2_LIBS="-lffms2"
+
+    if cc_check ffms.h "$FFMS2_CFLAGS $FFMS2_LIBS" "FFMS_DestroyVideoSource(0);" ; then
+        ffms_input="yes"
+    elif cc_check ffms.h "$FFMS2_CFLAGS $FFMS2_LIBS -lstdc++ $LAVF_LIBS" "FFMS_DestroyVideoSource(0);" ; then
+        ffms_input="yes"
+        FFMS2_LIBS="$FFMS2_LIBS -lstdc++ $LAVF_LIBS"
     fi
 fi

+if [ "$ffms_input" = "yes" ]; then
+    LDFLAGSCLI="$FFMS2_LIBS $LDFLAGSCLI"
+    [ -n "$FFMS2_CFLAGS" ] && CFLAGS="$CFLAGS $FFMS2_CFLAGS"
+    echo "#define FFMS_INPUT" >> config.h
+elif [ "$lavf_input" = "yes" ]; then
+    LDFLAGSCLI="$LAVF_LIBS $LDFLAGSCLI"
+    [ -n "$LAVF_CFLAGS" ] && CFLAGS="$CFLAGS $LAVF_CFLAGS"
+fi
+
 MP4_LDFLAGS="-lgpac_static"
 if [ $SYS = MINGW ]; then
     MP4_LDFLAGS="$MP4_LDFLAGS -lwinmm"
--
1.6.1.2


From 9c2216e94c2c001c5852fd2bfc398a0d17fe6d56 Mon Sep 17 00:00:00 2001
From: Diogo Franco <diogomfranco@gmail.com>
Date: Wed, 27 Jan 2010 10:12:42 -0800
Subject: [PATCH 08/11] Add config.log support
 Now, if configure fails, you'll be able to see why.

---
 .gitignore |    1 +
 configure  |  105 +++++++++++++++++++++++++++++++++++++++++++++++++----------
 2 files changed, 88 insertions(+), 18 deletions(-)

diff --git a/.gitignore b/.gitignore
index 308b793..9d8cb70 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,6 +12,7 @@
 .depend
 config.h
 config.mak
+config.log
 x264
 checkasm

diff --git a/configure b/configure
index 133a569..a532788 100755
--- a/configure
+++ b/configure
@@ -27,24 +27,77 @@ echo ""
 exit 1
 fi

+log_check() {
+    echo -n "checking $1... " >> config.log
+}
+
+log_ok() {
+    echo "yes" >> config.log
+}
+
+log_fail() {
+    echo "no" >> config.log
+}
+
+log_msg() {
+    echo "$1" >> config.log
+}
+
 cc_check() {
+    if [ -z "$3" ]; then
+        if [ -z "$1" ]; then
+            log_check "whether $CC works"
+        else
+            log_check "for $1"
+        fi
+    elif [ -z "$1" ]; then
+        log_check "whether $CC supports $3"
+    else
+        log_check "for $3 on $1";
+    fi
     rm -f conftest.c
     [ -n "$1" ] && echo "#include <$1>" > conftest.c
     echo "int main () { $3 return 0; }" >> conftest.c
-    $CC conftest.c $CFLAGS $LDFLAGS $LDFLAGSCLI $2 -o conftest 2>$DEVNULL
+    if $CC conftest.c $CFLAGS $LDFLAGS $LDFLAGSCLI $2 -o conftest >conftest.log 2>&1; then
+        res=$?
+        log_ok
+    else
+        res=$?
+        log_fail
+        log_msg "Failed commandline was:"
+        log_msg "--------------------------------------------------"
+        log_msg "$CC conftest.c $CFLAGS $LDFLAGS $LDFLAGSCLI $2"
+        cat conftest.log >> config.log
+        log_msg "--------------------------------------------------"
+    fi
+    return $res
 }

 as_check() {
+    log_check "whether $AS supports $1"
     echo "$1" > conftest.asm
-    $AS conftest.asm $ASFLAGS $2 -o conftest.o 2>$DEVNULL
+    if $AS conftest.asm $ASFLAGS $2 -o conftest.o >conftest.log 2>&1; then
+        res=$?
+        log_ok
+    else
+        res=$?
+        log_fail
+        log_msg "Failed commandline was:"
+        log_msg "--------------------------------------------------"
+        log_msg "$AS conftest.asm $ASFLAGS $2 -o conftest.o"
+        cat conftest.log >> config.log
+        log_msg "--------------------------------------------------"
+    fi
+    return $res
 }

 die() {
+    log_msg "DIED: $@"
     echo "$@"
     exit 1
 }

-rm -f config.h config.mak x264.pc conftest*
+rm -f config.h config.mak config.log x264.pc conftest*

 prefix='/usr/local'
 exec_prefix='${prefix}'
@@ -320,6 +373,16 @@ case $host_cpu in
     ;;
 esac

+log_msg "x264 configure script"
+if [ ! -z "$*" ]; then
+    msg="Command line options:"
+    for i in $@; do
+        msg="$msg \"$i\""
+    done
+    log_msg "$msg"
+fi
+log_msg ""
+
 # check requirements

 cc_check || die "No working C compiler found."
@@ -506,9 +569,9 @@ if [ "$debug" = "yes" ]; then
 elif [ $ARCH = ARM ]; then
     # arm-gcc-4.2 produces incorrect output with -ffast-math
     # and it doesn't save any speed anyway on 4.4, so disable it
-    CFLAGS="-O4 -fno-fast-math $CFLAGS"
+    CFLAGS="-O3 -fno-fast-math $CFLAGS"
 else
-    CFLAGS="-O4 -ffast-math $CFLAGS"
+    CFLAGS="-O3 -ffast-math $CFLAGS"
 fi

 if cc_check "stdio.h" "" "fseeko(stdin,0,0);" ; then
@@ -586,19 +649,25 @@ Cflags: -I$includedir
 EOF


-echo "Platform:   $ARCH"
-echo "System:     $SYS"
-echo "asm:        $asm"
-echo "avs input:  $avs_input"
-echo "lavf input: $lavf_input"
-echo "ffms input: $ffms_input"
-echo "mp4 output: $mp4_output"
-echo "pthread:    $pthread"
-echo "debug:      $debug"
-echo "gprof:      $gprof"
-echo "PIC:        $pic"
-echo "shared:     $shared"
-echo "visualize:  $vis"
+echo "Platform:   $ARCH" > conftest.log
+echo "System:     $SYS" >> conftest.log
+echo "asm:        $asm" >> conftest.log
+echo "avs input:  $avs_input" >> conftest.log
+echo "lavf input: $lavf_input" >> conftest.log
+echo "ffms input: $ffms_input" >> conftest.log
+echo "mp4 output: $mp4_output" >> conftest.log
+echo "pthread:    $pthread" >> conftest.log
+echo "debug:      $debug" >> conftest.log
+echo "gprof:      $gprof" >> conftest.log
+echo "PIC:        $pic" >> conftest.log
+echo "shared:     $shared" >> conftest.log
+echo "visualize:  $vis" >> conftest.log
+
+echo >> config.log
+cat conftest.log >> config.log
+cat conftest.log
+rm conftest.log
+
 echo
 echo "You can run 'make' or 'make fprofiled' now."

--
1.6.1.2


From a7c86d0ce68d0835df10b1153134053d02affbbf Mon Sep 17 00:00:00 2001
From: Diogo Franco <diogomfranco@gmail.com>
Date: Wed, 27 Jan 2010 13:11:08 -0800
Subject: [PATCH 09/11] Add configure check for log2 support
 Some incredibly braindamaged operating systems, such as FreeBSD, blatantly ignore the C specification and omit certain functions that are required by ISO C.
 log2f is one of these functions that periodically goes missing in such operating systems.

---
 common/osdep.h |    4 ++++
 configure      |    4 ++++
 2 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/common/osdep.h b/common/osdep.h
index abae9ac..9988803 100644
--- a/common/osdep.h
+++ b/common/osdep.h
@@ -34,6 +34,10 @@
 #include <inttypes.h>
 #endif

+#ifndef HAVE_LOG2F
+#define log2f(x) (logf((x))/0.693147180559945f)
+#endif
+
 #ifdef _WIN32
 #include <io.h>    // _setmode()
 #include <fcntl.h> // _O_BINARY
diff --git a/configure b/configure
index a532788..f778233 100755
--- a/configure
+++ b/configure
@@ -477,6 +477,10 @@ if test "$pthread" = "yes" ; then
     LDFLAGS="$LDFLAGS $libpthread"
 fi

+if cc_check "math.h" "-Werror" "log2f(2);" ; then
+    CFLAGS="$CFLAGS -DHAVE_LOG2F"
+fi
+
 if [ "$lavf_input" = "auto" ] ; then
     lavf_input="no"
     if ${cross_prefix}pkg-config --exists libavformat libavcodec libswscale 2>$DEVNULL; then
--
1.6.1.2


From 664ac05833320b614f50ccff84306645b7f28620 Mon Sep 17 00:00:00 2001
From: Jason Garrett-Glaser <darkshikari@gmail.com>
Date: Wed, 27 Jan 2010 19:41:27 -0800
Subject: [PATCH 10/11] Fix implicit CBR message to only print when in ABR mode.
 Also make it print outside of debug mode.

---
 encoder/ratecontrol.c |   12 ++++++++++--
 1 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
index 746b17a..5304616 100644
--- a/encoder/ratecontrol.c
+++ b/encoder/ratecontrol.c
@@ -436,8 +436,16 @@ int x264_ratecontrol_new( x264_t *h )
         }
         else if( h->param.rc.i_vbv_max_bitrate == 0 )
         {
-            x264_log( h, X264_LOG_DEBUG, "VBV maxrate unspecified, assuming CBR\n" );
-            h->param.rc.i_vbv_max_bitrate = h->param.rc.i_bitrate;
+            if( h->param.rc.i_rc_method == X264_RC_ABR )
+            {
+                x264_log( h, X264_LOG_INFO, "VBV maxrate unspecified, assuming CBR\n" );
+                h->param.rc.i_vbv_max_bitrate = h->param.rc.i_bitrate;
+            }
+            else
+            {
+                x264_log( h, X264_LOG_INFO, "VBV bufsize set but maxrate unspecified, ignored\n" );
+                h->param.rc.i_vbv_buffer_size = 0;
+            }
         }
     }
     if( h->param.rc.i_vbv_max_bitrate < h->param.rc.i_bitrate &&
--
1.6.1.2


From ba35cc56357f039a61db5c9e53cadd8bfbf8673e Mon Sep 17 00:00:00 2001
From: Diogo Franco <diogomfranco@gmail.com>
Date: Wed, 27 Jan 2010 20:29:50 -0800
Subject: [PATCH 11/11] Implement ffms2 version check
 Depends on ffms2 version 2.13.1 (r272).
 Tries pkg-config's built-in version checking first.
 Uses only the preprocessor to avoid cross-compilation issues.

---
 configure |   21 ++++++++++++++++++++-
 1 files changed, 20 insertions(+), 1 deletions(-)

diff --git a/configure b/configure
index f778233..b04ec2a 100755
--- a/configure
+++ b/configure
@@ -507,10 +507,17 @@ if [ "$lavf_input" = "auto" ] ; then
 fi

 if [ "$ffms_input" = "auto" ] ; then
+    ffms_major="2"; ffms_minor="13"; ffms_micro="1"; ffms_bump="0"
+
     ffms_input="no"
-    if ${cross_prefix}pkg-config --exists ffms2 2>$DEVNULL; then
+    [ $ffms_micro -gt 0 -o $ffms_bump -gt 0 ] && vmicro=".$ffms_micro"
+    [ $ffms_bump -gt 0 ] && vbump=".$ffms_bump"
+    if ${cross_prefix}pkg-config --atleast-version="$ffms_major.$ffms_minor$vmicro$vbump" ffms2 2>$DEVNULL; then
         FFMS2_LIBS="$FFMS2_LIBS $(${cross_prefix}pkg-config --libs ffms2)"
         FFMS2_CFLAGS="$FFMS2_LIBS $(${cross_prefix}pkg-config --cflags ffms2)"
+        api_check="no"
+    else
+        api_check="yes"
     fi
     [ -z "$FFMS2_LIBS" ] && FFMS2_LIBS="-lffms2"

@@ -520,6 +527,18 @@ if [ "$ffms_input" = "auto" ] ; then
         ffms_input="yes"
         FFMS2_LIBS="$FFMS2_LIBS -lstdc++ $LAVF_LIBS"
     fi
+
+    if [ $api_check = "yes" -a $ffms_input = "yes" ]; then
+        log_check "whether ffms2 version is at least $ffms_major.$ffms_minor$vmicro$vbump"
+        $CC $CFLAGS $FFMS2_CFLAGS -c -o conftest -x c - >$DEVNULL 2>&1 <<EOF
+#include <ffms.h>
+#if FFMS_VERSION < (($ffms_major << 24) | ($ffms_minor << 16)| ($ffms_micro << 8) | $ffms_bump)
+#error Requires ffms2 version 2.13.1
+#endif
+int main() { return 0; }
+EOF
+        [ $? = 0 ] && log_ok || { ffms_input="no"; log_fail; }
+    fi
 fi

 if [ "$ffms_input" = "yes" ]; then
--
1.6.1.2