Untitled

From 90a249aa40deec9234f9ea7554deb8a418183094 Mon Sep 17 00:00:00 2001
From: Jason Garrett-Glaser <darkshikari@gmail.com>
Date: Thu, 21 Jan 2010 10:00:07 -0800
Subject: [PATCH 01/13] Merge nnz_backup with scratch buffer
 Slightly less memory usage.

---
 common/common.h     |    1 -
 common/frame.c      |    5 +++--
 common/macroblock.c |    2 --
 encoder/encoder.c   |    4 +++-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/common/common.h b/common/common.h
index 02d1748..df39f26 100644
--- a/common/common.h
+++ b/common/common.h
@@ -532,7 +532,6 @@ struct x264_t
         int8_t  *skipbp;                    /* block pattern for SKIP or DIRECT (sub)mbs. B-frames + cabac only */
         int8_t  *mb_transform_size;         /* transform_size_8x8_flag of each mb */
         uint8_t *intra_border_backup[2][3]; /* bottom pixels of the previous mb row, used for intra prediction after the framebuffer has been deblocked */
-        uint8_t (*nnz_backup)[16];          /* when using cavlc + 8x8dct, the deblocker uses a modified nnz */

          /* buffer for weighted versions of the reference frames */
         uint8_t *p_weight_buf[16];
diff --git a/common/frame.c b/common/frame.c
index 08ef87f..3ef303a 100644
--- a/common/frame.c
+++ b/common/frame.c
@@ -661,9 +661,10 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
     int stride2y  = stridey << b_interlaced;
     int strideuv  = h->fdec->i_stride[1];
     int stride2uv = strideuv << b_interlaced;
+    uint8_t (*nnz_backup)[16] = h->scratch_buffer;

     if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
-        munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, munge_cavlc_nnz_row );
+        munge_cavlc_nnz( h, mb_y, nnz_backup, munge_cavlc_nnz_row );

     for( mb_x = 0; mb_x < h->sps->i_mb_width; mb_x += (~b_interlaced | mb_y)&1, mb_y ^= b_interlaced )
     {
@@ -823,7 +824,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
     }

     if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
-        munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, restore_cavlc_nnz_row );
+        munge_cavlc_nnz( h, mb_y, nnz_backup, restore_cavlc_nnz_row );
 }

 void x264_frame_deblock( x264_t *h )
diff --git a/common/macroblock.c b/common/macroblock.c
index 921d2b9..10f09ac 100644
--- a/common/macroblock.c
+++ b/common/macroblock.c
@@ -701,7 +701,6 @@ int x264_macroblock_cache_init( x264_t *h )

     /* all coeffs */
     CHECKED_MALLOC( h->mb.non_zero_count, i_mb_count * 24 * sizeof(uint8_t) );
-    CHECKED_MALLOC( h->mb.nnz_backup, h->sps->i_mb_width * 4 * 16 * sizeof(uint8_t) );

     if( h->param.b_cabac )
     {
@@ -797,7 +796,6 @@ void x264_macroblock_cache_end( x264_t *h )
     }
     x264_free( h->mb.intra4x4_pred_mode );
     x264_free( h->mb.non_zero_count );
-    x264_free( h->mb.nnz_backup );
     x264_free( h->mb.mb_transform_size );
     x264_free( h->mb.skipbp );
     x264_free( h->mb.cbp );
diff --git a/encoder/encoder.c b/encoder/encoder.c
index f97df1b..db2c861 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -1018,7 +1018,9 @@ x264_t *x264_encoder_open( x264_param_t *param )
         int buf_tesa = (h->param.analyse.i_me_method >= X264_ME_ESA) *
             ((me_range*2+18) * sizeof(int16_t) + (me_range+4) * (me_range+1) * 4 * sizeof(mvsad_t));
         int buf_mbtree = h->param.rc.b_mb_tree * ((h->sps->i_mb_width+3)&~3) * sizeof(int);
-        CHECKED_MALLOC( h->thread[i]->scratch_buffer, X264_MAX4( buf_hpel, buf_ssim, buf_tesa, buf_mbtree ) );
+        int buf_nnz = !h->param.b_cabac * h->pps->b_transform_8x8_mode * (h->sps->i_mb_width * 4 * 16 * sizeof(uint8_t));
+        int scratch_size = X264_MAX4( buf_hpel, buf_ssim, buf_tesa, X264_MAX( buf_mbtree, buf_nnz ) );
+        CHECKED_MALLOC( h->thread[i]->scratch_buffer, scratch_size );
     }

     if( x264_ratecontrol_new( h ) < 0 )
--
1.6.1.2


From 1a1b356a2996629d2c5ad2b568df8df9bff32954 Mon Sep 17 00:00:00 2001
From: Jason Garrett-Glaser <darkshikari@gmail.com>
Date: Thu, 21 Jan 2010 23:07:11 -0800
Subject: [PATCH 02/13] Fix bitstream alignment with multiple slices
 Broke multi-slice encoding on CPUs without unaligned access.
 New system simply forces a bitstream realignment at the start of each writing function and flushes when it reaches the end.

---
 common/bs.h       |   14 +++++++++++++-
 encoder/encoder.c |    2 +-
 encoder/set.c     |    8 ++++++++
 3 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/common/bs.h b/common/bs.h
index d13eb03..0773de6 100644
--- a/common/bs.h
+++ b/common/bs.h
@@ -77,7 +77,7 @@ static inline void bs_init( bs_t *s, void *p_data, int i_data )
     s->p       = s->p_start = (uint8_t*)p_data - offset;
     s->p_end   = (uint8_t*)p_data + i_data;
     s->i_left  = (WORD_SIZE - offset)*8;
-    s->cur_bits = endian_fix32(*(uint32_t *)(s->p));
+    s->cur_bits = endian_fix32( M32(s->p) );
     s->cur_bits >>= (4-offset)*8;
 }
 static inline int bs_pos( bs_t *s )
@@ -92,6 +92,18 @@ static inline void bs_flush( bs_t *s )
     s->p += WORD_SIZE - s->i_left / 8;
     s->i_left = WORD_SIZE*8;
 }
+/* The inverse of bs_flush: prepare the bitstream to be written to again. */
+static inline void bs_realign( bs_t *s )
+{
+    int offset = ((intptr_t)s->p & 3);
+    if( offset )
+    {
+        s->p       = (uint8_t*)s->p - offset;
+        s->i_left  = (WORD_SIZE - offset)*8;
+        s->cur_bits = endian_fix32( M32(s->p) );
+        s->cur_bits >>= (4-offset)*8;
+    }
+}

 static inline void bs_write( bs_t *s, int i_count, uint32_t i_bits )
 {
diff --git a/encoder/encoder.c b/encoder/encoder.c
index db2c861..c7065a2 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -1206,7 +1206,6 @@ int x264_encoder_headers( x264_t *h, x264_nal_t **pp_nal, int *pi_nal )
     x264_pps_write( &h->out.bs, h->pps );
     if( x264_nal_end( h ) )
         return -1;
-    bs_flush( &h->out.bs );

     frame_size = x264_encoder_encapsulate_nals( h );

@@ -1657,6 +1656,7 @@ static int x264_slice_write( x264_t *h )
     /* Assume no more than 3 bytes of NALU escaping. */
     int slice_max_size = h->param.i_slice_max_size > 0 ? (h->param.i_slice_max_size-3-NALU_OVERHEAD)*8 : INT_MAX;
     int starting_bits = bs_pos(&h->out.bs);
+    bs_realign( &h->out.bs );

     /* Slice */
     x264_nal_start( h, h->i_nal_type, h->i_nal_ref_idc );
diff --git a/encoder/set.c b/encoder/set.c
index 641eae9..f79919b 100644
--- a/encoder/set.c
+++ b/encoder/set.c
@@ -210,6 +210,7 @@ void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param )

 void x264_sps_write( bs_t *s, x264_sps_t *sps )
 {
+    bs_realign( s );
     bs_write( s, 8, sps->i_profile_idc );
     bs_write( s, 1, sps->b_constraint_set0 );
     bs_write( s, 1, sps->b_constraint_set1 );
@@ -359,6 +360,7 @@ void x264_sps_write( bs_t *s, x264_sps_t *sps )
     }

     bs_rbsp_trailing( s );
+    bs_flush( s );
 }

 void x264_pps_init( x264_pps_t *pps, int i_id, x264_param_t *param, x264_sps_t *sps )
@@ -423,6 +425,7 @@ void x264_pps_init( x264_pps_t *pps, int i_id, x264_param_t *param, x264_sps_t *

 void x264_pps_write( bs_t *s, x264_pps_t *pps )
 {
+    bs_realign( s );
     bs_write_ue( s, pps->i_id );
     bs_write_ue( s, pps->i_sps_id );

@@ -465,12 +468,14 @@ void x264_pps_write( bs_t *s, x264_pps_t *pps )
     }

     bs_rbsp_trailing( s );
+    bs_flush( s );
 }

 void x264_sei_recovery_point_write( x264_t *h, bs_t *s, int recovery_frame_cnt )
 {
     int payload_size;

+    bs_realign( s );
     bs_write( s, 8, 0x06 ); // payload_type = Recovery Point
     payload_size = bs_size_ue( recovery_frame_cnt ) + 4;

@@ -482,6 +487,7 @@ void x264_sei_recovery_point_write( x264_t *h, bs_t *s, int recovery_frame_cnt )

     bs_align_10( s );
     bs_rbsp_trailing( s );
+    bs_flush( s );
 }

 int x264_sei_version_write( x264_t *h, bs_t *s )
@@ -505,6 +511,7 @@ int x264_sei_version_write( x264_t *h, bs_t *s )
              X264_BUILD, X264_VERSION, opts );
     length = strlen(version)+1+16;

+    bs_realign( s );
     bs_write( s, 8, 0x5 ); // payload_type = user_data_unregistered
     // payload_size
     for( i = 0; i <= length-255; i += 255 )
@@ -517,6 +524,7 @@ int x264_sei_version_write( x264_t *h, bs_t *s )
         bs_write( s, 8, version[i] );

     bs_rbsp_trailing( s );
+    bs_flush( s );

     x264_free( opts );
     x264_free( version );
--
1.6.1.2


From 00f933f05df722e4b133d2c57f1f45dba285430c Mon Sep 17 00:00:00 2001
From: David Conrad <lessen42@gmail.com>
Date: Sat, 23 Jan 2010 18:05:25 -0800
Subject: [PATCH 03/13] Fix lavf input with pipes and image sequences
 x264 should now be able to encode from an image sequence using an image2-style formatted string (e.g. file%02d.jpg).

---
 x264.c |   10 ++++------
 1 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/x264.c b/x264.c
index 3a07bb3..db33536 100644
--- a/x264.c
+++ b/x264.c
@@ -719,13 +719,11 @@ static int select_input( const char *demuxer, char *used_demuxer, char *filename
     if( b_regular )
     {
         FILE *f = fopen( filename, "r" );
-        if( !f )
+        if( f )
         {
-            fprintf( stderr, "x264 [error]: could not open input file `%s'\n", filename );
-            return -1;
+            b_regular = x264_is_regular_file( f );
+            fclose( f );
         }
-        b_regular = x264_is_regular_file( f );
-        fclose( f );
     }
     const char *module = b_auto ? ext : demuxer;

@@ -756,7 +754,7 @@ static int select_input( const char *demuxer, char *used_demuxer, char *filename
 #endif
 #ifdef LAVF_INPUT
         if( (b_auto || !strcasecmp( demuxer, "lavf" )) &&
-            (!b_regular || !lavf_input.open_file( filename, p_handle, info, opt )) )
+            !lavf_input.open_file( filename, p_handle, info, opt ) )
         {
             module = "lavf";
             b_auto = 0;
--
1.6.1.2


From 0f985245093047980c1b6148562222265b230dff Mon Sep 17 00:00:00 2001
From: Jason Garrett-Glaser <darkshikari@gmail.com>
Date: Mon, 25 Jan 2010 11:23:55 -0800
Subject: [PATCH 04/13] Hardcode the bs_t in cavlc.c; passing it around is a waste

Saves ~1.5kb of code size, very slight speed boost.
---
 encoder/cavlc.c      |  143 ++++++++++++++++++++++++++------------------------
 encoder/encoder.c    |    2 +-
 encoder/macroblock.h |    2 +-
 encoder/rdo.c        |    6 +--
 4 files changed, 79 insertions(+), 74 deletions(-)

diff --git a/encoder/cavlc.c b/encoder/cavlc.c
index 59f362a..c65c9bd 100644
--- a/encoder/cavlc.c
+++ b/encoder/cavlc.c
@@ -61,8 +61,9 @@ static const uint8_t sub_mb_type_b_to_golomb[13]=
 /****************************************************************************
  * block_residual_write_cavlc:
  ****************************************************************************/
-static inline int block_residual_write_cavlc_escape( x264_t *h, bs_t *s, int i_suffix_length, int level )
+static inline int block_residual_write_cavlc_escape( x264_t *h, int i_suffix_length, int level )
 {
+    bs_t *s = &h->out.bs;
     static const uint16_t next_suffix[7] = { 0, 3, 6, 12, 24, 48, 0xffff };
     int i_level_prefix = 15;
     int mask = level >> 15;
@@ -112,8 +113,9 @@ static inline int block_residual_write_cavlc_escape( x264_t *h, bs_t *s, int i_s
     return i_suffix_length;
 }

-static int block_residual_write_cavlc( x264_t *h, bs_t *s, int i_ctxBlockCat, int16_t *l, int nC )
+static int block_residual_write_cavlc( x264_t *h, int i_ctxBlockCat, int16_t *l, int nC )
 {
+    bs_t *s = &h->out.bs;
     static const uint8_t ctz_index[8] = {3,0,1,0,2,0,1,0};
     static const int count_cat[5] = {16, 15, 16, 4, 15};
     x264_run_level_t runlevel;
@@ -157,7 +159,7 @@ static int block_residual_write_cavlc( x264_t *h, bs_t *s, int i_ctxBlockCat, in
             i_suffix_length = x264_level_token[i_suffix_length][val_original].i_next;
         }
         else
-            i_suffix_length = block_residual_write_cavlc_escape( h, s, i_suffix_length, val-LEVEL_TABLE_SIZE/2 );
+            i_suffix_length = block_residual_write_cavlc_escape( h, i_suffix_length, val-LEVEL_TABLE_SIZE/2 );
         for( i = i_trailing+1; i < i_total; i++ )
         {
             val = runlevel.level[i] + LEVEL_TABLE_SIZE/2;
@@ -167,7 +169,7 @@ static int block_residual_write_cavlc( x264_t *h, bs_t *s, int i_ctxBlockCat, in
                 i_suffix_length = x264_level_token[i_suffix_length][val].i_next;
             }
             else
-                i_suffix_length = block_residual_write_cavlc_escape( h, s, i_suffix_length, val-LEVEL_TABLE_SIZE/2 );
+                i_suffix_length = block_residual_write_cavlc_escape( h, i_suffix_length, val-LEVEL_TABLE_SIZE/2 );
         }
     }

@@ -191,18 +193,19 @@ static int block_residual_write_cavlc( x264_t *h, bs_t *s, int i_ctxBlockCat, in

 static const uint8_t ct_index[17] = {0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,3};

-#define block_residual_write_cavlc(h,s,cat,idx,l)\
+#define block_residual_write_cavlc(h,cat,idx,l)\
 {\
     int nC = cat == DCT_CHROMA_DC ? 4 : ct_index[x264_mb_predict_non_zero_code( h, cat == DCT_LUMA_DC ? 0 : idx )];\
     uint8_t *nnz = &h->mb.cache.non_zero_count[x264_scan8[idx]];\
     if( !*nnz )\
-        bs_write_vlc( s, x264_coeff0_token[nC] );\
+        bs_write_vlc( &h->out.bs, x264_coeff0_token[nC] );\
     else\
-        *nnz = block_residual_write_cavlc(h,s,cat,l,nC);\
+        *nnz = block_residual_write_cavlc(h,cat,l,nC);\
 }

-static void cavlc_qp_delta( x264_t *h, bs_t *s )
+static void cavlc_qp_delta( x264_t *h )
 {
+    bs_t *s = &h->out.bs;
     int i_dqp = h->mb.i_qp - h->mb.i_last_qp;

     /* Avoid writing a delta quant if we have an empty i16x16 block, e.g. in a completely flat background area */
@@ -225,39 +228,40 @@ static void cavlc_qp_delta( x264_t *h, bs_t *s )
     bs_write_se( s, i_dqp );
 }

-static void cavlc_mb_mvd( x264_t *h, bs_t *s, int i_list, int idx, int width )
+static void cavlc_mb_mvd( x264_t *h, int i_list, int idx, int width )
 {
+    bs_t *s = &h->out.bs;
     ALIGNED_4( int16_t mvp[2] );
     x264_mb_predict_mv( h, i_list, idx, width, mvp );
     bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[idx]][0] - mvp[0] );
     bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[idx]][1] - mvp[1] );
 }

-static inline void cavlc_mb8x8_mvd( x264_t *h, bs_t *s, int i )
+static inline void cavlc_mb8x8_mvd( x264_t *h, int i )
 {
     switch( h->mb.i_sub_partition[i] )
     {
         case D_L0_8x8:
-            cavlc_mb_mvd( h, s, 0, 4*i, 2 );
+            cavlc_mb_mvd( h, 0, 4*i, 2 );
             break;
         case D_L0_8x4:
-            cavlc_mb_mvd( h, s, 0, 4*i+0, 2 );
-            cavlc_mb_mvd( h, s, 0, 4*i+2, 2 );
+            cavlc_mb_mvd( h, 0, 4*i+0, 2 );
+            cavlc_mb_mvd( h, 0, 4*i+2, 2 );
             break;
         case D_L0_4x8:
-            cavlc_mb_mvd( h, s, 0, 4*i+0, 1 );
-            cavlc_mb_mvd( h, s, 0, 4*i+1, 1 );
+            cavlc_mb_mvd( h, 0, 4*i+0, 1 );
+            cavlc_mb_mvd( h, 0, 4*i+1, 1 );
             break;
         case D_L0_4x4:
-            cavlc_mb_mvd( h, s, 0, 4*i+0, 1 );
-            cavlc_mb_mvd( h, s, 0, 4*i+1, 1 );
-            cavlc_mb_mvd( h, s, 0, 4*i+2, 1 );
-            cavlc_mb_mvd( h, s, 0, 4*i+3, 1 );
+            cavlc_mb_mvd( h, 0, 4*i+0, 1 );
+            cavlc_mb_mvd( h, 0, 4*i+1, 1 );
+            cavlc_mb_mvd( h, 0, 4*i+2, 1 );
+            cavlc_mb_mvd( h, 0, 4*i+3, 1 );
             break;
     }
 }

-static inline void x264_macroblock_luma_write_cavlc( x264_t *h, bs_t *s, int i8start, int i8end )
+static inline void x264_macroblock_luma_write_cavlc( x264_t *h, int i8start, int i8end )
 {
     int i8, i4;
     if( h->mb.b_transform_8x8 )
@@ -271,20 +275,23 @@ static inline void x264_macroblock_luma_write_cavlc( x264_t *h, bs_t *s, int i8s
     for( i8 = i8start; i8 <= i8end; i8++ )
         if( h->mb.i_cbp_luma & (1 << i8) )
             for( i4 = 0; i4 < 4; i4++ )
-                block_residual_write_cavlc( h, s, DCT_LUMA_4x4, i4+i8*4, h->dct.luma4x4[i4+i8*4] );
+                block_residual_write_cavlc( h, DCT_LUMA_4x4, i4+i8*4, h->dct.luma4x4[i4+i8*4] );
 }

 /*****************************************************************************
  * x264_macroblock_write:
  *****************************************************************************/
-void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
+void x264_macroblock_write_cavlc( x264_t *h )
 {
+    bs_t *s = &h->out.bs;
     const int i_mb_type = h->mb.i_type;
     static const uint8_t i_offsets[3] = {5,23,0};
     int i_mb_i_offset = i_offsets[h->sh.i_type];
     int i;

-#if !RDO_SKIP_BS
+#if RDO_SKIP_BS
+    s->i_bits_encoded = 0;
+#else
     const int i_mb_pos_start = bs_pos( s );
     int       i_mb_pos_tex;
 #endif
@@ -365,7 +372,7 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )

             if( h->mb.pic.i_fref[0] > 1 )
                 bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
-            cavlc_mb_mvd( h, s, 0, 0, 4 );
+            cavlc_mb_mvd( h, 0, 0, 4 );
         }
         else if( h->mb.i_partition == D_16x8 )
         {
@@ -375,8 +382,8 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
                 bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
                 bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[8]] );
             }
-            cavlc_mb_mvd( h, s, 0, 0, 4 );
-            cavlc_mb_mvd( h, s, 0, 8, 4 );
+            cavlc_mb_mvd( h, 0, 0, 4 );
+            cavlc_mb_mvd( h, 0, 8, 4 );
         }
         else if( h->mb.i_partition == D_8x16 )
         {
@@ -386,8 +393,8 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
                 bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
                 bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[4]] );
             }
-            cavlc_mb_mvd( h, s, 0, 0, 2 );
-            cavlc_mb_mvd( h, s, 0, 4, 2 );
+            cavlc_mb_mvd( h, 0, 0, 2 );
+            cavlc_mb_mvd( h, 0, 4, 2 );
         }
     }
     else if( i_mb_type == P_8x8 )
@@ -422,7 +429,7 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
         }

         for( i = 0; i < 4; i++ )
-            cavlc_mb8x8_mvd( h, s, i );
+            cavlc_mb8x8_mvd( h, i );
     }
     else if( i_mb_type == B_8x8 )
     {
@@ -445,10 +452,10 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
         /* mvd */
         for( i = 0; i < 4; i++ )
             if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
-                cavlc_mb_mvd( h, s, 0, 4*i, 2 );
+                cavlc_mb_mvd( h, 0, 4*i, 2 );
         for( i = 0; i < 4; i++ )
             if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
-                cavlc_mb_mvd( h, s, 1, 4*i, 2 );
+                cavlc_mb_mvd( h, 1, 4*i, 2 );
     }
     else if( i_mb_type != B_DIRECT )
     {
@@ -463,8 +470,8 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
         {
             if( i_ref0_max && b_list[0][0] ) bs_write_te( s, i_ref0_max, h->mb.cache.ref[0][x264_scan8[0]] );
             if( i_ref1_max && b_list[1][0] ) bs_write_te( s, i_ref1_max, h->mb.cache.ref[1][x264_scan8[0]] );
-            if( b_list[0][0] ) cavlc_mb_mvd( h, s, 0, 0, 4 );
-            if( b_list[1][0] ) cavlc_mb_mvd( h, s, 1, 0, 4 );
+            if( b_list[0][0] ) cavlc_mb_mvd( h, 0, 0, 4 );
+            if( b_list[1][0] ) cavlc_mb_mvd( h, 1, 0, 4 );
         }
         else
         {
@@ -474,17 +481,17 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
             if( i_ref1_max && b_list[1][1] ) bs_write_te( s, i_ref1_max, h->mb.cache.ref[1][x264_scan8[12]] );
             if( h->mb.i_partition == D_16x8 )
             {
-                if( b_list[0][0] ) cavlc_mb_mvd( h, s, 0, 0, 4 );
-                if( b_list[0][1] ) cavlc_mb_mvd( h, s, 0, 8, 4 );
-                if( b_list[1][0] ) cavlc_mb_mvd( h, s, 1, 0, 4 );
-                if( b_list[1][1] ) cavlc_mb_mvd( h, s, 1, 8, 4 );
+                if( b_list[0][0] ) cavlc_mb_mvd( h, 0, 0, 4 );
+                if( b_list[0][1] ) cavlc_mb_mvd( h, 0, 8, 4 );
+                if( b_list[1][0] ) cavlc_mb_mvd( h, 1, 0, 4 );
+                if( b_list[1][1] ) cavlc_mb_mvd( h, 1, 8, 4 );
             }
             else //if( h->mb.i_partition == D_8x16 )
             {
-                if( b_list[0][0] ) cavlc_mb_mvd( h, s, 0, 0, 2 );
-                if( b_list[0][1] ) cavlc_mb_mvd( h, s, 0, 4, 2 );
-                if( b_list[1][0] ) cavlc_mb_mvd( h, s, 1, 0, 2 );
-                if( b_list[1][1] ) cavlc_mb_mvd( h, s, 1, 4, 2 );
+                if( b_list[0][0] ) cavlc_mb_mvd( h, 0, 0, 2 );
+                if( b_list[0][1] ) cavlc_mb_mvd( h, 0, 4, 2 );
+                if( b_list[1][0] ) cavlc_mb_mvd( h, 1, 0, 2 );
+                if( b_list[1][1] ) cavlc_mb_mvd( h, 1, 4, 2 );
             }
         }
     }
@@ -509,29 +516,29 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
     /* write residual */
     if( i_mb_type == I_16x16 )
     {
-        cavlc_qp_delta( h, s );
+        cavlc_qp_delta( h );

         /* DC Luma */
-        block_residual_write_cavlc( h, s, DCT_LUMA_DC, 24 , h->dct.luma16x16_dc );
+        block_residual_write_cavlc( h, DCT_LUMA_DC, 24 , h->dct.luma16x16_dc );

         /* AC Luma */
         if( h->mb.i_cbp_luma )
             for( i = 0; i < 16; i++ )
-                block_residual_write_cavlc( h, s, DCT_LUMA_AC, i, h->dct.luma4x4[i]+1 );
+                block_residual_write_cavlc( h, DCT_LUMA_AC, i, h->dct.luma4x4[i]+1 );
     }
     else if( h->mb.i_cbp_luma | h->mb.i_cbp_chroma )
     {
-        cavlc_qp_delta( h, s );
-        x264_macroblock_luma_write_cavlc( h, s, 0, 3 );
+        cavlc_qp_delta( h );
+        x264_macroblock_luma_write_cavlc( h, 0, 3 );
     }
     if( h->mb.i_cbp_chroma )
     {
         /* Chroma DC residual present */
-        block_residual_write_cavlc( h, s, DCT_CHROMA_DC, 25, h->dct.chroma_dc[0] );
-        block_residual_write_cavlc( h, s, DCT_CHROMA_DC, 26, h->dct.chroma_dc[1] );
+        block_residual_write_cavlc( h, DCT_CHROMA_DC, 25, h->dct.chroma_dc[0] );
+        block_residual_write_cavlc( h, DCT_CHROMA_DC, 26, h->dct.chroma_dc[1] );
         if( h->mb.i_cbp_chroma&0x02 ) /* Chroma AC residual present */
             for( i = 16; i < 24; i++ )
-                block_residual_write_cavlc( h, s, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1 );
+                block_residual_write_cavlc( h, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1 );
     }

 #if !RDO_SKIP_BS
@@ -549,36 +556,36 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
  *****************************************************************************/
 static int x264_partition_size_cavlc( x264_t *h, int i8, int i_pixel )
 {
+    bs_t *s = &h->out.bs;
     const int i_mb_type = h->mb.i_type;
     int b_8x16 = h->mb.i_partition == D_8x16;
     int j;
-    h->out.bs.i_bits_encoded = 0;

     if( i_mb_type == P_8x8 )
     {
-        cavlc_mb8x8_mvd( h, &h->out.bs, i8 );
-        bs_write_ue( &h->out.bs, sub_mb_type_p_to_golomb[ h->mb.i_sub_partition[i8] ] );
+        cavlc_mb8x8_mvd( h, i8 );
+        bs_write_ue( s, sub_mb_type_p_to_golomb[ h->mb.i_sub_partition[i8] ] );
     }
     else if( i_mb_type == P_L0 )
-        cavlc_mb_mvd( h, &h->out.bs, 0, 4*i8, 4>>b_8x16 );
+        cavlc_mb_mvd( h, 0, 4*i8, 4>>b_8x16 );
     else if( i_mb_type > B_DIRECT && i_mb_type < B_8x8 )
     {
-        if( x264_mb_type_list_table[ i_mb_type ][0][!!i8] ) cavlc_mb_mvd( h, &h->out.bs, 0, 4*i8, 4>>b_8x16 );
-        if( x264_mb_type_list_table[ i_mb_type ][1][!!i8] ) cavlc_mb_mvd( h, &h->out.bs, 1, 4*i8, 4>>b_8x16 );
+        if( x264_mb_type_list_table[ i_mb_type ][0][!!i8] ) cavlc_mb_mvd( h, 0, 4*i8, 4>>b_8x16 );
+        if( x264_mb_type_list_table[ i_mb_type ][1][!!i8] ) cavlc_mb_mvd( h, 1, 4*i8, 4>>b_8x16 );
     }
     else //if( i_mb_type == B_8x8 )
     {
         if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i8] ] )
-            cavlc_mb_mvd( h, &h->out.bs, 0, 4*i8, 2 );
+            cavlc_mb_mvd( h, 0, 4*i8, 2 );
         if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i8] ] )
-            cavlc_mb_mvd( h, &h->out.bs, 1, 4*i8, 2 );
+            cavlc_mb_mvd( h, 1, 4*i8, 2 );
     }

     for( j = (i_pixel < PIXEL_8x8); j >= 0; j-- )
     {
-        x264_macroblock_luma_write_cavlc( h, &h->out.bs, i8, i8 );
-        block_residual_write_cavlc( h, &h->out.bs, DCT_CHROMA_AC, 16+i8, h->dct.luma4x4[16+i8]+1 );
-        block_residual_write_cavlc( h, &h->out.bs, DCT_CHROMA_AC, 20+i8, h->dct.luma4x4[20+i8]+1 );
+        x264_macroblock_luma_write_cavlc( h, i8, i8 );
+        block_residual_write_cavlc( h, DCT_CHROMA_AC, 16+i8, h->dct.luma4x4[16+i8]+1 );
+        block_residual_write_cavlc( h, DCT_CHROMA_AC, 20+i8, h->dct.luma4x4[20+i8]+1 );
         i8 += x264_pixel_size[i_pixel].h >> 3;
     }

@@ -589,12 +596,12 @@ static int x264_subpartition_size_cavlc( x264_t *h, int i4, int i_pixel )
 {
     int b_8x4 = i_pixel == PIXEL_8x4;
     h->out.bs.i_bits_encoded = 0;
-    cavlc_mb_mvd( h, &h->out.bs, 0, i4, 1+b_8x4 );
-    block_residual_write_cavlc( h, &h->out.bs, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4] );
+    cavlc_mb_mvd( h, 0, i4, 1+b_8x4 );
+    block_residual_write_cavlc( h, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4] );
     if( i_pixel != PIXEL_4x4 )
     {
         i4 += 2-b_8x4;
-        block_residual_write_cavlc( h, &h->out.bs, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4] );
+        block_residual_write_cavlc( h, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4] );
     }

     return h->out.bs.i_bits_encoded;
@@ -612,14 +619,14 @@ static int x264_partition_i8x8_size_cavlc( x264_t *h, int i8, int i_mode )
 {
     h->out.bs.i_bits_encoded = cavlc_intra4x4_pred_size( h, 4*i8, i_mode );
     bs_write_ue( &h->out.bs, intra4x4_cbp_to_golomb[( h->mb.i_cbp_chroma << 4 )|h->mb.i_cbp_luma] );
-    x264_macroblock_luma_write_cavlc( h, &h->out.bs, i8, i8 );
+    x264_macroblock_luma_write_cavlc( h, i8, i8 );
     return h->out.bs.i_bits_encoded;
 }

 static int x264_partition_i4x4_size_cavlc( x264_t *h, int i4, int i_mode )
 {
     h->out.bs.i_bits_encoded = cavlc_intra4x4_pred_size( h, i4, i_mode );
-    block_residual_write_cavlc( h, &h->out.bs, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4] );
+    block_residual_write_cavlc( h, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4] );
     return h->out.bs.i_bits_encoded;
 }

@@ -628,14 +635,14 @@ static int x264_i8x8_chroma_size_cavlc( x264_t *h )
     h->out.bs.i_bits_encoded = bs_size_ue( x264_mb_pred_mode8x8c_fix[ h->mb.i_chroma_pred_mode ] );
     if( h->mb.i_cbp_chroma )
     {
-        block_residual_write_cavlc( h, &h->out.bs, DCT_CHROMA_DC, 25, h->dct.chroma_dc[0] );
-        block_residual_write_cavlc( h, &h->out.bs, DCT_CHROMA_DC, 26, h->dct.chroma_dc[1] );
+        block_residual_write_cavlc( h, DCT_CHROMA_DC, 25, h->dct.chroma_dc[0] );
+        block_residual_write_cavlc( h, DCT_CHROMA_DC, 26, h->dct.chroma_dc[1] );

         if( h->mb.i_cbp_chroma == 2 )
         {
             int i;
             for( i = 16; i < 24; i++ )
-                block_residual_write_cavlc( h, &h->out.bs, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1 );
+                block_residual_write_cavlc( h, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1 );
         }
     }
     return h->out.bs.i_bits_encoded;
diff --git a/encoder/encoder.c b/encoder/encoder.c
index c7065a2..15b373d 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -1741,7 +1741,7 @@ static int x264_slice_write( x264_t *h )
                     bs_write_ue( &h->out.bs, i_skip );  /* skip run */
                     i_skip = 0;
                 }
-                x264_macroblock_write_cavlc( h, &h->out.bs );
+                x264_macroblock_write_cavlc( h );
             }
         }

diff --git a/encoder/macroblock.h b/encoder/macroblock.h
index 24a43b1..25beb18 100644
--- a/encoder/macroblock.h
+++ b/encoder/macroblock.h
@@ -45,7 +45,7 @@ void x264_predict_lossless_16x16( x264_t *h, int i_mode );

 void x264_macroblock_encode      ( x264_t *h );
 void x264_macroblock_write_cabac ( x264_t *h, x264_cabac_t *cb );
-void x264_macroblock_write_cavlc ( x264_t *h, bs_t *s );
+void x264_macroblock_write_cavlc ( x264_t *h );

 void x264_macroblock_encode_p8x8( x264_t *h, int i8 );
 void x264_macroblock_encode_p4x4( x264_t *h, int i4 );
diff --git a/encoder/rdo.c b/encoder/rdo.c
index 9dee56d..3ed4a47 100644
--- a/encoder/rdo.c
+++ b/encoder/rdo.c
@@ -159,10 +159,8 @@ static int x264_rd_cost_mb( x264_t *h, int i_lambda2 )
     }
     else
     {
-        bs_t bs_tmp = h->out.bs;
-        bs_tmp.i_bits_encoded = 0;
-        x264_macroblock_size_cavlc( h, &bs_tmp );
-        i_bits = ( bs_tmp.i_bits_encoded * i_lambda2 + 128 ) >> 8;
+        x264_macroblock_size_cavlc( h );
+        i_bits = ( h->out.bs.i_bits_encoded * i_lambda2 + 128 ) >> 8;
     }

     h->mb.b_transform_8x8 = b_transform_bak;
--
1.6.1.2


From 85056bb1c99a38a9a7c0a7642eb0ee03294a29c3 Mon Sep 17 00:00:00 2001
From: Anton Mitrofanov <BugMaster@narod.ru>
Date: Tue, 26 Jan 2010 11:41:18 -0800
Subject: [PATCH 05/13] Various threading-related cosmetics
 Simplify a lot of code and remove some unnecessary variables.

---
 common/common.h       |   14 +++------
 encoder/analyse.c     |   12 ++++----
 encoder/encoder.c     |   68 ++++++++++++++++++++++++------------------------
 encoder/ratecontrol.c |   22 ++++++++--------
 4 files changed, 56 insertions(+), 60 deletions(-)

diff --git a/common/common.h b/common/common.h
index df39f26..0f16e0a 100644
--- a/common/common.h
+++ b/common/common.h
@@ -362,16 +362,12 @@ struct x264_t

     /* frame number/poc */
     int             i_frame;
+    int             i_frame_num;

-    int             i_frame_offset; /* decoding only */
-    int             i_frame_num;    /* decoding only */
-    int             i_poc_msb;      /* decoding only */
-    int             i_poc_lsb;      /* decoding only */
-    int             i_poc;          /* decoding only */
-
-    int             i_thread_num;   /* threads only */
-    int             i_nal_type;     /* threads only */
-    int             i_nal_ref_idc;  /* threads only */
+    int             i_thread_frames; /* Number of different frames being encoded by threads;
+                                      * 1 when sliced-threads is on. */
+    int             i_nal_type;
+    int             i_nal_ref_idc;

     /* We use only one SPS and one PPS */
     x264_sps_t      sps_array[1];
diff --git a/encoder/analyse.c b/encoder/analyse.c
index 37d7fd9..666596b 100644
--- a/encoder/analyse.c
+++ b/encoder/analyse.c
@@ -413,7 +413,7 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
             int mb_height = h->sps->i_mb_height >> h->sh.b_mbaff;
             int thread_mvy_range = i_fmv_range;

-            if( h->param.i_threads > 1 && !h->param.b_sliced_threads )
+            if( h->i_thread_frames > 1 )
             {
                 int pix_y = (h->mb.i_mb_y | h->mb.b_interlaced) * 16;
                 int thresh = pix_y + h->param.analyse.i_mv_range_thread;
@@ -1167,7 +1167,7 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
         {
             h->mb.i_type = P_SKIP;
             x264_analyse_update_cache( h, a );
-            assert( h->mb.cache.pskip_mv[1] <= h->mb.mv_max_spel[1] || h->param.i_threads == 1 || h->param.b_sliced_threads );
+            assert( h->mb.cache.pskip_mv[1] <= h->mb.mv_max_spel[1] || h->i_thread_frames == 1 );
             return;
         }

@@ -1183,7 +1183,7 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
     }

     x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.me16x16.i_ref );
-    assert( a->l0.me16x16.mv[1] <= h->mb.mv_max_spel[1] || h->param.i_threads == 1 || h->param.b_sliced_threads );
+    assert( a->l0.me16x16.mv[1] <= h->mb.mv_max_spel[1] || h->i_thread_frames == 1 );

     h->mb.i_type = P_L0;
     if( a->i_mbrd )
@@ -2403,7 +2403,7 @@ intra_analysis:
             /* Fast P_SKIP detection */
             if( h->param.analyse.b_fast_pskip )
             {
-                if( h->param.i_threads > 1 && !h->param.b_sliced_threads && h->mb.cache.pskip_mv[1] > h->mb.mv_max_spel[1] )
+                if( h->i_thread_frames > 1 && h->mb.cache.pskip_mv[1] > h->mb.mv_max_spel[1] )
                     // FIXME don't need to check this if the reference frame is done
                     {}
                 else if( h->param.analyse.i_subpel_refine >= 3 )
@@ -2422,7 +2422,7 @@ intra_analysis:
         {
             h->mb.i_type = P_SKIP;
             h->mb.i_partition = D_16x16;
-            assert( h->mb.cache.pskip_mv[1] <= h->mb.mv_max_spel[1] || h->param.i_threads == 1 || h->param.b_sliced_threads );
+            assert( h->mb.cache.pskip_mv[1] <= h->mb.mv_max_spel[1] || h->i_thread_frames == 1 );
         }
         else
         {
@@ -3143,7 +3143,7 @@ static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a  )
     }

 #ifndef NDEBUG
-    if( h->param.i_threads > 1 && !h->param.b_sliced_threads && !IS_INTRA(h->mb.i_type) )
+    if( h->i_thread_frames > 1 && !IS_INTRA(h->mb.i_type) )
     {
         int l;
         for( l=0; l <= (h->sh.i_type == SLICE_TYPE_B); l++ )
diff --git a/encoder/encoder.c b/encoder/encoder.c
index 15b373d..570f1d0 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -421,6 +421,7 @@ static int x264_validate_parameters( x264_t *h )
     }
     else
         h->param.b_sliced_threads = 0;
+    h->i_thread_frames = h->param.b_sliced_threads ? 1 : h->param.i_threads;

     if( h->param.b_interlaced )
     {
@@ -589,8 +590,8 @@ static int x264_validate_parameters( x264_t *h )
         h->param.rc.i_lookahead = 0;
 #ifdef HAVE_PTHREAD
     if( h->param.i_sync_lookahead )
-        h->param.i_sync_lookahead = x264_clip3( h->param.i_sync_lookahead, h->param.i_threads + h->param.i_bframe, X264_LOOKAHEAD_MAX );
-    if( h->param.rc.b_stat_read || h->param.i_threads == 1 || h->param.b_sliced_threads )
+        h->param.i_sync_lookahead = x264_clip3( h->param.i_sync_lookahead, h->i_thread_frames + h->param.i_bframe, X264_LOOKAHEAD_MAX );
+    if( h->param.rc.b_stat_read || h->i_thread_frames == 1 )
         h->param.i_sync_lookahead = 0;
 #else
     h->param.i_sync_lookahead = 0;
@@ -708,7 +709,7 @@ static int x264_validate_parameters( x264_t *h )
     if( !h->param.analyse.i_weighted_pred && h->param.rc.b_mb_tree && h->param.analyse.b_psy && !h->param.b_interlaced )
         h->param.analyse.i_weighted_pred = X264_WEIGHTP_FAKE;

-    if( h->param.i_threads > 1 && !h->param.b_sliced_threads )
+    if( h->i_thread_frames > 1 )
     {
         int r = h->param.analyse.i_mv_range_thread;
         int r2;
@@ -718,7 +719,7 @@ static int x264_validate_parameters( x264_t *h )
             // the rest is allocated to whichever thread is far enough ahead to use it.
             // reserving more space increases quality for some videos, but costs more time
             // in thread synchronization.
-            int max_range = (h->param.i_height + X264_THREAD_HEIGHT) / h->param.i_threads - X264_THREAD_HEIGHT;
+            int max_range = (h->param.i_height + X264_THREAD_HEIGHT) / h->i_thread_frames - X264_THREAD_HEIGHT;
             r = max_range / 2;
         }
         r = X264_MAX( r, h->param.analyse.i_me_range );
@@ -886,8 +887,7 @@ x264_t *x264_encoder_open( x264_param_t *param )
     if( h->param.rc.b_mb_tree || h->param.rc.i_vbv_buffer_size )
         h->frames.i_delay = X264_MAX( h->frames.i_delay, h->param.rc.i_lookahead );
     i_slicetype_length = h->frames.i_delay;
-    if( !h->param.b_sliced_threads )
-        h->frames.i_delay += h->param.i_threads - 1;
+    h->frames.i_delay += h->i_thread_frames - 1;
     h->frames.i_delay = X264_MIN( h->frames.i_delay, X264_LOOKAHEAD_MAX );
     h->frames.i_delay += h->param.i_sync_lookahead;
     h->frames.i_bframe_delay = h->param.i_bframe ? (h->param.i_bframe_pyramid ? 2 : 1) : 0;
@@ -910,11 +910,11 @@ x264_t *x264_encoder_open( x264_param_t *param )

     CHECKED_MALLOCZERO( h->frames.unused[0], (h->frames.i_delay + 3) * sizeof(x264_frame_t *) );
     /* Allocate room for max refs plus a few extra just in case. */
-    CHECKED_MALLOCZERO( h->frames.unused[1], (h->param.i_threads + 20) * sizeof(x264_frame_t *) );
+    CHECKED_MALLOCZERO( h->frames.unused[1], (h->i_thread_frames + 20) * sizeof(x264_frame_t *) );
     CHECKED_MALLOCZERO( h->frames.current, (h->param.i_sync_lookahead + h->param.i_bframe
-                        + h->param.i_threads + 3) * sizeof(x264_frame_t *) );
+                        + h->i_thread_frames + 3) * sizeof(x264_frame_t *) );
     if( h->param.analyse.i_weighted_pred > 0 )
-        CHECKED_MALLOCZERO( h->frames.blank_unused, h->param.i_threads * 4 * sizeof(x264_frame_t *) );
+        CHECKED_MALLOCZERO( h->frames.blank_unused, h->i_thread_frames * 4 * sizeof(x264_frame_t *) );
     h->i_ref0 = 0;
     h->i_ref1 = 0;

@@ -977,7 +977,6 @@ x264_t *x264_encoder_open( x264_param_t *param )
     h->nal_buffer_size = h->out.i_bitstream * 3/2 + 4;

     h->thread[0] = h;
-    h->i_thread_num = 0;
     for( i = 1; i < h->param.i_threads + !!h->param.i_sync_lookahead; i++ )
         CHECKED_MALLOC( h->thread[i], sizeof(x264_t) );

@@ -1501,7 +1500,7 @@ static void x264_fdec_filter_row( x264_t *h, int mb_y )
         }
     }

-    if( h->param.i_threads > 1 && h->fdec->b_kept_as_ref && !h->param.b_sliced_threads )
+    if( h->i_thread_frames > 1 && h->fdec->b_kept_as_ref )
         x264_frame_cond_broadcast( h->fdec, mb_y*16 + (b_end ? 10000 : -(X264_THREAD_HEIGHT << h->sh.b_mbaff)) );

     min_y = X264_MAX( min_y*16-8, 0 );
@@ -1537,7 +1536,7 @@ static inline int x264_reference_update( x264_t *h )
     int i, j;
     if( !h->fdec->b_kept_as_ref )
     {
-        if( h->param.i_threads > 1 && !h->param.b_sliced_threads )
+        if( h->i_thread_frames > 1 )
         {
             x264_frame_push_unused( h, h->fdec );
             h->fdec = x264_frame_pop_unused( h, 1 );
@@ -2036,12 +2035,12 @@ int     x264_encoder_encode( x264_t *h,
     x264_t *thread_current, *thread_prev, *thread_oldest;
     int i_nal_type, i_nal_ref_idc, i_global_qp, i;

-    if( h->param.i_threads > 1 && !h->param.b_sliced_threads )
+    if( h->i_thread_frames > 1 )
     {
         thread_prev    = h->thread[ h->i_thread_phase ];
-        h->i_thread_phase = (h->i_thread_phase + 1) % h->param.i_threads;
+        h->i_thread_phase = (h->i_thread_phase + 1) % h->i_thread_frames;
         thread_current = h->thread[ h->i_thread_phase ];
-        thread_oldest  = h->thread[ (h->i_thread_phase + 1) % h->param.i_threads ];
+        thread_oldest  = h->thread[ (h->i_thread_phase + 1) % h->i_thread_frames ];
         x264_thread_sync_context( thread_current, thread_prev );
         x264_thread_sync_ratecontrol( thread_current, thread_prev, thread_oldest );
         h = thread_current;
@@ -2100,7 +2099,7 @@ int     x264_encoder_encode( x264_t *h,
         /* 2: Place the frame into the queue for its slice type decision */
         x264_lookahead_put_frame( h, fenc );

-        if( h->frames.i_input <= h->frames.i_delay + (h->param.b_sliced_threads ? 0 : 1 - h->param.i_threads) )
+        if( h->frames.i_input <= h->frames.i_delay + 1 - h->i_thread_frames )
         {
             /* Nothing yet to encode, waiting for filling of buffers */
             pic_out->i_type = X264_TYPE_AUTO;
@@ -2327,7 +2326,7 @@ int     x264_encoder_encode( x264_t *h,
     /* Write frame */
     h->i_threadslice_start = 0;
     h->i_threadslice_end = h->sps->i_mb_height;
-    if( !h->param.b_sliced_threads && h->param.i_threads > 1 )
+    if( h->i_thread_frames > 1 )
     {
         if( x264_pthread_create( &h->thread_handle, NULL, (void*)x264_slices_write, h ) )
             return -1;
@@ -2564,25 +2563,23 @@ void    x264_encoder_close  ( x264_t *h )

     x264_lookahead_delete( h );

-    for( i = 0; i < h->param.i_threads; i++ )
+    if( h->i_thread_frames > 1 )
     {
-        // don't strictly have to wait for the other threads, but it's simpler than canceling them
-        if( h->thread[i]->b_thread_active )
+        for( i = 0; i < h->i_thread_frames; i++ )
         {
-            x264_pthread_join( h->thread[i]->thread_handle, NULL );
-            assert( h->thread[i]->fenc->i_reference_count == 1 );
-            x264_frame_delete( h->thread[i]->fenc );
+            // don't strictly have to wait for the other threads, but it's simpler than canceling them
+            if( h->thread[i]->b_thread_active )
+            {
+                x264_pthread_join( h->thread[i]->thread_handle, NULL );
+                assert( h->thread[i]->fenc->i_reference_count == 1 );
+                x264_frame_delete( h->thread[i]->fenc );
+            }
         }
-    }
-
-    if( h->param.i_threads > 1 && !h->param.b_sliced_threads )
-    {
-        x264_t *thread_prev;

-        thread_prev = h->thread[h->i_thread_phase];
+        x264_t *thread_prev = h->thread[h->i_thread_phase];
         x264_thread_sync_ratecontrol( h, thread_prev, h );
         x264_thread_sync_ratecontrol( thread_prev, thread_prev, h );
-        h->i_frame = thread_prev->i_frame + 1 - h->param.i_threads;
+        h->i_frame = thread_prev->i_frame + 1 - h->i_thread_frames;
     }
     h->i_frame++;

@@ -2833,7 +2830,7 @@ void    x264_encoder_close  ( x264_t *h )
     x264_free( h->nal_buffer );
     x264_analyse_free_costs( h );

-    if( h->param.i_threads > 1)
+    if( h->i_thread_frames > 1)
         h = h->thread[h->i_thread_phase];

     /* frames */
@@ -2878,9 +2875,12 @@ int x264_encoder_delayed_frames( x264_t *h )
 {
     int delayed_frames = 0;
     int i;
-    for( i=0; i<h->param.i_threads; i++ )
-        delayed_frames += h->thread[i]->b_thread_active;
-    h = h->thread[h->i_thread_phase];
+    if( h->i_thread_frames > 1 )
+    {
+        for( i=0; i<h->i_thread_frames; i++ )
+            delayed_frames += h->thread[i]->b_thread_active;
+        h = h->thread[h->i_thread_phase];
+    }
     for( i=0; h->frames.current[i]; i++ )
         delayed_frames++;
     x264_pthread_mutex_lock( &h->lookahead->ofbuf.mutex );
diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
index 761ff2c..746b17a 100644
--- a/encoder/ratecontrol.c
+++ b/encoder/ratecontrol.c
@@ -1578,13 +1578,13 @@ static void update_vbv_plan( x264_t *h, int overhead )
 {
     x264_ratecontrol_t *rcc = h->rc;
     rcc->buffer_fill = h->thread[0]->rc->buffer_fill_final - overhead;
-    if( h->param.i_threads > 1 && !h->param.b_sliced_threads )
+    if( h->i_thread_frames > 1 )
     {
         int j = h->rc - h->thread[0]->rc;
         int i;
-        for( i=1; i<h->param.i_threads; i++ )
+        for( i=1; i<h->i_thread_frames; i++ )
         {
-            x264_t *t = h->thread[ (j+i)%h->param.i_threads ];
+            x264_t *t = h->thread[ (j+i)%h->i_thread_frames ];
             double bits = t->rc->frame_size_planned;
             if( !t->b_thread_active )
                 continue;
@@ -1794,7 +1794,7 @@ static float rate_estimate_qscale( x264_t *h )
     }
     else
     {
-        double abr_buffer = 2 * rcc->rate_tolerance * rcc->bitrate * (h->param.b_sliced_threads?1:h->param.i_threads);
+        double abr_buffer = 2 * rcc->rate_tolerance * rcc->bitrate * h->i_thread_frames;

         if( rcc->b_2pass )
         {
@@ -1804,13 +1804,13 @@ static float rate_estimate_qscale( x264_t *h )

             if( rcc->b_vbv )
             {
-                if( h->param.i_threads > 1 && !h->param.b_sliced_threads )
+                if( h->i_thread_frames > 1 )
                 {
                     int j = h->rc - h->thread[0]->rc;
                     int i;
-                    for( i=1; i<h->param.i_threads; i++ )
+                    for( i=1; i<h->i_thread_frames; i++ )
                     {
-                        x264_t *t = h->thread[ (j+i)%h->param.i_threads ];
+                        x264_t *t = h->thread[ (j+i)%h->i_thread_frames ];
                         double bits = t->rc->frame_size_planned;
                         if( !t->b_thread_active )
                             continue;
@@ -1821,16 +1821,16 @@ static float rate_estimate_qscale( x264_t *h )
             }
             else
             {
-                if( h->fenc->i_frame < h->param.i_threads )
+                if( h->fenc->i_frame < h->i_thread_frames )
                     predicted_bits += (int64_t)h->fenc->i_frame * rcc->bitrate / rcc->fps;
                 else
-                    predicted_bits += (int64_t)(h->param.i_threads - 1) * rcc->bitrate / rcc->fps;
+                    predicted_bits += (int64_t)(h->i_thread_frames - 1) * rcc->bitrate / rcc->fps;
             }

             diff = predicted_bits - (int64_t)rce.expected_bits;
             q = rce.new_qscale;
             q /= x264_clip3f((double)(abr_buffer - diff) / abr_buffer, .5, 2);
-            if( ((h->fenc->i_frame + 1 - h->param.i_threads) >= rcc->fps) &&
+            if( ((h->fenc->i_frame + 1 - h->i_thread_frames) >= rcc->fps) &&
                 (rcc->expected_bits_sum > 0))
             {
                 /* Adjust quant based on the difference between
@@ -1897,7 +1897,7 @@ static float rate_estimate_qscale( x264_t *h )
             }
             else
             {
-                int i_frame_done = h->fenc->i_frame + 1 - h->param.i_threads;
+                int i_frame_done = h->fenc->i_frame + 1 - h->i_thread_frames;

                 q = get_qscale( h, &rce, rcc->wanted_bits_window / rcc->cplxr_sum, h->fenc->i_frame );

--
1.6.1.2


From 6d2cc932df587a109aac8c0b5a9370fe0a326baa Mon Sep 17 00:00:00 2001
From: Yusuke Nakamura <muken.the.vfrmaniac@gmail.com>
Date: Tue, 26 Jan 2010 16:01:54 -0800
Subject: [PATCH 06/13] Improve DTS generation, move DTS compression into libx264
 This change fixes some cases in which PTS could be less than DTS.

Additionally, a new parameter, b_dts_compress, enables DTS compression.
DTS compression eliminates negative DTS (i.e. initial delay) due to B-frames.
The algorithm changes timebase in order to avoid duplicating DTS.
Currently, in x264cli, only the FLV muxer uses it.  The MP4 muxer doesn't need it, as it uses an EditBox instead.
---
 common/common.c   |    1 +
 common/common.h   |    5 ++++
 encoder/encoder.c |   40 ++++++++++++++++++++++++++++++++++-
 output/flv.c      |   58 +++++++++++++++++-----------------------------------
 output/mp4.c      |   28 ++----------------------
 x264.c            |    6 ++++-
 x264.h            |    5 +++-
 7 files changed, 75 insertions(+), 68 deletions(-)

diff --git a/common/common.c b/common/common.c
index 9eed5c3..b454e37 100644
--- a/common/common.c
+++ b/common/common.c
@@ -157,6 +157,7 @@ void    x264_param_default( x264_param_t *param )
     param->b_annexb = 1;
     param->b_aud = 0;
     param->b_vfr_input = 1;
+    param->b_dts_compress = 0;
 }

 static int parse_enum( const char *arg, const char * const *names, int *dst )
diff --git a/common/common.h b/common/common.h
index 0f16e0a..ca15330 100644
--- a/common/common.h
+++ b/common/common.h
@@ -376,6 +376,9 @@ struct x264_t
     x264_pps_t      *pps;
     int             i_idr_pic_id;

+    /* Timebase multiplier for DTS compression */
+    int             i_dts_compress_multiplier;
+
     /* quantization matrix for decoding, [cqm][qp%6][coef] */
     int             (*dequant4_mf[4])[16];   /* [4][6][16] */
     int             (*dequant8_mf[2])[64];   /* [2][6][64] */
@@ -429,6 +432,8 @@ struct x264_t
         int i_delay;    /* Number of frames buffered for B reordering */
         int     i_bframe_delay;
         int64_t i_bframe_delay_time;
+        int64_t i_init_delta;
+        int64_t i_prev_dts[2];
         int b_have_lowres;  /* Whether 1/2 resolution luma planes are being used */
         int b_have_sub8x8_esa;
     } frames;
diff --git a/encoder/encoder.c b/encoder/encoder.c
index 570f1d0..9524c10 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -863,6 +863,18 @@ x264_t *x264_encoder_open( x264_param_t *param )
     h->i_frame = -1;
     h->i_frame_num = 0;
     h->i_idr_pic_id = 0;
+    if( h->param.b_dts_compress )
+    {
+        /* h->i_dts_compress_multiplier == h->frames.i_bframe_delay + 1 */
+        h->i_dts_compress_multiplier = h->param.i_bframe ? (h->param.i_bframe_pyramid ? 3 : 2) : 1;
+        if( h->i_dts_compress_multiplier != 1 )
+            x264_log( h, X264_LOG_DEBUG, "DTS compresion changed timebase: %d/%d -> %d/%d\n",
+                      h->param.i_timebase_num, h->param.i_timebase_den,
+                      h->param.i_timebase_num, h->param.i_timebase_den * h->i_dts_compress_multiplier );
+        h->param.i_timebase_den *= h->i_dts_compress_multiplier;
+    }
+    else
+        h->i_dts_compress_multiplier = 1;

     h->sps = &h->sps_array[0];
     x264_sps_init( h->sps, h->param.i_sps_id, &h->param );
@@ -2384,8 +2396,32 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
         pic_out->i_type = X264_TYPE_B;

     pic_out->b_keyframe = h->fenc->b_keyframe;
-    pic_out->i_pts = h->fenc->i_pts;
-    pic_out->i_dts = h->fenc->i_dts - h->frames.i_bframe_delay_time;
+
+    pic_out->i_pts = h->fenc->i_pts *= h->i_dts_compress_multiplier;
+    if( h->frames.i_bframe_delay )
+    {
+        int64_t *i_prev_dts = thread_current->frames.i_prev_dts;
+        if( h->i_frame <= h->frames.i_bframe_delay )
+        {
+            if( h->i_dts_compress_multiplier == 1 )
+                pic_out->i_dts = h->fenc->i_dts - h->frames.i_bframe_delay_time;
+            else
+            {
+                /* DTS compression */
+                if( h->i_frame == 1 )
+                    thread_current->frames.i_init_delta = h->fenc->i_dts * h->i_dts_compress_multiplier;
+                pic_out->i_dts = h->i_frame * thread_current->frames.i_init_delta / h->i_dts_compress_multiplier;
+            }
+        }
+        else
+            pic_out->i_dts = i_prev_dts[ (h->i_frame - h->frames.i_bframe_delay) % h->frames.i_bframe_delay ];
+        i_prev_dts[ h->i_frame % h->frames.i_bframe_delay ] = h->fenc->i_dts * h->i_dts_compress_multiplier;
+        h->fenc->i_dts = pic_out->i_dts;
+    }
+    else
+        pic_out->i_dts = h->fenc->i_dts;
+    assert( pic_out->i_pts >= pic_out->i_dts );
+
     pic_out->img.i_plane = h->fdec->i_plane;
     for(i = 0; i < 3; i++)
     {
diff --git a/output/flv.c b/output/flv.c
index 8a937cf..5ef5b0f 100644
--- a/output/flv.c
+++ b/output/flv.c
@@ -37,8 +37,6 @@ typedef struct
     int64_t i_fps_num;
     int64_t i_fps_den;
     int64_t i_framenum;
-    int     i_init_delay;
-    int     i_delay_time;

     uint64_t i_framerate_pos;
     uint64_t i_duration_pos;
@@ -46,8 +44,8 @@ typedef struct
     uint64_t i_bitrate_pos;

     uint8_t b_write_length;
-    int64_t i_init_delta;
-    int64_t i_prev_timestamps[2];
+    int64_t i_prev_dts;
+    int64_t i_prev_pts;

     int i_timebase_num;
     int i_timebase_den;
@@ -146,10 +144,8 @@ static int set_param( hnd_t handle, x264_param_t *p_param )
     p_flv->i_fps_den = p_param->i_fps_den;
     p_flv->i_timebase_num = p_param->i_timebase_num;
     p_flv->i_timebase_den = p_param->i_timebase_den;
-    p_flv->i_init_delay = p_param->i_bframe ? (p_param->i_bframe_pyramid ? 2 : 1) : 0;
     p_flv->b_vfr_input = p_param->b_vfr_input;

-
     return 0;
 }

@@ -216,45 +212,29 @@ static int write_frame( hnd_t handle, uint8_t *p_nalu, int i_size, x264_picture_
     flv_hnd_t *p_flv = handle;
     flv_buffer *c = p_flv->c;

-    int64_t dts;
-    int64_t cts;
-    int64_t offset;
-
-    if( !p_flv->i_framenum )
-        p_flv->i_delay_time = p_picture->i_dts;
+    int64_t dts = (int64_t)( (p_picture->i_dts * 1000 * ((double)p_flv->i_timebase_num / p_flv->i_timebase_den)) + 0.5 );
+    int64_t cts = (int64_t)( (p_picture->i_pts * 1000 * ((double)p_flv->i_timebase_num / p_flv->i_timebase_den)) + 0.5 );
+    int64_t offset = cts - dts;

-    if( !p_flv->i_init_delay )
-        dts = cts = (int64_t)((p_picture->i_pts * 1000 * p_flv->i_timebase_num / p_flv->i_timebase_den) + 0.5);
-    else
+    if( p_flv->i_framenum )
     {
-        // Use DTS compression
-        dts = p_picture->i_dts - p_flv->i_delay_time;
-
-        if( p_flv->i_framenum == 1 )
-            p_flv->i_init_delta = p_picture->i_dts - p_flv->i_delay_time;
-
-        if( p_flv->i_framenum > p_flv->i_init_delay )
+        int64_t prev_dts = (int64_t)( (p_flv->i_prev_dts * 1000 * ((double)p_flv->i_timebase_num / p_flv->i_timebase_den)) + 0.5 );
+        int64_t prev_cts = (int64_t)( (p_flv->i_prev_pts * 1000 * ((double)p_flv->i_timebase_num / p_flv->i_timebase_den)) + 0.5 );
+        if( prev_dts == dts )
         {
-            dts = p_flv->i_prev_timestamps[ (p_flv->i_framenum - p_flv->i_init_delay) % p_flv->i_init_delay ];
-            dts = (int64_t)((dts * 1000 * p_flv->i_timebase_num / p_flv->i_timebase_den) + 0.5);
+            double fps = ((double)p_flv->i_timebase_den / p_flv->i_timebase_num) / (p_picture->i_dts - p_flv->i_prev_dts);
+            fprintf( stderr, "flv [warning]: duplicate DTS %"PRId64" generated by rounding\n"
+                             "               current internal decoding framerate: %.6f fps\n", dts, fps );
         }
-        else if( p_flv->i_init_delta )
+        if( prev_cts == cts )
         {
-            // Compressed DTSs might not fit in input timescale
-            double compressed_dts;
-            compressed_dts = (p_flv->i_framenum * ((double)p_flv->i_init_delta / (2 * p_flv->i_init_delay)));
-            dts = (int64_t)((compressed_dts * 1000 * p_flv->i_timebase_num / p_flv->i_timebase_den) + 0.5);
+            double fps = ((double)p_flv->i_timebase_den / p_flv->i_timebase_num) / (p_picture->i_pts - p_flv->i_prev_pts);
+            fprintf( stderr, "flv [warning]: duplicating CTS %"PRId64" is generated by rounding\n"
+                             "               current internal composition framerate: %.6f fps\n", cts, fps );
         }
-
-        p_flv->i_prev_timestamps[ p_flv->i_framenum % p_flv->i_init_delay ] = p_picture->i_dts - p_flv->i_delay_time;
-
-        cts = p_picture->i_pts;
-        cts = (int64_t)((cts * 1000 * p_flv->i_timebase_num / p_flv->i_timebase_den) + 0.5);
-     }
-
-    offset = cts - dts;
-
-    assert( cts >= dts );
+    }
+    p_flv->i_prev_dts = p_picture->i_dts;
+    p_flv->i_prev_pts = p_picture->i_pts;

     // A new frame - write packet header
     x264_put_byte( c, FLV_TAG_TYPE_VIDEO );
diff --git a/output/mp4.c b/output/mp4.c
index 7889e4f..e3ad9c6 100644
--- a/output/mp4.c
+++ b/output/mp4.c
@@ -34,11 +34,7 @@ typedef struct
     int i_time_res;
     int64_t i_time_inc;
     int i_numframe;
-    int i_init_delay;
     int i_delay_time;
-
-    int64_t i_prev_timestamps[2];
-    int64_t i_init_delta;
 } mp4_hnd_t;

 static void recompute_bitrate_mp4( GF_ISOFile *p_file, int i_track )
@@ -195,8 +191,6 @@ static int set_param( hnd_t handle, x264_param_t *p_param )
     p_mp4->i_time_res = p_param->i_timebase_den;
     p_mp4->i_time_inc = p_param->i_timebase_num;

-    p_mp4->i_init_delay = p_param->i_bframe ? (p_param->i_bframe_pyramid ? 2 : 1) : 0;
-
     p_mp4->i_track = gf_isom_new_track( p_mp4->p_file, 0, GF_ISOM_MEDIA_VISUAL,
                                         p_mp4->i_time_res );

@@ -282,7 +276,6 @@ static int write_frame( hnd_t handle, uint8_t *p_nalu, int i_size, x264_picture_
     mp4_hnd_t *p_mp4 = handle;
     int64_t dts;
     int64_t cts;
-    int32_t offset = 0;

     memcpy( p_mp4->p_sample->data + p_mp4->p_sample->dataLength, p_nalu, i_size );
     p_mp4->p_sample->dataLength += i_size;
@@ -290,27 +283,12 @@ static int write_frame( hnd_t handle, uint8_t *p_nalu, int i_size, x264_picture_
     if( !p_mp4->i_numframe )
         p_mp4->i_delay_time = p_picture->i_dts * -1;

-    if( !p_mp4->i_init_delay )
-        dts = cts = p_picture->i_pts * p_mp4->i_time_inc;
-    else
-    {
-        if( p_mp4->i_numframe <= p_mp4->i_init_delay )
-            dts = p_picture->i_dts + p_mp4->i_delay_time;
-        else
-            dts = p_mp4->i_prev_timestamps[ (p_mp4->i_numframe - p_mp4->i_init_delay) % p_mp4->i_init_delay ] + p_mp4->i_delay_time;
-
-        // unordered pts
-        p_mp4->i_prev_timestamps[ p_mp4->i_numframe % p_mp4->i_init_delay ] = p_picture->i_dts + p_mp4->i_delay_time;
-
-        dts *= p_mp4->i_time_inc;
-        cts = (p_picture->i_pts + p_mp4->i_delay_time) * p_mp4->i_time_inc;
-
-        offset = cts - dts;
-    }
+    dts = (p_picture->i_dts + p_mp4->i_delay_time) * p_mp4->i_time_inc;
+    cts = (p_picture->i_pts + p_mp4->i_delay_time) * p_mp4->i_time_inc;

     p_mp4->p_sample->IsRAP = p_picture->b_keyframe;
     p_mp4->p_sample->DTS = dts;
-    p_mp4->p_sample->CTS_Offset = offset;
+    p_mp4->p_sample->CTS_Offset = (uint32_t)(cts - dts);
     gf_isom_add_sample( p_mp4->p_file, p_mp4->i_track, p_mp4->i_descidx, p_mp4->p_sample );

     p_mp4->p_sample->dataLength = 0;
diff --git a/x264.c b/x264.c
index db33536..d77fa47 100644
--- a/x264.c
+++ b/x264.c
@@ -683,6 +683,7 @@ static int select_output( const char *muxer, char *filename, x264_param_t *param
         output = mp4_output;
         param->b_annexb = 0;
         param->b_aud = 0;
+        param->b_dts_compress = 0;
         param->b_repeat_headers = 0;
 #else
         fprintf( stderr, "x264 [error]: not compiled with MP4 output support\n" );
@@ -694,6 +695,7 @@ static int select_output( const char *muxer, char *filename, x264_param_t *param
         output = mkv_output;
         param->b_annexb = 0;
         param->b_aud = 0;
+        param->b_dts_compress = 0;
         param->b_repeat_headers = 0;
     }
     else if( !strcasecmp( ext, "flv" ) )
@@ -701,6 +703,7 @@ static int select_output( const char *muxer, char *filename, x264_param_t *param
         output = flv_output;
         param->b_annexb = 0;
         param->b_aud = 0;
+        param->b_dts_compress = 1;
         param->b_repeat_headers = 0;
     }
     else
@@ -1528,7 +1531,7 @@ static int  Encode( x264_param_t *param, cli_opt_t *opt )
             {
                 if( h->param.i_log_level >= X264_LOG_DEBUG || pts_warning_cnt < MAX_PTS_WARNING )
                     fprintf( stderr, "x264 [warning]: non-strictly-monotonic pts at frame %d (%"PRId64" <= %"PRId64")\n",
-                             i_frame, pic.i_pts, largest_pts );
+                             i_frame, pic.i_pts * h->i_dts_compress_multiplier, largest_pts * h->i_dts_compress_multiplier );
                 else if( pts_warning_cnt == MAX_PTS_WARNING )
                     fprintf( stderr, "x264 [warning]: too many nonmonotonic pts warnings, suppressing further ones\n" );
                 pts_warning_cnt++;
@@ -1583,6 +1586,7 @@ static int  Encode( x264_param_t *param, cli_opt_t *opt )
         duration = (double)param->i_fps_den / param->i_fps_num;
     else
         duration = (double)(2 * largest_pts - second_largest_pts) * param->i_timebase_num / param->i_timebase_den;
+    duration *= h->i_dts_compress_multiplier;

     i_end = x264_mdate();
     input.picture_clean( &pic );
diff --git a/x264.h b/x264.h
index 1223df7..2550864 100644
--- a/x264.h
+++ b/x264.h
@@ -35,7 +35,7 @@

 #include <stdarg.h>

-#define X264_BUILD 83
+#define X264_BUILD 84

 /* x264_t:
  *      opaque handler for encoder */
@@ -316,6 +316,9 @@ typedef struct x264_param_t
     int b_vfr_input;            /* VFR input */
     int i_timebase_num;         /* Timebase numerator */
     int i_timebase_den;         /* Timebase denominator */
+    int b_dts_compress;         /* DTS compression: this algorithm eliminates negative DTS
+                                 * by compressing them to be less than the second PTS.
+                                 * Warning: this will change the timebase! */

     /* Slicing parameters */
     int i_slice_max_size;    /* Max size per slice in bytes; includes estimated NAL overhead. */
--
1.6.1.2


From 8a28b47a896fbe43866d9e6d3481902210f4107c Mon Sep 17 00:00:00 2001
From: Diogo Franco <diogomfranco@gmail.com>
Date: Wed, 27 Jan 2010 09:26:35 -0800
Subject: [PATCH 07/13] Fix cross-compiling with lavf, add support for ffms2.pc
 Also update configure script to work with newest ffms.

---
 configure |   52 ++++++++++++++++++++++++++++++----------------------
 1 files changed, 30 insertions(+), 22 deletions(-)

diff --git a/configure b/configure
index 9f04a18..133a569 100755
--- a/configure
+++ b/configure
@@ -416,25 +416,23 @@ fi

 if [ "$lavf_input" = "auto" ] ; then
     lavf_input="no"
-    if [ `${cross_prefix}pkg-config --exists libavformat libavcodec libswscale 2>$DEVNULL` ] ; then
-        LAVF_LDFLAGS="$LAVF_LDFLAGS $(pkg-config --libs libavformat libavcodec libswscale)"
-        LAVF_CFLAGS="$LAVF_CFLAGS $(pkg-config --cflags libavformat libavcodec libswscale)"
+    if ${cross_prefix}pkg-config --exists libavformat libavcodec libswscale 2>$DEVNULL; then
+        LAVF_LIBS="$LAVF_LIBS $(${cross_prefix}pkg-config --libs libavformat libavcodec libswscale)"
+        LAVF_CFLAGS="$LAVF_CFLAGS $(${cross_prefix}pkg-config --cflags libavformat libavcodec libswscale)"
     fi
-    if [ -z "$LAVF_LDFLAGS" -a -z "$LAVF_CFLAGS" ]; then
-        LAVF_LDFLAGS="-lavformat -lswscale"
+    if [ -z "$LAVF_LIBS" -a -z "$LAVF_CFLAGS" ]; then
+        LAVF_LIBS="-lavformat -lswscale"
         for lib in -lpostproc -lavcodec -lavutil -lm -lz -lbz2 $libpthread -lavifil32; do
-            cc_check "" $lib && LAVF_LDFLAGS="$LAVF_LDFLAGS $lib"
+            cc_check "" $lib && LAVF_LIBS="$LAVF_LIBS $lib"
         done
     fi
-    LAVF_LDFLAGS="-L. $LAVF_LDFLAGS"
-    if cc_check libavformat/avformat.h "$LAVF_CFLAGS $LAVF_LDFLAGS" && \
-       cc_check libswscale/swscale.h "$LAVF_CFLAGS $LAVF_LDFLAGS" ; then
+    LAVF_LIBS="-L. $LAVF_LIBS"
+    if cc_check libavformat/avformat.h "$LAVF_CFLAGS $LAVF_LIBS" && \
+       cc_check libswscale/swscale.h "$LAVF_CFLAGS $LAVF_LIBS" ; then
         # avcodec_decode_video2 is currently the most recently added function that we use; it was added in r18351
-        if cc_check libavformat/avformat.h "$LAVF_CFLAGS $LAVF_LDFLAGS" "avcodec_decode_video2( NULL, NULL, NULL, NULL );" ; then
+        if cc_check libavformat/avformat.h "$LAVF_CFLAGS $LAVF_LIBS" "avcodec_decode_video2( NULL, NULL, NULL, NULL );" ; then
             lavf_input="yes"
             echo "#define LAVF_INPUT" >> config.h
-            LDFLAGSCLI="$LDFLAGSCLI $LAVF_LDFLAGS"
-            [ -n "$LAVF_CFLAGS" ] && CFLAGS="$CFLAGS $LAVF_CFLAGS"
         else
             echo "Warning: libavformat is too old, update to ffmpeg r18351+"
         fi
@@ -443,19 +441,29 @@ fi

 if [ "$ffms_input" = "auto" ] ; then
     ffms_input="no"
-    if [ "$lavf_input" = "yes" ] ; then
-        if cc_check ffms.h -lFFMS2 "FFMS_DestroyVideoSource(0);" ; then
-           ffms_input="yes"
-           echo "#define FFMS_INPUT" >> config.h
-           LDFLAGSCLI="$LDFLAGSCLI -lFFMS2"
-        elif cc_check ffms.h "-lFFMS2 $LAVF_LDFLAGS -lstdc++" "FFMS_DestroyVideoSource(0);" ; then
-           ffms_input="yes"
-           echo "#define FFMS_INPUT" >> config.h
-           LDFLAGSCLI="-lFFMS2 $LDFLAGSCLI -lstdc++"
-        fi
+    if ${cross_prefix}pkg-config --exists ffms2 2>$DEVNULL; then
+        FFMS2_LIBS="$FFMS2_LIBS $(${cross_prefix}pkg-config --libs ffms2)"
+        FFMS2_CFLAGS="$FFMS2_LIBS $(${cross_prefix}pkg-config --cflags ffms2)"
+    fi
+    [ -z "$FFMS2_LIBS" ] && FFMS2_LIBS="-lffms2"
+
+    if cc_check ffms.h "$FFMS2_CFLAGS $FFMS2_LIBS" "FFMS_DestroyVideoSource(0);" ; then
+        ffms_input="yes"
+    elif cc_check ffms.h "$FFMS2_CFLAGS $FFMS2_LIBS -lstdc++ $LAVF_LIBS" "FFMS_DestroyVideoSource(0);" ; then
+        ffms_input="yes"
+        FFMS2_LIBS="$FFMS2_LIBS -lstdc++ $LAVF_LIBS"
     fi
 fi

+if [ "$ffms_input" = "yes" ]; then
+    LDFLAGSCLI="$FFMS2_LIBS $LDFLAGSCLI"
+    [ -n "$FFMS2_CFLAGS" ] && CFLAGS="$CFLAGS $FFMS2_CFLAGS"
+    echo "#define FFMS_INPUT" >> config.h
+elif [ "$lavf_input" = "yes" ]; then
+    LDFLAGSCLI="$LAVF_LIBS $LDFLAGSCLI"
+    [ -n "$LAVF_CFLAGS" ] && CFLAGS="$CFLAGS $LAVF_CFLAGS"
+fi
+
 MP4_LDFLAGS="-lgpac_static"
 if [ $SYS = MINGW ]; then
     MP4_LDFLAGS="$MP4_LDFLAGS -lwinmm"
--
1.6.1.2


From 6ec5692f70d45538e2ae7a76bd4656e5ca6ff918 Mon Sep 17 00:00:00 2001
From: Diogo Franco <diogomfranco@gmail.com>
Date: Wed, 27 Jan 2010 10:12:42 -0800
Subject: [PATCH 08/13] Add config.log support
 Now, if configure fails, you'll be able to see why.

---
 .gitignore |    1 +
 configure  |  106 +++++++++++++++++++++++++++++++++++++++++++++++++----------
 2 files changed, 89 insertions(+), 18 deletions(-)

diff --git a/.gitignore b/.gitignore
index 308b793..9d8cb70 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,6 +12,7 @@
 .depend
 config.h
 config.mak
+config.log
 x264
 checkasm

diff --git a/configure b/configure
index 133a569..7cb14ba 100755
--- a/configure
+++ b/configure
@@ -27,24 +27,77 @@ echo ""
 exit 1
 fi

+log_check() {
+    echo -n "checking $1... " >> config.log
+}
+
+log_ok() {
+    echo "yes" >> config.log
+}
+
+log_fail() {
+    echo "no" >> config.log
+}
+
+log_msg() {
+    echo "$1" >> config.log
+}
+
 cc_check() {
+    if [ -z "$3" ]; then
+        if [ -z "$1" ]; then
+            log_check "whether $CC works"
+        else
+            log_check "for $1"
+        fi
+    elif [ -z "$1" ]; then
+        log_check "whether $CC supports $3"
+    else
+        log_check "for $3 on $1";
+    fi
     rm -f conftest.c
     [ -n "$1" ] && echo "#include <$1>" > conftest.c
     echo "int main () { $3 return 0; }" >> conftest.c
-    $CC conftest.c $CFLAGS $LDFLAGS $LDFLAGSCLI $2 -o conftest 2>$DEVNULL
+    if $CC conftest.c $CFLAGS $LDFLAGS $LDFLAGSCLI $2 -o conftest >conftest.log 2>&1; then
+        res=$?
+        log_ok
+    else
+        res=$?
+        log_fail
+        log_msg "Failed commandline was:"
+        log_msg "--------------------------------------------------"
+        log_msg "$CC conftest.c $CFLAGS $LDFLAGS $LDFLAGSCLI $2"
+        cat conftest.log >> config.log
+        log_msg "--------------------------------------------------"
+    fi
+    return $res
 }

 as_check() {
+    log_check "whether $AS supports $1"
     echo "$1" > conftest.asm
-    $AS conftest.asm $ASFLAGS $2 -o conftest.o 2>$DEVNULL
+    if $AS conftest.asm $ASFLAGS $2 -o conftest.o >conftest.log 2>&1; then
+        res=$?
+        log_ok
+    else
+        res=$?
+        log_fail
+        log_msg "Failed commandline was:"
+        log_msg "--------------------------------------------------"
+        log_msg "$AS conftest.asm $ASFLAGS $2 -o conftest.o"
+        cat conftest.log >> config.log
+        log_msg "--------------------------------------------------"
+    fi
+    return $res
 }

 die() {
+    log_msg "DIED: $@"
     echo "$@"
     exit 1
 }

-rm -f config.h config.mak x264.pc conftest*
+rm -f config.h config.mak config.log x264.pc conftest*

 prefix='/usr/local'
 exec_prefix='${prefix}'
@@ -320,6 +373,16 @@ case $host_cpu in
     ;;
 esac

+log_msg "x264 configure script"
+if [ -n "$*" ]; then
+    msg="Command line options:"
+    for i in $@; do
+        msg="$msg \"$i\""
+    done
+    log_msg "$msg"
+fi
+log_msg ""
+
 # check requirements

 cc_check || die "No working C compiler found."
@@ -506,9 +569,9 @@ if [ "$debug" = "yes" ]; then
 elif [ $ARCH = ARM ]; then
     # arm-gcc-4.2 produces incorrect output with -ffast-math
     # and it doesn't save any speed anyway on 4.4, so disable it
-    CFLAGS="-O4 -fno-fast-math $CFLAGS"
+    CFLAGS="-O3 -fno-fast-math $CFLAGS"
 else
-    CFLAGS="-O4 -ffast-math $CFLAGS"
+    CFLAGS="-O3 -ffast-math $CFLAGS"
 fi

 if cc_check "stdio.h" "" "fseeko(stdin,0,0);" ; then
@@ -585,20 +648,27 @@ Libs: $pclibs
 Cflags: -I$includedir
 EOF

+cat > conftest.log <<EOF
+Platform:   $ARCH
+System:     $SYS
+asm:        $asm
+avs input:  $avs_input
+lavf input: $lavf_input
+ffms input: $ffms_input
+mp4 output: $mp4_output
+pthread:    $pthread
+debug:      $debug
+gprof:      $gprof
+PIC:        $pic
+shared:     $shared
+visualize:  $vis
+EOF
+
+echo >> config.log
+cat conftest.log >> config.log
+cat conftest.log
+rm conftest.log

-echo "Platform:   $ARCH"
-echo "System:     $SYS"
-echo "asm:        $asm"
-echo "avs input:  $avs_input"
-echo "lavf input: $lavf_input"
-echo "ffms input: $ffms_input"
-echo "mp4 output: $mp4_output"
-echo "pthread:    $pthread"
-echo "debug:      $debug"
-echo "gprof:      $gprof"
-echo "PIC:        $pic"
-echo "shared:     $shared"
-echo "visualize:  $vis"
 echo
 echo "You can run 'make' or 'make fprofiled' now."

--
1.6.1.2


From 959e19ea08325dc7859b3dbc2c797e64e03899cc Mon Sep 17 00:00:00 2001
From: Diogo Franco <diogomfranco@gmail.com>
Date: Wed, 27 Jan 2010 13:11:08 -0800
Subject: [PATCH 09/13] Add configure check for log2 support
 Some incredibly braindamaged operating systems, such as FreeBSD, blatantly ignore the C specification and omit certain functions that are required by ISO C.
 log2f is one of these functions that periodically goes missing in such operating systems.

---
 common/osdep.h |    4 ++++
 configure      |    4 ++++
 2 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/common/osdep.h b/common/osdep.h
index abae9ac..9988803 100644
--- a/common/osdep.h
+++ b/common/osdep.h
@@ -34,6 +34,10 @@
 #include <inttypes.h>
 #endif

+#ifndef HAVE_LOG2F
+#define log2f(x) (logf((x))/0.693147180559945f)
+#endif
+
 #ifdef _WIN32
 #include <io.h>    // _setmode()
 #include <fcntl.h> // _O_BINARY
diff --git a/configure b/configure
index 7cb14ba..271d919 100755
--- a/configure
+++ b/configure
@@ -477,6 +477,10 @@ if test "$pthread" = "yes" ; then
     LDFLAGS="$LDFLAGS $libpthread"
 fi

+if cc_check "math.h" "-Werror" "log2f(2);" ; then
+    CFLAGS="$CFLAGS -DHAVE_LOG2F"
+fi
+
 if [ "$lavf_input" = "auto" ] ; then
     lavf_input="no"
     if ${cross_prefix}pkg-config --exists libavformat libavcodec libswscale 2>$DEVNULL; then
--
1.6.1.2


From bc027c79f3c75cbc89163443511a1009b46344b9 Mon Sep 17 00:00:00 2001
From: Jason Garrett-Glaser <darkshikari@gmail.com>
Date: Wed, 27 Jan 2010 19:41:27 -0800
Subject: [PATCH 10/13] Fix implicit CBR message to only print when in ABR mode
 Also make it print outside of debug mode.

---
 encoder/ratecontrol.c |   12 ++++++++++--
 1 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
index 746b17a..5304616 100644
--- a/encoder/ratecontrol.c
+++ b/encoder/ratecontrol.c
@@ -436,8 +436,16 @@ int x264_ratecontrol_new( x264_t *h )
         }
         else if( h->param.rc.i_vbv_max_bitrate == 0 )
         {
-            x264_log( h, X264_LOG_DEBUG, "VBV maxrate unspecified, assuming CBR\n" );
-            h->param.rc.i_vbv_max_bitrate = h->param.rc.i_bitrate;
+            if( h->param.rc.i_rc_method == X264_RC_ABR )
+            {
+                x264_log( h, X264_LOG_INFO, "VBV maxrate unspecified, assuming CBR\n" );
+                h->param.rc.i_vbv_max_bitrate = h->param.rc.i_bitrate;
+            }
+            else
+            {
+                x264_log( h, X264_LOG_INFO, "VBV bufsize set but maxrate unspecified, ignored\n" );
+                h->param.rc.i_vbv_buffer_size = 0;
+            }
         }
     }
     if( h->param.rc.i_vbv_max_bitrate < h->param.rc.i_bitrate &&
--
1.6.1.2


From 3d456de7c5d4ca0e031a310ba6566690f4ff82fc Mon Sep 17 00:00:00 2001
From: Diogo Franco <diogomfranco@gmail.com>
Date: Wed, 27 Jan 2010 20:29:50 -0800
Subject: [PATCH 11/13] Implement ffms2 version check
 Depends on ffms2 version 2.13.1 (r272).
 Tries pkg-config's built-in version checking first.
 Uses only the preprocessor to avoid cross-compilation issues.

---
 configure |   20 +++++++++++++++++++-
 1 files changed, 19 insertions(+), 1 deletions(-)

diff --git a/configure b/configure
index 271d919..adebdb8 100755
--- a/configure
+++ b/configure
@@ -507,10 +507,17 @@ if [ "$lavf_input" = "auto" ] ; then
 fi

 if [ "$ffms_input" = "auto" ] ; then
+    ffms_major="2"; ffms_minor="13"; ffms_micro="1"; ffms_bump="0"
+
     ffms_input="no"
-    if ${cross_prefix}pkg-config --exists ffms2 2>$DEVNULL; then
+    [ $ffms_micro -gt 0 -o $ffms_bump -gt 0 ] && vmicro=".$ffms_micro"
+    [ $ffms_bump -gt 0 ] && vbump=".$ffms_bump"
+    if ${cross_prefix}pkg-config --atleast-version="$ffms_major.$ffms_minor$vmicro$vbump" ffms2 2>$DEVNULL; then
         FFMS2_LIBS="$FFMS2_LIBS $(${cross_prefix}pkg-config --libs ffms2)"
         FFMS2_CFLAGS="$FFMS2_LIBS $(${cross_prefix}pkg-config --cflags ffms2)"
+        api_check="no"
+    else
+        api_check="yes"
     fi
     [ -z "$FFMS2_LIBS" ] && FFMS2_LIBS="-lffms2"

@@ -520,6 +527,17 @@ if [ "$ffms_input" = "auto" ] ; then
         ffms_input="yes"
         FFMS2_LIBS="$FFMS2_LIBS -lstdc++ $LAVF_LIBS"
     fi
+
+    if [ $api_check = "yes" -a $ffms_input = "yes" ]; then
+        log_check "whether ffms2 version is at least $ffms_major.$ffms_minor$vmicro$vbump"
+        $CC $CFLAGS $FFMS2_CFLAGS -c -o conftest -x c - >$DEVNULL 2>&1 <<EOF
+#include <ffms.h>
+#if FFMS_VERSION < (($ffms_major << 24) | ($ffms_minor << 16) | ($ffms_micro << 8) | $ffms_bump)
+#error Requires ffms2 version 2.13.1
+#endif
+EOF
+        [ $? = 0 ] && log_ok || { ffms_input="no"; log_fail; }
+    fi
 fi

 if [ "$ffms_input" = "yes" ]; then
--
1.6.1.2


From c0e2d3d05b26e48ade557eface323c42649e1d83 Mon Sep 17 00:00:00 2001
From: Steven Walters <kemuri9@gmail.com>
Date: Thu, 28 Jan 2010 17:26:40 -0800
Subject: [PATCH 12/13] Fix stat with large file support

---
 common/common.h |    1 -
 common/osdep.h  |    1 +
 2 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/common/common.h b/common/common.h
index ca15330..455bc61 100644
--- a/common/common.h
+++ b/common/common.h
@@ -70,7 +70,6 @@ do {\
 /****************************************************************************
  * Includes
  ****************************************************************************/
-#include <sys/stat.h>
 #include "osdep.h"
 #include <stdarg.h>
 #include <stddef.h>
diff --git a/common/osdep.h b/common/osdep.h
index 9988803..3d12072 100644
--- a/common/osdep.h
+++ b/common/osdep.h
@@ -27,6 +27,7 @@
 #define _LARGEFILE_SOURCE 1
 #define _FILE_OFFSET_BITS 64
 #include <stdio.h>
+#include <sys/stat.h>

 #ifdef HAVE_STDINT_H
 #include <stdint.h>
--
1.6.1.2


From b4276573f79fef3084e50b22a0ffad8bb0b8779c Mon Sep 17 00:00:00 2001
From: Diogo Franco <diogomfranco@gmail.com>
Date: Thu, 28 Jan 2010 17:28:03 -0800
Subject: [PATCH 13/13] Move -D CFLAGS to config.h

---
 Makefile        |   12 +++++-----
 common/common.h |    1 -
 common/osdep.h  |    2 +
 configure       |   60 ++++++++++++++++++++++++++++++------------------------
 4 files changed, 41 insertions(+), 34 deletions(-)

diff --git a/Makefile b/Makefile
index f643228..cef8725 100644
--- a/Makefile
+++ b/Makefile
@@ -18,26 +18,26 @@ SRCCLI = x264.c input/yuv.c input/y4m.c output/raw.c \

 SRCSO =

-MUXERS := $(shell grep -E "(IN|OUT)PUT" config.h)
+CONFIG := $(shell cat config.h)

 # Optional muxer module sources
-ifneq ($(findstring AVS_INPUT, $(MUXERS)),)
+ifneq ($(findstring AVS_INPUT, $(CONFIG)),)
 SRCCLI += input/avs.c
 endif

-ifneq ($(findstring HAVE_PTHREAD, $(CFLAGS)),)
+ifneq ($(findstring HAVE_PTHREAD, $(CONFIG)),)
 SRCCLI += input/thread.c
 endif

-ifneq ($(findstring LAVF_INPUT, $(MUXERS)),)
+ifneq ($(findstring LAVF_INPUT, $(CONFIG)),)
 SRCCLI += input/lavf.c
 endif

-ifneq ($(findstring FFMS_INPUT, $(MUXERS)),)
+ifneq ($(findstring FFMS_INPUT, $(CONFIG)),)
 SRCCLI += input/ffms.c
 endif

-ifneq ($(findstring MP4_OUTPUT, $(MUXERS)),)
+ifneq ($(findstring MP4_OUTPUT, $(CONFIG)),)
 SRCCLI += output/mp4.c
 endif

diff --git a/common/common.h b/common/common.h
index 455bc61..a52e531 100644
--- a/common/common.h
+++ b/common/common.h
@@ -102,7 +102,6 @@ typedef union { uint64_t i; uint32_t a[2]; uint16_t b[4]; uint8_t c[8]; } MAY_AL
 #include "dct.h"
 #include "cabac.h"
 #include "quant.h"
-#include "config.h"

 /****************************************************************************
  * General functions
diff --git a/common/osdep.h b/common/osdep.h
index 3d12072..907bcee 100644
--- a/common/osdep.h
+++ b/common/osdep.h
@@ -29,6 +29,8 @@
 #include <stdio.h>
 #include <sys/stat.h>

+#include "config.h"
+
 #ifdef HAVE_STDINT_H
 #include <stdint.h>
 #else
diff --git a/configure b/configure
index adebdb8..3bb5a40 100755
--- a/configure
+++ b/configure
@@ -91,6 +91,10 @@ as_check() {
     return $res
 }

+define() {
+    echo "#define $1$([ -n "$2" ] && echo " $2")" >> config.h
+}
+
 die() {
     log_msg "DIED: $@"
     echo "$@"
@@ -208,7 +212,7 @@ for opt do
             ;;
         --enable-visualize)
             LDFLAGS="$LDFLAGS -L/usr/X11R6/lib -lX11"
-            CFLAGS="$CFLAGS -DVISUALIZE=1"
+            define VISUALIZE
             vis="yes"
             ;;
         --host=*)
@@ -243,7 +247,7 @@ host_os="${host#*-}"
 case $host_os in
   beos*)
     SYS="BEOS"
-    CFLAGS="$CFLAGS -DHAVE_MALLOC_H"
+    define HAVE_MALLOC_H
     ;;
   darwin*)
     SYS="MACOSX"
@@ -259,7 +263,7 @@ case $host_os in
     ;;
   kfreebsd*-gnu)
     SYS="FREEBSD"
-    CFLAGS="$CFLAGS -DHAVE_MALLOC_H"
+    define HAVE_MALLOC_H
     LDFLAGS="$LDFLAGS -lm"
     ;;
   netbsd*)
@@ -273,7 +277,7 @@ case $host_os in
     ;;
   *linux*)
     SYS="LINUX"
-    CFLAGS="$CFLAGS -DHAVE_MALLOC_H"
+    define HAVE_MALLOC_H
     LDFLAGS="$LDFLAGS -lm"
     ;;
   cygwin*)
@@ -292,7 +296,7 @@ case $host_os in
     ;;
   sunos*|solaris*)
     SYS="SunOS"
-    CFLAGS="$CFLAGS -DHAVE_MALLOC_H"
+    define HAVE_MALLOC_H
     LDFLAGS="$LDFLAGS -lm"
     HAVE_GETOPT_LONG=0
     ;;
@@ -341,7 +345,8 @@ case $host_cpu in
     then
       CFLAGS="$CFLAGS -faltivec -fastf -mcpu=G4"
     else
-      CFLAGS="$CFLAGS -maltivec -mabi=altivec -DHAVE_ALTIVEC_H"
+      CFLAGS="$CFLAGS -maltivec -mabi=altivec"
+      define HAVE_ALTIVEC_H
     fi
     ;;
   sparc)
@@ -407,17 +412,17 @@ if [ $asm = yes -a \( $ARCH = X86 -o $ARCH = X86_64 \) ] ; then
         echo "If you really want to compile without asm, configure with --disable-asm."
         exit 1
     fi
-    CFLAGS="$CFLAGS -DHAVE_MMX"
+    define HAVE_MMX
 fi

 if [ $asm = yes -a $ARCH = ARM ] ; then
     # set flags so neon is built by default
     echo $CFLAGS | grep -Eq '(-mcpu|-march|-mfpu|-mfloat-abi)' || CFLAGS="$CFLAGS -mcpu=cortex-a8 -mfpu=neon -mfloat-abi=softfp"

-    if  cc_check '' '' 'asm("rev ip, ip");' ; then      CFLAGS="$CFLAGS -DHAVE_ARMV6"
-        cc_check '' '' 'asm("movt r0, #0");'         && CFLAGS="$CFLAGS -DHAVE_ARMV6T2"
-        cc_check '' '' 'asm("vadd.i16 q0, q0, q0");' && CFLAGS="$CFLAGS -DHAVE_NEON"
-        ASFLAGS="$ASFLAGS $CFLAGS -c"
+    if  cc_check '' '' 'asm("rev ip, ip");' ; then      define HAVE_ARMV6   && ASFLAGS="$ASFLAGS -DHAVE_ARMV6"
+        cc_check '' '' 'asm("movt r0, #0");'         && define HAVE_ARMV6T2 && ASFLAGS="$ASFLAGS -DHAVE_ARMV6T2"
+        cc_check '' '' 'asm("vadd.i16 q0, q0, q0");' && define HAVE_NEON    && ASFLAGS="$ASFLAGS -DHAVE_NEON"
+        ASFLAGS="$ASFLAGS -c"
     else
         echo "You specified a pre-ARMv6 or Thumb-1 CPU in your CFLAGS."
         echo "If you really want to run on such a CPU, configure with --disable-asm."
@@ -428,12 +433,13 @@ fi
 [ $asm = no ] && AS=""
 [ "x$AS" = x ] && asm="no"

-CFLAGS="$CFLAGS -DARCH_$ARCH -DSYS_$SYS"
+define ARCH_$ARCH
+define SYS_$SYS

 echo "int i = 0x42494745; double f = 0x1.0656e6469616ep+102;" > conftest.c
 $CC $CFLAGS conftest.c -c -o conftest.o 2>$DEVNULL || die "endian test failed"
 if grep -q BIGE conftest.o && grep -q FPendian conftest.o ; then
-    CFLAGS="$CFLAGS -DWORDS_BIGENDIAN"
+    define WORDS_BIGENDIAN
 elif !(grep -q EGIB conftest.o && grep -q naidnePF conftest.o) ; then
     die "endian test failed"
 fi
@@ -457,11 +463,11 @@ if test "$pthread" = "auto" ; then
             elif cc_check pthread.h "-lpthreadGC2 -lwsock32 -DPTW32_STATIC_LIB" "pthread_create(0,0,0,0);" ; then
                 pthread="yes"
                 libpthread="-lpthreadGC2 -lwsock32"
-                CFLAGS="$CFLAGS -DPTW32_STATIC_LIB"
+                define PTW32_STATIC_LIB
             elif cc_check pthread.h "-lpthreadGC2 -lws2_32 -DPTW32_STATIC_LIB" "pthread_create(0,0,0,0);" ; then
                 pthread="yes"
                 libpthread="-lpthreadGC2 -lws2_32"
-                CFLAGS="$CFLAGS -DPTW32_STATIC_LIB"
+                define PTW32_STATIC_LIB
             fi
             ;;
         OPENBSD)
@@ -473,12 +479,12 @@ if test "$pthread" = "auto" ; then
     esac
 fi
 if test "$pthread" = "yes" ; then
-    CFLAGS="$CFLAGS -DHAVE_PTHREAD"
+    define HAVE_PTHREAD
     LDFLAGS="$LDFLAGS $libpthread"
 fi

 if cc_check "math.h" "-Werror" "log2f(2);" ; then
-    CFLAGS="$CFLAGS -DHAVE_LOG2F"
+    define HAVE_LOG2F
 fi

 if [ "$lavf_input" = "auto" ] ; then
@@ -499,7 +505,7 @@ if [ "$lavf_input" = "auto" ] ; then
         # avcodec_decode_video2 is currently the most recently added function that we use; it was added in r18351
         if cc_check libavformat/avformat.h "$LAVF_CFLAGS $LAVF_LIBS" "avcodec_decode_video2( NULL, NULL, NULL, NULL );" ; then
             lavf_input="yes"
-            echo "#define LAVF_INPUT" >> config.h
+            define LAVF_INPUT
         else
             echo "Warning: libavformat is too old, update to ffmpeg r18351+"
         fi
@@ -543,7 +549,7 @@ fi
 if [ "$ffms_input" = "yes" ]; then
     LDFLAGSCLI="$FFMS2_LIBS $LDFLAGSCLI"
     [ -n "$FFMS2_CFLAGS" ] && CFLAGS="$CFLAGS $FFMS2_CFLAGS"
-    echo "#define FFMS_INPUT" >> config.h
+    define FFMS_INPUT
 elif [ "$lavf_input" = "yes" ]; then
     LDFLAGSCLI="$LAVF_LIBS $LDFLAGSCLI"
     [ -n "$LAVF_CFLAGS" ] && CFLAGS="$CFLAGS $LAVF_CFLAGS"
@@ -558,7 +564,7 @@ if [ "$mp4_output" = "auto" ] ; then
     cc_check gpac/isomedia.h "$MP4_LDFLAGS" && mp4_output="yes"
 fi
 if [ "$mp4_output" = "yes" ] ; then
-    echo "#define MP4_OUTPUT" >> config.h
+    define MP4_OUTPUT
     LDFLAGSCLI="$LDFLAGSCLI $MP4_LDFLAGS"
 fi

@@ -566,11 +572,11 @@ if [ "$avs_input" = "auto" ] ; then
     avs_input=no
     if [ $SYS = MINGW ] && cc_check avisynth_c.h ; then
         avs_input="yes"
-        echo "#define AVS_INPUT" >> config.h
-        echo "#define HAVE_AVISYNTH_C_H" >> config.h
+        define AVS_INPUT
+        define HAVE_AVISYNTH_C_H
     elif [ $SYS = MINGW ] && cc_check extras/avisynth_c.h ; then
         avs_input="yes"
-        echo "#define AVS_INPUT" >> config.h
+        define AVS_INPUT
     fi
 fi

@@ -597,11 +603,11 @@ else
 fi

 if cc_check "stdio.h" "" "fseeko(stdin,0,0);" ; then
-    echo "#define fseek fseeko" >> config.h
-    echo "#define ftell ftello" >> config.h
+    define fseek fseeko
+    define ftell ftello
 elif cc_check "stdio.h" "" "fseeko64(stdin,0,0);" ; then
-    echo "#define fseek fseeko64" >> config.h
-    echo "#define ftell ftello64" >> config.h
+    define fseek fseeko64
+    define ftell ftello64
 fi

 rm -f conftest*
--
1.6.1.2