Don't like ads? PRO users don't see any ads ;-)
Guest

Untitled

By: a guest on May 8th, 2012  |  syntax: Diff  |  size: 10.23 KB  |  hits: 44  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. diff --git a/common/common.h b/common/common.h
  2. index 5e34212..cf5c1e4 100644
  3. --- a/common/common.h
  4. +++ b/common/common.h
  5. @@ -56,6 +56,7 @@ do {\
  6.  #define X264_BFRAME_MAX 16
  7.  #define X264_REF_MAX 16
  8.  #define X264_THREAD_MAX 128
  9. +#define X264_LOOKAHEAD_THREAD_MAX 16
  10.  #define X264_PCM_COST (FRAME_SIZE(256*BIT_DEPTH)+16)
  11.  #define X264_LOOKAHEAD_MAX 250
  12.  #define QP_BD_OFFSET (6*(BIT_DEPTH-8))
  13. diff --git a/common/macroblock.c b/common/macroblock.c
  14. index 11c3e75..f175fef 100644
  15. --- a/common/macroblock.c
  16. +++ b/common/macroblock.c
  17. @@ -396,6 +396,7 @@ int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
  18.      }
  19.      int buf_mbtree = h->param.rc.b_mb_tree * ((h->mb.i_mb_width+7)&~7) * sizeof(int);
  20.      scratch_size = X264_MAX( scratch_size, buf_mbtree );
  21. +    //int buf_lookahead_threads = (h->mb.i_mb_height + (h->param.i_lookahead_threads-1) / h->param.i_lookahead_threads) * sizeof(int)
  22.      if( scratch_size )
  23.          CHECKED_MALLOC( h->scratch_buffer, scratch_size );
  24.      else
  25. diff --git a/common/threadpool.c b/common/threadpool.c
  26. index f7a95fc..61e5b15 100644
  27. --- a/common/threadpool.c
  28. +++ b/common/threadpool.c
  29. @@ -83,7 +83,7 @@ int x264_threadpool_init( x264_threadpool_t **p_pool, int threads,
  30.  
  31.      pool->init_func = init_func;
  32.      pool->init_arg  = init_arg;
  33. -    pool->threads   = X264_MIN( threads, X264_THREAD_MAX );
  34. +    pool->threads   = threads;
  35.  
  36.      CHECKED_MALLOC( pool->thread_handle, pool->threads * sizeof(x264_pthread_t) );
  37.  
  38. diff --git a/encoder/encoder.c b/encoder/encoder.c
  39. index b42d5dc..f0217fc 100644
  40. --- a/encoder/encoder.c
  41. +++ b/encoder/encoder.c
  42. @@ -494,6 +494,8 @@ static int x264_validate_parameters( x264_t *h, int b_open )
  43.  
  44.      if( h->param.i_threads == X264_THREADS_AUTO )
  45.          h->param.i_threads = x264_cpu_num_processors() * (h->param.b_sliced_threads?2:3)/2;
  46. +    if( h->param.i_lookahead_threads == X264_THREADS_AUTO )
  47. +        h->param.i_lookahead_threads = h->param.i_threads / (h->param.b_sliced_threads?1:4);
  48.      if( h->param.i_threads > 1 )
  49.      {
  50.  #if !HAVE_THREAD
  51. @@ -509,8 +511,12 @@ static int x264_validate_parameters( x264_t *h, int b_open )
  52.          }
  53.      }
  54.      h->param.i_threads = x264_clip3( h->param.i_threads, 1, X264_THREAD_MAX );
  55. +    h->param.i_lookahead_threads = x264_clip3( h->param.i_lookahead_threads, 1, X264_LOOKAHEAD_THREAD_MAX );
  56.      if( h->param.i_threads == 1 )
  57. +    {
  58.          h->param.b_sliced_threads = 0;
  59. +        h->param.i_lookahead_threads = 1;
  60. +    }
  61.      h->i_thread_frames = h->param.b_sliced_threads ? 1 : h->param.i_threads;
  62.      if( h->i_thread_frames > 1 )
  63.          h->param.nalu_process = NULL;
  64. @@ -1268,8 +1274,9 @@ x264_t *x264_encoder_open( x264_param_t *param )
  65.      h->nal_buffer_size = h->out.i_bitstream * 3/2 + 4;
  66.      CHECKED_MALLOC( h->nal_buffer, h->nal_buffer_size );
  67.  
  68. +    int total_threads = h->param.i_threads + h->param.i_lookahead_threads * (h->param.i_lookahead_threads > 1);
  69.      if( h->param.i_threads > 1 &&
  70. -        x264_threadpool_init( &h->threadpool, h->param.i_threads, (void*)x264_encoder_thread_init, h ) )
  71. +        x264_threadpool_init( &h->threadpool, total_threads, (void*)x264_encoder_thread_init, h ) )
  72.          goto fail;
  73.  
  74.      h->thread[0] = h;
  75. diff --git a/encoder/slicetype.c b/encoder/slicetype.c
  76. index 1aa4891..495d094 100644
  77. --- a/encoder/slicetype.c
  78. +++ b/encoder/slicetype.c
  79. @@ -571,7 +571,7 @@ static void x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
  80.  #define MVC(mv) { CP32( mvc[i_mvc], mv ); i_mvc++; }
  81.              if( i_mb_x < h->mb.i_mb_width - 1 )
  82.                  MVC( fenc_mv[1] );
  83. -            if( i_mb_y < h->mb.i_mb_height - 1 )
  84. +            if( i_mb_y < h->i_threadslice_end - 1 )
  85.              {
  86.                  MVC( fenc_mv[i_mb_stride] );
  87.                  if( i_mb_x > 0 )
  88. @@ -701,6 +701,55 @@ lowres_intra_mb:
  89.     (h->mb.i_mb_width - 2) * (h->mb.i_mb_height - 2) :\
  90.      h->mb.i_mb_width * h->mb.i_mb_height)
  91.  
  92. +typedef struct
  93. +{
  94. +    x264_t *h;
  95. +    x264_mb_analysis_t *a;
  96. +    x264_frame_t **frames;
  97. +    int p0;
  98. +    int p1;
  99. +    int b;
  100. +    int dist_scale_factor;
  101. +    int *do_search;
  102. +    const x264_weight_t *w;
  103. +} x264_slicetype_slice_t;
  104. +
  105. +static void x264_slicetype_slice_cost( x264_slicetype_slice_t *s )
  106. +{
  107. +    x264_frame_t **frames = s->frames;
  108. +    x264_t *h = s->h;
  109. +    int p0 = s->p0;
  110. +    int p1 = s->p1;
  111. +    int b = s->b;
  112. +    /* Lowres lookahead goes backwards because the MVs are used as predictors in the main encode.
  113. +     * This considerably improves MV prediction overall. */
  114. +
  115. +    /* The edge mbs seem to reduce the predictive quality of the
  116. +     * whole frame's score, but are needed for a spatial distribution. */
  117. +    if( h->param.rc.b_mb_tree || h->param.rc.i_vbv_buffer_size ||
  118. +        h->mb.i_mb_width <= 2 || h->mb.i_mb_height <= 2 )
  119. +    {
  120. +        int *row_satd = frames[b]->i_row_satds[b-p0][p1-b];
  121. +        int *row_satd_intra = frames[b]->i_row_satds[0][0];
  122. +        for( h->mb.i_mb_y = h->i_threadslice_end - 1; h->mb.i_mb_y >= h->i_threadslice_start; h->mb.i_mb_y-- )
  123. +        {
  124. +            row_satd[h->mb.i_mb_y] = 0;
  125. +            if( !frames[b]->b_intra_calculated )
  126. +                row_satd_intra[h->mb.i_mb_y] = 0;
  127. +            for( h->mb.i_mb_x = h->mb.i_mb_width - 1; h->mb.i_mb_x >= 0; h->mb.i_mb_x-- )
  128. +                x264_slicetype_mb_cost( h, s->a, frames, p0, p1, b, s->dist_scale_factor, s->do_search, s->w );
  129. +        }
  130. +    }
  131. +    else
  132. +    {
  133. +        int start_row = X264_MIN( h->i_threadslice_end - 1, h->mb.i_mb_height - 2 );
  134. +        int end_row = X264_MAX( h->i_threadslice_start, 1 );
  135. +        for( h->mb.i_mb_y = start_row; h->mb.i_mb_y >= end_row; h->mb.i_mb_y-- )
  136. +            for( h->mb.i_mb_x = h->mb.i_mb_width - 2; h->mb.i_mb_x >= 1; h->mb.i_mb_x-- )
  137. +                x264_slicetype_mb_cost( h, s->a, frames, p0, p1, b, s->dist_scale_factor, s->do_search, s->w );
  138. +    }
  139. +}
  140. +
  141.  static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
  142.                                        x264_frame_t **frames, int p0, int p1, int b,
  143.                                        int b_intra_penalty )
  144. @@ -717,8 +766,6 @@ static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
  145.      else
  146.      {
  147.          int dist_scale_factor = 128;
  148. -        int *row_satd = frames[b]->i_row_satds[b-p0][p1-b];
  149. -        int *row_satd_intra = frames[b]->i_row_satds[0][0];
  150.  
  151.          /* For each list, check to see whether we have lowres motion-searched this reference frame before. */
  152.          do_search[0] = b != p0 && frames[b]->lowres_mvs[0][b-p0-1][0][0] == 0x7FFF;
  153. @@ -748,28 +795,29 @@ static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
  154.          frames[b]->i_cost_est[b-p0][p1-b] = 0;
  155.          frames[b]->i_cost_est_aq[b-p0][p1-b] = 0;
  156.  
  157. -        /* Lowres lookahead goes backwards because the MVs are used as predictors in the main encode.
  158. -         * This considerably improves MV prediction overall. */
  159. -
  160. -        /* The edge mbs seem to reduce the predictive quality of the
  161. -         * whole frame's score, but are needed for a spatial distribution. */
  162. -        if( h->param.rc.b_mb_tree || h->param.rc.i_vbv_buffer_size ||
  163. -            h->mb.i_mb_width <= 2 || h->mb.i_mb_height <= 2 )
  164. +        if( h->param.i_lookahead_threads > 1 )
  165.          {
  166. -            for( h->mb.i_mb_y = h->mb.i_mb_height - 1; h->mb.i_mb_y >= 0; h->mb.i_mb_y-- )
  167. +            x264_slicetype_slice_t s[X264_LOOKAHEAD_THREAD_MAX];
  168. +            ALIGNED_16( x264_t temp_struct[X264_LOOKAHEAD_THREAD_MAX] );
  169. +
  170. +            for( int i = 0; i < h->param.i_lookahead_threads; i++ )
  171.              {
  172. -                row_satd[h->mb.i_mb_y] = 0;
  173. -                if( !frames[b]->b_intra_calculated )
  174. -                    row_satd_intra[h->mb.i_mb_y] = 0;
  175. -                for( h->mb.i_mb_x = h->mb.i_mb_width - 1; h->mb.i_mb_x >= 0; h->mb.i_mb_x-- )
  176. -                    x264_slicetype_mb_cost( h, a, frames, p0, p1, b, dist_scale_factor, do_search, w );
  177. +                x264_t *t = &temp_struct[i];
  178. +                memcpy( t, h, sizeof(x264_t) );
  179. +                s[i] = (x264_slicetype_slice_t){ t, a, frames, p0, p1, b, dist_scale_factor, do_search, w };
  180. +                t->i_threadslice_start = ((h->mb.i_mb_height *  i    + h->param.i_lookahead_threads/2) / h->param.i_lookahead_threads);
  181. +                t->i_threadslice_end   = ((h->mb.i_mb_height * (i+1) + h->param.i_lookahead_threads/2) / h->param.i_lookahead_threads);
  182. +                x264_threadpool_run( h->threadpool, (void*)x264_slicetype_slice_cost, &s[i] );
  183.              }
  184. +            for( int i = 0; i < h->param.i_lookahead_threads; i++ )
  185. +                x264_threadpool_wait( h->threadpool, &s[i] );
  186.          }
  187.          else
  188.          {
  189. -            for( h->mb.i_mb_y = h->mb.i_mb_height - 2; h->mb.i_mb_y >= 1; h->mb.i_mb_y-- )
  190. -                for( h->mb.i_mb_x = h->mb.i_mb_width - 2; h->mb.i_mb_x >= 1; h->mb.i_mb_x-- )
  191. -                    x264_slicetype_mb_cost( h, a, frames, p0, p1, b, dist_scale_factor, do_search, w );
  192. +            h->i_threadslice_start = 0;
  193. +            h->i_threadslice_end = h->mb.i_mb_height;
  194. +            x264_slicetype_slice_t s = (x264_slicetype_slice_t){ h, a, frames, p0, p1, b, dist_scale_factor, do_search, w };
  195. +            x264_slicetype_slice_cost( &s );
  196.          }
  197.  
  198.          i_score = frames[b]->i_cost_est[b-p0][p1-b];
  199. diff --git a/x264.h b/x264.h
  200. index eb2b3b7..b6c258e 100644
  201. --- a/x264.h
  202. +++ b/x264.h
  203. @@ -41,7 +41,7 @@
  204.  
  205.  #include "x264_config.h"
  206.  
  207. -#define X264_BUILD 124
  208. +#define X264_BUILD 125
  209.  
  210.  /* Application developers planning to link against a shared library version of
  211.   * libx264 from a Microsoft Visual Studio or similar development environment
  212. @@ -254,7 +254,8 @@ typedef struct x264_param_t
  213.  {
  214.      /* CPU flags */
  215.      unsigned int cpu;
  216. -    int         i_threads;       /* encode multiple frames in parallel */
  217. +    int         i_threads;           /* encode multiple frames in parallel */
  218. +    int         i_lookahead_threads; /* multiple threads for lookahead analysis */
  219.      int         b_sliced_threads;  /* Whether to use slice-based threading. */
  220.      int         b_deterministic; /* whether to allow non-deterministic optimizations when threaded */
  221.      int         b_cpu_independent; /* force canonical behavior rather than cpu-dependent optimal algorithms */