Advertisement
Guest User

Untitled

a guest
Jun 1st, 2017
562
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Diff 79.74 KB | None | 0 0
  1. From 4fa7d539c1a09a0779194c7971495e966ff7de1e Mon Sep 17 00:00:00 2001
  2. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  3. Date: Tue, 15 Jun 2010 05:15:42 -0700
  4. Subject: [PATCH 1/9] Fix compilation on ARM w/ Apple ABI
  5.  
  6. ---
  7. encoder/me.c |    2 +-
  8.  1 files changed, 1 insertions(+), 1 deletions(-)
  9.  
  10. diff --git a/encoder/me.c b/encoder/me.c
  11. index 2914eb3..291104a 100644
  12. --- a/encoder/me.c
  13. +++ b/encoder/me.c
  14. @@ -245,7 +245,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
  15.          pmv = pack16to32_mask( bmx, bmy );
  16.          if( i_mvc > 0 )
  17.          {
  18. -            ALIGNED_ARRAY_8( int16_t, mvc_fpel,[16][2] );
  19. +            ALIGNED_ARRAY_8( int16_t, mvc_fpel,[16],[2] );
  20.              x264_predictor_roundclip( mvc_fpel, mvc, i_mvc, mv_x_min, mv_x_max, mv_y_min, mv_y_max );
  21.              bcost <<= 4;
  22.              for( int i = 1; i <= i_mvc; i++ )
  23. --
  24. 1.7.0.4
  25.  
  26.  
  27. From 3f539defdc78eb77c90e0164e62a851a4bb42669 Mon Sep 17 00:00:00 2001
  28. From: Steven Walters <kemuri9@gmail.com>
  29. Date: Wed, 9 Jun 2010 18:14:52 -0400
  30. Subject: [PATCH 2/9] Use threadpools to avoid unnecessary thread creation
  31.  Tiny performance improvement with fast settings and lots of threads.
  32.  May help more on some OSs with slow thread creation, like OS X.
  33.  Unify inconsistent synchronized abbreviations to sync.
  34.  
  35. ---
  36. Makefile            |    3 +-
  37.  common/common.h     |   10 ++-
  38.  common/frame.c      |   19 +++++-
  39.  common/frame.h      |    9 ++-
  40.  common/threadpool.c |  163 +++++++++++++++++++++++++++++++++++++++++++++++++++
  41.  common/threadpool.h |   39 ++++++++++++
  42.  encoder/encoder.c   |   79 ++++++++++++-------------
  43.  encoder/lookahead.c |   22 ++++----
  44.  input/thread.c      |   17 ++---
  45.  9 files changed, 288 insertions(+), 73 deletions(-)
  46.  create mode 100644 common/threadpool.c
  47.  create mode 100644 common/threadpool.h
  48.  
  49. diff --git a/Makefile b/Makefile
  50. index 8074ce5..9837821 100644
  51. --- a/Makefile
  52. +++ b/Makefile
  53. @@ -22,13 +22,14 @@ SRCSO =
  54.  
  55.  CONFIG := $(shell cat config.h)
  56.  
  57. -# Optional muxer module sources
  58. +# Optional module sources
  59.  ifneq ($(findstring HAVE_AVS, $(CONFIG)),)
  60.  SRCCLI += input/avs.c
  61.  endif
  62.  
  63.  ifneq ($(findstring HAVE_PTHREAD, $(CONFIG)),)
  64.  SRCCLI += input/thread.c
  65. +SRCS   += common/threadpool.c
  66.  endif
  67.  
  68.  ifneq ($(findstring HAVE_LAVF, $(CONFIG)),)
  69. diff --git a/common/common.h b/common/common.h
  70. index abb5db2..659c2a4 100644
  71. --- a/common/common.h
  72. +++ b/common/common.h
  73. @@ -160,6 +160,7 @@ static const int x264_scan8[16+2*4+3] =
  74.  #include "cabac.h"
  75.  #include "quant.h"
  76.  #include "cpu.h"
  77. +#include "threadpool.h"
  78.  
  79.  /****************************************************************************
  80.   * General functions
  81. @@ -364,9 +365,10 @@ typedef struct x264_lookahead_t
  82.      int                           i_last_keyframe;
  83.      int                           i_slicetype_length;
  84.      x264_frame_t                  *last_nonb;
  85. -    x264_synch_frame_list_t       ifbuf;
  86. -    x264_synch_frame_list_t       next;
  87. -    x264_synch_frame_list_t       ofbuf;
  88. +    x264_pthread_t                thread_handle;
  89. +    x264_sync_frame_list_t        ifbuf;
  90. +    x264_sync_frame_list_t        next;
  91. +    x264_sync_frame_list_t        ofbuf;
  92.  } x264_lookahead_t;
  93.  
  94.  typedef struct x264_ratecontrol_t   x264_ratecontrol_t;
  95. @@ -377,11 +379,11 @@ struct x264_t
  96.      x264_param_t    param;
  97.  
  98.      x264_t          *thread[X264_THREAD_MAX+1];
  99. -    x264_pthread_t  thread_handle;
  100.      int             b_thread_active;
  101.      int             i_thread_phase; /* which thread to use for the next frame */
  102.      int             i_threadslice_start; /* first row in this thread slice */
  103.      int             i_threadslice_end; /* row after the end of this thread slice */
  104. +    x264_threadpool_t *threadpool;
  105.  
  106.      /* bitstream output */
  107.      struct
  108. diff --git a/common/frame.c b/common/frame.c
  109. index c5c573f..7c2fce0 100644
  110. --- a/common/frame.c
  111. +++ b/common/frame.c
  112. @@ -517,7 +517,7 @@ void x264_frame_delete_list( x264_frame_t **list )
  113.      x264_free( list );
  114.  }
  115.  
  116. -int x264_synch_frame_list_init( x264_synch_frame_list_t *slist, int max_size )
  117. +int x264_sync_frame_list_init( x264_sync_frame_list_t *slist, int max_size )
  118.  {
  119.      if( max_size < 0 )
  120.          return -1;
  121. @@ -533,7 +533,7 @@ fail:
  122.      return -1;
  123.  }
  124.  
  125. -void x264_synch_frame_list_delete( x264_synch_frame_list_t *slist )
  126. +void x264_sync_frame_list_delete( x264_sync_frame_list_t *slist )
  127.  {
  128.      x264_pthread_mutex_destroy( &slist->mutex );
  129.      x264_pthread_cond_destroy( &slist->cv_fill );
  130. @@ -541,7 +541,7 @@ void x264_synch_frame_list_delete( x264_synch_frame_list_t *slist )
  131.      x264_frame_delete_list( slist->list );
  132.  }
  133.  
  134. -void x264_synch_frame_list_push( x264_synch_frame_list_t *slist, x264_frame_t *frame )
  135. +void x264_sync_frame_list_push( x264_sync_frame_list_t *slist, x264_frame_t *frame )
  136.  {
  137.      x264_pthread_mutex_lock( &slist->mutex );
  138.      while( slist->i_size == slist->i_max_size )
  139. @@ -550,3 +550,16 @@ void x264_synch_frame_list_push( x264_synch_frame_list_t *slist, x264_frame_t *f
  140.      x264_pthread_mutex_unlock( &slist->mutex );
  141.      x264_pthread_cond_broadcast( &slist->cv_fill );
  142.  }
  143. +
  144. +x264_frame_t *x264_sync_frame_list_pop( x264_sync_frame_list_t *slist )
  145. +{
  146. +    x264_frame_t *frame;
  147. +    x264_pthread_mutex_lock( &slist->mutex );
  148. +    while( !slist->i_size )
  149. +        x264_pthread_cond_wait( &slist->cv_fill, &slist->mutex );
  150. +    frame = slist->list[ --slist->i_size ];
  151. +    slist->list[ slist->i_size ] = NULL;
  152. +    x264_pthread_cond_broadcast( &slist->cv_empty );
  153. +    x264_pthread_mutex_unlock( &slist->mutex );
  154. +    return frame;
  155. +}
  156. diff --git a/common/frame.h b/common/frame.h
  157. index 7d252c3..26529ce 100644
  158. --- a/common/frame.h
  159. +++ b/common/frame.h
  160. @@ -154,7 +154,7 @@ typedef struct
  161.     x264_pthread_mutex_t     mutex;
  162.     x264_pthread_cond_t      cv_fill;  /* event signaling that the list became fuller */
  163.     x264_pthread_cond_t      cv_empty; /* event signaling that the list became emptier */
  164. -} x264_synch_frame_list_t;
  165. +} x264_sync_frame_list_t;
  166.  
  167.  typedef void (*x264_deblock_inter_t)( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 );
  168.  typedef void (*x264_deblock_intra_t)( pixel *pix, int stride, int alpha, int beta );
  169. @@ -202,9 +202,10 @@ x264_frame_t *x264_frame_pop_unused( x264_t *h, int b_fdec );
  170.  void          x264_frame_sort( x264_frame_t **list, int b_dts );
  171.  void          x264_frame_delete_list( x264_frame_t **list );
  172.  
  173. -int           x264_synch_frame_list_init( x264_synch_frame_list_t *slist, int nelem );
  174. -void          x264_synch_frame_list_delete( x264_synch_frame_list_t *slist );
  175. -void          x264_synch_frame_list_push( x264_synch_frame_list_t *slist, x264_frame_t *frame );
  176. +int           x264_sync_frame_list_init( x264_sync_frame_list_t *slist, int nelem );
  177. +void          x264_sync_frame_list_delete( x264_sync_frame_list_t *slist );
  178. +void          x264_sync_frame_list_push( x264_sync_frame_list_t *slist, x264_frame_t *frame );
  179. +x264_frame_t *x264_sync_frame_list_pop( x264_sync_frame_list_t *slist );
  180.  
  181.  #define x264_frame_sort_dts(list) x264_frame_sort(list, 1)
  182.  #define x264_frame_sort_pts(list) x264_frame_sort(list, 0)
  183. diff --git a/common/threadpool.c b/common/threadpool.c
  184. new file mode 100644
  185. index 0000000..4448ea2
  186. --- /dev/null
  187. +++ b/common/threadpool.c
  188. @@ -0,0 +1,163 @@
  189. +/*****************************************************************************
  190. + * threadpool.c: x264 threadpool module
  191. + *****************************************************************************
  192. + * Copyright (C) 2010 Steven Walters <kemuri9@gmail.com>
  193. + *
  194. + * This program is free software; you can redistribute it and/or modify
  195. + * it under the terms of the GNU General Public License as published by
  196. + * the Free Software Foundation; either version 2 of the License, or
  197. + * (at your option) any later version.
  198. + *
  199. + * This program is distributed in the hope that it will be useful,
  200. + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  201. + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  202. + * GNU General Public License for more details.
  203. + *
  204. + * You should have received a copy of the GNU General Public License
  205. + * along with this program; if not, write to the Free Software
  206. + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
  207. + *****************************************************************************/
  208. +
  209. +#include "common.h"
  210. +
  211. +typedef struct
  212. +{
  213. +    void *(*func)(void *);
  214. +    void *arg;
  215. +    void *ret;
  216. +} x264_threadpool_job_t;
  217. +
  218. +struct x264_threadpool_t
  219. +{
  220. +    int            exit;
  221. +    int            threads;
  222. +    x264_pthread_t *thread_handle;
  223. +    void           (*init_func)(void *);
  224. +    void           *init_arg;
  225. +
  226. +    /* requires a synchronized list structure and associated methods,
  227. +       so use what is already implemented for frames */
  228. +    x264_sync_frame_list_t uninit; /* list of jobs that are awaiting use */
  229. +    x264_sync_frame_list_t run;    /* list of jobs that are queued for processing by the pool */
  230. +    x264_sync_frame_list_t done;   /* list of jobs that have finished processing */
  231. +};
  232. +
  233. +static void x264_threadpool_thread( x264_threadpool_t *pool )
  234. +{
  235. +    if( pool->init_func )
  236. +        pool->init_func( pool->init_arg );
  237. +
  238. +    while( !pool->exit )
  239. +    {
  240. +        x264_threadpool_job_t *job = NULL;
  241. +        x264_pthread_mutex_lock( &pool->run.mutex );
  242. +        while( !pool->exit && !pool->run.i_size )
  243. +            x264_pthread_cond_wait( &pool->run.cv_fill, &pool->run.mutex );
  244. +        if( pool->run.i_size )
  245. +        {
  246. +            job = (void*)x264_frame_shift( pool->run.list );
  247. +            pool->run.i_size--;
  248. +        }
  249. +        x264_pthread_mutex_unlock( &pool->run.mutex );
  250. +        if( !job )
  251. +            continue;
  252. +        job->ret = job->func( job->arg ); /* execute the function */
  253. +        x264_sync_frame_list_push( &pool->done, (void*)job );
  254. +    }
  255. +}
  256. +
  257. +int x264_threadpool_init( x264_threadpool_t **p_pool, int threads,
  258. +                          void (*init_func)(void *), void *init_arg )
  259. +{
  260. +    if( threads <= 0 )
  261. +        return -1;
  262. +
  263. +    x264_threadpool_t *pool;
  264. +    CHECKED_MALLOCZERO( pool, sizeof(x264_threadpool_t) );
  265. +    *p_pool = pool;
  266. +
  267. +    pool->init_func = init_func;
  268. +    pool->init_arg  = init_arg;
  269. +    pool->threads   = X264_MIN( threads, X264_THREAD_MAX );
  270. +
  271. +    CHECKED_MALLOC( pool->thread_handle, pool->threads * sizeof(x264_pthread_t) );
  272. +
  273. +    if( x264_sync_frame_list_init( &pool->uninit, pool->threads ) ||
  274. +        x264_sync_frame_list_init( &pool->run, pool->threads ) ||
  275. +        x264_sync_frame_list_init( &pool->done, pool->threads ) )
  276. +        goto fail;
  277. +
  278. +    for( int i = 0; i < pool->threads; i++ )
  279. +    {
  280. +       x264_threadpool_job_t *job;
  281. +       CHECKED_MALLOC( job, sizeof(x264_threadpool_job_t) );
  282. +       x264_sync_frame_list_push( &pool->uninit, (void*)job );
  283. +    }
  284. +    for( int i = 0; i < pool->threads; i++ )
  285. +        if( x264_pthread_create( pool->thread_handle+i, NULL, (void*)x264_threadpool_thread, pool ) )
  286. +            goto fail;
  287. +
  288. +    return 0;
  289. +fail:
  290. +    return -1;
  291. +}
  292. +
  293. +void x264_threadpool_run( x264_threadpool_t *pool, void *(*func)(void *), void *arg )
  294. +{
  295. +    x264_threadpool_job_t *job = (void*)x264_sync_frame_list_pop( &pool->uninit );
  296. +    job->func = func;
  297. +    job->arg  = arg;
  298. +    x264_sync_frame_list_push( &pool->run, (void*)job );
  299. +}
  300. +
  301. +void *x264_threadpool_wait( x264_threadpool_t *pool, void *arg )
  302. +{
  303. +    x264_threadpool_job_t *job = NULL;
  304. +
  305. +    x264_pthread_mutex_lock( &pool->done.mutex );
  306. +    while( !job )
  307. +    {
  308. +        for( int i = 0; i < pool->done.i_size; i++ )
  309. +        {
  310. +            x264_threadpool_job_t *t = (void*)pool->done.list[i];
  311. +            if( t->arg == arg )
  312. +            {
  313. +                job = (void*)x264_frame_shift( pool->done.list+i );
  314. +                pool->done.i_size--;
  315. +            }
  316. +        }
  317. +        if( !job )
  318. +            x264_pthread_cond_wait( &pool->done.cv_fill, &pool->done.mutex );
  319. +    }
  320. +    x264_pthread_mutex_unlock( &pool->done.mutex );
  321. +
  322. +    void *ret = job->ret;
  323. +    x264_sync_frame_list_push( &pool->uninit, (void*)job );
  324. +    return ret;
  325. +}
  326. +
  327. +static void x264_threadpool_list_delete( x264_sync_frame_list_t *slist )
  328. +{
  329. +    for( int i = 0; slist->list[i]; i++ )
  330. +    {
  331. +        x264_free( slist->list[i] );
  332. +        slist->list[i] = NULL;
  333. +    }
  334. +    x264_sync_frame_list_delete( slist );
  335. +}
  336. +
  337. +void x264_threadpool_delete( x264_threadpool_t *pool )
  338. +{
  339. +    x264_pthread_mutex_lock( &pool->run.mutex );
  340. +    pool->exit = 1;
  341. +    x264_pthread_cond_broadcast( &pool->run.cv_fill );
  342. +    x264_pthread_mutex_unlock( &pool->run.mutex );
  343. +    for( int i = 0; i < pool->threads; i++ )
  344. +        x264_pthread_join( pool->thread_handle[i], NULL );
  345. +
  346. +    x264_threadpool_list_delete( &pool->uninit );
  347. +    x264_threadpool_list_delete( &pool->run );
  348. +    x264_threadpool_list_delete( &pool->done );
  349. +    x264_free( pool->thread_handle );
  350. +    x264_free( pool );
  351. +}
  352. diff --git a/common/threadpool.h b/common/threadpool.h
  353. new file mode 100644
  354. index 0000000..519737c
  355. --- /dev/null
  356. +++ b/common/threadpool.h
  357. @@ -0,0 +1,39 @@
  358. +/*****************************************************************************
  359. + * threadpool.h: x264 threadpool module
  360. + *****************************************************************************
  361. + * Copyright (C) 2010 Steven Walters <kemuri9@gmail.com>
  362. + *
  363. + * This program is free software; you can redistribute it and/or modify
  364. + * it under the terms of the GNU General Public License as published by
  365. + * the Free Software Foundation; either version 2 of the License, or
  366. + * (at your option) any later version.
  367. + *
  368. + * This program is distributed in the hope that it will be useful,
  369. + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  370. + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  371. + * GNU General Public License for more details.
  372. + *
  373. + * You should have received a copy of the GNU General Public License
  374. + * along with this program; if not, write to the Free Software
  375. + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
  376. + *****************************************************************************/
  377. +
  378. +#ifndef X264_THREADPOOL_H
  379. +#define X264_THREADPOOL_H
  380. +
  381. +typedef struct x264_threadpool_t x264_threadpool_t;
  382. +
  383. +#if HAVE_PTHREAD
  384. +int   x264_threadpool_init( x264_threadpool_t **p_pool, int threads,
  385. +                            void (*init_func)(void *), void *init_arg );
  386. +void  x264_threadpool_run( x264_threadpool_t *pool, void *(*func)(void *), void *arg );
  387. +void *x264_threadpool_wait( x264_threadpool_t *pool, void *arg );
  388. +void  x264_threadpool_delete( x264_threadpool_t *pool );
  389. +#else
  390. +#define x264_threadpool_init(p,t,f,a) -1
  391. +#define x264_threadpool_run(p,f,a)
  392. +#define x264_threadpool_wait(p,a)     NULL
  393. +#define x264_threadpool_delete(p)
  394. +#endif
  395. +
  396. +#endif
  397. diff --git a/encoder/encoder.c b/encoder/encoder.c
  398. index 08a28bd..0d33915 100644
  399. --- a/encoder/encoder.c
  400. +++ b/encoder/encoder.c
  401. @@ -349,6 +349,20 @@ fail:
  402.      return -1;
  403.  }
  404.  
  405. +#if HAVE_PTHREAD
  406. +static void x264_encoder_thread_init( x264_t *h )
  407. +{
  408. +    if( h->param.i_sync_lookahead )
  409. +        x264_lower_thread_priority( 10 );
  410. +
  411. +#if HAVE_MMX
  412. +    /* Misalign mask has to be set separately for each thread. */
  413. +    if( h->param.cpu&X264_CPU_SSE_MISALIGN )
  414. +        x264_cpu_mask_misalign_sse();
  415. +#endif
  416. +}
  417. +#endif
  418. +
  419.  /****************************************************************************
  420.   *
  421.   ****************************************************************************
  422. @@ -1052,6 +1066,10 @@ x264_t *x264_encoder_open( x264_param_t *param )
  423.      CHECKED_MALLOC( h->nal_buffer, h->out.i_bitstream * 3/2 + 4 );
  424.      h->nal_buffer_size = h->out.i_bitstream * 3/2 + 4;
  425.  
  426. +    if( h->param.i_threads > 1 &&
  427. +        x264_threadpool_init( &h->threadpool, h->param.i_threads, (void*)x264_encoder_thread_init, h ) )
  428. +        goto fail;
  429. +
  430.      h->thread[0] = h;
  431.      for( int i = 1; i < h->param.i_threads + !!h->param.i_sync_lookahead; i++ )
  432.          CHECKED_MALLOC( h->thread[i], sizeof(x264_t) );
  433. @@ -2044,14 +2062,6 @@ static void *x264_slices_write( x264_t *h )
  434.  {
  435.      int i_slice_num = 0;
  436.      int last_thread_mb = h->sh.i_last_mb;
  437. -    if( h->param.i_sync_lookahead )
  438. -        x264_lower_thread_priority( 10 );
  439. -
  440. -#if HAVE_MMX
  441. -    /* Misalign mask has to be set separately for each thread. */
  442. -    if( h->param.cpu&X264_CPU_SSE_MISALIGN )
  443. -        x264_cpu_mask_misalign_sse();
  444. -#endif
  445.  
  446.  #if HAVE_VISUALIZE
  447.      if( h->param.b_visualize )
  448. @@ -2093,11 +2103,6 @@ static void *x264_slices_write( x264_t *h )
  449.  
  450.  static int x264_threaded_slices_write( x264_t *h )
  451.  {
  452. -    void *ret = NULL;
  453. -#if HAVE_MMX
  454. -    if( h->param.cpu&X264_CPU_SSE_MISALIGN )
  455. -        x264_cpu_mask_misalign_sse();
  456. -#endif
  457.      /* set first/last mb and sync contexts */
  458.      for( int i = 0; i < h->param.i_threads; i++ )
  459.      {
  460. @@ -2121,16 +2126,14 @@ static int x264_threaded_slices_write( x264_t *h )
  461.      /* dispatch */
  462.      for( int i = 0; i < h->param.i_threads; i++ )
  463.      {
  464. -        if( x264_pthread_create( &h->thread[i]->thread_handle, NULL, (void*)x264_slices_write, (void*)h->thread[i] ) )
  465. -            return -1;
  466. +        x264_threadpool_run( h->threadpool, (void*)x264_slices_write, h->thread[i] );
  467.          h->thread[i]->b_thread_active = 1;
  468.      }
  469.      for( int i = 0; i < h->param.i_threads; i++ )
  470.      {
  471. -        x264_pthread_join( h->thread[i]->thread_handle, &ret );
  472.          h->thread[i]->b_thread_active = 0;
  473. -        if( (intptr_t)ret )
  474. -            return (intptr_t)ret;
  475. +        if( (intptr_t)x264_threadpool_wait( h->threadpool, h->thread[i] ) )
  476. +            return -1;
  477.      }
  478.  
  479.      /* Go back and fix up the hpel on the borders between slices. */
  480. @@ -2206,6 +2209,10 @@ int     x264_encoder_encode( x264_t *h,
  481.          thread_current =
  482.          thread_oldest  = h;
  483.      }
  484. +#if HAVE_MMX
  485. +    if( h->i_thread_frames == 1 && h->param.cpu&X264_CPU_SSE_MISALIGN )
  486. +        x264_cpu_mask_misalign_sse();
  487. +#endif
  488.  
  489.      // ok to call this before encoding any frames, since the initial values of fdec have b_kept_as_ref=0
  490.      if( x264_reference_update( h ) )
  491. @@ -2529,8 +2536,7 @@ int     x264_encoder_encode( x264_t *h,
  492.      h->i_threadslice_end = h->mb.i_mb_height;
  493.      if( h->i_thread_frames > 1 )
  494.      {
  495. -        if( x264_pthread_create( &h->thread_handle, NULL, (void*)x264_slices_write, h ) )
  496. -            return -1;
  497. +        x264_threadpool_run( h->threadpool, (void*)x264_slices_write, h );
  498.          h->b_thread_active = 1;
  499.      }
  500.      else if( h->param.b_sliced_threads )
  501. @@ -2553,11 +2559,9 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
  502.  
  503.      if( h->b_thread_active )
  504.      {
  505. -        void *ret = NULL;
  506. -        x264_pthread_join( h->thread_handle, &ret );
  507.          h->b_thread_active = 0;
  508. -        if( (intptr_t)ret )
  509. -            return (intptr_t)ret;
  510. +        if( (intptr_t)x264_threadpool_wait( h->threadpool, h ) )
  511. +            return -1;
  512.      }
  513.      if( !h->out.i_nal )
  514.      {
  515. @@ -2822,25 +2826,20 @@ void    x264_encoder_close  ( x264_t *h )
  516.      x264_lookahead_delete( h );
  517.  
  518.      if( h->param.i_threads > 1 )
  519. +        x264_threadpool_delete( h->threadpool );
  520. +    if( h->i_thread_frames > 1 )
  521.      {
  522. -        // don't strictly have to wait for the other threads, but it's simpler than canceling them
  523. -        for( int i = 0; i < h->param.i_threads; i++ )
  524. +        for( int i = 0; i < h->i_thread_frames; i++ )
  525.              if( h->thread[i]->b_thread_active )
  526. -                x264_pthread_join( h->thread[i]->thread_handle, NULL );
  527. -        if( h->i_thread_frames > 1 )
  528. -        {
  529. -            for( int i = 0; i < h->i_thread_frames; i++ )
  530. -                if( h->thread[i]->b_thread_active )
  531. -                {
  532. -                    assert( h->thread[i]->fenc->i_reference_count == 1 );
  533. -                    x264_frame_delete( h->thread[i]->fenc );
  534. -                }
  535. +            {
  536. +                assert( h->thread[i]->fenc->i_reference_count == 1 );
  537. +                x264_frame_delete( h->thread[i]->fenc );
  538. +            }
  539.  
  540. -            x264_t *thread_prev = h->thread[h->i_thread_phase];
  541. -            x264_thread_sync_ratecontrol( h, thread_prev, h );
  542. -            x264_thread_sync_ratecontrol( thread_prev, thread_prev, h );
  543. -            h->i_frame = thread_prev->i_frame + 1 - h->i_thread_frames;
  544. -        }
  545. +        x264_t *thread_prev = h->thread[h->i_thread_phase];
  546. +        x264_thread_sync_ratecontrol( h, thread_prev, h );
  547. +        x264_thread_sync_ratecontrol( thread_prev, thread_prev, h );
  548. +        h->i_frame = thread_prev->i_frame + 1 - h->i_thread_frames;
  549.      }
  550.      h->i_frame++;
  551.  
  552. diff --git a/encoder/lookahead.c b/encoder/lookahead.c
  553. index a79d4b1..f0af216 100644
  554. --- a/encoder/lookahead.c
  555. +++ b/encoder/lookahead.c
  556. @@ -37,7 +37,7 @@
  557.  #include "common/common.h"
  558.  #include "analyse.h"
  559.  
  560. -static void x264_lookahead_shift( x264_synch_frame_list_t *dst, x264_synch_frame_list_t *src, int count )
  561. +static void x264_lookahead_shift( x264_sync_frame_list_t *dst, x264_sync_frame_list_t *src, int count )
  562.  {
  563.      int i = count;
  564.      while( i-- )
  565. @@ -137,9 +137,9 @@ int x264_lookahead_init( x264_t *h, int i_slicetype_length )
  566.      look->i_slicetype_length = i_slicetype_length;
  567.  
  568.      /* init frame lists */
  569. -    if( x264_synch_frame_list_init( &look->ifbuf, h->param.i_sync_lookahead+3 ) ||
  570. -        x264_synch_frame_list_init( &look->next, h->frames.i_delay+3 ) ||
  571. -        x264_synch_frame_list_init( &look->ofbuf, h->frames.i_delay+3 ) )
  572. +    if( x264_sync_frame_list_init( &look->ifbuf, h->param.i_sync_lookahead+3 ) ||
  573. +        x264_sync_frame_list_init( &look->next, h->frames.i_delay+3 ) ||
  574. +        x264_sync_frame_list_init( &look->ofbuf, h->frames.i_delay+3 ) )
  575.          goto fail;
  576.  
  577.      if( !h->param.i_sync_lookahead )
  578. @@ -153,7 +153,7 @@ int x264_lookahead_init( x264_t *h, int i_slicetype_length )
  579.      if( x264_macroblock_thread_allocate( look_h, 1 ) < 0 )
  580.          goto fail;
  581.  
  582. -    if( x264_pthread_create( &look_h->thread_handle, NULL, (void *)x264_lookahead_thread, look_h ) )
  583. +    if( x264_pthread_create( &look->thread_handle, NULL, (void*)x264_lookahead_thread, look_h ) )
  584.          goto fail;
  585.      look->b_thread_active = 1;
  586.  
  587. @@ -171,25 +171,25 @@ void x264_lookahead_delete( x264_t *h )
  588.          h->lookahead->b_exit_thread = 1;
  589.          x264_pthread_cond_broadcast( &h->lookahead->ifbuf.cv_fill );
  590.          x264_pthread_mutex_unlock( &h->lookahead->ifbuf.mutex );
  591. -        x264_pthread_join( h->thread[h->param.i_threads]->thread_handle, NULL );
  592. +        x264_pthread_join( h->lookahead->thread_handle, NULL );
  593.          x264_macroblock_cache_free( h->thread[h->param.i_threads] );
  594.          x264_macroblock_thread_free( h->thread[h->param.i_threads], 1 );
  595.          x264_free( h->thread[h->param.i_threads] );
  596.      }
  597. -    x264_synch_frame_list_delete( &h->lookahead->ifbuf );
  598. -    x264_synch_frame_list_delete( &h->lookahead->next );
  599. +    x264_sync_frame_list_delete( &h->lookahead->ifbuf );
  600. +    x264_sync_frame_list_delete( &h->lookahead->next );
  601.      if( h->lookahead->last_nonb )
  602.          x264_frame_push_unused( h, h->lookahead->last_nonb );
  603. -    x264_synch_frame_list_delete( &h->lookahead->ofbuf );
  604. +    x264_sync_frame_list_delete( &h->lookahead->ofbuf );
  605.      x264_free( h->lookahead );
  606.  }
  607.  
  608.  void x264_lookahead_put_frame( x264_t *h, x264_frame_t *frame )
  609.  {
  610.      if( h->param.i_sync_lookahead )
  611. -        x264_synch_frame_list_push( &h->lookahead->ifbuf, frame );
  612. +        x264_sync_frame_list_push( &h->lookahead->ifbuf, frame );
  613.      else
  614. -        x264_synch_frame_list_push( &h->lookahead->next, frame );
  615. +        x264_sync_frame_list_push( &h->lookahead->next, frame );
  616.  }
  617.  
  618.  int x264_lookahead_is_empty( x264_t *h )
  619. diff --git a/input/thread.c b/input/thread.c
  620. index a88cfae..c4b07fa 100644
  621. --- a/input/thread.c
  622. +++ b/input/thread.c
  623. @@ -30,10 +30,9 @@ typedef struct
  624.      cli_input_t input;
  625.      hnd_t p_handle;
  626.      x264_picture_t pic;
  627. -    x264_pthread_t tid;
  628. +    x264_threadpool_t *pool;
  629.      int next_frame;
  630.      int frame_total;
  631. -    int in_progress;
  632.      struct thread_input_arg_t *next_args;
  633.  } thread_hnd_t;
  634.  
  635. @@ -55,7 +54,6 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
  636.      }
  637.      h->input = input;
  638.      h->p_handle = *p_handle;
  639. -    h->in_progress = 0;
  640.      h->next_frame = -1;
  641.      h->next_args = malloc( sizeof(thread_input_arg_t) );
  642.      if( !h->next_args )
  643. @@ -66,6 +64,9 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
  644.      thread_input.picture_alloc = h->input.picture_alloc;
  645.      thread_input.picture_clean = h->input.picture_clean;
  646.  
  647. +    if( x264_threadpool_init( &h->pool, 1, NULL, NULL ) )
  648. +        return -1;
  649. +
  650.      *p_handle = h;
  651.      return 0;
  652.  }
  653. @@ -88,9 +89,8 @@ static int read_frame( x264_picture_t *p_pic, hnd_t handle, int i_frame )
  654.  
  655.      if( h->next_frame >= 0 )
  656.      {
  657. -        x264_pthread_join( h->tid, NULL );
  658. +        x264_threadpool_wait( h->pool, h->next_args );
  659.          ret |= h->next_args->status;
  660. -        h->in_progress = 0;
  661.      }
  662.  
  663.      if( h->next_frame == i_frame )
  664. @@ -103,9 +103,7 @@ static int read_frame( x264_picture_t *p_pic, hnd_t handle, int i_frame )
  665.          h->next_frame =
  666.          h->next_args->i_frame = i_frame+1;
  667.          h->next_args->pic = &h->pic;
  668. -        if( x264_pthread_create( &h->tid, NULL, (void*)read_frame_thread_int, h->next_args ) )
  669. -            return -1;
  670. -        h->in_progress = 1;
  671. +        x264_threadpool_run( h->pool, (void*)read_frame_thread_int, h->next_args );
  672.      }
  673.      else
  674.          h->next_frame = -1;
  675. @@ -124,8 +122,7 @@ static int release_frame( x264_picture_t *pic, hnd_t handle )
  676.  static int close_file( hnd_t handle )
  677.  {
  678.      thread_hnd_t *h = handle;
  679. -    if( h->in_progress )
  680. -        x264_pthread_join( h->tid, NULL );
  681. +    x264_threadpool_delete( h->pool );
  682.      h->input.close_file( h->p_handle );
  683.      h->input.picture_clean( &h->pic );
  684.      free( h->next_args );
  685. --
  686. 1.7.0.4
  687.  
  688.  
  689. From 0496fd76623fb8dd72eefd4b20719f27565913c3 Mon Sep 17 00:00:00 2001
  690. From: Lamont Alston <wewk584@gmail.com>
  691. Date: Wed, 16 Jun 2010 10:05:17 -0700
  692. Subject: [PATCH 3/9] Add open-GOP support
  693.  
  694. ---
  695. common/common.c       |    6 +++-
  696.  common/common.h       |    8 +++++-
  697.  encoder/encoder.c     |   48 +++++++++++++++++++++++++-------------
  698.  encoder/lookahead.c   |    2 +-
  699.  encoder/ratecontrol.c |    1 +
  700.  encoder/slicetype.c   |   61 ++++++++++++++++++++++++++++++++++--------------
  701.  x264.c                |    7 +++++-
  702.  x264.h                |    4 ++-
  703.  8 files changed, 95 insertions(+), 42 deletions(-)
  704.  
  705. diff --git a/common/common.c b/common/common.c
  706. index 4fa5e4b..5ccd541 100644
  707. --- a/common/common.c
  708. +++ b/common/common.c
  709. @@ -699,6 +699,8 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
  710.          p->i_slice_max_mbs = atoi(value);
  711.      OPT("slices")
  712.          p->i_slice_count = atoi(value);
  713. +    OPT("open-gop")
  714. +        p->b_open_gop = atobool(value);
  715.      OPT("cabac")
  716.          p->b_cabac = atobool(value);
  717.      OPT("cabac-idc")
  718. @@ -1186,9 +1188,9 @@ char *x264_param2string( x264_param_t *p, int b_res )
  719.      s += sprintf( s, " bframes=%d", p->i_bframe );
  720.      if( p->i_bframe )
  721.      {
  722. -        s += sprintf( s, " b_pyramid=%d b_adapt=%d b_bias=%d direct=%d weightb=%d",
  723. +        s += sprintf( s, " b_pyramid=%d b_adapt=%d b_bias=%d direct=%d weightb=%d open_gop=%d",
  724.                        p->i_bframe_pyramid, p->i_bframe_adaptive, p->i_bframe_bias,
  725. -                      p->analyse.i_direct_mv_pred, p->analyse.b_weighted_bipred );
  726. +                      p->analyse.i_direct_mv_pred, p->analyse.b_weighted_bipred, p->b_open_gop );
  727.      }
  728.      s += sprintf( s, " weightp=%d", p->analyse.i_weighted_pred > 0 ? p->analyse.i_weighted_pred : 0 );
  729.  
  730. diff --git a/common/common.h b/common/common.h
  731. index 659c2a4..19e5d32 100644
  732. --- a/common/common.h
  733. +++ b/common/common.h
  734. @@ -362,7 +362,7 @@ typedef struct x264_lookahead_t
  735.      volatile uint8_t              b_exit_thread;
  736.      uint8_t                       b_thread_active;
  737.      uint8_t                       b_analyse_keyframe;
  738. -    int                           i_last_keyframe;
  739. +    int                           i_last_coded_keyframe;
  740.      int                           i_slicetype_length;
  741.      x264_frame_t                  *last_nonb;
  742.      x264_pthread_t                thread_handle;
  743. @@ -470,7 +470,11 @@ struct x264_t
  744.          /* frames used for reference + sentinels */
  745.          x264_frame_t *reference[16+2];
  746.  
  747. -        int i_last_keyframe; /* Frame number of the last keyframe */
  748. +        int i_last_coded_keyframe; /* Frame number of the last keyframe coding order */
  749. +        int i_last_idr;            /* Frame number of the last IDR (not RP)*/
  750. +        int i_poc_last_open_gop;   /* Poc of the I frame of the last open-gop. The value
  751. +                                    * is only assigned during the period between that
  752. +                                    * I frame and the next P or I frame, else -1 */
  753.  
  754.          int i_input;    /* Number of input frames already accepted */
  755.  
  756. diff --git a/encoder/encoder.c b/encoder/encoder.c
  757. index 0d33915..3e7f227 100644
  758. --- a/encoder/encoder.c
  759. +++ b/encoder/encoder.c
  760. @@ -573,12 +573,9 @@ static int x264_validate_parameters( x264_t *h )
  761.          x264_log( h, X264_LOG_WARNING, "subme=0 + direct=temporal is not supported\n" );
  762.          h->param.analyse.i_direct_mv_pred = X264_DIRECT_PRED_SPATIAL;
  763.      }
  764. -    h->param.i_bframe = x264_clip3( h->param.i_bframe, 0, X264_BFRAME_MAX );
  765. +    h->param.i_bframe = x264_clip3( h->param.i_bframe, 0, X264_MIN( X264_BFRAME_MAX, h->param.i_keyint_max-1 ) );
  766.      if( h->param.i_keyint_max == 1 )
  767. -    {
  768. -        h->param.i_bframe = 0;
  769.          h->param.b_intra_refresh = 0;
  770. -    }
  771.      h->param.i_bframe_bias = x264_clip3( h->param.i_bframe_bias, -90, 100 );
  772.      if( h->param.i_bframe <= 1 )
  773.          h->param.i_bframe_pyramid = X264_B_PYRAMID_NONE;
  774. @@ -588,6 +585,7 @@ static int x264_validate_parameters( x264_t *h )
  775.          h->param.i_bframe_adaptive = X264_B_ADAPT_NONE;
  776.          h->param.analyse.i_direct_mv_pred = 0;
  777.          h->param.analyse.b_weighted_bipred = 0;
  778. +        h->param.b_open_gop = 0;
  779.      }
  780.      if( h->param.b_intra_refresh && h->param.i_bframe_pyramid == X264_B_PYRAMID_NORMAL )
  781.      {
  782. @@ -599,6 +597,11 @@ static int x264_validate_parameters( x264_t *h )
  783.          x264_log( h, X264_LOG_WARNING, "ref > 1 + intra-refresh is not supported\n" );
  784.          h->param.i_frame_reference = 1;
  785.      }
  786. +    if( h->param.b_intra_refresh && h->param.b_open_gop )
  787. +    {
  788. +        x264_log( h, X264_LOG_WARNING, "intra-refresh is not compatible with open-gop\n" );
  789. +        h->param.b_open_gop = 0;
  790. +    }
  791.      if( h->param.i_keyint_min == X264_KEYINT_MIN_AUTO )
  792.          h->param.i_keyint_min = h->param.i_keyint_max / 10;
  793.      h->param.i_keyint_min = x264_clip3( h->param.i_keyint_min, 1, h->param.i_keyint_max/2+1 );
  794. @@ -978,9 +981,11 @@ x264_t *x264_encoder_open( x264_param_t *param )
  795.      h->frames.b_have_lowres |= h->param.rc.b_stat_read && h->param.rc.i_vbv_buffer_size > 0;
  796.      h->frames.b_have_sub8x8_esa = !!(h->param.analyse.inter & X264_ANALYSE_PSUB8x8);
  797.  
  798. -    h->frames.i_last_keyframe = - h->param.i_keyint_max;
  799. +    h->frames.i_last_idr =
  800. +    h->frames.i_last_coded_keyframe = - h->param.i_keyint_max;
  801.      h->frames.i_input    = 0;
  802.      h->frames.i_largest_pts = h->frames.i_second_largest_pts = -1;
  803. +    h->frames.i_poc_last_open_gop = -1;
  804.  
  805.      CHECKED_MALLOCZERO( h->frames.unused[0], (h->frames.i_delay + 3) * sizeof(x264_frame_t *) );
  806.      /* Allocate room for max refs plus a few extra just in case. */
  807. @@ -1688,35 +1693,37 @@ static inline void x264_reference_hierarchy_reset( x264_t *h )
  808.  {
  809.      int ref;
  810.      int b_hasdelayframe = 0;
  811. -    if( !h->param.i_bframe_pyramid )
  812. -        return;
  813.  
  814.      /* look for delay frames -- chain must only contain frames that are disposable */
  815.      for( int i = 0; h->frames.current[i] && IS_DISPOSABLE( h->frames.current[i]->i_type ); i++ )
  816.          b_hasdelayframe |= h->frames.current[i]->i_coded
  817.                          != h->frames.current[i]->i_frame + h->sps->vui.i_num_reorder_frames;
  818.  
  819. -    if( h->param.i_bframe_pyramid != X264_B_PYRAMID_STRICT && !b_hasdelayframe )
  820. +    /* This function must handle b-pyramid and clear frames for open-gop */
  821. +    if( h->param.i_bframe_pyramid != X264_B_PYRAMID_STRICT && !b_hasdelayframe && h->frames.i_poc_last_open_gop == -1 )
  822.          return;
  823.  
  824.      /* Remove last BREF. There will never be old BREFs in the
  825.       * dpb during a BREF decode when pyramid == STRICT */
  826.      for( ref = 0; h->frames.reference[ref]; ref++ )
  827.      {
  828. -        if( h->param.i_bframe_pyramid == X264_B_PYRAMID_STRICT
  829. +        if( ( h->param.i_bframe_pyramid == X264_B_PYRAMID_STRICT
  830.              && h->frames.reference[ref]->i_type == X264_TYPE_BREF )
  831. +            || ( h->frames.reference[ref]->i_poc < h->frames.i_poc_last_open_gop
  832. +            && h->sh.i_type != SLICE_TYPE_B ) )
  833.          {
  834.              int diff = h->i_frame_num - h->frames.reference[ref]->i_frame_num;
  835.              h->sh.mmco[h->sh.i_mmco_command_count].i_difference_of_pic_nums = diff;
  836.              h->sh.mmco[h->sh.i_mmco_command_count++].i_poc = h->frames.reference[ref]->i_poc;
  837. -            x264_frame_push_unused( h, x264_frame_pop( h->frames.reference ) );
  838. +            x264_frame_push_unused( h, x264_frame_shift( &h->frames.reference[ref] ) );
  839.              h->b_ref_reorder[0] = 1;
  840. -            break;
  841. +            ref--;
  842.          }
  843.      }
  844.  
  845. -    /* Prepare to room in the dpb for the delayed display time of the later b-frame's */
  846. -    h->sh.i_mmco_remove_from_end = X264_MAX( ref + 2 - h->frames.i_max_dpb, 0 );
  847. +    /* Prepare room in the dpb for the delayed display time of the later b-frame's */
  848. +    if( h->param.i_bframe_pyramid )
  849. +        h->sh.i_mmco_remove_from_end = X264_MAX( ref + 2 - h->frames.i_max_dpb, 0 );
  850.  }
  851.  
  852.  static inline void x264_slice_init( x264_t *h, int i_nal_type, int i_global_qp )
  853. @@ -2319,14 +2326,19 @@ int     x264_encoder_encode( x264_t *h,
  854.  
  855.      if( h->fenc->b_keyframe )
  856.      {
  857. -        h->frames.i_last_keyframe = h->fenc->i_frame;
  858. +        h->frames.i_last_coded_keyframe = h->fenc->i_frame;
  859.          if( h->fenc->i_type == X264_TYPE_IDR )
  860. +        {
  861.              h->i_frame_num = 0;
  862. +            h->frames.i_last_idr = h->fenc->i_frame;
  863. +        }
  864.      }
  865.      h->sh.i_mmco_command_count =
  866.      h->sh.i_mmco_remove_from_end = 0;
  867.      h->b_ref_reorder[0] =
  868.      h->b_ref_reorder[1] = 0;
  869. +    h->fdec->i_poc =
  870. +    h->fenc->i_poc = 2 * ( h->fenc->i_frame - X264_MAX( h->frames.i_last_idr, 0 ) );
  871.  
  872.      /* ------------------- Setup frame context ----------------------------- */
  873.      /* 5: Init data dependent of frame type */
  874. @@ -2337,6 +2349,7 @@ int     x264_encoder_encode( x264_t *h,
  875.          i_nal_ref_idc = NAL_PRIORITY_HIGHEST;
  876.          h->sh.i_type = SLICE_TYPE_I;
  877.          x264_reference_reset( h );
  878. +        h->frames.i_poc_last_open_gop = -1;
  879.      }
  880.      else if( h->fenc->i_type == X264_TYPE_I )
  881.      {
  882. @@ -2344,6 +2357,8 @@ int     x264_encoder_encode( x264_t *h,
  883.          i_nal_ref_idc = NAL_PRIORITY_HIGH; /* Not completely true but for now it is (as all I/P are kept as ref)*/
  884.          h->sh.i_type = SLICE_TYPE_I;
  885.          x264_reference_hierarchy_reset( h );
  886. +        if( h->param.b_open_gop )
  887. +            h->frames.i_poc_last_open_gop = h->fenc->b_keyframe ? h->fenc->i_poc : -1;
  888.      }
  889.      else if( h->fenc->i_type == X264_TYPE_P )
  890.      {
  891. @@ -2351,6 +2366,7 @@ int     x264_encoder_encode( x264_t *h,
  892.          i_nal_ref_idc = NAL_PRIORITY_HIGH; /* Not completely true but for now it is (as all I/P are kept as ref)*/
  893.          h->sh.i_type = SLICE_TYPE_P;
  894.          x264_reference_hierarchy_reset( h );
  895. +        h->frames.i_poc_last_open_gop = -1;
  896.      }
  897.      else if( h->fenc->i_type == X264_TYPE_BREF )
  898.      {
  899. @@ -2366,8 +2382,6 @@ int     x264_encoder_encode( x264_t *h,
  900.          h->sh.i_type = SLICE_TYPE_B;
  901.      }
  902.  
  903. -    h->fdec->i_poc =
  904. -    h->fenc->i_poc = 2 * (h->fenc->i_frame - h->frames.i_last_keyframe);
  905.      h->fdec->i_type = h->fenc->i_type;
  906.      h->fdec->i_frame = h->fenc->i_frame;
  907.      h->fenc->b_kept_as_ref =
  908. @@ -2484,7 +2498,7 @@ int     x264_encoder_encode( x264_t *h,
  909.  
  910.          if( h->fenc->i_type != X264_TYPE_IDR )
  911.          {
  912. -            int time_to_recovery = X264_MIN( h->mb.i_mb_width - 1, h->param.i_keyint_max ) + h->param.i_bframe;
  913. +            int time_to_recovery = h->param.b_open_gop ? 0 : X264_MIN( h->mb.i_mb_width - 1, h->param.i_keyint_max ) + h->param.i_bframe;
  914.              x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
  915.              x264_sei_recovery_point_write( h, &h->out.bs, time_to_recovery );
  916.              x264_nal_end( h );
  917. diff --git a/encoder/lookahead.c b/encoder/lookahead.c
  918. index f0af216..6994829 100644
  919. --- a/encoder/lookahead.c
  920. +++ b/encoder/lookahead.c
  921. @@ -131,7 +131,7 @@ int x264_lookahead_init( x264_t *h, int i_slicetype_length )
  922.      for( int i = 0; i < h->param.i_threads; i++ )
  923.          h->thread[i]->lookahead = look;
  924.  
  925. -    look->i_last_keyframe = - h->param.i_keyint_max;
  926. +    look->i_last_coded_keyframe = - h->param.i_keyint_max;
  927.      look->b_analyse_keyframe = (h->param.rc.b_mb_tree || (h->param.rc.i_vbv_buffer_size && h->param.rc.i_lookahead))
  928.                                 && !h->param.rc.b_stat_read;
  929.      look->i_slicetype_length = i_slicetype_length;
  930. diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
  931. index 2c05ad7..f30df22 100644
  932. --- a/encoder/ratecontrol.c
  933. +++ b/encoder/ratecontrol.c
  934. @@ -704,6 +704,7 @@ int x264_ratecontrol_new( x264_t *h )
  935.              CMP_OPT_FIRST_PASS( "b_pyramid", h->param.i_bframe_pyramid );
  936.              CMP_OPT_FIRST_PASS( "intra_refresh", h->param.b_intra_refresh );
  937.              CMP_OPT_FIRST_PASS( "keyint", h->param.i_keyint_max );
  938. +            CMP_OPT_FIRST_PASS( "open_gop", h->param.b_open_gop );
  939.  
  940.              if( strstr( opts, "qp=0" ) && h->param.rc.i_rc_method == X264_RC_ABR )
  941.                  x264_log( h, X264_LOG_WARNING, "1st pass was lossless, bitrate prediction will be inaccurate\n" );
  942. diff --git a/encoder/slicetype.c b/encoder/slicetype.c
  943. index 60f3a24..0762c99 100644
  944. --- a/encoder/slicetype.c
  945. +++ b/encoder/slicetype.c
  946. @@ -981,7 +981,7 @@ static int scenecut_internal( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **f
  947.      int icost = frame->i_cost_est[0][0];
  948.      int pcost = frame->i_cost_est[p1-p0][0];
  949.      float f_bias;
  950. -    int i_gop_size = frame->i_frame - h->lookahead->i_last_keyframe;
  951. +    int i_gop_size = frame->i_frame - h->lookahead->i_last_coded_keyframe;
  952.      float f_thresh_max = h->param.i_scenecut_threshold / 100.0;
  953.      /* magic numbers pulled out of thin air */
  954.      float f_thresh_min = f_thresh_max * h->param.i_keyint_min
  955. @@ -1058,7 +1058,7 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
  956.  {
  957.      x264_mb_analysis_t a;
  958.      x264_frame_t *frames[X264_LOOKAHEAD_MAX+3] = { NULL, };
  959. -    int num_frames, orig_num_frames, keyint_limit, idr_frame_type, framecnt;
  960. +    int num_frames, orig_num_frames, keyint_limit, framecnt;
  961.      int i_mb_count = NUM_MBS;
  962.      int cost1p0, cost2p0, cost1b1, cost2p1;
  963.      int i_max_search = X264_MIN( h->lookahead->next.i_size, X264_LOOKAHEAD_MAX );
  964. @@ -1076,11 +1076,10 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
  965.      if( !framecnt )
  966.          return;
  967.  
  968. -    keyint_limit = h->param.i_keyint_max - frames[0]->i_frame + h->lookahead->i_last_keyframe - 1;
  969. +    keyint_limit = h->param.i_keyint_max - frames[0]->i_frame + h->lookahead->i_last_coded_keyframe - 1;
  970.      orig_num_frames = num_frames = h->param.b_intra_refresh ? framecnt : X264_MIN( framecnt, keyint_limit );
  971.  
  972.      x264_lowres_context_init( h, &a );
  973. -    idr_frame_type = frames[1]->i_frame - h->lookahead->i_last_keyframe >= h->param.i_keyint_min ? X264_TYPE_IDR : X264_TYPE_I;
  974.  
  975.      /* This is important psy-wise: if we have a non-scenecut keyframe,
  976.       * there will be significant visual artifacts if the frames just before
  977. @@ -1092,12 +1091,12 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
  978.      {
  979.          frames[1]->i_type = X264_TYPE_P;
  980.          if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1, 1, orig_num_frames ) )
  981. -            frames[1]->i_type = idr_frame_type;
  982. +            frames[1]->i_type = X264_TYPE_I;
  983.          return;
  984.      }
  985.      else if( num_frames == 0 )
  986.      {
  987. -        frames[1]->i_type = idr_frame_type;
  988. +        frames[1]->i_type = X264_TYPE_I;
  989.          return;
  990.      }
  991.  
  992. @@ -1106,7 +1105,7 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
  993.      int reset_start;
  994.      if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1, 1, orig_num_frames ) )
  995.      {
  996. -        frames[1]->i_type = idr_frame_type;
  997. +        frames[1]->i_type = X264_TYPE_I;
  998.          return;
  999.      }
  1000.  
  1001. @@ -1210,15 +1209,19 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
  1002.  
  1003.      /* Enforce keyframe limit. */
  1004.      if( !h->param.b_intra_refresh )
  1005. -        for( int j = 0; j < num_frames; j++ )
  1006. +        for( int i = keyint_limit+1; i <= num_frames; i += h->param.i_keyint_max )
  1007.          {
  1008. -            if( ((j-keyint_limit) % h->param.i_keyint_max) == 0 )
  1009. +            int j = i;
  1010. +            if( h->param.b_open_gop )
  1011.              {
  1012. -                if( j && h->param.i_keyint_max > 1 )
  1013. -                    frames[j]->i_type = X264_TYPE_P;
  1014. -                frames[j+1]->i_type = X264_TYPE_IDR;
  1015. -                reset_start = X264_MIN( reset_start, j+2 );
  1016. +                while( IS_X264_TYPE_B( frames[i]->i_type ) )
  1017. +                    i++;
  1018. +                while( IS_X264_TYPE_B( frames[j-1]->i_type ) )
  1019. +                    j--;
  1020.              }
  1021. +            frames[i]->i_type = X264_TYPE_I;
  1022. +            reset_start = X264_MIN( reset_start, i+1 );
  1023. +            i = j;
  1024.          }
  1025.  
  1026.      if( h->param.rc.i_vbv_buffer_size )
  1027. @@ -1303,18 +1306,40 @@ void x264_slicetype_decide( x264_t *h )
  1028.                        frm->i_frame, x264_b_pyramid_names[h->param.i_bframe_pyramid], h->param.i_frame_reference );
  1029.          }
  1030.  
  1031. +        if( frm->i_type == X264_TYPE_KEYFRAME )
  1032. +            frm->i_type = h->param.b_open_gop ? X264_TYPE_I : X264_TYPE_IDR;
  1033. +
  1034.          /* Limit GOP size */
  1035. -        if( (!h->param.b_intra_refresh || frm->i_frame == 0) && frm->i_frame - h->lookahead->i_last_keyframe >= h->param.i_keyint_max )
  1036. +        if( (!h->param.b_intra_refresh || frm->i_frame == 0) && frm->i_frame - h->lookahead->i_last_coded_keyframe >= h->param.i_keyint_max )
  1037. +        {
  1038. +            if( frm->i_type == X264_TYPE_AUTO || frm->i_type == X264_TYPE_I )
  1039. +                frm->i_type = h->param.b_open_gop && h->lookahead->i_last_coded_keyframe >= 0 ? X264_TYPE_I : X264_TYPE_IDR;
  1040. +            int warn = frm->i_type != X264_TYPE_IDR;
  1041. +            if( warn && h->param.b_open_gop )
  1042. +            {
  1043. +                /* if this minigop ends with i, it's not a violation */
  1044. +                int j = bframes;
  1045. +                while( IS_X264_TYPE_B( h->lookahead->next.list[j]->i_type ) )
  1046. +                    j++;
  1047. +                warn = h->lookahead->next.list[j]->i_type != X264_TYPE_I && h->lookahead->next.list[j]->i_type != X264_TYPE_KEYFRAME;
  1048. +            }
  1049. +            if( warn )
  1050. +                x264_log( h, X264_LOG_WARNING, "specified frame type (%d) at %d is not compatible with keyframe interval\n", frm->i_type, frm->i_frame );
  1051. +        }
  1052. +        if( frm->i_type == X264_TYPE_I && frm->i_frame - h->lookahead->i_last_coded_keyframe >= h->param.i_keyint_min )
  1053.          {
  1054. -            if( frm->i_type == X264_TYPE_AUTO )
  1055. +            if( h->param.b_open_gop )
  1056. +            {
  1057. +                h->lookahead->i_last_coded_keyframe = frm->i_frame - bframes;
  1058. +                frm->b_keyframe = 1;
  1059. +            }
  1060. +            else
  1061.                  frm->i_type = X264_TYPE_IDR;
  1062. -            if( frm->i_type != X264_TYPE_IDR )
  1063. -                x264_log( h, X264_LOG_WARNING, "specified frame type (%d) is not compatible with keyframe interval\n", frm->i_type );
  1064.          }
  1065.          if( frm->i_type == X264_TYPE_IDR )
  1066.          {
  1067.              /* Close GOP */
  1068. -            h->lookahead->i_last_keyframe = frm->i_frame;
  1069. +            h->lookahead->i_last_coded_keyframe = frm->i_frame;
  1070.              frm->b_keyframe = 1;
  1071.              if( bframes > 0 )
  1072.              {
  1073. diff --git a/x264.c b/x264.c
  1074. index a124083..eba72c5 100644
  1075. --- a/x264.c
  1076. +++ b/x264.c
  1077. @@ -380,6 +380,8 @@ static void Help( x264_param_t *defaults, int longhelp )
  1078.          "                                  - strict: Strictly hierarchical pyramid\n"
  1079.          "                                  - normal: Non-strict (not Blu-ray compatible)\n",
  1080.          strtable_lookup( x264_b_pyramid_names, defaults->i_bframe_pyramid ) );
  1081. +    H1( "      --open-gop              Use recovery points to close GOPs\n"
  1082. +        "                              Only available with b-frames\n" );
  1083.      H1( "      --no-cabac              Disable CABAC\n" );
  1084.      H1( "  -r, --ref <integer>         Number of reference frames [%d]\n", defaults->i_frame_reference );
  1085.      H1( "      --no-deblock            Disable loop filter\n" );
  1086. @@ -441,7 +443,8 @@ static void Help( x264_param_t *defaults, int longhelp )
  1087.          "                                  or  b=<float> (bitrate multiplier)\n" );
  1088.      H2( "      --qpfile <string>       Force frametypes and QPs for some or all frames\n"
  1089.          "                              Format of each line: framenumber frametype QP\n"
  1090. -        "                              QP of -1 lets x264 choose. Frametypes: I,i,P,B,b.\n"
  1091. +        "                              QP of -1 lets x264 choose. Frametypes: I,i,K,P,B,b.\n"
  1092. +        "                                  K=<I or i> depending on open-gop setting\n"
  1093.          "                              QPs are restricted by qpmin/qpmax.\n" );
  1094.      H1( "\n" );
  1095.      H1( "Analysis:\n" );
  1096. @@ -627,6 +630,7 @@ static struct option long_options[] =
  1097.      { "no-b-adapt",        no_argument, NULL, 0 },
  1098.      { "b-bias",      required_argument, NULL, 0 },
  1099.      { "b-pyramid",   required_argument, NULL, 0 },
  1100. +    { "open-gop",          no_argument, NULL, 0 },
  1101.      { "min-keyint",  required_argument, NULL, 'i' },
  1102.      { "keyint",      required_argument, NULL, 'I' },
  1103.      { "intra-refresh",     no_argument, NULL, 0 },
  1104. @@ -1304,6 +1308,7 @@ static void parse_qpfile( cli_opt_t *opt, x264_picture_t *pic, int i_frame )
  1105.          pic->i_qpplus1 = qp+1;
  1106.          if     ( type == 'I' ) pic->i_type = X264_TYPE_IDR;
  1107.          else if( type == 'i' ) pic->i_type = X264_TYPE_I;
  1108. +        else if( type == 'K' ) pic->i_type = X264_TYPE_KEYFRAME;
  1109.          else if( type == 'P' ) pic->i_type = X264_TYPE_P;
  1110.          else if( type == 'B' ) pic->i_type = X264_TYPE_BREF;
  1111.          else if( type == 'b' ) pic->i_type = X264_TYPE_B;
  1112. diff --git a/x264.h b/x264.h
  1113. index 9cd4600..b1402c9 100644
  1114. --- a/x264.h
  1115. +++ b/x264.h
  1116. @@ -35,7 +35,7 @@
  1117.  
  1118.  #include <stdarg.h>
  1119.  
  1120. -#define X264_BUILD 98
  1121. +#define X264_BUILD 99
  1122.  
  1123.  /* x264_t:
  1124.   *      opaque handler for encoder */
  1125. @@ -138,6 +138,7 @@ static const char * const x264_nal_hrd_names[] = { "none", "vbr", "cbr", 0 };
  1126.  #define X264_TYPE_P             0x0003
  1127.  #define X264_TYPE_BREF          0x0004  /* Non-disposable B-frame */
  1128.  #define X264_TYPE_B             0x0005
  1129. +#define X264_TYPE_KEYFRAME      0x0006  /* IDR or I depending on b_open_gop option */
  1130.  #define IS_X264_TYPE_I(x) ((x)==X264_TYPE_I || (x)==X264_TYPE_IDR)
  1131.  #define IS_X264_TYPE_B(x) ((x)==X264_TYPE_B || (x)==X264_TYPE_BREF)
  1132.  
  1133. @@ -221,6 +222,7 @@ typedef struct x264_param_t
  1134.      int         i_bframe_adaptive;
  1135.      int         i_bframe_bias;
  1136.      int         i_bframe_pyramid;   /* Keep some B-frames as references: 0=off, 1=strict hierarchical, 2=normal */
  1137. +    int         b_open_gop;
  1138.  
  1139.      int         b_deblocking_filter;
  1140.      int         i_deblocking_filter_alphac0;    /* [-6, 6] -6 light filter, 6 strong */
  1141. --
  1142. 1.7.0.4
  1143.  
  1144.  
  1145. From 7ce61c25c289ef641349c2f4295a4f61dd173557 Mon Sep 17 00:00:00 2001
  1146. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  1147. Date: Thu, 17 Jun 2010 14:50:07 -0700
  1148. Subject: [PATCH 4/9] Lookaheadless MB-tree support
  1149.  Uses past motion information instead of future data from the lookahead.
  1150.  Not as accurate, but better than nothing in zero-latency compression when a lookahead isn't available.
  1151.  Currently resets on keyframes, so only available if intra-refresh is set, to avoid pops on non-scenecut keyframes.
  1152.  
  1153. Also slightly modify encoding presets: disable rc-lookahead in the fastest presets.
  1154. Enable MB-tree in "veryfast", albeit with a very short lookahead.
  1155. ---
  1156. common/common.c     |    4 +++-
  1157.  encoder/encoder.c   |    7 ++++++-
  1158.  encoder/slicetype.c |   48 ++++++++++++++++++++++++++++++++++--------------
  1159.  x264.c              |   14 +++++++-------
  1160.  4 files changed, 50 insertions(+), 23 deletions(-)
  1161.  
  1162. diff --git a/common/common.c b/common/common.c
  1163. index 5ccd541..9e86f93 100644
  1164. --- a/common/common.c
  1165. +++ b/common/common.c
  1166. @@ -184,6 +184,7 @@ static int x264_param_apply_preset( x264_param_t *param, const char *preset )
  1167.          param->rc.b_mb_tree = 0;
  1168.          param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
  1169.          param->analyse.b_weighted_bipred = 0;
  1170. +        param->rc.i_lookahead = 0;
  1171.      }
  1172.      else if( !strcasecmp( preset, "superfast" ) )
  1173.      {
  1174. @@ -195,6 +196,7 @@ static int x264_param_apply_preset( x264_param_t *param, const char *preset )
  1175.          param->analyse.i_trellis = 0;
  1176.          param->rc.b_mb_tree = 0;
  1177.          param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
  1178. +        param->rc.i_lookahead = 0;
  1179.      }
  1180.      else if( !strcasecmp( preset, "veryfast" ) )
  1181.      {
  1182. @@ -203,8 +205,8 @@ static int x264_param_apply_preset( x264_param_t *param, const char *preset )
  1183.          param->i_frame_reference = 1;
  1184.          param->analyse.b_mixed_references = 0;
  1185.          param->analyse.i_trellis = 0;
  1186. -        param->rc.b_mb_tree = 0;
  1187.          param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
  1188. +        param->rc.i_lookahead = 10;
  1189.      }
  1190.      else if( !strcasecmp( preset, "faster" ) )
  1191.      {
  1192. diff --git a/encoder/encoder.c b/encoder/encoder.c
  1193. index 3e7f227..2b0e017 100644
  1194. --- a/encoder/encoder.c
  1195. +++ b/encoder/encoder.c
  1196. @@ -620,8 +620,13 @@ static int x264_validate_parameters( x264_t *h )
  1197.      }
  1198.  
  1199.      h->param.rc.f_qcompress = x264_clip3f( h->param.rc.f_qcompress, 0.0, 1.0 );
  1200. -    if( !h->param.rc.i_lookahead || h->param.i_keyint_max == 1 || h->param.rc.f_qcompress == 1 )
  1201. +    if( h->param.i_keyint_max == 1 || h->param.rc.f_qcompress == 1 )
  1202.          h->param.rc.b_mb_tree = 0;
  1203. +    if( !h->param.rc.i_lookahead && !h->param.b_intra_refresh && h->param.rc.b_mb_tree )
  1204. +    {
  1205. +        x264_log( h, X264_LOG_WARNING, "lookaheadless mb-tree requires intra refresh\n" );
  1206. +        h->param.rc.b_mb_tree = 0;
  1207. +    }
  1208.      if( h->param.rc.b_stat_read )
  1209.          h->param.rc.i_lookahead = 0;
  1210.  #if HAVE_PTHREAD
  1211. diff --git a/encoder/slicetype.c b/encoder/slicetype.c
  1212. index 0762c99..83948fc 100644
  1213. --- a/encoder/slicetype.c
  1214. +++ b/encoder/slicetype.c
  1215. @@ -734,7 +734,7 @@ static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, in
  1216.          }
  1217.      }
  1218.  
  1219. -    if( h->param.rc.i_vbv_buffer_size && referenced )
  1220. +    if( h->param.rc.i_vbv_buffer_size && h->param.rc.i_lookahead && referenced )
  1221.          x264_macroblock_tree_finish( h, frames[b], b == p1 ? b - p0 : 0 );
  1222.  }
  1223.  
  1224. @@ -743,7 +743,8 @@ static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t
  1225.      int idx = !b_intra;
  1226.      int last_nonb, cur_nonb = 1;
  1227.      int bframes = 0;
  1228. -    int i = num_frames - 1;
  1229. +    int i = num_frames;
  1230. +
  1231.      if( b_intra )
  1232.          x264_slicetype_frame_cost( h, a, frames, 0, 0, 0, 0 );
  1233.  
  1234. @@ -751,10 +752,25 @@ static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t
  1235.          i--;
  1236.      last_nonb = i;
  1237.  
  1238. -    if( last_nonb < idx )
  1239. -        return;
  1240.  
  1241. -    memset( frames[last_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) );
  1242. +    if( !h->param.rc.i_lookahead )
  1243. +    {
  1244. +        if( b_intra )
  1245. +        {
  1246. +            memset( frames[0]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) );
  1247. +            memcpy( frames[0]->f_qp_offset, frames[0]->f_qp_offset_aq, h->mb.i_mb_count * sizeof(float) );
  1248. +            return;
  1249. +        }
  1250. +        XCHG( uint16_t*, frames[last_nonb]->i_propagate_cost, frames[0]->i_propagate_cost );
  1251. +        memset( frames[0]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) );
  1252. +    }
  1253. +    else
  1254. +    {
  1255. +        if( last_nonb < idx )
  1256. +            return;
  1257. +        memset( frames[last_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) );
  1258. +    }
  1259. +
  1260.      while( i-- > idx )
  1261.      {
  1262.          cur_nonb = i;
  1263. @@ -796,6 +812,12 @@ static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t
  1264.          last_nonb = cur_nonb;
  1265.      }
  1266.  
  1267. +    if( !h->param.rc.i_lookahead )
  1268. +    {
  1269. +        x264_macroblock_tree_propagate( h, frames, 0, last_nonb, last_nonb, 1 );
  1270. +        XCHG( uint16_t*, frames[last_nonb]->i_propagate_cost, frames[0]->i_propagate_cost );
  1271. +    }
  1272. +
  1273.      x264_macroblock_tree_finish( h, frames[last_nonb], last_nonb );
  1274.      if( h->param.i_bframe_pyramid && bframes > 1 && !h->param.rc.i_vbv_buffer_size )
  1275.          x264_macroblock_tree_finish( h, frames[last_nonb+(bframes+1)/2], 0 );
  1276. @@ -1062,6 +1084,7 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
  1277.      int i_mb_count = NUM_MBS;
  1278.      int cost1p0, cost2p0, cost1b1, cost2p1;
  1279.      int i_max_search = X264_MIN( h->lookahead->next.i_size, X264_LOOKAHEAD_MAX );
  1280. +    int vbv_lookahead = h->param.rc.i_vbv_buffer_size && h->param.rc.i_lookahead;
  1281.      if( h->param.b_deterministic )
  1282.          i_max_search = X264_MIN( i_max_search, h->lookahead->i_slicetype_length + !keyframe );
  1283.  
  1284. @@ -1074,7 +1097,11 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
  1285.          frames[framecnt+1] = h->lookahead->next.list[framecnt];
  1286.  
  1287.      if( !framecnt )
  1288. +    {
  1289. +        if( h->param.rc.b_mb_tree )
  1290. +            x264_macroblock_tree( h, &a, frames, 0, keyframe );
  1291.          return;
  1292. +    }
  1293.  
  1294.      keyint_limit = h->param.i_keyint_max - frames[0]->i_frame + h->lookahead->i_last_coded_keyframe - 1;
  1295.      orig_num_frames = num_frames = h->param.b_intra_refresh ? framecnt : X264_MIN( framecnt, keyint_limit );
  1296. @@ -1085,15 +1112,8 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
  1297.       * there will be significant visual artifacts if the frames just before
  1298.       * go down in quality due to being referenced less, despite it being
  1299.       * more RD-optimal. */
  1300. -    if( (h->param.analyse.b_psy && h->param.rc.b_mb_tree) || h->param.rc.i_vbv_buffer_size )
  1301. +    if( (h->param.analyse.b_psy && h->param.rc.b_mb_tree) || vbv_lookahead )
  1302.          num_frames = framecnt;
  1303. -    else if( num_frames == 1 )
  1304. -    {
  1305. -        frames[1]->i_type = X264_TYPE_P;
  1306. -        if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1, 1, orig_num_frames ) )
  1307. -            frames[1]->i_type = X264_TYPE_I;
  1308. -        return;
  1309. -    }
  1310.      else if( num_frames == 0 )
  1311.      {
  1312.          frames[1]->i_type = X264_TYPE_I;
  1313. @@ -1224,7 +1244,7 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
  1314.              i = j;
  1315.          }
  1316.  
  1317. -    if( h->param.rc.i_vbv_buffer_size )
  1318. +    if( vbv_lookahead )
  1319.          x264_vbv_lookahead( h, &a, frames, num_frames, keyframe );
  1320.  
  1321.      /* Restore frametypes for all frames that haven't actually been decided yet. */
  1322. diff --git a/x264.c b/x264.c
  1323. index eba72c5..4265a3b 100644
  1324. --- a/x264.c
  1325. +++ b/x264.c
  1326. @@ -285,16 +285,16 @@ static void Help( x264_param_t *defaults, int longhelp )
  1327.          "                                    --no-8x8dct --aq-mode 0 --b-adapt 0\n"
  1328.          "                                    --bframes 0 --no-cabac --no-deblock\n"
  1329.          "                                    --no-mbtree --me dia --no-mixed-refs\n"
  1330. -        "                                    --partitions none --ref 1 --scenecut 0\n"
  1331. -        "                                    --subme 0 --trellis 0 --no-weightb\n"
  1332. -        "                                    --weightp 0\n"
  1333. +        "                                    --partitions none --rc-lookahead 0 --ref 1\n"
  1334. +        "                                    --scenecut 0 --subme 0 --trellis 0\n"
  1335. +        "                                    --no-weightb --weightp 0\n"
  1336.          "                                  - superfast:\n"
  1337.          "                                    --no-mbtree --me dia --no-mixed-refs\n"
  1338. -        "                                    --partitions i8x8,i4x4 --ref 1\n"
  1339. -        "                                    --subme 1 --trellis 0 --weightp 0\n"
  1340. +        "                                    --partitions i8x8,i4x4 --rc-lookahead 0\n"
  1341. +        "                                    --ref 1 --subme 1 --trellis 0 --weightp 0\n"
  1342.          "                                  - veryfast:\n"
  1343. -        "                                    --no-mbtree --no-mixed-refs --ref 1\n"
  1344. -        "                                    --subme 2 --trellis 0 --weightp 0\n"
  1345. +        "                                    --no-mixed-refs --rc-lookahead 10\n"
  1346. +        "                                    --ref 1 --subme 2 --trellis 0 --weightp 0\n"
  1347.          "                                  - faster:\n"
  1348.          "                                    --no-mixed-refs --rc-lookahead 20\n"
  1349.          "                                    --ref 2 --subme 4 --weightp 1\n"
  1350. --
  1351. 1.7.0.4
  1352.  
  1353.  
  1354. From f0505f9c3c9c4d6e7643cb878ea72192abef2420 Mon Sep 17 00:00:00 2001
  1355. From: Anton Mitrofanov <BugMaster@narod.ru>
  1356. Date: Sat, 19 Jun 2010 01:44:56 +0400
  1357. Subject: [PATCH 5/9] Fix SIGPIPEs caused by is_regular_file checks
  1358.  Check to see if input file is a pipe without opening it.
  1359.  
  1360. ---
  1361. common/osdep.h |   10 +++++++++-
  1362.  x264.c         |    1 +
  1363.  2 files changed, 10 insertions(+), 1 deletions(-)
  1364.  
  1365. diff --git a/common/osdep.h b/common/osdep.h
  1366. index b1b357c..b3a8cd6 100644
  1367. --- a/common/osdep.h
  1368. +++ b/common/osdep.h
  1369. @@ -290,7 +290,15 @@ static inline uint8_t x264_is_regular_file( FILE *filehandle )
  1370.  {
  1371.      struct stat file_stat;
  1372.      if( fstat( fileno( filehandle ), &file_stat ) )
  1373. -        return 0;
  1374. +        return -1;
  1375. +    return S_ISREG( file_stat.st_mode );
  1376. +}
  1377. +
  1378. +static inline uint8_t x264_is_regular_file_path( const char *filename )
  1379. +{
  1380. +    struct stat file_stat;
  1381. +    if( stat( filename, &file_stat ) )
  1382. +        return -1;
  1383.      return S_ISREG( file_stat.st_mode );
  1384.  }
  1385.  
  1386. diff --git a/x264.c b/x264.c
  1387. index 4265a3b..25609a3 100644
  1388. --- a/x264.c
  1389. +++ b/x264.c
  1390. @@ -810,6 +810,7 @@ static int select_input( const char *demuxer, char *used_demuxer, char *filename
  1391.      int b_auto = !strcasecmp( demuxer, "auto" );
  1392.      if( !b_regular && b_auto )
  1393.          ext = "yuv";
  1394. +    b_regular = b_regular && x264_is_regular_file_path( filename );
  1395.      if( b_regular )
  1396.      {
  1397.          FILE *f = fopen( filename, "r" );
  1398. --
  1399. 1.7.0.4
  1400.  
  1401.  
  1402. From 99a6182a8232083a641cc0423f56407e0589c313 Mon Sep 17 00:00:00 2001
  1403. From: Yusuke Nakamura <muken.the.vfrmaniac@gmail.com>
  1404. Date: Fri, 18 Jun 2010 14:57:52 -0700
  1405. Subject: [PATCH 6/9] Properly close qpfile on errors
  1406.  
  1407. ---
  1408. encoder/encoder.c |    1 +
  1409.  input/avs.c       |    1 +
  1410.  output/mp4.c      |    1 +
  1411.  3 files changed, 3 insertions(+), 0 deletions(-)
  1412.  
  1413. diff --git a/encoder/encoder.c b/encoder/encoder.c
  1414. index 2b0e017..cd65da2 100644
  1415. --- a/encoder/encoder.c
  1416. +++ b/encoder/encoder.c
  1417. @@ -1137,6 +1137,7 @@ x264_t *x264_encoder_open( x264_param_t *param )
  1418.          else if( !x264_is_regular_file( f ) )
  1419.          {
  1420.              x264_log( h, X264_LOG_ERROR, "dump_yuv: incompatible with non-regular file %s\n", h->param.psz_dump_yuv );
  1421. +            fclose( f );
  1422.              goto fail;
  1423.          }
  1424.          fclose( f );
  1425. diff --git a/input/avs.c b/input/avs.c
  1426. index 07add40..849c465 100644
  1427. --- a/input/avs.c
  1428. +++ b/input/avs.c
  1429. @@ -134,6 +134,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
  1430.      else if( !x264_is_regular_file( fh ) )
  1431.      {
  1432.          fprintf( stderr, "avs [error]: AVS input is incompatible with non-regular file `%s'\n", psz_filename );
  1433. +        fclose( fh );
  1434.          return -1;
  1435.      }
  1436.      fclose( fh );
  1437. diff --git a/output/mp4.c b/output/mp4.c
  1438. index 0e3c2fc..9b35a2f 100644
  1439. --- a/output/mp4.c
  1440. +++ b/output/mp4.c
  1441. @@ -166,6 +166,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle )
  1442.      else if( !x264_is_regular_file( fh ) )
  1443.      {
  1444.          fprintf( stderr, "mp4 [error]: MP4 output is incompatible with non-regular file `%s'\n", psz_filename );
  1445. +        fclose( fh );
  1446.          return -1;
  1447.      }
  1448.      fclose( fh );
  1449. --
  1450. 1.7.0.4
  1451.  
  1452.  
  1453. From 9dda85ea508aa5f3e5d3d44c80fee7b33caaea49 Mon Sep 17 00:00:00 2001
  1454. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  1455. Date: Fri, 18 Jun 2010 13:58:11 -0700
  1456. Subject: [PATCH 7/9] sse4 and ssse3 versions of some intra_sad functions
  1457.  
  1458. ---
  1459. common/pixel.c       |    5 ++
  1460.  common/x86/pixel.h   |    2 +
  1461.  common/x86/sad-a.asm |  107 ++++++++++++++++++++++++++++++++++++++++++++++++++
  1462.  3 files changed, 114 insertions(+), 0 deletions(-)
  1463.  
  1464. diff --git a/common/pixel.c b/common/pixel.c
  1465. index a8cb1df..7fcc91a 100644
  1466. --- a/common/pixel.c
  1467. +++ b/common/pixel.c
  1468. @@ -856,6 +856,11 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
  1469.          }
  1470.          pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_sse4;
  1471.          pixf->sa8d[PIXEL_8x8]  = x264_pixel_sa8d_8x8_sse4;
  1472. +        pixf->intra_sad_x3_4x4 = x264_intra_sad_x3_4x4_sse4;
  1473. +
  1474. +        /* Only faster on Nehalem */
  1475. +        if( cpu&X264_CPU_SSE42 )
  1476. +            pixf->intra_sad_x3_8x8    = x264_intra_sad_x3_8x8_ssse3;
  1477.      }
  1478.  #endif //HAVE_MMX
  1479.  
  1480. diff --git a/common/x86/pixel.h b/common/x86/pixel.h
  1481. index 9bba683..b1b916d 100644
  1482. --- a/common/x86/pixel.h
  1483. +++ b/common/x86/pixel.h
  1484. @@ -80,6 +80,7 @@ DECL_PIXELS( uint64_t, hadamard_ac, sse4,   ( uint8_t *pix, int i_stride ))
  1485.  void x264_intra_satd_x3_4x4_mmxext  ( uint8_t *, uint8_t *, int * );
  1486.  void x264_intra_satd_x3_4x4_ssse3   ( uint8_t *, uint8_t *, int * );
  1487.  void x264_intra_sad_x3_4x4_mmxext   ( uint8_t *, uint8_t *, int * );
  1488. +void x264_intra_sad_x3_4x4_sse4     ( uint8_t *, uint8_t *, int * );
  1489.  void x264_intra_satd_x3_8x8c_mmxext ( uint8_t *, uint8_t *, int * );
  1490.  void x264_intra_satd_x3_8x8c_ssse3  ( uint8_t *, uint8_t *, int * );
  1491.  void x264_intra_sad_x3_8x8c_mmxext  ( uint8_t *, uint8_t *, int * );
  1492. @@ -93,6 +94,7 @@ void x264_intra_sa8d_x3_8x8_mmxext  ( uint8_t *, uint8_t *, int * );
  1493.  void x264_intra_sa8d_x3_8x8_sse2    ( uint8_t *, uint8_t *, int * );
  1494.  void x264_intra_sa8d_x3_8x8_ssse3   ( uint8_t *, uint8_t *, int * );
  1495.  void x264_intra_sad_x3_8x8_mmxext   ( uint8_t *, uint8_t *, int * );
  1496. +void x264_intra_sad_x3_8x8_ssse3    ( uint8_t *, uint8_t *, int * );
  1497.  void x264_intra_sa8d_x3_8x8_core_mmxext( uint8_t *, int16_t [2][8], int * );
  1498.  void x264_intra_sa8d_x3_8x8_core_sse2  ( uint8_t *, int16_t [2][8], int * );
  1499.  void x264_intra_sa8d_x3_8x8_core_ssse3 ( uint8_t *, int16_t [2][8], int * );
  1500. diff --git a/common/x86/sad-a.asm b/common/x86/sad-a.asm
  1501. index 72c1789..10a365c 100644
  1502. --- a/common/x86/sad-a.asm
  1503. +++ b/common/x86/sad-a.asm
  1504. @@ -26,6 +26,19 @@
  1505.  %include "x86inc.asm"
  1506.  %include "x86util.asm"
  1507.  
  1508. +SECTION_RODATA
  1509. +
  1510. +h4x4_pred_shuf: db 3,3,3,3,7,7,7,7,11,11,11,11,15,15,15,15
  1511. +h4x4_pred_shuf2: db 3,7,11,15,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
  1512. +h8x8_pred_shuf: times 8 db 1
  1513. +                times 8 db 0
  1514. +                times 8 db 3
  1515. +                times 8 db 2
  1516. +                times 8 db 5
  1517. +                times 8 db 4
  1518. +                times 8 db 7
  1519. +                times 8 db 6
  1520. +
  1521.  SECTION .text
  1522.  
  1523.  cextern pb_3
  1524. @@ -303,6 +316,40 @@ cglobal intra_sad_x3_4x4_mmxext, 3,3
  1525.      movd   [r2+4], mm1 ;H prediction cost
  1526.      RET
  1527.  
  1528. +cglobal intra_sad_x3_4x4_sse4, 3,3
  1529. +    movd       xmm4, [r1+FDEC_STRIDE*0-4]
  1530. +    pinsrd     xmm4, [r1+FDEC_STRIDE*1-4], 1
  1531. +    pinsrd     xmm4, [r1+FDEC_STRIDE*2-4], 2
  1532. +    pinsrd     xmm4, [r1+FDEC_STRIDE*3-4], 3
  1533. +    movd       xmm2, [r1-FDEC_STRIDE]
  1534. +    pxor       xmm3, xmm3
  1535. +    movdqa     xmm5, xmm4
  1536. +    pshufb     xmm4, [h4x4_pred_shuf2] ; EFGH
  1537. +    pshufb     xmm5, [h4x4_pred_shuf]  ; EEEEFFFFGGGGHHHH
  1538. +    pshufd     xmm0, xmm2, 0           ; ABCDABCDABCDABCD
  1539. +    punpckldq  xmm2, xmm4              ; ABCDEFGH
  1540. +    psadbw     xmm2, xmm3
  1541. +    movd       xmm1, [r0+FENC_STRIDE*0]
  1542. +    pinsrd     xmm1, [r0+FENC_STRIDE*1], 1
  1543. +    pinsrd     xmm1, [r0+FENC_STRIDE*2], 2
  1544. +    pinsrd     xmm1, [r0+FENC_STRIDE*3], 3
  1545. +    psadbw     xmm0, xmm1
  1546. +    psadbw     xmm5, xmm1
  1547. +    psraw      xmm2, 2
  1548. +    pavgw      xmm2, xmm3
  1549. +    pshufb     xmm2, xmm3              ; DC prediction
  1550. +    movdqa     xmm3, xmm0
  1551. +    punpcklqdq xmm0, xmm5
  1552. +    punpckhqdq xmm3, xmm5
  1553. +    psadbw     xmm2, xmm1
  1554. +    paddw      xmm0, xmm3
  1555. +    movhlps    xmm4, xmm2
  1556. +    packusdw   xmm0, xmm0
  1557. +    paddw      xmm2, xmm4
  1558. +    movq       [r2], xmm0              ; V/H prediction costs
  1559. +    movd     [r2+8], xmm2              ; DC prediction cost
  1560. +    RET
  1561. +
  1562.  ;-----------------------------------------------------------------------------
  1563.  ; void intra_sad_x3_8x8( uint8_t *fenc, uint8_t edge[33], int res[3]);
  1564.  ;-----------------------------------------------------------------------------
  1565. @@ -370,6 +417,66 @@ cglobal intra_sad_x3_8x8_mmxext, 3,3
  1566.      movd  [r2+8], m1
  1567.      RET
  1568.  
  1569. +INIT_XMM
  1570. +cglobal intra_sad_x3_8x8_ssse3, 3,4,9
  1571. +%ifdef PIC
  1572. +    lea        r11, [h8x8_pred_shuf]
  1573. +%define shuf r11
  1574. +%else
  1575. +%define shuf h8x8_pred_shuf
  1576. +%endif
  1577. +    movq       m0, [r1+7]   ; left pixels
  1578. +    movq       m1, [r1+16]  ; top pixels
  1579. +    pxor       m2, m2
  1580. +    pxor       m3, m3
  1581. +    psadbw     m2, m0
  1582. +    psadbw     m3, m1
  1583. +    paddw      m2, m3
  1584. +    pxor       m3, m3       ; V score accumulator
  1585. +    psraw      m2, 3
  1586. +    pavgw      m2, m3
  1587. +    punpcklqdq m1, m1       ; V prediction
  1588. +    pshufb     m2, m3       ; DC prediction
  1589. +    pxor       m4, m4       ; H score accumulator
  1590. +    pxor       m5, m5       ; DC score accumulator
  1591. +    mov       r3d, 6
  1592. +.loop:
  1593. +    movq        m6, [r0+FENC_STRIDE*0]
  1594. +    movhps      m6, [r0+FENC_STRIDE*1]
  1595. +    movdqa      m7, m0
  1596. +    pshufb      m7, [shuf+r3*8] ; H prediction
  1597. +%ifdef ARCH_X86_64
  1598. +    movdqa      m8, m1
  1599. +    psadbw      m7, m6
  1600. +    psadbw      m8, m6
  1601. +    psadbw      m6, m2
  1602. +    paddw       m4, m7
  1603. +    paddw       m3, m8
  1604. +    paddw       m5, m6
  1605. +%else
  1606. +    psadbw      m7, m6
  1607. +    paddw       m4, m7
  1608. +    movdqa      m7, m1
  1609. +    psadbw      m7, m6
  1610. +    psadbw      m6, m2
  1611. +    paddw       m3, m7
  1612. +    paddw       m5, m6
  1613. +%endif
  1614. +    add         r0, FENC_STRIDE*2
  1615. +    sub        r3d, 2
  1616. +    jge .loop
  1617. +
  1618. +    movhlps     m0, m3
  1619. +    movhlps     m1, m4
  1620. +    movhlps     m2, m5
  1621. +    paddw       m3, m0
  1622. +    paddw       m4, m1
  1623. +    paddw       m5, m2
  1624. +    movd    [r2+0], m3
  1625. +    movd    [r2+4], m4
  1626. +    movd    [r2+8], m5
  1627. +    RET
  1628. +
  1629.  ;-----------------------------------------------------------------------------
  1630.  ; void intra_sad_x3_8x8c( uint8_t *fenc, uint8_t *fdec, int res[3] );
  1631.  ;-----------------------------------------------------------------------------
  1632. --
  1633. 1.7.0.4
  1634.  
  1635.  
  1636. From 4e190ca01a0717f9d4056213226b57eb4a6d1b69 Mon Sep 17 00:00:00 2001
  1637. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  1638. Date: Sat, 19 Jun 2010 01:41:07 -0700
  1639. Subject: [PATCH 8/9] Improve 2-pass bitrate prediction
  1640.  Adapt based on distance to the end in bits, not in frames.
  1641.  Helps in videos with absurdly simple end sections, e.g. black frames.
  1642.  
  1643. ---
  1644. encoder/ratecontrol.c |   12 +++++++++---
  1645.  1 files changed, 9 insertions(+), 3 deletions(-)
  1646.  
  1647. diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
  1648. index f30df22..6668c18 100644
  1649. --- a/encoder/ratecontrol.c
  1650. +++ b/encoder/ratecontrol.c
  1651. @@ -2014,9 +2014,6 @@ static float rate_estimate_qscale( x264_t *h )
  1652.              double lmax = rcc->lmax[pict_type];
  1653.              int64_t diff;
  1654.              int64_t predicted_bits = total_bits;
  1655. -            /* Adjust ABR buffer based on distance to the end of the video. */
  1656. -            if( rcc->num_entries > h->i_frame )
  1657. -                abr_buffer *= 0.5 * sqrt( rcc->num_entries - h->i_frame );
  1658.  
  1659.              if( rcc->b_vbv )
  1660.              {
  1661. @@ -2042,6 +2039,15 @@ static float rate_estimate_qscale( x264_t *h )
  1662.                      predicted_bits += (int64_t)(h->i_thread_frames - 1) * rcc->bitrate / rcc->fps;
  1663.              }
  1664.  
  1665. +            /* Adjust ABR buffer based on distance to the end of the video. */
  1666. +            if( rcc->num_entries > h->i_frame )
  1667. +            {
  1668. +                double final_bits = rcc->entry[rcc->num_entries-1].expected_bits;
  1669. +                double video_pos = rce.expected_bits / final_bits;
  1670. +                double scale_factor = sqrt( (1 - video_pos) * rcc->num_entries );
  1671. +                abr_buffer *= 0.5 * X264_MAX( scale_factor, 0.5 );
  1672. +            }
  1673. +
  1674.              diff = predicted_bits - (int64_t)rce.expected_bits;
  1675.              q = rce.new_qscale;
  1676.              q /= x264_clip3f((double)(abr_buffer - diff) / abr_buffer, .5, 2);
  1677. --
  1678. 1.7.0.4
  1679.  
  1680.  
  1681. From 62e9a31ca2eca59f0d8f54c104d8f7229af4f78c Mon Sep 17 00:00:00 2001
  1682. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  1683. Date: Sat, 19 Jun 2010 03:27:33 -0700
  1684. Subject: [PATCH 9/9] Attempt to fix rounding errors in HRD
  1685.  In a staggering display of brain damage, the spec requires all HRD math to be done in infinite precision despite the output being of quite limited precision.
  1686.  Accordingly, convert buffer management to work in units of timescale.
  1687.  This should fix problems with accumulating rounding errors in HRD information.
  1688.  
  1689. ---
  1690. common/common.c       |   36 ++++++++++++++-----------
  1691.  common/common.h       |    1 +
  1692.  encoder/encoder.c     |    3 +-
  1693.  encoder/ratecontrol.c |   67 ++++++++++++++++++++++++++++++++-----------------
  1694.  encoder/ratecontrol.h |    2 +-
  1695.  5 files changed, 67 insertions(+), 42 deletions(-)
  1696.  
  1697. diff --git a/common/common.c b/common/common.c
  1698. index 9e86f93..44fd68a 100644
  1699. --- a/common/common.c
  1700. +++ b/common/common.c
  1701. @@ -1084,24 +1084,28 @@ void x264_free( void *p )
  1702.  /****************************************************************************
  1703.   * x264_reduce_fraction:
  1704.   ****************************************************************************/
  1705. -void x264_reduce_fraction( uint32_t *n, uint32_t *d )
  1706. -{
  1707. -    uint32_t a = *n;
  1708. -    uint32_t b = *d;
  1709. -    uint32_t c;
  1710. -    if( !a || !b )
  1711. -        return;
  1712. -    c = a % b;
  1713. -    while(c)
  1714. -    {
  1715. -        a = b;
  1716. -        b = c;
  1717. -        c = a % b;
  1718. -    }
  1719. -    *n /= b;
  1720. -    *d /= b;
  1721. +#define REDUCE_FRACTION( name, type )\
  1722. +void name( type *n, type *d )\
  1723. +{                   \
  1724. +    type a = *n;    \
  1725. +    type b = *d;    \
  1726. +    type c;         \
  1727. +    if( !a || !b )  \
  1728. +        return;     \
  1729. +    c = a % b;      \
  1730. +    while( c )      \
  1731. +    {               \
  1732. +        a = b;      \
  1733. +        b = c;      \
  1734. +        c = a % b;  \
  1735. +    }               \
  1736. +    *n /= b;        \
  1737. +    *d /= b;        \
  1738.  }
  1739.  
  1740. +REDUCE_FRACTION(   x264_reduce_fraction, uint32_t )
  1741. +REDUCE_FRACTION( x264_reduce_fraction64, uint64_t )
  1742. +
  1743.  /****************************************************************************
  1744.   * x264_slurp_file:
  1745.   ****************************************************************************/
  1746. diff --git a/common/common.h b/common/common.h
  1747. index 19e5d32..fee9398 100644
  1748. --- a/common/common.h
  1749. +++ b/common/common.h
  1750. @@ -184,6 +184,7 @@ char *x264_param2string( x264_param_t *p, int b_res );
  1751.  void x264_log( x264_t *h, int i_level, const char *psz_fmt, ... );
  1752.  
  1753.  void x264_reduce_fraction( uint32_t *n, uint32_t *d );
  1754. +void x264_reduce_fraction64( uint64_t *n, uint64_t *d );
  1755.  void x264_init_vlc_tables();
  1756.  
  1757.  static ALWAYS_INLINE pixel x264_clip_pixel( int x )
  1758. diff --git a/encoder/encoder.c b/encoder/encoder.c
  1759. index cd65da2..c49ea1a 100644
  1760. --- a/encoder/encoder.c
  1761. +++ b/encoder/encoder.c
  1762. @@ -2593,8 +2593,7 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
  1763.      /* generate sei buffering period and insert it into place */
  1764.      if( h->fenc->b_keyframe && h->sps->vui.b_nal_hrd_parameters_present )
  1765.      {
  1766. -        h->initial_cpb_removal_delay = x264_hrd_fullness( h );
  1767. -
  1768. +        x264_hrd_fullness( h );
  1769.          x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
  1770.          x264_sei_buffering_period_write( h, &h->out.bs );
  1771.          if( x264_nal_end( h ) )
  1772. diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
  1773. index 6668c18..b37d9be 100644
  1774. --- a/encoder/ratecontrol.c
  1775. +++ b/encoder/ratecontrol.c
  1776. @@ -91,7 +91,7 @@ struct x264_ratecontrol_t
  1777.  
  1778.      /* VBV stuff */
  1779.      double buffer_size;
  1780. -    double buffer_fill_final;   /* real buffer as of the last finished frame */
  1781. +    int64_t buffer_fill_final;
  1782.      double buffer_fill;         /* planned buffer, if all in-progress frames hit their bit budget */
  1783.      double buffer_rate;         /* # of bits added to buffer_fill after each frame */
  1784.      double vbv_max_rate;        /* # of bits added to buffer_fill per second */
  1785. @@ -157,6 +157,7 @@ struct x264_ratecontrol_t
  1786.      int initial_cpb_removal_delay_offset;
  1787.      double nrt_first_access_unit; /* nominal removal time */
  1788.      double previous_cpb_final_arrival_time;
  1789. +    uint64_t hrd_multiply_denom;
  1790.  };
  1791.  
  1792.  
  1793. @@ -463,6 +464,8 @@ void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init )
  1794.          int vbv_max_bitrate = h->param.rc.i_vbv_max_bitrate * 1000;
  1795.  
  1796.          /* Init HRD */
  1797. +        h->sps->vui.hrd.i_bit_rate_unscaled = vbv_max_bitrate;
  1798. +        h->sps->vui.hrd.i_cpb_size_unscaled = vbv_buffer_size;
  1799.          if( h->param.i_nal_hrd && b_init )
  1800.          {
  1801.              h->sps->vui.hrd.i_cpb_cnt = 1;
  1802. @@ -499,8 +502,8 @@ void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init )
  1803.  
  1804.              #undef MAX_DURATION
  1805.  
  1806. -            vbv_buffer_size = X264_MIN( vbv_buffer_size, h->sps->vui.hrd.i_cpb_size_unscaled );
  1807. -            vbv_max_bitrate = X264_MIN( vbv_max_bitrate, h->sps->vui.hrd.i_bit_rate_unscaled );
  1808. +            vbv_buffer_size = h->sps->vui.hrd.i_cpb_size_unscaled;
  1809. +            vbv_max_bitrate = h->sps->vui.hrd.i_bit_rate_unscaled;
  1810.          }
  1811.          else if( h->param.i_nal_hrd && !b_init )
  1812.          {
  1813. @@ -528,7 +531,7 @@ void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init )
  1814.              if( h->param.rc.f_vbv_buffer_init > 1. )
  1815.                  h->param.rc.f_vbv_buffer_init = x264_clip3f( h->param.rc.f_vbv_buffer_init / h->param.rc.i_vbv_buffer_size, 0, 1 );
  1816.              h->param.rc.f_vbv_buffer_init = x264_clip3f( X264_MAX( h->param.rc.f_vbv_buffer_init, rc->buffer_rate / rc->buffer_size ), 0, 1);
  1817. -            rc->buffer_fill_final = rc->buffer_size * h->param.rc.f_vbv_buffer_init;
  1818. +            rc->buffer_fill_final = rc->buffer_size * h->param.rc.f_vbv_buffer_init * h->sps->vui.i_time_scale;
  1819.              rc->b_vbv = 1;
  1820.              rc->b_vbv_min_rate = !rc->b_2pass
  1821.                            && h->param.rc.i_rc_method == X264_RC_ABR
  1822. @@ -577,6 +580,23 @@ int x264_ratecontrol_new( x264_t *h )
  1823.  
  1824.      x264_ratecontrol_init_reconfigurable( h, 1 );
  1825.  
  1826. +    if( h->param.i_nal_hrd )
  1827. +    {
  1828. +        uint64_t denom = (uint64_t)h->sps->vui.hrd.i_bit_rate_unscaled * h->sps->vui.i_time_scale;
  1829. +        uint64_t num = 180000;
  1830. +        x264_reduce_fraction64( &num, &denom );
  1831. +        rc->hrd_multiply_denom = 180000 / num;
  1832. +
  1833. +        double bits_required = log2( 180000 / rc->hrd_multiply_denom )
  1834. +                             + log2( h->sps->vui.i_time_scale )
  1835. +                             + log2( h->sps->vui.hrd.i_cpb_size_unscaled );
  1836. +        if( bits_required >= 63 )
  1837. +        {
  1838. +            x264_log( h, X264_LOG_ERROR, "HRD with very large timescale and bufsize not supported\n" );
  1839. +            return -1;
  1840. +        }
  1841. +    }
  1842. +
  1843.      if( rc->rate_tolerance < 0.01 )
  1844.      {
  1845.          x264_log(h, X264_LOG_WARNING, "bitrate tolerance too small, using .01\n");
  1846. @@ -1723,9 +1743,10 @@ static void update_predictor( predictor_t *p, double q, double var, double bits
  1847.  static int update_vbv( x264_t *h, int bits )
  1848.  {
  1849.      int filler = 0;
  1850. -
  1851. +    int bitrate = h->sps->vui.hrd.i_bit_rate_unscaled;
  1852.      x264_ratecontrol_t *rcc = h->rc;
  1853.      x264_ratecontrol_t *rct = h->thread[0]->rc;
  1854. +    uint64_t buffer_size = (uint64_t)h->sps->vui.hrd.i_cpb_size_unscaled * h->sps->vui.i_time_scale;
  1855.  
  1856.      if( rcc->last_satd >= h->mb.i_mb_count )
  1857.          update_predictor( &rct->pred[h->sh.i_type], qp2qscale( rcc->qpa_rc ), rcc->last_satd, bits );
  1858. @@ -1733,48 +1754,48 @@ static int update_vbv( x264_t *h, int bits )
  1859.      if( !rcc->b_vbv )
  1860.          return filler;
  1861.  
  1862. -    rct->buffer_fill_final -= bits;
  1863. +    rct->buffer_fill_final -= (uint64_t)bits * h->sps->vui.i_time_scale;
  1864.  
  1865.      if( rct->buffer_fill_final < 0 )
  1866. -        x264_log( h, X264_LOG_WARNING, "VBV underflow (frame %d, %.0f bits)\n", h->i_frame, rct->buffer_fill_final );
  1867. +        x264_log( h, X264_LOG_WARNING, "VBV underflow (frame %d, %.0f bits)\n", h->i_frame, (double)rct->buffer_fill_final / h->sps->vui.i_time_scale );
  1868.      rct->buffer_fill_final = X264_MAX( rct->buffer_fill_final, 0 );
  1869. -    rct->buffer_fill_final += rcc->buffer_rate;
  1870. +    rct->buffer_fill_final += (uint64_t)bitrate * h->sps->vui.i_num_units_in_tick * h->fenc->i_cpb_duration;
  1871.  
  1872. -    if( h->sps->vui.hrd.b_cbr_hrd && rct->buffer_fill_final > rcc->buffer_size )
  1873. +    if( h->sps->vui.hrd.b_cbr_hrd && rct->buffer_fill_final > buffer_size )
  1874.      {
  1875. -        filler = ceil( (rct->buffer_fill_final - rcc->buffer_size) / 8 );
  1876. -        rct->buffer_fill_final -= X264_MAX( (FILLER_OVERHEAD - h->param.b_annexb), filler ) * 8;
  1877. +        filler = ceil( (rct->buffer_fill_final - buffer_size) / (8. * h->sps->vui.i_time_scale) );
  1878. +        bits = X264_MAX( (FILLER_OVERHEAD - h->param.b_annexb), filler ) * 8;
  1879. +        rct->buffer_fill_final -= (uint64_t)bits * h->sps->vui.i_time_scale;
  1880.      }
  1881.      else
  1882. -        rct->buffer_fill_final = X264_MIN( rct->buffer_fill_final, rcc->buffer_size );
  1883. +        rct->buffer_fill_final = X264_MIN( rct->buffer_fill_final, buffer_size );
  1884.  
  1885.      return filler;
  1886.  }
  1887.  
  1888. -int x264_hrd_fullness( x264_t *h )
  1889. +void x264_hrd_fullness( x264_t *h )
  1890.  {
  1891.      x264_ratecontrol_t *rct = h->thread[0]->rc;
  1892. -    double cpb_bits = rct->buffer_fill_final;
  1893. -    double bps = h->sps->vui.hrd.i_bit_rate_unscaled;
  1894. -    double cpb_size = h->sps->vui.hrd.i_cpb_size_unscaled;
  1895. -    double cpb_fullness = 90000.0*cpb_bits/bps;
  1896. +    uint64_t denom = (uint64_t)h->sps->vui.hrd.i_bit_rate_unscaled * h->sps->vui.i_time_scale / rct->hrd_multiply_denom;
  1897. +    uint64_t cpb_state = rct->buffer_fill_final;
  1898. +    uint64_t cpb_size = (uint64_t)h->sps->vui.hrd.i_cpb_size_unscaled * h->sps->vui.i_time_scale;
  1899. +    uint64_t multiply_factor = 180000 / rct->hrd_multiply_denom;
  1900.  
  1901. -    if( cpb_bits < 0 || cpb_bits > cpb_size )
  1902. +    if( cpb_state < 0 || cpb_state > cpb_size )
  1903.      {
  1904.           x264_log( h, X264_LOG_WARNING, "CPB %s: %.0lf bits in a %.0lf-bit buffer\n",
  1905. -                   cpb_bits < 0 ? "underflow" : "overflow", cpb_bits, cpb_size );
  1906. +                   cpb_state < 0 ? "underflow" : "overflow", (float)cpb_state/denom, (float)cpb_size/denom );
  1907.      }
  1908.  
  1909. -    h->initial_cpb_removal_delay_offset = 90000.0*(cpb_size - cpb_bits)/bps;
  1910. -
  1911. -    return x264_clip3f( cpb_fullness + 0.5, 0, 90000.0*cpb_size/bps ); // just lie if we are in a weird state
  1912. +    h->initial_cpb_removal_delay = (multiply_factor * cpb_state + denom) / (2*denom);
  1913. +    h->initial_cpb_removal_delay_offset = (multiply_factor * cpb_size + denom) / (2*denom) - h->initial_cpb_removal_delay;
  1914.  }
  1915.  
  1916.  // provisionally update VBV according to the planned size of all frames currently in progress
  1917.  static void update_vbv_plan( x264_t *h, int overhead )
  1918.  {
  1919.      x264_ratecontrol_t *rcc = h->rc;
  1920. -    rcc->buffer_fill = h->thread[0]->rc->buffer_fill_final;
  1921. +    rcc->buffer_fill = h->thread[0]->rc->buffer_fill_final / h->sps->vui.i_time_scale;
  1922.      if( h->i_thread_frames > 1 )
  1923.      {
  1924.          int j = h->rc - h->thread[0]->rc;
  1925. diff --git a/encoder/ratecontrol.h b/encoder/ratecontrol.h
  1926. index dd139eb..f39c070 100644
  1927. --- a/encoder/ratecontrol.h
  1928. +++ b/encoder/ratecontrol.h
  1929. @@ -47,6 +47,6 @@ int  x264_rc_analyse_slice( x264_t *h );
  1930.  int x264_weighted_reference_duplicate( x264_t *h, int i_ref, const x264_weight_t *w );
  1931.  void x264_threads_distribute_ratecontrol( x264_t *h );
  1932.  void x264_threads_merge_ratecontrol( x264_t *h );
  1933. -int x264_hrd_fullness( x264_t *h );
  1934. +void x264_hrd_fullness( x264_t *h );
  1935.  #endif
  1936.  
  1937. --
  1938. 1.7.0.4
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement