Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- From 4fa7d539c1a09a0779194c7971495e966ff7de1e Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Tue, 15 Jun 2010 05:15:42 -0700
- Subject: [PATCH 1/9] Fix compilation on ARM w/ Apple ABI
- ---
- encoder/me.c | 2 +-
- 1 files changed, 1 insertions(+), 1 deletions(-)
- diff --git a/encoder/me.c b/encoder/me.c
- index 2914eb3..291104a 100644
- --- a/encoder/me.c
- +++ b/encoder/me.c
- @@ -245,7 +245,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
- pmv = pack16to32_mask( bmx, bmy );
- if( i_mvc > 0 )
- {
- - ALIGNED_ARRAY_8( int16_t, mvc_fpel,[16][2] );
- + ALIGNED_ARRAY_8( int16_t, mvc_fpel,[16],[2] );
- x264_predictor_roundclip( mvc_fpel, mvc, i_mvc, mv_x_min, mv_x_max, mv_y_min, mv_y_max );
- bcost <<= 4;
- for( int i = 1; i <= i_mvc; i++ )
- --
- 1.7.0.4
- From 3f539defdc78eb77c90e0164e62a851a4bb42669 Mon Sep 17 00:00:00 2001
- From: Steven Walters <kemuri9@gmail.com>
- Date: Wed, 9 Jun 2010 18:14:52 -0400
- Subject: [PATCH 2/9] Use threadpools to avoid unnecessary thread creation
- Tiny performance improvement with fast settings and lots of threads.
- May help more on some OSs with slow thread creation, like OS X.
- Unify inconsistent synchronized abbreviations to sync.
- ---
- Makefile | 3 +-
- common/common.h | 10 ++-
- common/frame.c | 19 +++++-
- common/frame.h | 9 ++-
- common/threadpool.c | 163 +++++++++++++++++++++++++++++++++++++++++++++++++++
- common/threadpool.h | 39 ++++++++++++
- encoder/encoder.c | 79 ++++++++++++-------------
- encoder/lookahead.c | 22 ++++----
- input/thread.c | 17 ++---
- 9 files changed, 288 insertions(+), 73 deletions(-)
- create mode 100644 common/threadpool.c
- create mode 100644 common/threadpool.h
- diff --git a/Makefile b/Makefile
- index 8074ce5..9837821 100644
- --- a/Makefile
- +++ b/Makefile
- @@ -22,13 +22,14 @@ SRCSO =
- CONFIG := $(shell cat config.h)
- -# Optional muxer module sources
- +# Optional module sources
- ifneq ($(findstring HAVE_AVS, $(CONFIG)),)
- SRCCLI += input/avs.c
- endif
- ifneq ($(findstring HAVE_PTHREAD, $(CONFIG)),)
- SRCCLI += input/thread.c
- +SRCS += common/threadpool.c
- endif
- ifneq ($(findstring HAVE_LAVF, $(CONFIG)),)
- diff --git a/common/common.h b/common/common.h
- index abb5db2..659c2a4 100644
- --- a/common/common.h
- +++ b/common/common.h
- @@ -160,6 +160,7 @@ static const int x264_scan8[16+2*4+3] =
- #include "cabac.h"
- #include "quant.h"
- #include "cpu.h"
- +#include "threadpool.h"
- /****************************************************************************
- * General functions
- @@ -364,9 +365,10 @@ typedef struct x264_lookahead_t
- int i_last_keyframe;
- int i_slicetype_length;
- x264_frame_t *last_nonb;
- - x264_synch_frame_list_t ifbuf;
- - x264_synch_frame_list_t next;
- - x264_synch_frame_list_t ofbuf;
- + x264_pthread_t thread_handle;
- + x264_sync_frame_list_t ifbuf;
- + x264_sync_frame_list_t next;
- + x264_sync_frame_list_t ofbuf;
- } x264_lookahead_t;
- typedef struct x264_ratecontrol_t x264_ratecontrol_t;
- @@ -377,11 +379,11 @@ struct x264_t
- x264_param_t param;
- x264_t *thread[X264_THREAD_MAX+1];
- - x264_pthread_t thread_handle;
- int b_thread_active;
- int i_thread_phase; /* which thread to use for the next frame */
- int i_threadslice_start; /* first row in this thread slice */
- int i_threadslice_end; /* row after the end of this thread slice */
- + x264_threadpool_t *threadpool;
- /* bitstream output */
- struct
- diff --git a/common/frame.c b/common/frame.c
- index c5c573f..7c2fce0 100644
- --- a/common/frame.c
- +++ b/common/frame.c
- @@ -517,7 +517,7 @@ void x264_frame_delete_list( x264_frame_t **list )
- x264_free( list );
- }
- -int x264_synch_frame_list_init( x264_synch_frame_list_t *slist, int max_size )
- +int x264_sync_frame_list_init( x264_sync_frame_list_t *slist, int max_size )
- {
- if( max_size < 0 )
- return -1;
- @@ -533,7 +533,7 @@ fail:
- return -1;
- }
- -void x264_synch_frame_list_delete( x264_synch_frame_list_t *slist )
- +void x264_sync_frame_list_delete( x264_sync_frame_list_t *slist )
- {
- x264_pthread_mutex_destroy( &slist->mutex );
- x264_pthread_cond_destroy( &slist->cv_fill );
- @@ -541,7 +541,7 @@ void x264_synch_frame_list_delete( x264_synch_frame_list_t *slist )
- x264_frame_delete_list( slist->list );
- }
- -void x264_synch_frame_list_push( x264_synch_frame_list_t *slist, x264_frame_t *frame )
- +void x264_sync_frame_list_push( x264_sync_frame_list_t *slist, x264_frame_t *frame )
- {
- x264_pthread_mutex_lock( &slist->mutex );
- while( slist->i_size == slist->i_max_size )
- @@ -550,3 +550,16 @@ void x264_synch_frame_list_push( x264_synch_frame_list_t *slist, x264_frame_t *f
- x264_pthread_mutex_unlock( &slist->mutex );
- x264_pthread_cond_broadcast( &slist->cv_fill );
- }
- +
- +x264_frame_t *x264_sync_frame_list_pop( x264_sync_frame_list_t *slist )
- +{
- + x264_frame_t *frame;
- + x264_pthread_mutex_lock( &slist->mutex );
- + while( !slist->i_size )
- + x264_pthread_cond_wait( &slist->cv_fill, &slist->mutex );
- + frame = slist->list[ --slist->i_size ];
- + slist->list[ slist->i_size ] = NULL;
- + x264_pthread_cond_broadcast( &slist->cv_empty );
- + x264_pthread_mutex_unlock( &slist->mutex );
- + return frame;
- +}
- diff --git a/common/frame.h b/common/frame.h
- index 7d252c3..26529ce 100644
- --- a/common/frame.h
- +++ b/common/frame.h
- @@ -154,7 +154,7 @@ typedef struct
- x264_pthread_mutex_t mutex;
- x264_pthread_cond_t cv_fill; /* event signaling that the list became fuller */
- x264_pthread_cond_t cv_empty; /* event signaling that the list became emptier */
- -} x264_synch_frame_list_t;
- +} x264_sync_frame_list_t;
- typedef void (*x264_deblock_inter_t)( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 );
- typedef void (*x264_deblock_intra_t)( pixel *pix, int stride, int alpha, int beta );
- @@ -202,9 +202,10 @@ x264_frame_t *x264_frame_pop_unused( x264_t *h, int b_fdec );
- void x264_frame_sort( x264_frame_t **list, int b_dts );
- void x264_frame_delete_list( x264_frame_t **list );
- -int x264_synch_frame_list_init( x264_synch_frame_list_t *slist, int nelem );
- -void x264_synch_frame_list_delete( x264_synch_frame_list_t *slist );
- -void x264_synch_frame_list_push( x264_synch_frame_list_t *slist, x264_frame_t *frame );
- +int x264_sync_frame_list_init( x264_sync_frame_list_t *slist, int nelem );
- +void x264_sync_frame_list_delete( x264_sync_frame_list_t *slist );
- +void x264_sync_frame_list_push( x264_sync_frame_list_t *slist, x264_frame_t *frame );
- +x264_frame_t *x264_sync_frame_list_pop( x264_sync_frame_list_t *slist );
- #define x264_frame_sort_dts(list) x264_frame_sort(list, 1)
- #define x264_frame_sort_pts(list) x264_frame_sort(list, 0)
- diff --git a/common/threadpool.c b/common/threadpool.c
- new file mode 100644
- index 0000000..4448ea2
- --- /dev/null
- +++ b/common/threadpool.c
- @@ -0,0 +1,163 @@
- +/*****************************************************************************
- + * threadpool.c: x264 threadpool module
- + *****************************************************************************
- + * Copyright (C) 2010 Steven Walters <kemuri9@gmail.com>
- + *
- + * This program is free software; you can redistribute it and/or modify
- + * it under the terms of the GNU General Public License as published by
- + * the Free Software Foundation; either version 2 of the License, or
- + * (at your option) any later version.
- + *
- + * This program is distributed in the hope that it will be useful,
- + * but WITHOUT ANY WARRANTY; without even the implied warranty of
- + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- + * GNU General Public License for more details.
- + *
- + * You should have received a copy of the GNU General Public License
- + * along with this program; if not, write to the Free Software
- + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
- + *****************************************************************************/
- +
- +#include "common.h"
- +
- +typedef struct
- +{
- + void *(*func)(void *);
- + void *arg;
- + void *ret;
- +} x264_threadpool_job_t;
- +
- +struct x264_threadpool_t
- +{
- + int exit;
- + int threads;
- + x264_pthread_t *thread_handle;
- + void (*init_func)(void *);
- + void *init_arg;
- +
- + /* requires a synchronized list structure and associated methods,
- + so use what is already implemented for frames */
- + x264_sync_frame_list_t uninit; /* list of jobs that are awaiting use */
- + x264_sync_frame_list_t run; /* list of jobs that are queued for processing by the pool */
- + x264_sync_frame_list_t done; /* list of jobs that have finished processing */
- +};
- +
- +static void x264_threadpool_thread( x264_threadpool_t *pool )
- +{
- + if( pool->init_func )
- + pool->init_func( pool->init_arg );
- +
- + while( !pool->exit )
- + {
- + x264_threadpool_job_t *job = NULL;
- + x264_pthread_mutex_lock( &pool->run.mutex );
- + while( !pool->exit && !pool->run.i_size )
- + x264_pthread_cond_wait( &pool->run.cv_fill, &pool->run.mutex );
- + if( pool->run.i_size )
- + {
- + job = (void*)x264_frame_shift( pool->run.list );
- + pool->run.i_size--;
- + }
- + x264_pthread_mutex_unlock( &pool->run.mutex );
- + if( !job )
- + continue;
- + job->ret = job->func( job->arg ); /* execute the function */
- + x264_sync_frame_list_push( &pool->done, (void*)job );
- + }
- +}
- +
- +int x264_threadpool_init( x264_threadpool_t **p_pool, int threads,
- + void (*init_func)(void *), void *init_arg )
- +{
- + if( threads <= 0 )
- + return -1;
- +
- + x264_threadpool_t *pool;
- + CHECKED_MALLOCZERO( pool, sizeof(x264_threadpool_t) );
- + *p_pool = pool;
- +
- + pool->init_func = init_func;
- + pool->init_arg = init_arg;
- + pool->threads = X264_MIN( threads, X264_THREAD_MAX );
- +
- + CHECKED_MALLOC( pool->thread_handle, pool->threads * sizeof(x264_pthread_t) );
- +
- + if( x264_sync_frame_list_init( &pool->uninit, pool->threads ) ||
- + x264_sync_frame_list_init( &pool->run, pool->threads ) ||
- + x264_sync_frame_list_init( &pool->done, pool->threads ) )
- + goto fail;
- +
- + for( int i = 0; i < pool->threads; i++ )
- + {
- + x264_threadpool_job_t *job;
- + CHECKED_MALLOC( job, sizeof(x264_threadpool_job_t) );
- + x264_sync_frame_list_push( &pool->uninit, (void*)job );
- + }
- + for( int i = 0; i < pool->threads; i++ )
- + if( x264_pthread_create( pool->thread_handle+i, NULL, (void*)x264_threadpool_thread, pool ) )
- + goto fail;
- +
- + return 0;
- +fail:
- + return -1;
- +}
- +
- +void x264_threadpool_run( x264_threadpool_t *pool, void *(*func)(void *), void *arg )
- +{
- + x264_threadpool_job_t *job = (void*)x264_sync_frame_list_pop( &pool->uninit );
- + job->func = func;
- + job->arg = arg;
- + x264_sync_frame_list_push( &pool->run, (void*)job );
- +}
- +
- +void *x264_threadpool_wait( x264_threadpool_t *pool, void *arg )
- +{
- + x264_threadpool_job_t *job = NULL;
- +
- + x264_pthread_mutex_lock( &pool->done.mutex );
- + while( !job )
- + {
- + for( int i = 0; i < pool->done.i_size; i++ )
- + {
- + x264_threadpool_job_t *t = (void*)pool->done.list[i];
- + if( t->arg == arg )
- + {
- + job = (void*)x264_frame_shift( pool->done.list+i );
- + pool->done.i_size--;
- + }
- + }
- + if( !job )
- + x264_pthread_cond_wait( &pool->done.cv_fill, &pool->done.mutex );
- + }
- + x264_pthread_mutex_unlock( &pool->done.mutex );
- +
- + void *ret = job->ret;
- + x264_sync_frame_list_push( &pool->uninit, (void*)job );
- + return ret;
- +}
- +
- +static void x264_threadpool_list_delete( x264_sync_frame_list_t *slist )
- +{
- + for( int i = 0; slist->list[i]; i++ )
- + {
- + x264_free( slist->list[i] );
- + slist->list[i] = NULL;
- + }
- + x264_sync_frame_list_delete( slist );
- +}
- +
- +void x264_threadpool_delete( x264_threadpool_t *pool )
- +{
- + x264_pthread_mutex_lock( &pool->run.mutex );
- + pool->exit = 1;
- + x264_pthread_cond_broadcast( &pool->run.cv_fill );
- + x264_pthread_mutex_unlock( &pool->run.mutex );
- + for( int i = 0; i < pool->threads; i++ )
- + x264_pthread_join( pool->thread_handle[i], NULL );
- +
- + x264_threadpool_list_delete( &pool->uninit );
- + x264_threadpool_list_delete( &pool->run );
- + x264_threadpool_list_delete( &pool->done );
- + x264_free( pool->thread_handle );
- + x264_free( pool );
- +}
- diff --git a/common/threadpool.h b/common/threadpool.h
- new file mode 100644
- index 0000000..519737c
- --- /dev/null
- +++ b/common/threadpool.h
- @@ -0,0 +1,39 @@
- +/*****************************************************************************
- + * threadpool.h: x264 threadpool module
- + *****************************************************************************
- + * Copyright (C) 2010 Steven Walters <kemuri9@gmail.com>
- + *
- + * This program is free software; you can redistribute it and/or modify
- + * it under the terms of the GNU General Public License as published by
- + * the Free Software Foundation; either version 2 of the License, or
- + * (at your option) any later version.
- + *
- + * This program is distributed in the hope that it will be useful,
- + * but WITHOUT ANY WARRANTY; without even the implied warranty of
- + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- + * GNU General Public License for more details.
- + *
- + * You should have received a copy of the GNU General Public License
- + * along with this program; if not, write to the Free Software
- + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
- + *****************************************************************************/
- +
- +#ifndef X264_THREADPOOL_H
- +#define X264_THREADPOOL_H
- +
- +typedef struct x264_threadpool_t x264_threadpool_t;
- +
- +#if HAVE_PTHREAD
- +int x264_threadpool_init( x264_threadpool_t **p_pool, int threads,
- + void (*init_func)(void *), void *init_arg );
- +void x264_threadpool_run( x264_threadpool_t *pool, void *(*func)(void *), void *arg );
- +void *x264_threadpool_wait( x264_threadpool_t *pool, void *arg );
- +void x264_threadpool_delete( x264_threadpool_t *pool );
- +#else
- +#define x264_threadpool_init(p,t,f,a) -1
- +#define x264_threadpool_run(p,f,a)
- +#define x264_threadpool_wait(p,a) NULL
- +#define x264_threadpool_delete(p)
- +#endif
- +
- +#endif
- diff --git a/encoder/encoder.c b/encoder/encoder.c
- index 08a28bd..0d33915 100644
- --- a/encoder/encoder.c
- +++ b/encoder/encoder.c
- @@ -349,6 +349,20 @@ fail:
- return -1;
- }
- +#if HAVE_PTHREAD
- +static void x264_encoder_thread_init( x264_t *h )
- +{
- + if( h->param.i_sync_lookahead )
- + x264_lower_thread_priority( 10 );
- +
- +#if HAVE_MMX
- + /* Misalign mask has to be set separately for each thread. */
- + if( h->param.cpu&X264_CPU_SSE_MISALIGN )
- + x264_cpu_mask_misalign_sse();
- +#endif
- +}
- +#endif
- +
- /****************************************************************************
- *
- ****************************************************************************
- @@ -1052,6 +1066,10 @@ x264_t *x264_encoder_open( x264_param_t *param )
- CHECKED_MALLOC( h->nal_buffer, h->out.i_bitstream * 3/2 + 4 );
- h->nal_buffer_size = h->out.i_bitstream * 3/2 + 4;
- + if( h->param.i_threads > 1 &&
- + x264_threadpool_init( &h->threadpool, h->param.i_threads, (void*)x264_encoder_thread_init, h ) )
- + goto fail;
- +
- h->thread[0] = h;
- for( int i = 1; i < h->param.i_threads + !!h->param.i_sync_lookahead; i++ )
- CHECKED_MALLOC( h->thread[i], sizeof(x264_t) );
- @@ -2044,14 +2062,6 @@ static void *x264_slices_write( x264_t *h )
- {
- int i_slice_num = 0;
- int last_thread_mb = h->sh.i_last_mb;
- - if( h->param.i_sync_lookahead )
- - x264_lower_thread_priority( 10 );
- -
- -#if HAVE_MMX
- - /* Misalign mask has to be set separately for each thread. */
- - if( h->param.cpu&X264_CPU_SSE_MISALIGN )
- - x264_cpu_mask_misalign_sse();
- -#endif
- #if HAVE_VISUALIZE
- if( h->param.b_visualize )
- @@ -2093,11 +2103,6 @@ static void *x264_slices_write( x264_t *h )
- static int x264_threaded_slices_write( x264_t *h )
- {
- - void *ret = NULL;
- -#if HAVE_MMX
- - if( h->param.cpu&X264_CPU_SSE_MISALIGN )
- - x264_cpu_mask_misalign_sse();
- -#endif
- /* set first/last mb and sync contexts */
- for( int i = 0; i < h->param.i_threads; i++ )
- {
- @@ -2121,16 +2126,14 @@ static int x264_threaded_slices_write( x264_t *h )
- /* dispatch */
- for( int i = 0; i < h->param.i_threads; i++ )
- {
- - if( x264_pthread_create( &h->thread[i]->thread_handle, NULL, (void*)x264_slices_write, (void*)h->thread[i] ) )
- - return -1;
- + x264_threadpool_run( h->threadpool, (void*)x264_slices_write, h->thread[i] );
- h->thread[i]->b_thread_active = 1;
- }
- for( int i = 0; i < h->param.i_threads; i++ )
- {
- - x264_pthread_join( h->thread[i]->thread_handle, &ret );
- h->thread[i]->b_thread_active = 0;
- - if( (intptr_t)ret )
- - return (intptr_t)ret;
- + if( (intptr_t)x264_threadpool_wait( h->threadpool, h->thread[i] ) )
- + return -1;
- }
- /* Go back and fix up the hpel on the borders between slices. */
- @@ -2206,6 +2209,10 @@ int x264_encoder_encode( x264_t *h,
- thread_current =
- thread_oldest = h;
- }
- +#if HAVE_MMX
- + if( h->i_thread_frames == 1 && h->param.cpu&X264_CPU_SSE_MISALIGN )
- + x264_cpu_mask_misalign_sse();
- +#endif
- // ok to call this before encoding any frames, since the initial values of fdec have b_kept_as_ref=0
- if( x264_reference_update( h ) )
- @@ -2529,8 +2536,7 @@ int x264_encoder_encode( x264_t *h,
- h->i_threadslice_end = h->mb.i_mb_height;
- if( h->i_thread_frames > 1 )
- {
- - if( x264_pthread_create( &h->thread_handle, NULL, (void*)x264_slices_write, h ) )
- - return -1;
- + x264_threadpool_run( h->threadpool, (void*)x264_slices_write, h );
- h->b_thread_active = 1;
- }
- else if( h->param.b_sliced_threads )
- @@ -2553,11 +2559,9 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
- if( h->b_thread_active )
- {
- - void *ret = NULL;
- - x264_pthread_join( h->thread_handle, &ret );
- h->b_thread_active = 0;
- - if( (intptr_t)ret )
- - return (intptr_t)ret;
- + if( (intptr_t)x264_threadpool_wait( h->threadpool, h ) )
- + return -1;
- }
- if( !h->out.i_nal )
- {
- @@ -2822,25 +2826,20 @@ void x264_encoder_close ( x264_t *h )
- x264_lookahead_delete( h );
- if( h->param.i_threads > 1 )
- + x264_threadpool_delete( h->threadpool );
- + if( h->i_thread_frames > 1 )
- {
- - // don't strictly have to wait for the other threads, but it's simpler than canceling them
- - for( int i = 0; i < h->param.i_threads; i++ )
- + for( int i = 0; i < h->i_thread_frames; i++ )
- if( h->thread[i]->b_thread_active )
- - x264_pthread_join( h->thread[i]->thread_handle, NULL );
- - if( h->i_thread_frames > 1 )
- - {
- - for( int i = 0; i < h->i_thread_frames; i++ )
- - if( h->thread[i]->b_thread_active )
- - {
- - assert( h->thread[i]->fenc->i_reference_count == 1 );
- - x264_frame_delete( h->thread[i]->fenc );
- - }
- + {
- + assert( h->thread[i]->fenc->i_reference_count == 1 );
- + x264_frame_delete( h->thread[i]->fenc );
- + }
- - x264_t *thread_prev = h->thread[h->i_thread_phase];
- - x264_thread_sync_ratecontrol( h, thread_prev, h );
- - x264_thread_sync_ratecontrol( thread_prev, thread_prev, h );
- - h->i_frame = thread_prev->i_frame + 1 - h->i_thread_frames;
- - }
- + x264_t *thread_prev = h->thread[h->i_thread_phase];
- + x264_thread_sync_ratecontrol( h, thread_prev, h );
- + x264_thread_sync_ratecontrol( thread_prev, thread_prev, h );
- + h->i_frame = thread_prev->i_frame + 1 - h->i_thread_frames;
- }
- h->i_frame++;
- diff --git a/encoder/lookahead.c b/encoder/lookahead.c
- index a79d4b1..f0af216 100644
- --- a/encoder/lookahead.c
- +++ b/encoder/lookahead.c
- @@ -37,7 +37,7 @@
- #include "common/common.h"
- #include "analyse.h"
- -static void x264_lookahead_shift( x264_synch_frame_list_t *dst, x264_synch_frame_list_t *src, int count )
- +static void x264_lookahead_shift( x264_sync_frame_list_t *dst, x264_sync_frame_list_t *src, int count )
- {
- int i = count;
- while( i-- )
- @@ -137,9 +137,9 @@ int x264_lookahead_init( x264_t *h, int i_slicetype_length )
- look->i_slicetype_length = i_slicetype_length;
- /* init frame lists */
- - if( x264_synch_frame_list_init( &look->ifbuf, h->param.i_sync_lookahead+3 ) ||
- - x264_synch_frame_list_init( &look->next, h->frames.i_delay+3 ) ||
- - x264_synch_frame_list_init( &look->ofbuf, h->frames.i_delay+3 ) )
- + if( x264_sync_frame_list_init( &look->ifbuf, h->param.i_sync_lookahead+3 ) ||
- + x264_sync_frame_list_init( &look->next, h->frames.i_delay+3 ) ||
- + x264_sync_frame_list_init( &look->ofbuf, h->frames.i_delay+3 ) )
- goto fail;
- if( !h->param.i_sync_lookahead )
- @@ -153,7 +153,7 @@ int x264_lookahead_init( x264_t *h, int i_slicetype_length )
- if( x264_macroblock_thread_allocate( look_h, 1 ) < 0 )
- goto fail;
- - if( x264_pthread_create( &look_h->thread_handle, NULL, (void *)x264_lookahead_thread, look_h ) )
- + if( x264_pthread_create( &look->thread_handle, NULL, (void*)x264_lookahead_thread, look_h ) )
- goto fail;
- look->b_thread_active = 1;
- @@ -171,25 +171,25 @@ void x264_lookahead_delete( x264_t *h )
- h->lookahead->b_exit_thread = 1;
- x264_pthread_cond_broadcast( &h->lookahead->ifbuf.cv_fill );
- x264_pthread_mutex_unlock( &h->lookahead->ifbuf.mutex );
- - x264_pthread_join( h->thread[h->param.i_threads]->thread_handle, NULL );
- + x264_pthread_join( h->lookahead->thread_handle, NULL );
- x264_macroblock_cache_free( h->thread[h->param.i_threads] );
- x264_macroblock_thread_free( h->thread[h->param.i_threads], 1 );
- x264_free( h->thread[h->param.i_threads] );
- }
- - x264_synch_frame_list_delete( &h->lookahead->ifbuf );
- - x264_synch_frame_list_delete( &h->lookahead->next );
- + x264_sync_frame_list_delete( &h->lookahead->ifbuf );
- + x264_sync_frame_list_delete( &h->lookahead->next );
- if( h->lookahead->last_nonb )
- x264_frame_push_unused( h, h->lookahead->last_nonb );
- - x264_synch_frame_list_delete( &h->lookahead->ofbuf );
- + x264_sync_frame_list_delete( &h->lookahead->ofbuf );
- x264_free( h->lookahead );
- }
- void x264_lookahead_put_frame( x264_t *h, x264_frame_t *frame )
- {
- if( h->param.i_sync_lookahead )
- - x264_synch_frame_list_push( &h->lookahead->ifbuf, frame );
- + x264_sync_frame_list_push( &h->lookahead->ifbuf, frame );
- else
- - x264_synch_frame_list_push( &h->lookahead->next, frame );
- + x264_sync_frame_list_push( &h->lookahead->next, frame );
- }
- int x264_lookahead_is_empty( x264_t *h )
- diff --git a/input/thread.c b/input/thread.c
- index a88cfae..c4b07fa 100644
- --- a/input/thread.c
- +++ b/input/thread.c
- @@ -30,10 +30,9 @@ typedef struct
- cli_input_t input;
- hnd_t p_handle;
- x264_picture_t pic;
- - x264_pthread_t tid;
- + x264_threadpool_t *pool;
- int next_frame;
- int frame_total;
- - int in_progress;
- struct thread_input_arg_t *next_args;
- } thread_hnd_t;
- @@ -55,7 +54,6 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
- }
- h->input = input;
- h->p_handle = *p_handle;
- - h->in_progress = 0;
- h->next_frame = -1;
- h->next_args = malloc( sizeof(thread_input_arg_t) );
- if( !h->next_args )
- @@ -66,6 +64,9 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
- thread_input.picture_alloc = h->input.picture_alloc;
- thread_input.picture_clean = h->input.picture_clean;
- + if( x264_threadpool_init( &h->pool, 1, NULL, NULL ) )
- + return -1;
- +
- *p_handle = h;
- return 0;
- }
- @@ -88,9 +89,8 @@ static int read_frame( x264_picture_t *p_pic, hnd_t handle, int i_frame )
- if( h->next_frame >= 0 )
- {
- - x264_pthread_join( h->tid, NULL );
- + x264_threadpool_wait( h->pool, h->next_args );
- ret |= h->next_args->status;
- - h->in_progress = 0;
- }
- if( h->next_frame == i_frame )
- @@ -103,9 +103,7 @@ static int read_frame( x264_picture_t *p_pic, hnd_t handle, int i_frame )
- h->next_frame =
- h->next_args->i_frame = i_frame+1;
- h->next_args->pic = &h->pic;
- - if( x264_pthread_create( &h->tid, NULL, (void*)read_frame_thread_int, h->next_args ) )
- - return -1;
- - h->in_progress = 1;
- + x264_threadpool_run( h->pool, (void*)read_frame_thread_int, h->next_args );
- }
- else
- h->next_frame = -1;
- @@ -124,8 +122,7 @@ static int release_frame( x264_picture_t *pic, hnd_t handle )
- static int close_file( hnd_t handle )
- {
- thread_hnd_t *h = handle;
- - if( h->in_progress )
- - x264_pthread_join( h->tid, NULL );
- + x264_threadpool_delete( h->pool );
- h->input.close_file( h->p_handle );
- h->input.picture_clean( &h->pic );
- free( h->next_args );
- --
- 1.7.0.4
- From 0496fd76623fb8dd72eefd4b20719f27565913c3 Mon Sep 17 00:00:00 2001
- From: Lamont Alston <wewk584@gmail.com>
- Date: Wed, 16 Jun 2010 10:05:17 -0700
- Subject: [PATCH 3/9] Add open-GOP support
- ---
- common/common.c | 6 +++-
- common/common.h | 8 +++++-
- encoder/encoder.c | 48 +++++++++++++++++++++++++-------------
- encoder/lookahead.c | 2 +-
- encoder/ratecontrol.c | 1 +
- encoder/slicetype.c | 61 ++++++++++++++++++++++++++++++++++--------------
- x264.c | 7 +++++-
- x264.h | 4 ++-
- 8 files changed, 95 insertions(+), 42 deletions(-)
- diff --git a/common/common.c b/common/common.c
- index 4fa5e4b..5ccd541 100644
- --- a/common/common.c
- +++ b/common/common.c
- @@ -699,6 +699,8 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
- p->i_slice_max_mbs = atoi(value);
- OPT("slices")
- p->i_slice_count = atoi(value);
- + OPT("open-gop")
- + p->b_open_gop = atobool(value);
- OPT("cabac")
- p->b_cabac = atobool(value);
- OPT("cabac-idc")
- @@ -1186,9 +1188,9 @@ char *x264_param2string( x264_param_t *p, int b_res )
- s += sprintf( s, " bframes=%d", p->i_bframe );
- if( p->i_bframe )
- {
- - s += sprintf( s, " b_pyramid=%d b_adapt=%d b_bias=%d direct=%d weightb=%d",
- + s += sprintf( s, " b_pyramid=%d b_adapt=%d b_bias=%d direct=%d weightb=%d open_gop=%d",
- p->i_bframe_pyramid, p->i_bframe_adaptive, p->i_bframe_bias,
- - p->analyse.i_direct_mv_pred, p->analyse.b_weighted_bipred );
- + p->analyse.i_direct_mv_pred, p->analyse.b_weighted_bipred, p->b_open_gop );
- }
- s += sprintf( s, " weightp=%d", p->analyse.i_weighted_pred > 0 ? p->analyse.i_weighted_pred : 0 );
- diff --git a/common/common.h b/common/common.h
- index 659c2a4..19e5d32 100644
- --- a/common/common.h
- +++ b/common/common.h
- @@ -362,7 +362,7 @@ typedef struct x264_lookahead_t
- volatile uint8_t b_exit_thread;
- uint8_t b_thread_active;
- uint8_t b_analyse_keyframe;
- - int i_last_keyframe;
- + int i_last_coded_keyframe;
- int i_slicetype_length;
- x264_frame_t *last_nonb;
- x264_pthread_t thread_handle;
- @@ -470,7 +470,11 @@ struct x264_t
- /* frames used for reference + sentinels */
- x264_frame_t *reference[16+2];
- - int i_last_keyframe; /* Frame number of the last keyframe */
- + int i_last_coded_keyframe; /* Frame number of the last keyframe coding order */
- + int i_last_idr; /* Frame number of the last IDR (not RP)*/
- + int i_poc_last_open_gop; /* Poc of the I frame of the last open-gop. The value
- + * is only assigned during the period between that
- + * I frame and the next P or I frame, else -1 */
- int i_input; /* Number of input frames already accepted */
- diff --git a/encoder/encoder.c b/encoder/encoder.c
- index 0d33915..3e7f227 100644
- --- a/encoder/encoder.c
- +++ b/encoder/encoder.c
- @@ -573,12 +573,9 @@ static int x264_validate_parameters( x264_t *h )
- x264_log( h, X264_LOG_WARNING, "subme=0 + direct=temporal is not supported\n" );
- h->param.analyse.i_direct_mv_pred = X264_DIRECT_PRED_SPATIAL;
- }
- - h->param.i_bframe = x264_clip3( h->param.i_bframe, 0, X264_BFRAME_MAX );
- + h->param.i_bframe = x264_clip3( h->param.i_bframe, 0, X264_MIN( X264_BFRAME_MAX, h->param.i_keyint_max-1 ) );
- if( h->param.i_keyint_max == 1 )
- - {
- - h->param.i_bframe = 0;
- h->param.b_intra_refresh = 0;
- - }
- h->param.i_bframe_bias = x264_clip3( h->param.i_bframe_bias, -90, 100 );
- if( h->param.i_bframe <= 1 )
- h->param.i_bframe_pyramid = X264_B_PYRAMID_NONE;
- @@ -588,6 +585,7 @@ static int x264_validate_parameters( x264_t *h )
- h->param.i_bframe_adaptive = X264_B_ADAPT_NONE;
- h->param.analyse.i_direct_mv_pred = 0;
- h->param.analyse.b_weighted_bipred = 0;
- + h->param.b_open_gop = 0;
- }
- if( h->param.b_intra_refresh && h->param.i_bframe_pyramid == X264_B_PYRAMID_NORMAL )
- {
- @@ -599,6 +597,11 @@ static int x264_validate_parameters( x264_t *h )
- x264_log( h, X264_LOG_WARNING, "ref > 1 + intra-refresh is not supported\n" );
- h->param.i_frame_reference = 1;
- }
- + if( h->param.b_intra_refresh && h->param.b_open_gop )
- + {
- + x264_log( h, X264_LOG_WARNING, "intra-refresh is not compatible with open-gop\n" );
- + h->param.b_open_gop = 0;
- + }
- if( h->param.i_keyint_min == X264_KEYINT_MIN_AUTO )
- h->param.i_keyint_min = h->param.i_keyint_max / 10;
- h->param.i_keyint_min = x264_clip3( h->param.i_keyint_min, 1, h->param.i_keyint_max/2+1 );
- @@ -978,9 +981,11 @@ x264_t *x264_encoder_open( x264_param_t *param )
- h->frames.b_have_lowres |= h->param.rc.b_stat_read && h->param.rc.i_vbv_buffer_size > 0;
- h->frames.b_have_sub8x8_esa = !!(h->param.analyse.inter & X264_ANALYSE_PSUB8x8);
- - h->frames.i_last_keyframe = - h->param.i_keyint_max;
- + h->frames.i_last_idr =
- + h->frames.i_last_coded_keyframe = - h->param.i_keyint_max;
- h->frames.i_input = 0;
- h->frames.i_largest_pts = h->frames.i_second_largest_pts = -1;
- + h->frames.i_poc_last_open_gop = -1;
- CHECKED_MALLOCZERO( h->frames.unused[0], (h->frames.i_delay + 3) * sizeof(x264_frame_t *) );
- /* Allocate room for max refs plus a few extra just in case. */
- @@ -1688,35 +1693,37 @@ static inline void x264_reference_hierarchy_reset( x264_t *h )
- {
- int ref;
- int b_hasdelayframe = 0;
- - if( !h->param.i_bframe_pyramid )
- - return;
- /* look for delay frames -- chain must only contain frames that are disposable */
- for( int i = 0; h->frames.current[i] && IS_DISPOSABLE( h->frames.current[i]->i_type ); i++ )
- b_hasdelayframe |= h->frames.current[i]->i_coded
- != h->frames.current[i]->i_frame + h->sps->vui.i_num_reorder_frames;
- - if( h->param.i_bframe_pyramid != X264_B_PYRAMID_STRICT && !b_hasdelayframe )
- + /* This function must handle b-pyramid and clear frames for open-gop */
- + if( h->param.i_bframe_pyramid != X264_B_PYRAMID_STRICT && !b_hasdelayframe && h->frames.i_poc_last_open_gop == -1 )
- return;
- /* Remove last BREF. There will never be old BREFs in the
- * dpb during a BREF decode when pyramid == STRICT */
- for( ref = 0; h->frames.reference[ref]; ref++ )
- {
- - if( h->param.i_bframe_pyramid == X264_B_PYRAMID_STRICT
- + if( ( h->param.i_bframe_pyramid == X264_B_PYRAMID_STRICT
- && h->frames.reference[ref]->i_type == X264_TYPE_BREF )
- + || ( h->frames.reference[ref]->i_poc < h->frames.i_poc_last_open_gop
- + && h->sh.i_type != SLICE_TYPE_B ) )
- {
- int diff = h->i_frame_num - h->frames.reference[ref]->i_frame_num;
- h->sh.mmco[h->sh.i_mmco_command_count].i_difference_of_pic_nums = diff;
- h->sh.mmco[h->sh.i_mmco_command_count++].i_poc = h->frames.reference[ref]->i_poc;
- - x264_frame_push_unused( h, x264_frame_pop( h->frames.reference ) );
- + x264_frame_push_unused( h, x264_frame_shift( &h->frames.reference[ref] ) );
- h->b_ref_reorder[0] = 1;
- - break;
- + ref--;
- }
- }
- - /* Prepare to room in the dpb for the delayed display time of the later b-frame's */
- - h->sh.i_mmco_remove_from_end = X264_MAX( ref + 2 - h->frames.i_max_dpb, 0 );
- + /* Prepare room in the dpb for the delayed display time of the later b-frame's */
- + if( h->param.i_bframe_pyramid )
- + h->sh.i_mmco_remove_from_end = X264_MAX( ref + 2 - h->frames.i_max_dpb, 0 );
- }
- static inline void x264_slice_init( x264_t *h, int i_nal_type, int i_global_qp )
- @@ -2319,14 +2326,19 @@ int x264_encoder_encode( x264_t *h,
- if( h->fenc->b_keyframe )
- {
- - h->frames.i_last_keyframe = h->fenc->i_frame;
- + h->frames.i_last_coded_keyframe = h->fenc->i_frame;
- if( h->fenc->i_type == X264_TYPE_IDR )
- + {
- h->i_frame_num = 0;
- + h->frames.i_last_idr = h->fenc->i_frame;
- + }
- }
- h->sh.i_mmco_command_count =
- h->sh.i_mmco_remove_from_end = 0;
- h->b_ref_reorder[0] =
- h->b_ref_reorder[1] = 0;
- + h->fdec->i_poc =
- + h->fenc->i_poc = 2 * ( h->fenc->i_frame - X264_MAX( h->frames.i_last_idr, 0 ) );
- /* ------------------- Setup frame context ----------------------------- */
- /* 5: Init data dependent of frame type */
- @@ -2337,6 +2349,7 @@ int x264_encoder_encode( x264_t *h,
- i_nal_ref_idc = NAL_PRIORITY_HIGHEST;
- h->sh.i_type = SLICE_TYPE_I;
- x264_reference_reset( h );
- + h->frames.i_poc_last_open_gop = -1;
- }
- else if( h->fenc->i_type == X264_TYPE_I )
- {
- @@ -2344,6 +2357,8 @@ int x264_encoder_encode( x264_t *h,
- i_nal_ref_idc = NAL_PRIORITY_HIGH; /* Not completely true but for now it is (as all I/P are kept as ref)*/
- h->sh.i_type = SLICE_TYPE_I;
- x264_reference_hierarchy_reset( h );
- + if( h->param.b_open_gop )
- + h->frames.i_poc_last_open_gop = h->fenc->b_keyframe ? h->fenc->i_poc : -1;
- }
- else if( h->fenc->i_type == X264_TYPE_P )
- {
- @@ -2351,6 +2366,7 @@ int x264_encoder_encode( x264_t *h,
- i_nal_ref_idc = NAL_PRIORITY_HIGH; /* Not completely true but for now it is (as all I/P are kept as ref)*/
- h->sh.i_type = SLICE_TYPE_P;
- x264_reference_hierarchy_reset( h );
- + h->frames.i_poc_last_open_gop = -1;
- }
- else if( h->fenc->i_type == X264_TYPE_BREF )
- {
- @@ -2366,8 +2382,6 @@ int x264_encoder_encode( x264_t *h,
- h->sh.i_type = SLICE_TYPE_B;
- }
- - h->fdec->i_poc =
- - h->fenc->i_poc = 2 * (h->fenc->i_frame - h->frames.i_last_keyframe);
- h->fdec->i_type = h->fenc->i_type;
- h->fdec->i_frame = h->fenc->i_frame;
- h->fenc->b_kept_as_ref =
- @@ -2484,7 +2498,7 @@ int x264_encoder_encode( x264_t *h,
- if( h->fenc->i_type != X264_TYPE_IDR )
- {
- - int time_to_recovery = X264_MIN( h->mb.i_mb_width - 1, h->param.i_keyint_max ) + h->param.i_bframe;
- + int time_to_recovery = h->param.b_open_gop ? 0 : X264_MIN( h->mb.i_mb_width - 1, h->param.i_keyint_max ) + h->param.i_bframe;
- x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
- x264_sei_recovery_point_write( h, &h->out.bs, time_to_recovery );
- x264_nal_end( h );
- diff --git a/encoder/lookahead.c b/encoder/lookahead.c
- index f0af216..6994829 100644
- --- a/encoder/lookahead.c
- +++ b/encoder/lookahead.c
- @@ -131,7 +131,7 @@ int x264_lookahead_init( x264_t *h, int i_slicetype_length )
- for( int i = 0; i < h->param.i_threads; i++ )
- h->thread[i]->lookahead = look;
- - look->i_last_keyframe = - h->param.i_keyint_max;
- + look->i_last_coded_keyframe = - h->param.i_keyint_max;
- look->b_analyse_keyframe = (h->param.rc.b_mb_tree || (h->param.rc.i_vbv_buffer_size && h->param.rc.i_lookahead))
- && !h->param.rc.b_stat_read;
- look->i_slicetype_length = i_slicetype_length;
- diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
- index 2c05ad7..f30df22 100644
- --- a/encoder/ratecontrol.c
- +++ b/encoder/ratecontrol.c
- @@ -704,6 +704,7 @@ int x264_ratecontrol_new( x264_t *h )
- CMP_OPT_FIRST_PASS( "b_pyramid", h->param.i_bframe_pyramid );
- CMP_OPT_FIRST_PASS( "intra_refresh", h->param.b_intra_refresh );
- CMP_OPT_FIRST_PASS( "keyint", h->param.i_keyint_max );
- + CMP_OPT_FIRST_PASS( "open_gop", h->param.b_open_gop );
- if( strstr( opts, "qp=0" ) && h->param.rc.i_rc_method == X264_RC_ABR )
- x264_log( h, X264_LOG_WARNING, "1st pass was lossless, bitrate prediction will be inaccurate\n" );
- diff --git a/encoder/slicetype.c b/encoder/slicetype.c
- index 60f3a24..0762c99 100644
- --- a/encoder/slicetype.c
- +++ b/encoder/slicetype.c
- @@ -981,7 +981,7 @@ static int scenecut_internal( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **f
- int icost = frame->i_cost_est[0][0];
- int pcost = frame->i_cost_est[p1-p0][0];
- float f_bias;
- - int i_gop_size = frame->i_frame - h->lookahead->i_last_keyframe;
- + int i_gop_size = frame->i_frame - h->lookahead->i_last_coded_keyframe;
- float f_thresh_max = h->param.i_scenecut_threshold / 100.0;
- /* magic numbers pulled out of thin air */
- float f_thresh_min = f_thresh_max * h->param.i_keyint_min
- @@ -1058,7 +1058,7 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
- {
- x264_mb_analysis_t a;
- x264_frame_t *frames[X264_LOOKAHEAD_MAX+3] = { NULL, };
- - int num_frames, orig_num_frames, keyint_limit, idr_frame_type, framecnt;
- + int num_frames, orig_num_frames, keyint_limit, framecnt;
- int i_mb_count = NUM_MBS;
- int cost1p0, cost2p0, cost1b1, cost2p1;
- int i_max_search = X264_MIN( h->lookahead->next.i_size, X264_LOOKAHEAD_MAX );
- @@ -1076,11 +1076,10 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
- if( !framecnt )
- return;
- - keyint_limit = h->param.i_keyint_max - frames[0]->i_frame + h->lookahead->i_last_keyframe - 1;
- + keyint_limit = h->param.i_keyint_max - frames[0]->i_frame + h->lookahead->i_last_coded_keyframe - 1;
- orig_num_frames = num_frames = h->param.b_intra_refresh ? framecnt : X264_MIN( framecnt, keyint_limit );
- x264_lowres_context_init( h, &a );
- - idr_frame_type = frames[1]->i_frame - h->lookahead->i_last_keyframe >= h->param.i_keyint_min ? X264_TYPE_IDR : X264_TYPE_I;
- /* This is important psy-wise: if we have a non-scenecut keyframe,
- * there will be significant visual artifacts if the frames just before
- @@ -1092,12 +1091,12 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
- {
- frames[1]->i_type = X264_TYPE_P;
- if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1, 1, orig_num_frames ) )
- - frames[1]->i_type = idr_frame_type;
- + frames[1]->i_type = X264_TYPE_I;
- return;
- }
- else if( num_frames == 0 )
- {
- - frames[1]->i_type = idr_frame_type;
- + frames[1]->i_type = X264_TYPE_I;
- return;
- }
- @@ -1106,7 +1105,7 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
- int reset_start;
- if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1, 1, orig_num_frames ) )
- {
- - frames[1]->i_type = idr_frame_type;
- + frames[1]->i_type = X264_TYPE_I;
- return;
- }
- @@ -1210,15 +1209,19 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
- /* Enforce keyframe limit. */
- if( !h->param.b_intra_refresh )
- - for( int j = 0; j < num_frames; j++ )
- + for( int i = keyint_limit+1; i <= num_frames; i += h->param.i_keyint_max )
- {
- - if( ((j-keyint_limit) % h->param.i_keyint_max) == 0 )
- + int j = i;
- + if( h->param.b_open_gop )
- {
- - if( j && h->param.i_keyint_max > 1 )
- - frames[j]->i_type = X264_TYPE_P;
- - frames[j+1]->i_type = X264_TYPE_IDR;
- - reset_start = X264_MIN( reset_start, j+2 );
- + while( IS_X264_TYPE_B( frames[i]->i_type ) )
- + i++;
- + while( IS_X264_TYPE_B( frames[j-1]->i_type ) )
- + j--;
- }
- + frames[i]->i_type = X264_TYPE_I;
- + reset_start = X264_MIN( reset_start, i+1 );
- + i = j;
- }
- if( h->param.rc.i_vbv_buffer_size )
- @@ -1303,18 +1306,40 @@ void x264_slicetype_decide( x264_t *h )
- frm->i_frame, x264_b_pyramid_names[h->param.i_bframe_pyramid], h->param.i_frame_reference );
- }
- + if( frm->i_type == X264_TYPE_KEYFRAME )
- + frm->i_type = h->param.b_open_gop ? X264_TYPE_I : X264_TYPE_IDR;
- +
- /* Limit GOP size */
- - if( (!h->param.b_intra_refresh || frm->i_frame == 0) && frm->i_frame - h->lookahead->i_last_keyframe >= h->param.i_keyint_max )
- + if( (!h->param.b_intra_refresh || frm->i_frame == 0) && frm->i_frame - h->lookahead->i_last_coded_keyframe >= h->param.i_keyint_max )
- + {
- + if( frm->i_type == X264_TYPE_AUTO || frm->i_type == X264_TYPE_I )
- + frm->i_type = h->param.b_open_gop && h->lookahead->i_last_coded_keyframe >= 0 ? X264_TYPE_I : X264_TYPE_IDR;
- + int warn = frm->i_type != X264_TYPE_IDR;
- + if( warn && h->param.b_open_gop )
- + {
- + /* if this minigop ends with i, it's not a violation */
- + int j = bframes;
- + while( IS_X264_TYPE_B( h->lookahead->next.list[j]->i_type ) )
- + j++;
- + warn = h->lookahead->next.list[j]->i_type != X264_TYPE_I && h->lookahead->next.list[j]->i_type != X264_TYPE_KEYFRAME;
- + }
- + if( warn )
- + x264_log( h, X264_LOG_WARNING, "specified frame type (%d) at %d is not compatible with keyframe interval\n", frm->i_type, frm->i_frame );
- + }
- + if( frm->i_type == X264_TYPE_I && frm->i_frame - h->lookahead->i_last_coded_keyframe >= h->param.i_keyint_min )
- {
- - if( frm->i_type == X264_TYPE_AUTO )
- + if( h->param.b_open_gop )
- + {
- + h->lookahead->i_last_coded_keyframe = frm->i_frame - bframes;
- + frm->b_keyframe = 1;
- + }
- + else
- frm->i_type = X264_TYPE_IDR;
- - if( frm->i_type != X264_TYPE_IDR )
- - x264_log( h, X264_LOG_WARNING, "specified frame type (%d) is not compatible with keyframe interval\n", frm->i_type );
- }
- if( frm->i_type == X264_TYPE_IDR )
- {
- /* Close GOP */
- - h->lookahead->i_last_keyframe = frm->i_frame;
- + h->lookahead->i_last_coded_keyframe = frm->i_frame;
- frm->b_keyframe = 1;
- if( bframes > 0 )
- {
- diff --git a/x264.c b/x264.c
- index a124083..eba72c5 100644
- --- a/x264.c
- +++ b/x264.c
- @@ -380,6 +380,8 @@ static void Help( x264_param_t *defaults, int longhelp )
- " - strict: Strictly hierarchical pyramid\n"
- " - normal: Non-strict (not Blu-ray compatible)\n",
- strtable_lookup( x264_b_pyramid_names, defaults->i_bframe_pyramid ) );
- + H1( " --open-gop Use recovery points to close GOPs\n"
- + " Only available with b-frames\n" );
- H1( " --no-cabac Disable CABAC\n" );
- H1( " -r, --ref <integer> Number of reference frames [%d]\n", defaults->i_frame_reference );
- H1( " --no-deblock Disable loop filter\n" );
- @@ -441,7 +443,8 @@ static void Help( x264_param_t *defaults, int longhelp )
- " or b=<float> (bitrate multiplier)\n" );
- H2( " --qpfile <string> Force frametypes and QPs for some or all frames\n"
- " Format of each line: framenumber frametype QP\n"
- - " QP of -1 lets x264 choose. Frametypes: I,i,P,B,b.\n"
- + " QP of -1 lets x264 choose. Frametypes: I,i,K,P,B,b.\n"
- + " K=<I or i> depending on open-gop setting\n"
- " QPs are restricted by qpmin/qpmax.\n" );
- H1( "\n" );
- H1( "Analysis:\n" );
- @@ -627,6 +630,7 @@ static struct option long_options[] =
- { "no-b-adapt", no_argument, NULL, 0 },
- { "b-bias", required_argument, NULL, 0 },
- { "b-pyramid", required_argument, NULL, 0 },
- + { "open-gop", no_argument, NULL, 0 },
- { "min-keyint", required_argument, NULL, 'i' },
- { "keyint", required_argument, NULL, 'I' },
- { "intra-refresh", no_argument, NULL, 0 },
- @@ -1304,6 +1308,7 @@ static void parse_qpfile( cli_opt_t *opt, x264_picture_t *pic, int i_frame )
- pic->i_qpplus1 = qp+1;
- if ( type == 'I' ) pic->i_type = X264_TYPE_IDR;
- else if( type == 'i' ) pic->i_type = X264_TYPE_I;
- + else if( type == 'K' ) pic->i_type = X264_TYPE_KEYFRAME;
- else if( type == 'P' ) pic->i_type = X264_TYPE_P;
- else if( type == 'B' ) pic->i_type = X264_TYPE_BREF;
- else if( type == 'b' ) pic->i_type = X264_TYPE_B;
- diff --git a/x264.h b/x264.h
- index 9cd4600..b1402c9 100644
- --- a/x264.h
- +++ b/x264.h
- @@ -35,7 +35,7 @@
- #include <stdarg.h>
- -#define X264_BUILD 98
- +#define X264_BUILD 99
- /* x264_t:
- * opaque handler for encoder */
- @@ -138,6 +138,7 @@ static const char * const x264_nal_hrd_names[] = { "none", "vbr", "cbr", 0 };
- #define X264_TYPE_P 0x0003
- #define X264_TYPE_BREF 0x0004 /* Non-disposable B-frame */
- #define X264_TYPE_B 0x0005
- +#define X264_TYPE_KEYFRAME 0x0006 /* IDR or I depending on b_open_gop option */
- #define IS_X264_TYPE_I(x) ((x)==X264_TYPE_I || (x)==X264_TYPE_IDR)
- #define IS_X264_TYPE_B(x) ((x)==X264_TYPE_B || (x)==X264_TYPE_BREF)
- @@ -221,6 +222,7 @@ typedef struct x264_param_t
- int i_bframe_adaptive;
- int i_bframe_bias;
- int i_bframe_pyramid; /* Keep some B-frames as references: 0=off, 1=strict hierarchical, 2=normal */
- + int b_open_gop;
- int b_deblocking_filter;
- int i_deblocking_filter_alphac0; /* [-6, 6] -6 light filter, 6 strong */
- --
- 1.7.0.4
- From 7ce61c25c289ef641349c2f4295a4f61dd173557 Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Thu, 17 Jun 2010 14:50:07 -0700
- Subject: [PATCH 4/9] Lookaheadless MB-tree support
- Uses past motion information instead of future data from the lookahead.
- Not as accurate, but better than nothing in zero-latency compression when a lookahead isn't available.
- Currently resets on keyframes, so only available if intra-refresh is set, to avoid pops on non-scenecut keyframes.
- Also slightly modify encoding presets: disable rc-lookahead in the fastest presets.
- Enable MB-tree in "veryfast", albeit with a very short lookahead.
- ---
- common/common.c | 4 +++-
- encoder/encoder.c | 7 ++++++-
- encoder/slicetype.c | 48 ++++++++++++++++++++++++++++++++++--------------
- x264.c | 14 +++++++-------
- 4 files changed, 50 insertions(+), 23 deletions(-)
- diff --git a/common/common.c b/common/common.c
- index 5ccd541..9e86f93 100644
- --- a/common/common.c
- +++ b/common/common.c
- @@ -184,6 +184,7 @@ static int x264_param_apply_preset( x264_param_t *param, const char *preset )
- param->rc.b_mb_tree = 0;
- param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
- param->analyse.b_weighted_bipred = 0;
- + param->rc.i_lookahead = 0;
- }
- else if( !strcasecmp( preset, "superfast" ) )
- {
- @@ -195,6 +196,7 @@ static int x264_param_apply_preset( x264_param_t *param, const char *preset )
- param->analyse.i_trellis = 0;
- param->rc.b_mb_tree = 0;
- param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
- + param->rc.i_lookahead = 0;
- }
- else if( !strcasecmp( preset, "veryfast" ) )
- {
- @@ -203,8 +205,8 @@ static int x264_param_apply_preset( x264_param_t *param, const char *preset )
- param->i_frame_reference = 1;
- param->analyse.b_mixed_references = 0;
- param->analyse.i_trellis = 0;
- - param->rc.b_mb_tree = 0;
- param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
- + param->rc.i_lookahead = 10;
- }
- else if( !strcasecmp( preset, "faster" ) )
- {
- diff --git a/encoder/encoder.c b/encoder/encoder.c
- index 3e7f227..2b0e017 100644
- --- a/encoder/encoder.c
- +++ b/encoder/encoder.c
- @@ -620,8 +620,13 @@ static int x264_validate_parameters( x264_t *h )
- }
- h->param.rc.f_qcompress = x264_clip3f( h->param.rc.f_qcompress, 0.0, 1.0 );
- - if( !h->param.rc.i_lookahead || h->param.i_keyint_max == 1 || h->param.rc.f_qcompress == 1 )
- + if( h->param.i_keyint_max == 1 || h->param.rc.f_qcompress == 1 )
- h->param.rc.b_mb_tree = 0;
- + if( !h->param.rc.i_lookahead && !h->param.b_intra_refresh && h->param.rc.b_mb_tree )
- + {
- + x264_log( h, X264_LOG_WARNING, "lookaheadless mb-tree requires intra refresh\n" );
- + h->param.rc.b_mb_tree = 0;
- + }
- if( h->param.rc.b_stat_read )
- h->param.rc.i_lookahead = 0;
- #if HAVE_PTHREAD
- diff --git a/encoder/slicetype.c b/encoder/slicetype.c
- index 0762c99..83948fc 100644
- --- a/encoder/slicetype.c
- +++ b/encoder/slicetype.c
- @@ -734,7 +734,7 @@ static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, in
- }
- }
- - if( h->param.rc.i_vbv_buffer_size && referenced )
- + if( h->param.rc.i_vbv_buffer_size && h->param.rc.i_lookahead && referenced )
- x264_macroblock_tree_finish( h, frames[b], b == p1 ? b - p0 : 0 );
- }
- @@ -743,7 +743,8 @@ static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t
- int idx = !b_intra;
- int last_nonb, cur_nonb = 1;
- int bframes = 0;
- - int i = num_frames - 1;
- + int i = num_frames;
- +
- if( b_intra )
- x264_slicetype_frame_cost( h, a, frames, 0, 0, 0, 0 );
- @@ -751,10 +752,25 @@ static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t
- i--;
- last_nonb = i;
- - if( last_nonb < idx )
- - return;
- - memset( frames[last_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) );
- + if( !h->param.rc.i_lookahead )
- + {
- + if( b_intra )
- + {
- + memset( frames[0]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) );
- + memcpy( frames[0]->f_qp_offset, frames[0]->f_qp_offset_aq, h->mb.i_mb_count * sizeof(float) );
- + return;
- + }
- + XCHG( uint16_t*, frames[last_nonb]->i_propagate_cost, frames[0]->i_propagate_cost );
- + memset( frames[0]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) );
- + }
- + else
- + {
- + if( last_nonb < idx )
- + return;
- + memset( frames[last_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) );
- + }
- +
- while( i-- > idx )
- {
- cur_nonb = i;
- @@ -796,6 +812,12 @@ static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t
- last_nonb = cur_nonb;
- }
- + if( !h->param.rc.i_lookahead )
- + {
- + x264_macroblock_tree_propagate( h, frames, 0, last_nonb, last_nonb, 1 );
- + XCHG( uint16_t*, frames[last_nonb]->i_propagate_cost, frames[0]->i_propagate_cost );
- + }
- +
- x264_macroblock_tree_finish( h, frames[last_nonb], last_nonb );
- if( h->param.i_bframe_pyramid && bframes > 1 && !h->param.rc.i_vbv_buffer_size )
- x264_macroblock_tree_finish( h, frames[last_nonb+(bframes+1)/2], 0 );
- @@ -1062,6 +1084,7 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
- int i_mb_count = NUM_MBS;
- int cost1p0, cost2p0, cost1b1, cost2p1;
- int i_max_search = X264_MIN( h->lookahead->next.i_size, X264_LOOKAHEAD_MAX );
- + int vbv_lookahead = h->param.rc.i_vbv_buffer_size && h->param.rc.i_lookahead;
- if( h->param.b_deterministic )
- i_max_search = X264_MIN( i_max_search, h->lookahead->i_slicetype_length + !keyframe );
- @@ -1074,7 +1097,11 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
- frames[framecnt+1] = h->lookahead->next.list[framecnt];
- if( !framecnt )
- + {
- + if( h->param.rc.b_mb_tree )
- + x264_macroblock_tree( h, &a, frames, 0, keyframe );
- return;
- + }
- keyint_limit = h->param.i_keyint_max - frames[0]->i_frame + h->lookahead->i_last_coded_keyframe - 1;
- orig_num_frames = num_frames = h->param.b_intra_refresh ? framecnt : X264_MIN( framecnt, keyint_limit );
- @@ -1085,15 +1112,8 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
- * there will be significant visual artifacts if the frames just before
- * go down in quality due to being referenced less, despite it being
- * more RD-optimal. */
- - if( (h->param.analyse.b_psy && h->param.rc.b_mb_tree) || h->param.rc.i_vbv_buffer_size )
- + if( (h->param.analyse.b_psy && h->param.rc.b_mb_tree) || vbv_lookahead )
- num_frames = framecnt;
- - else if( num_frames == 1 )
- - {
- - frames[1]->i_type = X264_TYPE_P;
- - if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1, 1, orig_num_frames ) )
- - frames[1]->i_type = X264_TYPE_I;
- - return;
- - }
- else if( num_frames == 0 )
- {
- frames[1]->i_type = X264_TYPE_I;
- @@ -1224,7 +1244,7 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
- i = j;
- }
- - if( h->param.rc.i_vbv_buffer_size )
- + if( vbv_lookahead )
- x264_vbv_lookahead( h, &a, frames, num_frames, keyframe );
- /* Restore frametypes for all frames that haven't actually been decided yet. */
- diff --git a/x264.c b/x264.c
- index eba72c5..4265a3b 100644
- --- a/x264.c
- +++ b/x264.c
- @@ -285,16 +285,16 @@ static void Help( x264_param_t *defaults, int longhelp )
- " --no-8x8dct --aq-mode 0 --b-adapt 0\n"
- " --bframes 0 --no-cabac --no-deblock\n"
- " --no-mbtree --me dia --no-mixed-refs\n"
- - " --partitions none --ref 1 --scenecut 0\n"
- - " --subme 0 --trellis 0 --no-weightb\n"
- - " --weightp 0\n"
- + " --partitions none --rc-lookahead 0 --ref 1\n"
- + " --scenecut 0 --subme 0 --trellis 0\n"
- + " --no-weightb --weightp 0\n"
- " - superfast:\n"
- " --no-mbtree --me dia --no-mixed-refs\n"
- - " --partitions i8x8,i4x4 --ref 1\n"
- - " --subme 1 --trellis 0 --weightp 0\n"
- + " --partitions i8x8,i4x4 --rc-lookahead 0\n"
- + " --ref 1 --subme 1 --trellis 0 --weightp 0\n"
- " - veryfast:\n"
- - " --no-mbtree --no-mixed-refs --ref 1\n"
- - " --subme 2 --trellis 0 --weightp 0\n"
- + " --no-mixed-refs --rc-lookahead 10\n"
- + " --ref 1 --subme 2 --trellis 0 --weightp 0\n"
- " - faster:\n"
- " --no-mixed-refs --rc-lookahead 20\n"
- " --ref 2 --subme 4 --weightp 1\n"
- --
- 1.7.0.4
- From f0505f9c3c9c4d6e7643cb878ea72192abef2420 Mon Sep 17 00:00:00 2001
- From: Anton Mitrofanov <BugMaster@narod.ru>
- Date: Sat, 19 Jun 2010 01:44:56 +0400
- Subject: [PATCH 5/9] Fix SIGPIPEs caused by is_regular_file checks
- Check to see if input file is a pipe without opening it.
- ---
- common/osdep.h | 10 +++++++++-
- x264.c | 1 +
- 2 files changed, 10 insertions(+), 1 deletions(-)
- diff --git a/common/osdep.h b/common/osdep.h
- index b1b357c..b3a8cd6 100644
- --- a/common/osdep.h
- +++ b/common/osdep.h
- @@ -290,7 +290,15 @@ static inline uint8_t x264_is_regular_file( FILE *filehandle )
- {
- struct stat file_stat;
- if( fstat( fileno( filehandle ), &file_stat ) )
- - return 0;
- + return -1;
- + return S_ISREG( file_stat.st_mode );
- +}
- +
- +static inline uint8_t x264_is_regular_file_path( const char *filename )
- +{
- + struct stat file_stat;
- + if( stat( filename, &file_stat ) )
- + return -1;
- return S_ISREG( file_stat.st_mode );
- }
- diff --git a/x264.c b/x264.c
- index 4265a3b..25609a3 100644
- --- a/x264.c
- +++ b/x264.c
- @@ -810,6 +810,7 @@ static int select_input( const char *demuxer, char *used_demuxer, char *filename
- int b_auto = !strcasecmp( demuxer, "auto" );
- if( !b_regular && b_auto )
- ext = "yuv";
- + b_regular = b_regular && x264_is_regular_file_path( filename );
- if( b_regular )
- {
- FILE *f = fopen( filename, "r" );
- --
- 1.7.0.4
- From 99a6182a8232083a641cc0423f56407e0589c313 Mon Sep 17 00:00:00 2001
- From: Yusuke Nakamura <muken.the.vfrmaniac@gmail.com>
- Date: Fri, 18 Jun 2010 14:57:52 -0700
- Subject: [PATCH 6/9] Properly close qpfile on errors
- ---
- encoder/encoder.c | 1 +
- input/avs.c | 1 +
- output/mp4.c | 1 +
- 3 files changed, 3 insertions(+), 0 deletions(-)
- diff --git a/encoder/encoder.c b/encoder/encoder.c
- index 2b0e017..cd65da2 100644
- --- a/encoder/encoder.c
- +++ b/encoder/encoder.c
- @@ -1137,6 +1137,7 @@ x264_t *x264_encoder_open( x264_param_t *param )
- else if( !x264_is_regular_file( f ) )
- {
- x264_log( h, X264_LOG_ERROR, "dump_yuv: incompatible with non-regular file %s\n", h->param.psz_dump_yuv );
- + fclose( f );
- goto fail;
- }
- fclose( f );
- diff --git a/input/avs.c b/input/avs.c
- index 07add40..849c465 100644
- --- a/input/avs.c
- +++ b/input/avs.c
- @@ -134,6 +134,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
- else if( !x264_is_regular_file( fh ) )
- {
- fprintf( stderr, "avs [error]: AVS input is incompatible with non-regular file `%s'\n", psz_filename );
- + fclose( fh );
- return -1;
- }
- fclose( fh );
- diff --git a/output/mp4.c b/output/mp4.c
- index 0e3c2fc..9b35a2f 100644
- --- a/output/mp4.c
- +++ b/output/mp4.c
- @@ -166,6 +166,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle )
- else if( !x264_is_regular_file( fh ) )
- {
- fprintf( stderr, "mp4 [error]: MP4 output is incompatible with non-regular file `%s'\n", psz_filename );
- + fclose( fh );
- return -1;
- }
- fclose( fh );
- --
- 1.7.0.4
- From 9dda85ea508aa5f3e5d3d44c80fee7b33caaea49 Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Fri, 18 Jun 2010 13:58:11 -0700
- Subject: [PATCH 7/9] sse4 and ssse3 versions of some intra_sad functions
- ---
- common/pixel.c | 5 ++
- common/x86/pixel.h | 2 +
- common/x86/sad-a.asm | 107 ++++++++++++++++++++++++++++++++++++++++++++++++++
- 3 files changed, 114 insertions(+), 0 deletions(-)
- diff --git a/common/pixel.c b/common/pixel.c
- index a8cb1df..7fcc91a 100644
- --- a/common/pixel.c
- +++ b/common/pixel.c
- @@ -856,6 +856,11 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
- }
- pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_sse4;
- pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_sse4;
- + pixf->intra_sad_x3_4x4 = x264_intra_sad_x3_4x4_sse4;
- +
- + /* Only faster on Nehalem */
- + if( cpu&X264_CPU_SSE42 )
- + pixf->intra_sad_x3_8x8 = x264_intra_sad_x3_8x8_ssse3;
- }
- #endif //HAVE_MMX
- diff --git a/common/x86/pixel.h b/common/x86/pixel.h
- index 9bba683..b1b916d 100644
- --- a/common/x86/pixel.h
- +++ b/common/x86/pixel.h
- @@ -80,6 +80,7 @@ DECL_PIXELS( uint64_t, hadamard_ac, sse4, ( uint8_t *pix, int i_stride ))
- void x264_intra_satd_x3_4x4_mmxext ( uint8_t *, uint8_t *, int * );
- void x264_intra_satd_x3_4x4_ssse3 ( uint8_t *, uint8_t *, int * );
- void x264_intra_sad_x3_4x4_mmxext ( uint8_t *, uint8_t *, int * );
- +void x264_intra_sad_x3_4x4_sse4 ( uint8_t *, uint8_t *, int * );
- void x264_intra_satd_x3_8x8c_mmxext ( uint8_t *, uint8_t *, int * );
- void x264_intra_satd_x3_8x8c_ssse3 ( uint8_t *, uint8_t *, int * );
- void x264_intra_sad_x3_8x8c_mmxext ( uint8_t *, uint8_t *, int * );
- @@ -93,6 +94,7 @@ void x264_intra_sa8d_x3_8x8_mmxext ( uint8_t *, uint8_t *, int * );
- void x264_intra_sa8d_x3_8x8_sse2 ( uint8_t *, uint8_t *, int * );
- void x264_intra_sa8d_x3_8x8_ssse3 ( uint8_t *, uint8_t *, int * );
- void x264_intra_sad_x3_8x8_mmxext ( uint8_t *, uint8_t *, int * );
- +void x264_intra_sad_x3_8x8_ssse3 ( uint8_t *, uint8_t *, int * );
- void x264_intra_sa8d_x3_8x8_core_mmxext( uint8_t *, int16_t [2][8], int * );
- void x264_intra_sa8d_x3_8x8_core_sse2 ( uint8_t *, int16_t [2][8], int * );
- void x264_intra_sa8d_x3_8x8_core_ssse3 ( uint8_t *, int16_t [2][8], int * );
- diff --git a/common/x86/sad-a.asm b/common/x86/sad-a.asm
- index 72c1789..10a365c 100644
- --- a/common/x86/sad-a.asm
- +++ b/common/x86/sad-a.asm
- @@ -26,6 +26,19 @@
- %include "x86inc.asm"
- %include "x86util.asm"
- +SECTION_RODATA
- +
- +h4x4_pred_shuf: db 3,3,3,3,7,7,7,7,11,11,11,11,15,15,15,15
- +h4x4_pred_shuf2: db 3,7,11,15,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
- +h8x8_pred_shuf: times 8 db 1
- + times 8 db 0
- + times 8 db 3
- + times 8 db 2
- + times 8 db 5
- + times 8 db 4
- + times 8 db 7
- + times 8 db 6
- +
- SECTION .text
- cextern pb_3
- @@ -303,6 +316,40 @@ cglobal intra_sad_x3_4x4_mmxext, 3,3
- movd [r2+4], mm1 ;H prediction cost
- RET
- +cglobal intra_sad_x3_4x4_sse4, 3,3
- + movd xmm4, [r1+FDEC_STRIDE*0-4]
- + pinsrd xmm4, [r1+FDEC_STRIDE*1-4], 1
- + pinsrd xmm4, [r1+FDEC_STRIDE*2-4], 2
- + pinsrd xmm4, [r1+FDEC_STRIDE*3-4], 3
- + movd xmm2, [r1-FDEC_STRIDE]
- + pxor xmm3, xmm3
- + movdqa xmm5, xmm4
- + pshufb xmm4, [h4x4_pred_shuf2] ; EFGH
- + pshufb xmm5, [h4x4_pred_shuf] ; EEEEFFFFGGGGHHHH
- + pshufd xmm0, xmm2, 0 ; ABCDABCDABCDABCD
- + punpckldq xmm2, xmm4 ; ABCDEFGH
- + psadbw xmm2, xmm3
- + movd xmm1, [r0+FENC_STRIDE*0]
- + pinsrd xmm1, [r0+FENC_STRIDE*1], 1
- + pinsrd xmm1, [r0+FENC_STRIDE*2], 2
- + pinsrd xmm1, [r0+FENC_STRIDE*3], 3
- + psadbw xmm0, xmm1
- + psadbw xmm5, xmm1
- + psraw xmm2, 2
- + pavgw xmm2, xmm3
- + pshufb xmm2, xmm3 ; DC prediction
- + movdqa xmm3, xmm0
- + punpcklqdq xmm0, xmm5
- + punpckhqdq xmm3, xmm5
- + psadbw xmm2, xmm1
- + paddw xmm0, xmm3
- + movhlps xmm4, xmm2
- + packusdw xmm0, xmm0
- + paddw xmm2, xmm4
- + movq [r2], xmm0 ; V/H prediction costs
- + movd [r2+8], xmm2 ; DC prediction cost
- + RET
- +
- ;-----------------------------------------------------------------------------
- ; void intra_sad_x3_8x8( uint8_t *fenc, uint8_t edge[33], int res[3]);
- ;-----------------------------------------------------------------------------
- @@ -370,6 +417,66 @@ cglobal intra_sad_x3_8x8_mmxext, 3,3
- movd [r2+8], m1
- RET
- +INIT_XMM
- +cglobal intra_sad_x3_8x8_ssse3, 3,4,9
- +%ifdef PIC
- + lea r11, [h8x8_pred_shuf]
- +%define shuf r11
- +%else
- +%define shuf h8x8_pred_shuf
- +%endif
- + movq m0, [r1+7] ; left pixels
- + movq m1, [r1+16] ; top pixels
- + pxor m2, m2
- + pxor m3, m3
- + psadbw m2, m0
- + psadbw m3, m1
- + paddw m2, m3
- + pxor m3, m3 ; V score accumulator
- + psraw m2, 3
- + pavgw m2, m3
- + punpcklqdq m1, m1 ; V prediction
- + pshufb m2, m3 ; DC prediction
- + pxor m4, m4 ; H score accumulator
- + pxor m5, m5 ; DC score accumulator
- + mov r3d, 6
- +.loop:
- + movq m6, [r0+FENC_STRIDE*0]
- + movhps m6, [r0+FENC_STRIDE*1]
- + movdqa m7, m0
- + pshufb m7, [shuf+r3*8] ; H prediction
- +%ifdef ARCH_X86_64
- + movdqa m8, m1
- + psadbw m7, m6
- + psadbw m8, m6
- + psadbw m6, m2
- + paddw m4, m7
- + paddw m3, m8
- + paddw m5, m6
- +%else
- + psadbw m7, m6
- + paddw m4, m7
- + movdqa m7, m1
- + psadbw m7, m6
- + psadbw m6, m2
- + paddw m3, m7
- + paddw m5, m6
- +%endif
- + add r0, FENC_STRIDE*2
- + sub r3d, 2
- + jge .loop
- +
- + movhlps m0, m3
- + movhlps m1, m4
- + movhlps m2, m5
- + paddw m3, m0
- + paddw m4, m1
- + paddw m5, m2
- + movd [r2+0], m3
- + movd [r2+4], m4
- + movd [r2+8], m5
- + RET
- +
- ;-----------------------------------------------------------------------------
- ; void intra_sad_x3_8x8c( uint8_t *fenc, uint8_t *fdec, int res[3] );
- ;-----------------------------------------------------------------------------
- --
- 1.7.0.4
- From 4e190ca01a0717f9d4056213226b57eb4a6d1b69 Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Sat, 19 Jun 2010 01:41:07 -0700
- Subject: [PATCH 8/9] Improve 2-pass bitrate prediction
- Adapt based on distance to the end in bits, not in frames.
- Helps in videos with absurdly simple end sections, e.g. black frames.
- ---
- encoder/ratecontrol.c | 12 +++++++++---
- 1 files changed, 9 insertions(+), 3 deletions(-)
- diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
- index f30df22..6668c18 100644
- --- a/encoder/ratecontrol.c
- +++ b/encoder/ratecontrol.c
- @@ -2014,9 +2014,6 @@ static float rate_estimate_qscale( x264_t *h )
- double lmax = rcc->lmax[pict_type];
- int64_t diff;
- int64_t predicted_bits = total_bits;
- - /* Adjust ABR buffer based on distance to the end of the video. */
- - if( rcc->num_entries > h->i_frame )
- - abr_buffer *= 0.5 * sqrt( rcc->num_entries - h->i_frame );
- if( rcc->b_vbv )
- {
- @@ -2042,6 +2039,15 @@ static float rate_estimate_qscale( x264_t *h )
- predicted_bits += (int64_t)(h->i_thread_frames - 1) * rcc->bitrate / rcc->fps;
- }
- + /* Adjust ABR buffer based on distance to the end of the video. */
- + if( rcc->num_entries > h->i_frame )
- + {
- + double final_bits = rcc->entry[rcc->num_entries-1].expected_bits;
- + double video_pos = rce.expected_bits / final_bits;
- + double scale_factor = sqrt( (1 - video_pos) * rcc->num_entries );
- + abr_buffer *= 0.5 * X264_MAX( scale_factor, 0.5 );
- + }
- +
- diff = predicted_bits - (int64_t)rce.expected_bits;
- q = rce.new_qscale;
- q /= x264_clip3f((double)(abr_buffer - diff) / abr_buffer, .5, 2);
- --
- 1.7.0.4
- From 62e9a31ca2eca59f0d8f54c104d8f7229af4f78c Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Sat, 19 Jun 2010 03:27:33 -0700
- Subject: [PATCH 9/9] Attempt to fix rounding errors in HRD
- In a staggering display of brain damage, the spec requires all HRD math to be done in infinite precision despite the output being of quite limited precision.
- Accordingly, convert buffer management to work in units of timescale.
- This should fix problems with accumulating rounding errors in HRD information.
- ---
- common/common.c | 36 ++++++++++++++-----------
- common/common.h | 1 +
- encoder/encoder.c | 3 +-
- encoder/ratecontrol.c | 67 ++++++++++++++++++++++++++++++++-----------------
- encoder/ratecontrol.h | 2 +-
- 5 files changed, 67 insertions(+), 42 deletions(-)
- diff --git a/common/common.c b/common/common.c
- index 9e86f93..44fd68a 100644
- --- a/common/common.c
- +++ b/common/common.c
- @@ -1084,24 +1084,28 @@ void x264_free( void *p )
- /****************************************************************************
- * x264_reduce_fraction:
- ****************************************************************************/
- -void x264_reduce_fraction( uint32_t *n, uint32_t *d )
- -{
- - uint32_t a = *n;
- - uint32_t b = *d;
- - uint32_t c;
- - if( !a || !b )
- - return;
- - c = a % b;
- - while(c)
- - {
- - a = b;
- - b = c;
- - c = a % b;
- - }
- - *n /= b;
- - *d /= b;
- +#define REDUCE_FRACTION( name, type )\
- +void name( type *n, type *d )\
- +{ \
- + type a = *n; \
- + type b = *d; \
- + type c; \
- + if( !a || !b ) \
- + return; \
- + c = a % b; \
- + while( c ) \
- + { \
- + a = b; \
- + b = c; \
- + c = a % b; \
- + } \
- + *n /= b; \
- + *d /= b; \
- }
- +REDUCE_FRACTION( x264_reduce_fraction, uint32_t )
- +REDUCE_FRACTION( x264_reduce_fraction64, uint64_t )
- +
- /****************************************************************************
- * x264_slurp_file:
- ****************************************************************************/
- diff --git a/common/common.h b/common/common.h
- index 19e5d32..fee9398 100644
- --- a/common/common.h
- +++ b/common/common.h
- @@ -184,6 +184,7 @@ char *x264_param2string( x264_param_t *p, int b_res );
- void x264_log( x264_t *h, int i_level, const char *psz_fmt, ... );
- void x264_reduce_fraction( uint32_t *n, uint32_t *d );
- +void x264_reduce_fraction64( uint64_t *n, uint64_t *d );
- void x264_init_vlc_tables();
- static ALWAYS_INLINE pixel x264_clip_pixel( int x )
- diff --git a/encoder/encoder.c b/encoder/encoder.c
- index cd65da2..c49ea1a 100644
- --- a/encoder/encoder.c
- +++ b/encoder/encoder.c
- @@ -2593,8 +2593,7 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
- /* generate sei buffering period and insert it into place */
- if( h->fenc->b_keyframe && h->sps->vui.b_nal_hrd_parameters_present )
- {
- - h->initial_cpb_removal_delay = x264_hrd_fullness( h );
- -
- + x264_hrd_fullness( h );
- x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
- x264_sei_buffering_period_write( h, &h->out.bs );
- if( x264_nal_end( h ) )
- diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
- index 6668c18..b37d9be 100644
- --- a/encoder/ratecontrol.c
- +++ b/encoder/ratecontrol.c
- @@ -91,7 +91,7 @@ struct x264_ratecontrol_t
- /* VBV stuff */
- double buffer_size;
- - double buffer_fill_final; /* real buffer as of the last finished frame */
- + int64_t buffer_fill_final;
- double buffer_fill; /* planned buffer, if all in-progress frames hit their bit budget */
- double buffer_rate; /* # of bits added to buffer_fill after each frame */
- double vbv_max_rate; /* # of bits added to buffer_fill per second */
- @@ -157,6 +157,7 @@ struct x264_ratecontrol_t
- int initial_cpb_removal_delay_offset;
- double nrt_first_access_unit; /* nominal removal time */
- double previous_cpb_final_arrival_time;
- + uint64_t hrd_multiply_denom;
- };
- @@ -463,6 +464,8 @@ void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init )
- int vbv_max_bitrate = h->param.rc.i_vbv_max_bitrate * 1000;
- /* Init HRD */
- + h->sps->vui.hrd.i_bit_rate_unscaled = vbv_max_bitrate;
- + h->sps->vui.hrd.i_cpb_size_unscaled = vbv_buffer_size;
- if( h->param.i_nal_hrd && b_init )
- {
- h->sps->vui.hrd.i_cpb_cnt = 1;
- @@ -499,8 +502,8 @@ void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init )
- #undef MAX_DURATION
- - vbv_buffer_size = X264_MIN( vbv_buffer_size, h->sps->vui.hrd.i_cpb_size_unscaled );
- - vbv_max_bitrate = X264_MIN( vbv_max_bitrate, h->sps->vui.hrd.i_bit_rate_unscaled );
- + vbv_buffer_size = h->sps->vui.hrd.i_cpb_size_unscaled;
- + vbv_max_bitrate = h->sps->vui.hrd.i_bit_rate_unscaled;
- }
- else if( h->param.i_nal_hrd && !b_init )
- {
- @@ -528,7 +531,7 @@ void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init )
- if( h->param.rc.f_vbv_buffer_init > 1. )
- h->param.rc.f_vbv_buffer_init = x264_clip3f( h->param.rc.f_vbv_buffer_init / h->param.rc.i_vbv_buffer_size, 0, 1 );
- h->param.rc.f_vbv_buffer_init = x264_clip3f( X264_MAX( h->param.rc.f_vbv_buffer_init, rc->buffer_rate / rc->buffer_size ), 0, 1);
- - rc->buffer_fill_final = rc->buffer_size * h->param.rc.f_vbv_buffer_init;
- + rc->buffer_fill_final = rc->buffer_size * h->param.rc.f_vbv_buffer_init * h->sps->vui.i_time_scale;
- rc->b_vbv = 1;
- rc->b_vbv_min_rate = !rc->b_2pass
- && h->param.rc.i_rc_method == X264_RC_ABR
- @@ -577,6 +580,23 @@ int x264_ratecontrol_new( x264_t *h )
- x264_ratecontrol_init_reconfigurable( h, 1 );
- + if( h->param.i_nal_hrd )
- + {
- + uint64_t denom = (uint64_t)h->sps->vui.hrd.i_bit_rate_unscaled * h->sps->vui.i_time_scale;
- + uint64_t num = 180000;
- + x264_reduce_fraction64( &num, &denom );
- + rc->hrd_multiply_denom = 180000 / num;
- +
- + double bits_required = log2( 180000 / rc->hrd_multiply_denom )
- + + log2( h->sps->vui.i_time_scale )
- + + log2( h->sps->vui.hrd.i_cpb_size_unscaled );
- + if( bits_required >= 63 )
- + {
- + x264_log( h, X264_LOG_ERROR, "HRD with very large timescale and bufsize not supported\n" );
- + return -1;
- + }
- + }
- +
- if( rc->rate_tolerance < 0.01 )
- {
- x264_log(h, X264_LOG_WARNING, "bitrate tolerance too small, using .01\n");
- @@ -1723,9 +1743,10 @@ static void update_predictor( predictor_t *p, double q, double var, double bits
- static int update_vbv( x264_t *h, int bits )
- {
- int filler = 0;
- -
- + int bitrate = h->sps->vui.hrd.i_bit_rate_unscaled;
- x264_ratecontrol_t *rcc = h->rc;
- x264_ratecontrol_t *rct = h->thread[0]->rc;
- + uint64_t buffer_size = (uint64_t)h->sps->vui.hrd.i_cpb_size_unscaled * h->sps->vui.i_time_scale;
- if( rcc->last_satd >= h->mb.i_mb_count )
- update_predictor( &rct->pred[h->sh.i_type], qp2qscale( rcc->qpa_rc ), rcc->last_satd, bits );
- @@ -1733,48 +1754,48 @@ static int update_vbv( x264_t *h, int bits )
- if( !rcc->b_vbv )
- return filler;
- - rct->buffer_fill_final -= bits;
- + rct->buffer_fill_final -= (uint64_t)bits * h->sps->vui.i_time_scale;
- if( rct->buffer_fill_final < 0 )
- - x264_log( h, X264_LOG_WARNING, "VBV underflow (frame %d, %.0f bits)\n", h->i_frame, rct->buffer_fill_final );
- + x264_log( h, X264_LOG_WARNING, "VBV underflow (frame %d, %.0f bits)\n", h->i_frame, (double)rct->buffer_fill_final / h->sps->vui.i_time_scale );
- rct->buffer_fill_final = X264_MAX( rct->buffer_fill_final, 0 );
- - rct->buffer_fill_final += rcc->buffer_rate;
- + rct->buffer_fill_final += (uint64_t)bitrate * h->sps->vui.i_num_units_in_tick * h->fenc->i_cpb_duration;
- - if( h->sps->vui.hrd.b_cbr_hrd && rct->buffer_fill_final > rcc->buffer_size )
- + if( h->sps->vui.hrd.b_cbr_hrd && rct->buffer_fill_final > buffer_size )
- {
- - filler = ceil( (rct->buffer_fill_final - rcc->buffer_size) / 8 );
- - rct->buffer_fill_final -= X264_MAX( (FILLER_OVERHEAD - h->param.b_annexb), filler ) * 8;
- + filler = ceil( (rct->buffer_fill_final - buffer_size) / (8. * h->sps->vui.i_time_scale) );
- + bits = X264_MAX( (FILLER_OVERHEAD - h->param.b_annexb), filler ) * 8;
- + rct->buffer_fill_final -= (uint64_t)bits * h->sps->vui.i_time_scale;
- }
- else
- - rct->buffer_fill_final = X264_MIN( rct->buffer_fill_final, rcc->buffer_size );
- + rct->buffer_fill_final = X264_MIN( rct->buffer_fill_final, buffer_size );
- return filler;
- }
- -int x264_hrd_fullness( x264_t *h )
- +void x264_hrd_fullness( x264_t *h )
- {
- x264_ratecontrol_t *rct = h->thread[0]->rc;
- - double cpb_bits = rct->buffer_fill_final;
- - double bps = h->sps->vui.hrd.i_bit_rate_unscaled;
- - double cpb_size = h->sps->vui.hrd.i_cpb_size_unscaled;
- - double cpb_fullness = 90000.0*cpb_bits/bps;
- + uint64_t denom = (uint64_t)h->sps->vui.hrd.i_bit_rate_unscaled * h->sps->vui.i_time_scale / rct->hrd_multiply_denom;
- + uint64_t cpb_state = rct->buffer_fill_final;
- + uint64_t cpb_size = (uint64_t)h->sps->vui.hrd.i_cpb_size_unscaled * h->sps->vui.i_time_scale;
- + uint64_t multiply_factor = 180000 / rct->hrd_multiply_denom;
- - if( cpb_bits < 0 || cpb_bits > cpb_size )
- + if( cpb_state < 0 || cpb_state > cpb_size )
- {
- x264_log( h, X264_LOG_WARNING, "CPB %s: %.0lf bits in a %.0lf-bit buffer\n",
- - cpb_bits < 0 ? "underflow" : "overflow", cpb_bits, cpb_size );
- + cpb_state < 0 ? "underflow" : "overflow", (float)cpb_state/denom, (float)cpb_size/denom );
- }
- - h->initial_cpb_removal_delay_offset = 90000.0*(cpb_size - cpb_bits)/bps;
- -
- - return x264_clip3f( cpb_fullness + 0.5, 0, 90000.0*cpb_size/bps ); // just lie if we are in a weird state
- + h->initial_cpb_removal_delay = (multiply_factor * cpb_state + denom) / (2*denom);
- + h->initial_cpb_removal_delay_offset = (multiply_factor * cpb_size + denom) / (2*denom) - h->initial_cpb_removal_delay;
- }
- // provisionally update VBV according to the planned size of all frames currently in progress
- static void update_vbv_plan( x264_t *h, int overhead )
- {
- x264_ratecontrol_t *rcc = h->rc;
- - rcc->buffer_fill = h->thread[0]->rc->buffer_fill_final;
- + rcc->buffer_fill = h->thread[0]->rc->buffer_fill_final / h->sps->vui.i_time_scale;
- if( h->i_thread_frames > 1 )
- {
- int j = h->rc - h->thread[0]->rc;
- diff --git a/encoder/ratecontrol.h b/encoder/ratecontrol.h
- index dd139eb..f39c070 100644
- --- a/encoder/ratecontrol.h
- +++ b/encoder/ratecontrol.h
- @@ -47,6 +47,6 @@ int x264_rc_analyse_slice( x264_t *h );
- int x264_weighted_reference_duplicate( x264_t *h, int i_ref, const x264_weight_t *w );
- void x264_threads_distribute_ratecontrol( x264_t *h );
- void x264_threads_merge_ratecontrol( x264_t *h );
- -int x264_hrd_fullness( x264_t *h );
- +void x264_hrd_fullness( x264_t *h );
- #endif
- --
- 1.7.0.4
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement