Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- From 4784723450ae1dd28ede1ff04a93f1849d6444e5 Mon Sep 17 00:00:00 2001
- From: Anton Mitrofanov <BugMaster@narod.ru>
- Date: Tue, 16 Feb 2010 09:41:55 -0800
- Subject: [PATCH 01/16] Fix I and B-frame QPs with threads
- Rounding errors resulted in slightly wrong QPs with threads enabled.
- ---
- encoder/ratecontrol.c | 6 +++---
- 1 files changed, 3 insertions(+), 3 deletions(-)
- diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
- index 8c61582..3d86aaa 100644
- --- a/encoder/ratecontrol.c
- +++ b/encoder/ratecontrol.c
- @@ -1077,15 +1077,15 @@ void x264_ratecontrol_start( x264_t *h, int i_force_qp, int overhead )
- rc->qpa_rc =
- rc->qpa_aq = 0;
- - h->fdec->f_qp_avg_rc =
- - h->fdec->f_qp_avg_aq =
- rc->qpm =
- rc->qp = x264_clip3( (int)(q + 0.5), 0, 51 );
- + h->fdec->f_qp_avg_rc =
- + h->fdec->f_qp_avg_aq =
- rc->f_qpm = q;
- if( rce )
- rce->new_qp = rc->qp;
- - accum_p_qp_update( h, rc->qp );
- + accum_p_qp_update( h, rc->f_qpm );
- if( h->sh.i_type != SLICE_TYPE_B )
- rc->last_non_b_pict_type = h->sh.i_type;
- --
- 1.6.1.2
- From 28e6eb67ffaa002469f60c40e2b5d58b2a758f9c Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Mon, 22 Feb 2010 11:21:51 -0800
- Subject: [PATCH 02/16] Fix integer overflow in chroma SSD check
- Could cause bad skips at very high quantizers on extreme inputs.
- ---
- encoder/rdo.c | 4 ++--
- 1 files changed, 2 insertions(+), 2 deletions(-)
- diff --git a/encoder/rdo.c b/encoder/rdo.c
- index 3ed4a47..e15f47d 100644
- --- a/encoder/rdo.c
- +++ b/encoder/rdo.c
- @@ -131,7 +131,7 @@ static inline int ssd_plane( x264_t *h, int size, int p, int x, int y )
- static inline int ssd_mb( x264_t *h )
- {
- int chromassd = ssd_plane(h, PIXEL_8x8, 1, 0, 0) + ssd_plane(h, PIXEL_8x8, 2, 0, 0);
- - chromassd = (chromassd * h->mb.i_chroma_lambda2_offset + 128) >> 8;
- + chromassd = ((uint64_t)chromassd * h->mb.i_chroma_lambda2_offset + 128) >> 8;
- return ssd_plane(h, PIXEL_16x16, 0, 0, 0) + chromassd;
- }
- @@ -223,7 +223,7 @@ uint64_t x264_rd_cost_part( x264_t *h, int i_lambda2, int i4, int i_pixel )
- chromassd = ssd_plane( h, i_pixel+3, 1, (i8&1)*4, (i8>>1)*4 )
- + ssd_plane( h, i_pixel+3, 2, (i8&1)*4, (i8>>1)*4 );
- - chromassd = (chromassd * h->mb.i_chroma_lambda2_offset + 128) >> 8;
- + chromassd = ((uint64_t)chromassd * h->mb.i_chroma_lambda2_offset + 128) >> 8;
- i_ssd = ssd_plane( h, i_pixel, 0, (i8&1)*8, (i8>>1)*8 ) + chromassd;
- if( h->param.b_cabac )
- --
- 1.6.1.2
- From f0da96145cb068ade0f0232d0682137c9065929f Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Mon, 22 Feb 2010 13:04:47 -0800
- Subject: [PATCH 03/16] Fix overread of scratch buffer
- Could cause crashes on non-mod16 frames.
- ---
- encoder/encoder.c | 2 +-
- 1 files changed, 1 insertions(+), 1 deletions(-)
- diff --git a/encoder/encoder.c b/encoder/encoder.c
- index df62389..89bf457 100644
- --- a/encoder/encoder.c
- +++ b/encoder/encoder.c
- @@ -1055,7 +1055,7 @@ x264_t *x264_encoder_open( x264_param_t *param )
- /* Allocate scratch buffer */
- for( i = 0; i < h->param.i_threads + !!h->param.i_sync_lookahead; i++ )
- {
- - int buf_hpel = (h->param.i_width+48) * sizeof(int16_t);
- + int buf_hpel = (h->fdec->i_width[0]+48) * sizeof(int16_t);
- int buf_ssim = h->param.analyse.b_ssim * 8 * (h->param.i_width/4+3) * sizeof(int);
- int me_range = X264_MIN(h->param.analyse.i_me_range, h->param.analyse.i_mv_range);
- int buf_tesa = (h->param.analyse.i_me_method >= X264_ME_ESA) *
- --
- 1.6.1.2
- From 25292b825a42b577bd121c48d2508f3b4aa7a9eb Mon Sep 17 00:00:00 2001
- From: Yusuke Nakamura <muken.the.vfrmaniac@gmail.com>
- Date: Tue, 16 Feb 2010 11:05:21 -0800
- Subject: [PATCH 04/16] Add GPAC version check
- ---
- configure | 8 +++++++-
- 1 files changed, 7 insertions(+), 1 deletions(-)
- diff --git a/configure b/configure
- index 25f5458..d0ff43a 100755
- --- a/configure
- +++ b/configure
- @@ -584,7 +584,13 @@ if [ $SYS = MINGW ]; then
- fi
- if [ "$mp4_output" = "auto" ] ; then
- mp4_output="no"
- - cc_check gpac/isomedia.h "$MP4_LDFLAGS" && mp4_output="yes"
- + if cc_check gpac/isomedia.h "$MP4_LDFLAGS" ; then
- + if cc_check gpac/isomedia.h "$MP4_LDFLAGS" "gf_isom_set_pixel_aspect_ratio(0,0,0,0,0);" ; then
- + mp4_output="yes"
- + else
- + echo "Warning: gpac is too old, update to 2007-06-21 UTC or later"
- + fi
- + fi
- fi
- if [ "$mp4_output" = "yes" ] ; then
- define MP4_OUTPUT
- --
- 1.6.1.2
- From 5234f855a23607ae0dbfce9eeb0c69007e9d69e4 Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Sun, 21 Feb 2010 14:21:26 -0800
- Subject: [PATCH 05/16] SimpleBlock requires Matroska Doctype v2
- ---
- output/matroska_ebml.c | 4 ++--
- 1 files changed, 2 insertions(+), 2 deletions(-)
- diff --git a/output/matroska_ebml.c b/output/matroska_ebml.c
- index 7265909..89790b7 100644
- --- a/output/matroska_ebml.c
- +++ b/output/matroska_ebml.c
- @@ -338,8 +338,8 @@ int mk_writeHeader( mk_writer *w, const char *writing_app,
- CHECK( mk_write_uint( c, 0x42f2, 4 ) ); // EBMLMaxIDLength
- CHECK( mk_write_uint( c, 0x42f3, 8 ) ); // EBMLMaxSizeLength
- CHECK( mk_write_string( c, 0x4282, "matroska") ); // DocType
- - CHECK( mk_write_uint( c, 0x4287, 1 ) ); // DocTypeVersion
- - CHECK( mk_write_uint( c, 0x4285, 1 ) ); // DocTypeReadversion
- + CHECK( mk_write_uint( c, 0x4287, 2 ) ); // DocTypeVersion
- + CHECK( mk_write_uint( c, 0x4285, 2 ) ); // DocTypeReadversion
- CHECK( mk_close_context( c, 0 ) );
- if( !(c = mk_create_context( w, w->root, 0x18538067 )) ) // Segment
- --
- 1.6.1.2
- From fff9312827eb936da8da24a426e167494208d195 Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Tue, 16 Feb 2010 10:13:33 -0800
- Subject: [PATCH 06/16] Much faster and simpler direct spatial calculation
- ---
- common/macroblock.c | 130 ++++++++++++++++++++++++--------------------------
- 1 files changed, 62 insertions(+), 68 deletions(-)
- diff --git a/common/macroblock.c b/common/macroblock.c
- index 278659c..19cd371 100644
- --- a/common/macroblock.c
- +++ b/common/macroblock.c
- @@ -36,8 +36,6 @@ void x264_mb_predict_mv( x264_t *h, int i_list, int idx, int i_width, int16_t mv
- int i_refc = h->mb.cache.ref[i_list][i8 - 8 + i_width];
- int16_t *mv_c = h->mb.cache.mv[i_list][i8 - 8 + i_width];
- - int i_count = 0;
- -
- if( (idx&3) >= 2 + (i_width&1) || i_refc == -2 )
- {
- i_refc = h->mb.cache.ref[i_list][i8 - 8 - 1];
- @@ -83,9 +81,7 @@ void x264_mb_predict_mv( x264_t *h, int i_list, int idx, int i_width, int16_t mv
- }
- }
- - if( i_refa == i_ref ) i_count++;
- - if( i_refb == i_ref ) i_count++;
- - if( i_refc == i_ref ) i_count++;
- + int i_count = (i_refa == i_ref) + (i_refb == i_ref) + (i_refc == i_ref);
- if( i_count > 1 )
- {
- @@ -115,18 +111,13 @@ void x264_mb_predict_mv_16x16( x264_t *h, int i_list, int i_ref, int16_t mvp[2]
- int16_t *mv_b = h->mb.cache.mv[i_list][X264_SCAN8_0 - 8];
- int i_refc = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8 + 4];
- int16_t *mv_c = h->mb.cache.mv[i_list][X264_SCAN8_0 - 8 + 4];
- -
- - int i_count = 0;
- -
- if( i_refc == -2 )
- {
- i_refc = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8 - 1];
- mv_c = h->mb.cache.mv[i_list][X264_SCAN8_0 - 8 - 1];
- }
- - if( i_refa == i_ref ) i_count++;
- - if( i_refb == i_ref ) i_count++;
- - if( i_refc == i_ref ) i_count++;
- + int i_count = (i_refa == i_ref) + (i_refb == i_ref) + (i_refc == i_ref);
- if( i_count > 1 )
- {
- @@ -196,7 +187,7 @@ static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h )
- if( i_ref >= 0 )
- {
- const int dist_scale_factor = h->mb.dist_scale_factor[i_ref][0];
- - const int16_t *mv_col = h->fref1[0]->mv[0][ i_mb_4x4 + 3*x8 + 3*y8 * h->mb.i_b4_stride];
- + const int16_t *mv_col = h->fref1[0]->mv[0][i_mb_4x4 + 3*x8 + 3*y8 * h->mb.i_b4_stride];
- const int l0x = ( dist_scale_factor * mv_col[0] + 128 ) >> 8;
- const int l0y = ( dist_scale_factor * mv_col[1] + 128 ) >> 8;
- if( h->param.i_threads > 1 && (l0y > h->mb.mv_max_spel[1] || l0y-mv_col[1] > h->mb.mv_max_spel[1]) )
- @@ -221,58 +212,67 @@ static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h )
- static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h )
- {
- - int ref[2];
- + int8_t ref[2];
- ALIGNED_ARRAY_8( int16_t, mv,[2],[2] );
- - int i_list;
- - int i8;
- - const int8_t *l1ref0 = &h->fref1[0]->ref[0][ h->mb.i_b8_xy ];
- - const int8_t *l1ref1 = &h->fref1[0]->ref[1][ h->mb.i_b8_xy ];
- - const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->fref1[0]->mv[0][ h->mb.i_b4_xy ];
- - const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->fref1[0]->mv[1][ h->mb.i_b4_xy ];
- - const int type_col = h->fref1[0]->mb_type[ h->mb.i_mb_xy ];
- + int i_list, i8, i_ref;
- + const int8_t *l1ref0 = &h->fref1[0]->ref[0][h->mb.i_b8_xy];
- + const int8_t *l1ref1 = &h->fref1[0]->ref[1][h->mb.i_b8_xy];
- + const int16_t (*l1mv[2])[2] = { (const int16_t (*)[2]) &h->fref1[0]->mv[0][h->mb.i_b4_xy],
- + (const int16_t (*)[2]) &h->fref1[0]->mv[1][h->mb.i_b4_xy] };
- + const int type_col = h->fref1[0]->mb_type[h->mb.i_mb_xy];
- - for( i_list=0; i_list<2; i_list++ )
- + for( i_list = 0; i_list < 2; i_list++ )
- {
- - int i_refa = h->mb.cache.ref[i_list][X264_SCAN8_0 - 1];
- - int i_refb = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8];
- - int i_refc = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8 + 4];
- + int i_refa = h->mb.cache.ref[i_list][X264_SCAN8_0 - 1];
- + int16_t *mv_a = h->mb.cache.mv[i_list][X264_SCAN8_0 - 1];
- + int i_refb = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8];
- + int16_t *mv_b = h->mb.cache.mv[i_list][X264_SCAN8_0 - 8];
- + int i_refc = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8 + 4];
- + int16_t *mv_c = h->mb.cache.mv[i_list][X264_SCAN8_0 - 8 + 4];
- if( i_refc == -2 )
- + {
- i_refc = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8 - 1];
- + mv_c = h->mb.cache.mv[i_list][X264_SCAN8_0 - 8 - 1];
- + }
- +
- + i_ref = X264_MIN3( (unsigned)i_refa, (unsigned)i_refb, (unsigned)i_refc );
- + if( i_ref < 0 )
- + {
- + i_ref = -1;
- + M32( mv[i_list] ) = 0;
- + }
- + else
- + {
- + /* Same as x264_mb_predict_mv_16x16, but simplified to eliminate cases
- + * not relevant to spatial direct. */
- + int i_count = (i_refa == i_ref) + (i_refb == i_ref) + (i_refc == i_ref);
- +
- + if( i_count > 1 )
- + x264_median_mv( mv[i_list], mv_a, mv_b, mv_c );
- + else
- + {
- + if( i_refa == i_ref )
- + CP32( mv[i_list], mv_a );
- + else if( i_refb == i_ref )
- + CP32( mv[i_list], mv_b );
- + else
- + CP32( mv[i_list], mv_c );
- + }
- + }
- - ref[i_list] = i_refa;
- - if( ref[i_list] < 0 || ( i_refb < ref[i_list] && i_refb >= 0 ))
- - ref[i_list] = i_refb;
- - if( ref[i_list] < 0 || ( i_refc < ref[i_list] && i_refc >= 0 ))
- - ref[i_list] = i_refc;
- - if( ref[i_list] < 0 )
- - ref[i_list] = -1;
- + x264_macroblock_cache_ref( h, 0, 0, 4, 4, i_list, i_ref );
- + x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, i_list, mv[i_list] );
- + ref[i_list] = i_ref;
- }
- - if( ref[0] < 0 && ref[1] < 0 )
- + if( (M16( ref ) & 0x8080) == 0x8080 ) /* if( ref[0] < 0 && ref[1] < 0 ) */
- {
- x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, 0 );
- x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, 0 );
- - x264_macroblock_cache_mv( h, 0, 0, 4, 4, 0, 0 );
- - x264_macroblock_cache_mv( h, 0, 0, 4, 4, 1, 0 );
- return 1;
- }
- - if( ref[0] >= 0 )
- - x264_mb_predict_mv_16x16( h, 0, ref[0], mv[0] );
- - else
- - M32( mv[0] ) = 0;
- -
- - if( ref[1] >= 0 )
- - x264_mb_predict_mv_16x16( h, 1, ref[1], mv[1] );
- - else
- - M32( mv[1] ) = 0;
- -
- - x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, ref[0] );
- - x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, ref[1] );
- - x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, mv[0] );
- - x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 1, mv[1] );
- -
- - if( !M64( mv ) )
- + if( !M64( mv ) || IS_INTRA( type_col ) || (ref[0]&&ref[1]) )
- return 1;
- if( h->param.i_threads > 1
- @@ -287,31 +287,25 @@ static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h )
- return 0;
- }
- - if( IS_INTRA( type_col ) || (ref[0]&&ref[1]) )
- - return 1;
- -
- /* col_zero_flag */
- - for( i8=0; i8<4; i8++ )
- + for( i8 = 0; i8 < 4; i8++ )
- {
- - const int x8 = i8%2;
- - const int y8 = i8/2;
- + const int x8 = i8&1;
- + const int y8 = i8>>1;
- const int o8 = x8 + y8 * h->mb.i_b8_stride;
- const int o4 = 3*(x8 + y8 * h->mb.i_b4_stride);
- + int idx;
- if( l1ref0[o8] == 0 )
- - {
- - if( abs( l1mv0[o4][0] ) <= 1 && abs( l1mv0[o4][1] ) <= 1 )
- - {
- - if( ref[0] == 0 ) x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, 0 );
- - if( ref[1] == 0 ) x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 1, 0 );
- - }
- - }
- + idx = 0;
- else if( l1ref0[o8] < 0 && l1ref1[o8] == 0 )
- + idx = 1;
- + else
- + continue;
- +
- + if( abs( l1mv[idx][o4][0] ) <= 1 && abs( l1mv[idx][o4][1] ) <= 1 )
- {
- - if( abs( l1mv1[o4][0] ) <= 1 && abs( l1mv1[o4][1] ) <= 1 )
- - {
- - if( ref[0] == 0 ) x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, 0 );
- - if( ref[1] == 0 ) x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 1, 0 );
- - }
- + if( ref[0] == 0 ) x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, 0 );
- + if( ref[1] == 0 ) x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 1, 0 );
- }
- }
- --
- 1.6.1.2
- From 4a1303d128a4f7a9df81321940f789022695a9ad Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Wed, 17 Feb 2010 22:41:16 -0800
- Subject: [PATCH 07/16] Keep track of macroblock partitions
- Allows vastly simpler motion compensation and direct MV calculation.
- ---
- common/common.h | 2 +
- common/frame.c | 1 +
- common/frame.h | 1 +
- common/macroblock.c | 233 +++++++++++++++++++++++++--------------------------
- encoder/analyse.c | 1 +
- 5 files changed, 121 insertions(+), 117 deletions(-)
- diff --git a/common/common.h b/common/common.h
- index e2e8fac..68f79ba 100644
- --- a/common/common.h
- +++ b/common/common.h
- @@ -519,6 +519,7 @@ struct x264_t
- /* mb table */
- int8_t *type; /* mb type */
- + uint8_t *partition; /* mb partition */
- int8_t *qp; /* mb qp */
- int16_t *cbp; /* mb cbp: 0x0?: luma, 0x?0: chroma, 0x100: luma dc, 0x0200 and 0x0400: chroma dc (all set for PCM)*/
- int8_t (*intra4x4_pred_mode)[8]; /* intra4x4 pred mode. for non I4x4 set to I_PRED_4x4_DC(2) */
- @@ -627,6 +628,7 @@ struct x264_t
- ALIGNED_4( int16_t direct_mv[2][4][2] );
- ALIGNED_4( int8_t direct_ref[2][4] );
- + int direct_partition;
- ALIGNED_4( int16_t pskip_mv[2] );
- /* number of neighbors (top and left) that used 8x8 dct */
- diff --git a/common/frame.c b/common/frame.c
- index d89f5ab..2798f25 100644
- --- a/common/frame.c
- +++ b/common/frame.c
- @@ -95,6 +95,7 @@ x264_frame_t *x264_frame_new( x264_t *h, int b_fdec )
- if( b_fdec ) /* fdec frame */
- {
- CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t));
- + CHECKED_MALLOC( frame->mb_partition, i_mb_count * sizeof(uint8_t));
- CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
- CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
- if( h->param.i_bframe )
- diff --git a/common/frame.h b/common/frame.h
- index 7c8e2ff..6e7de50 100644
- --- a/common/frame.h
- +++ b/common/frame.h
- @@ -75,6 +75,7 @@ typedef struct x264_frame
- /* motion data */
- int8_t *mb_type;
- + uint8_t *mb_partition;
- int16_t (*mv[2])[2];
- int16_t (*lowres_mvs[2][X264_BFRAME_MAX+1])[2];
- uint16_t (*lowres_costs[X264_BFRAME_MAX+2][X264_BFRAME_MAX+2]);
- diff --git a/common/macroblock.c b/common/macroblock.c
- index 19cd371..2573415 100644
- --- a/common/macroblock.c
- +++ b/common/macroblock.c
- @@ -165,9 +165,12 @@ static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h )
- int i_mb_8x8 = 4 * h->mb.i_mb_stride * h->mb.i_mb_y + 2 * h->mb.i_mb_x;
- int i8;
- const int type_col = h->fref1[0]->mb_type[h->mb.i_mb_xy];
- + const int partition_col = h->fref1[0]->mb_partition[h->mb.i_mb_xy];
- x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, 0 );
- + h->mb.i_partition = partition_col;
- +
- if( IS_INTRA( type_col ) )
- {
- x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, 0 );
- @@ -176,7 +179,15 @@ static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h )
- return 1;
- }
- - for( i8 = 0; i8 < 4; i8++ )
- + /* Don't do any checks other than the ones we have to, based
- + * on the size of the colocated partitions.
- + * Depends on the enum order: D_8x8, D_16x8, D_8x16, D_16x16 */
- + int max_i8 = (D_16x16 - partition_col) + 1;
- + int step = (partition_col == D_16x8) + 1;
- + int width = 4 >> ((D_16x16 - partition_col)&1);
- + int height = 4 >> ((D_16x16 - partition_col)>>1);
- +
- + for( i8 = 0; i8 < max_i8; i8 += step )
- {
- const int x8 = i8%2;
- const int y8 = i8/2;
- @@ -192,9 +203,9 @@ static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h )
- const int l0y = ( dist_scale_factor * mv_col[1] + 128 ) >> 8;
- if( h->param.i_threads > 1 && (l0y > h->mb.mv_max_spel[1] || l0y-mv_col[1] > h->mb.mv_max_spel[1]) )
- return 0;
- - x264_macroblock_cache_ref( h, 2*x8, 2*y8, 2, 2, 0, i_ref );
- - x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, pack16to32_mask(l0x, l0y) );
- - x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 1, pack16to32_mask(l0x-mv_col[0], l0y-mv_col[1]) );
- + x264_macroblock_cache_ref( h, 2*x8, 2*y8, width, height, 0, i_ref );
- + x264_macroblock_cache_mv( h, 2*x8, 2*y8, width, height, 0, pack16to32_mask(l0x, l0y) );
- + x264_macroblock_cache_mv( h, 2*x8, 2*y8, width, height, 1, pack16to32_mask(l0x-mv_col[0], l0y-mv_col[1]) );
- }
- else
- {
- @@ -220,6 +231,9 @@ static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h )
- const int16_t (*l1mv[2])[2] = { (const int16_t (*)[2]) &h->fref1[0]->mv[0][h->mb.i_b4_xy],
- (const int16_t (*)[2]) &h->fref1[0]->mv[1][h->mb.i_b4_xy] };
- const int type_col = h->fref1[0]->mb_type[h->mb.i_mb_xy];
- + const int partition_col = h->fref1[0]->mb_partition[h->mb.i_mb_xy];
- +
- + h->mb.i_partition = partition_col;
- for( i_list = 0; i_list < 2; i_list++ )
- {
- @@ -287,8 +301,16 @@ static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h )
- return 0;
- }
- + /* Don't do any checks other than the ones we have to, based
- + * on the size of the colocated partitions.
- + * Depends on the enum order: D_8x8, D_16x8, D_8x16, D_16x16 */
- + int max_i8 = (D_16x16 - partition_col) + 1;
- + int step = (partition_col == D_16x8) + 1;
- + int width = 4 >> ((D_16x16 - partition_col)&1);
- + int height = 4 >> ((D_16x16 - partition_col)>>1);
- +
- /* col_zero_flag */
- - for( i8 = 0; i8 < 4; i8++ )
- + for( i8 = 0; i8 < max_i8; i8 += step )
- {
- const int x8 = i8&1;
- const int y8 = i8>>1;
- @@ -304,8 +326,8 @@ static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h )
- if( abs( l1mv[idx][o4][0] ) <= 1 && abs( l1mv[idx][o4][1] ) <= 1 )
- {
- - if( ref[0] == 0 ) x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, 0 );
- - if( ref[1] == 0 ) x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 1, 0 );
- + if( ref[0] == 0 ) x264_macroblock_cache_mv( h, 2*x8, 2*y8, width, height, 0, 0 );
- + if( ref[1] == 0 ) x264_macroblock_cache_mv( h, 2*x8, 2*y8, width, height, 1, 0 );
- }
- }
- @@ -324,32 +346,29 @@ int x264_mb_predict_mv_direct16x16( x264_t *h, int *b_changed )
- if( b_changed != NULL && b_available )
- {
- - int type_col = h->fref1[0]->mb_type[h->mb.i_mb_xy];
- - int changed = 0;
- + int changed;
- - if( IS_INTRA( type_col ) || type_col == P_SKIP )
- + changed = M32( h->mb.cache.direct_mv[0][0] ) ^ M32( h->mb.cache.mv[0][x264_scan8[0]] );
- + changed |= M32( h->mb.cache.direct_mv[1][0] ) ^ M32( h->mb.cache.mv[1][x264_scan8[0]] );
- + changed |= h->mb.cache.direct_ref[0][0] ^ h->mb.cache.ref[0][x264_scan8[0]];
- + changed |= h->mb.cache.direct_ref[1][0] ^ h->mb.cache.ref[1][x264_scan8[0]];
- + if( !changed && h->mb.i_partition != D_16x16 )
- {
- - changed |= M32( h->mb.cache.direct_mv[0][0] ) ^ M32( h->mb.cache.mv[0][X264_SCAN8_0] );
- - changed |= M32( h->mb.cache.direct_mv[1][0] ) ^ M32( h->mb.cache.mv[1][X264_SCAN8_0] );
- - changed |= h->mb.cache.direct_ref[0][0] ^ h->mb.cache.ref[0][X264_SCAN8_0];
- - changed |= h->mb.cache.direct_ref[1][0] ^ h->mb.cache.ref[1][X264_SCAN8_0];
- + changed |= M32( h->mb.cache.direct_mv[0][3] ) ^ M32( h->mb.cache.mv[0][x264_scan8[12]] );
- + changed |= M32( h->mb.cache.direct_mv[1][3] ) ^ M32( h->mb.cache.mv[1][x264_scan8[12]] );
- + changed |= h->mb.cache.direct_ref[0][3] ^ h->mb.cache.ref[0][x264_scan8[12]];
- + changed |= h->mb.cache.direct_ref[1][3] ^ h->mb.cache.ref[1][x264_scan8[12]];
- }
- - else
- + if( !changed && h->mb.i_partition == D_8x8 )
- {
- - int l;
- - for( l = 0; l < 2; l++ )
- - {
- - changed |= M32( h->mb.cache.direct_mv[l][0] ) ^ M32( h->mb.cache.mv[l][x264_scan8[ 0]] );
- - if( changed ) break;
- - changed |= M32( h->mb.cache.direct_mv[l][1] ) ^ M32( h->mb.cache.mv[l][x264_scan8[ 4]] );
- - changed |= M32( h->mb.cache.direct_mv[l][2] ) ^ M32( h->mb.cache.mv[l][x264_scan8[ 8]] );
- - changed |= M32( h->mb.cache.direct_mv[l][3] ) ^ M32( h->mb.cache.mv[l][x264_scan8[12]] );
- - if( changed ) break;
- - changed |= h->mb.cache.direct_ref[l][0] ^ h->mb.cache.ref[l][x264_scan8[ 0]];
- - changed |= h->mb.cache.direct_ref[l][1] ^ h->mb.cache.ref[l][x264_scan8[ 4]];
- - changed |= h->mb.cache.direct_ref[l][2] ^ h->mb.cache.ref[l][x264_scan8[ 8]];
- - changed |= h->mb.cache.direct_ref[l][3] ^ h->mb.cache.ref[l][x264_scan8[12]];
- - }
- + changed |= M32( h->mb.cache.direct_mv[0][1] ) ^ M32( h->mb.cache.mv[0][x264_scan8[4]] );
- + changed |= M32( h->mb.cache.direct_mv[1][1] ) ^ M32( h->mb.cache.mv[1][x264_scan8[4]] );
- + changed |= M32( h->mb.cache.direct_mv[0][2] ) ^ M32( h->mb.cache.mv[0][x264_scan8[8]] );
- + changed |= M32( h->mb.cache.direct_mv[1][2] ) ^ M32( h->mb.cache.mv[1][x264_scan8[8]] );
- + changed |= h->mb.cache.direct_ref[0][1] ^ h->mb.cache.ref[0][x264_scan8[4]];
- + changed |= h->mb.cache.direct_ref[1][1] ^ h->mb.cache.ref[1][x264_scan8[4]];
- + changed |= h->mb.cache.direct_ref[0][2] ^ h->mb.cache.ref[0][x264_scan8[8]];
- + changed |= h->mb.cache.direct_ref[1][2] ^ h->mb.cache.ref[1][x264_scan8[8]];
- }
- *b_changed = changed;
- if( !changed )
- @@ -370,6 +389,7 @@ int x264_mb_predict_mv_direct16x16( x264_t *h, int *b_changed )
- h->mb.cache.direct_ref[l][1] = h->mb.cache.ref[l][x264_scan8[ 4]];
- h->mb.cache.direct_ref[l][2] = h->mb.cache.ref[l][x264_scan8[ 8]];
- h->mb.cache.direct_ref[l][3] = h->mb.cache.ref[l][x264_scan8[12]];
- + h->mb.cache.direct_partition = h->mb.i_partition;
- }
- }
- @@ -564,116 +584,93 @@ static NOINLINE void x264_mb_mc_01xywh( x264_t *h, int x, int y, int width, int
- h->mc.avg[i_mode+3]( &h->mb.pic.p_fdec[2][2*y*FDEC_STRIDE+2*x], FDEC_STRIDE, tmp0, 16, tmp1, 16, weight );
- }
- -static void x264_mb_mc_direct8x8( x264_t *h, int x, int y )
- -{
- - const int i8 = x264_scan8[0] + x + 8*y;
- -
- - if( h->mb.cache.ref[0][i8] >= 0 )
- - if( h->mb.cache.ref[1][i8] >= 0 )
- - x264_mb_mc_01xywh( h, x, y, 2, 2 );
- - else
- - x264_mb_mc_0xywh( h, x, y, 2, 2 );
- - else
- - x264_mb_mc_1xywh( h, x, y, 2, 2 );
- -}
- -
- void x264_mb_mc_8x8( x264_t *h, int i8 )
- {
- const int x = 2*(i8&1);
- const int y = 2*(i8>>1);
- - switch( h->mb.i_sub_partition[i8] )
- +
- + if( h->sh.i_type == SLICE_TYPE_P )
- + {
- + switch( h->mb.i_sub_partition[i8] )
- + {
- + case D_L0_8x8:
- + x264_mb_mc_0xywh( h, x, y, 2, 2 );
- + break;
- + case D_L0_8x4:
- + x264_mb_mc_0xywh( h, x, y+0, 2, 1 );
- + x264_mb_mc_0xywh( h, x, y+1, 2, 1 );
- + break;
- + case D_L0_4x8:
- + x264_mb_mc_0xywh( h, x+0, y, 1, 2 );
- + x264_mb_mc_0xywh( h, x+1, y, 1, 2 );
- + break;
- + case D_L0_4x4:
- + x264_mb_mc_0xywh( h, x+0, y+0, 1, 1 );
- + x264_mb_mc_0xywh( h, x+1, y+0, 1, 1 );
- + x264_mb_mc_0xywh( h, x+0, y+1, 1, 1 );
- + x264_mb_mc_0xywh( h, x+1, y+1, 1, 1 );
- + break;
- + }
- + }
- + else
- {
- - case D_L0_8x8:
- - x264_mb_mc_0xywh( h, x, y, 2, 2 );
- - break;
- - case D_L0_8x4:
- - x264_mb_mc_0xywh( h, x, y+0, 2, 1 );
- - x264_mb_mc_0xywh( h, x, y+1, 2, 1 );
- - break;
- - case D_L0_4x8:
- - x264_mb_mc_0xywh( h, x+0, y, 1, 2 );
- - x264_mb_mc_0xywh( h, x+1, y, 1, 2 );
- - break;
- - case D_L0_4x4:
- - x264_mb_mc_0xywh( h, x+0, y+0, 1, 1 );
- - x264_mb_mc_0xywh( h, x+1, y+0, 1, 1 );
- - x264_mb_mc_0xywh( h, x+0, y+1, 1, 1 );
- - x264_mb_mc_0xywh( h, x+1, y+1, 1, 1 );
- - break;
- - case D_L1_8x8:
- + const int i8 = x264_scan8[0] + x + 8*y;
- +
- + if( h->mb.cache.ref[0][i8] >= 0 )
- + if( h->mb.cache.ref[1][i8] >= 0 )
- + x264_mb_mc_01xywh( h, x, y, 2, 2 );
- + else
- + x264_mb_mc_0xywh( h, x, y, 2, 2 );
- + else
- x264_mb_mc_1xywh( h, x, y, 2, 2 );
- - break;
- - case D_BI_8x8:
- - x264_mb_mc_01xywh( h, x, y, 2, 2 );
- - break;
- - case D_DIRECT_8x8:
- - x264_mb_mc_direct8x8( h, x, y );
- - break;
- }
- }
- void x264_mb_mc( x264_t *h )
- {
- - if( h->mb.i_type == P_L0 )
- - {
- - if( h->mb.i_partition == D_16x16 )
- - {
- - x264_mb_mc_0xywh( h, 0, 0, 4, 4 );
- - }
- - else if( h->mb.i_partition == D_16x8 )
- - {
- - x264_mb_mc_0xywh( h, 0, 0, 4, 2 );
- - x264_mb_mc_0xywh( h, 0, 2, 4, 2 );
- - }
- - else if( h->mb.i_partition == D_8x16 )
- - {
- - x264_mb_mc_0xywh( h, 0, 0, 2, 4 );
- - x264_mb_mc_0xywh( h, 2, 0, 2, 4 );
- - }
- - }
- - else if( h->mb.i_type == P_8x8 || h->mb.i_type == B_8x8 )
- + if( h->mb.i_partition == D_8x8 )
- {
- int i;
- for( i = 0; i < 4; i++ )
- x264_mb_mc_8x8( h, i );
- }
- - else if( h->mb.i_type == B_SKIP || h->mb.i_type == B_DIRECT )
- - {
- - x264_mb_mc_direct8x8( h, 0, 0 );
- - x264_mb_mc_direct8x8( h, 2, 0 );
- - x264_mb_mc_direct8x8( h, 0, 2 );
- - x264_mb_mc_direct8x8( h, 2, 2 );
- - }
- - else /* B_*x* */
- + else
- {
- - const uint8_t *b_list0 = x264_mb_type_list_table[h->mb.i_type][0];
- - const uint8_t *b_list1 = x264_mb_type_list_table[h->mb.i_type][1];
- + const int ref0a = h->mb.cache.ref[0][x264_scan8[ 0]];
- + const int ref0b = h->mb.cache.ref[0][x264_scan8[12]];
- + const int ref1a = h->mb.cache.ref[1][x264_scan8[ 0]];
- + const int ref1b = h->mb.cache.ref[1][x264_scan8[12]];
- if( h->mb.i_partition == D_16x16 )
- {
- - if( b_list0[0] && b_list1[0] ) x264_mb_mc_01xywh( h, 0, 0, 4, 4 );
- - else if( b_list0[0] ) x264_mb_mc_0xywh ( h, 0, 0, 4, 4 );
- - else if( b_list1[0] ) x264_mb_mc_1xywh ( h, 0, 0, 4, 4 );
- + if( ref0a >= 0 )
- + if( ref1a >= 0 ) x264_mb_mc_01xywh( h, 0, 0, 4, 4 );
- + else x264_mb_mc_0xywh ( h, 0, 0, 4, 4 );
- + else x264_mb_mc_1xywh ( h, 0, 0, 4, 4 );
- }
- else if( h->mb.i_partition == D_16x8 )
- {
- - if( b_list0[0] && b_list1[0] ) x264_mb_mc_01xywh( h, 0, 0, 4, 2 );
- - else if( b_list0[0] ) x264_mb_mc_0xywh ( h, 0, 0, 4, 2 );
- - else if( b_list1[0] ) x264_mb_mc_1xywh ( h, 0, 0, 4, 2 );
- -
- - if( b_list0[1] && b_list1[1] ) x264_mb_mc_01xywh( h, 0, 2, 4, 2 );
- - else if( b_list0[1] ) x264_mb_mc_0xywh ( h, 0, 2, 4, 2 );
- - else if( b_list1[1] ) x264_mb_mc_1xywh ( h, 0, 2, 4, 2 );
- + if( ref0a >= 0 )
- + if( ref1a >= 0 ) x264_mb_mc_01xywh( h, 0, 0, 4, 2 );
- + else x264_mb_mc_0xywh ( h, 0, 0, 4, 2 );
- + else x264_mb_mc_1xywh ( h, 0, 0, 4, 2 );
- +
- + if( ref0b >= 0 )
- + if( ref1b >= 0 ) x264_mb_mc_01xywh( h, 0, 2, 4, 2 );
- + else x264_mb_mc_0xywh ( h, 0, 2, 4, 2 );
- + else x264_mb_mc_1xywh ( h, 0, 2, 4, 2 );
- }
- else if( h->mb.i_partition == D_8x16 )
- {
- - if( b_list0[0] && b_list1[0] ) x264_mb_mc_01xywh( h, 0, 0, 2, 4 );
- - else if( b_list0[0] ) x264_mb_mc_0xywh ( h, 0, 0, 2, 4 );
- - else if( b_list1[0] ) x264_mb_mc_1xywh ( h, 0, 0, 2, 4 );
- -
- - if( b_list0[1] && b_list1[1] ) x264_mb_mc_01xywh( h, 2, 0, 2, 4 );
- - else if( b_list0[1] ) x264_mb_mc_0xywh ( h, 2, 0, 2, 4 );
- - else if( b_list1[1] ) x264_mb_mc_1xywh ( h, 2, 0, 2, 4 );
- + if( ref0a >= 0 )
- + if( ref1a >= 0 ) x264_mb_mc_01xywh( h, 0, 0, 2, 4 );
- + else x264_mb_mc_0xywh ( h, 0, 0, 2, 4 );
- + else x264_mb_mc_1xywh ( h, 0, 0, 2, 4 );
- +
- + if( ref0b >= 0 )
- + if( ref1b >= 0 ) x264_mb_mc_01xywh( h, 2, 0, 2, 4 );
- + else x264_mb_mc_0xywh ( h, 2, 0, 2, 4 );
- + else x264_mb_mc_1xywh ( h, 2, 0, 2, 4 );
- }
- }
- }
- @@ -767,10 +764,6 @@ int x264_macroblock_cache_init( x264_t *h )
- h->mb.intra_border_backup[i][j] += 8;
- }
- - /* init with not available (for top right idx=7,15) */
- - memset( h->mb.cache.ref[0], -2, X264_SCAN8_SIZE * sizeof( int8_t ) );
- - memset( h->mb.cache.ref[1], -2, X264_SCAN8_SIZE * sizeof( int8_t ) );
- -
- return 0;
- fail: return -1;
- }
- @@ -808,6 +801,7 @@ void x264_macroblock_slice_init( x264_t *h )
- h->mb.ref[0] = h->fdec->ref[0];
- h->mb.ref[1] = h->fdec->ref[1];
- h->mb.type = h->fdec->mb_type;
- + h->mb.partition = h->fdec->mb_partition;
- h->fdec->i_ref[0] = h->i_ref0;
- h->fdec->i_ref[1] = h->i_ref1;
- @@ -835,6 +829,10 @@ void x264_macroblock_slice_init( x264_t *h )
- if( h->sh.i_type == SLICE_TYPE_P )
- memset( h->mb.cache.skip, 0, X264_SCAN8_SIZE * sizeof( int8_t ) );
- + /* init with not available (for top right idx=7,15) */
- + memset( h->mb.cache.ref[0], -2, X264_SCAN8_SIZE * sizeof( int8_t ) );
- + memset( h->mb.cache.ref[1], -2, X264_SCAN8_SIZE * sizeof( int8_t ) );
- +
- setup_inverse_delta_pocs( h );
- h->mb.i_neighbour4[6] =
- @@ -1304,6 +1302,7 @@ void x264_macroblock_cache_save( x264_t *h )
- x264_prefetch_fenc( h, h->fdec, h->mb.i_mb_x, h->mb.i_mb_y );
- h->mb.type[i_mb_xy] = i_mb_type;
- + h->mb.partition[i_mb_xy] = IS_INTRA( i_mb_type ) ? D_16x16 : h->mb.i_partition;
- h->mb.i_mb_prev_xy = i_mb_xy;
- /* save intra4x4 */
- diff --git a/encoder/analyse.c b/encoder/analyse.c
- index 1d48b7d..6ee5f8e 100644
- --- a/encoder/analyse.c
- +++ b/encoder/analyse.c
- @@ -3149,6 +3149,7 @@ static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a )
- case B_SKIP:
- case B_DIRECT:
- + h->mb.i_partition = h->mb.cache.direct_partition;
- x264_mb_load_mv_direct8x8( h, 0 );
- x264_mb_load_mv_direct8x8( h, 1 );
- x264_mb_load_mv_direct8x8( h, 2 );
- --
- 1.6.1.2
- From d0be7257766d40b39dd453ebe8a266b64d653f71 Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Thu, 18 Feb 2010 10:37:57 -0800
- Subject: [PATCH 08/16] Add temporal predictor support to interlaced encoding
- 0.5-1% better compression in interlaced mode
- ---
- common/frame.h | 2 +-
- common/macroblock.c | 26 +++++++++++++++++++-------
- 2 files changed, 20 insertions(+), 8 deletions(-)
- diff --git a/common/frame.h b/common/frame.h
- index 6e7de50..0566b1e 100644
- --- a/common/frame.h
- +++ b/common/frame.h
- @@ -85,7 +85,7 @@ typedef struct x264_frame
- int8_t *ref[2];
- int i_ref[2];
- int ref_poc[2][16];
- - int inv_ref_poc[16]; // inverse values (list0 only) to avoid divisions in MB encoding
- + int16_t inv_ref_poc[2][32]; // inverse values (list0 only) to avoid divisions in MB encoding
- /* for adaptive B-frame decision.
- * contains the SATD cost of the lowres frame encoded in various modes
- diff --git a/common/macroblock.c b/common/macroblock.c
- index 2573415..68c7e06 100644
- --- a/common/macroblock.c
- +++ b/common/macroblock.c
- @@ -447,10 +447,14 @@ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t mvc[
- #undef SET_MVP
- /* temporal predictors */
- - /* FIXME temporal scaling w/ interlace */
- - if( h->fref0[0]->i_ref[0] > 0 && !h->sh.b_mbaff )
- + if( h->fref0[0]->i_ref[0] > 0 )
- {
- x264_frame_t *l0 = h->fref0[0];
- + int field = h->mb.i_mb_y&1;
- + int curpoc = h->fdec->i_poc + field*h->sh.i_delta_poc_bottom;
- + int refpoc = h->fref0[i_ref>>h->sh.b_mbaff]->i_poc;
- + if( h->sh.b_mbaff && field^(i_ref&1) )
- + refpoc += h->sh.i_delta_poc_bottom;
- #define SET_TMVP(dx, dy) { \
- int i_b4 = h->mb.i_b4_xy + dx*4 + dy*4*h->mb.i_b4_stride; \
- @@ -458,7 +462,7 @@ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t mvc[
- int ref_col = l0->ref[0][i_b8]; \
- if( ref_col >= 0 ) \
- { \
- - int scale = (h->fdec->i_poc - h->fdec->ref_poc[0][i_ref]) * l0->inv_ref_poc[ref_col];\
- + int scale = (curpoc - refpoc) * l0->inv_ref_poc[h->mb.b_interlaced&field][ref_col];\
- mvc[i][0] = (l0->mv[0][i_b4][0]*scale + 128) >> 8;\
- mvc[i][1] = (l0->mv[0][i_b4][1]*scale + 128) >> 8;\
- i++; \
- @@ -479,11 +483,19 @@ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t mvc[
- /* Set up a lookup table for delta pocs to reduce an IDIV to an IMUL */
- static void setup_inverse_delta_pocs( x264_t *h )
- {
- - int i;
- - for( i = 0; i < h->i_ref0; i++ )
- + int i, field;
- + for( field = 0; field <= h->sh.b_mbaff; field++ )
- {
- - int delta = h->fdec->i_poc - h->fref0[i]->i_poc;
- - h->fdec->inv_ref_poc[i] = (256 + delta/2) / delta;
- + int curpoc = h->fdec->i_poc + field*h->sh.i_delta_poc_bottom;
- + for( i = 0; i < (h->i_ref0<<h->sh.b_mbaff); i++ )
- + {
- + int refpoc = h->fref0[i>>h->sh.b_mbaff]->i_poc;
- + if( h->sh.b_mbaff && field^(i&1) )
- + refpoc += h->sh.i_delta_poc_bottom;
- + int delta = curpoc - refpoc;
- +
- + h->fdec->inv_ref_poc[field][i] = (256 + delta/2) / delta;
- + }
- }
- }
- --
- 1.6.1.2
- From da810dcc80ef85239a7c641b8af5c00f88aba1eb Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Thu, 18 Feb 2010 17:01:38 -0800
- Subject: [PATCH 09/16] Much faster and more efficient MVD handling
- Store MV deltas as clipped absolute values.
- This means CABAC no longer has to calculate absolute values in MV context selection.
- This also lets us cut the memory spent on MVDs by a factor of 2, speeding up cache_mvd and reducing memory usage by 32*threads*(num macroblocks) bytes.
- On a Core i7 encoding 1080p, this is about 3 megabytes saved.
- ---
- common/common.h | 8 ++++----
- common/macroblock.c | 47 +++++++++++++----------------------------------
- common/macroblock.h | 31 +++++++++++++++++++++++++++++--
- common/x86/util.h | 40 +++++++++++++++++-----------------------
- encoder/cabac.c | 20 +++++++++++---------
- encoder/me.c | 3 ++-
- 6 files changed, 76 insertions(+), 73 deletions(-)
- diff --git a/common/common.h b/common/common.h
- index 68f79ba..ab54508 100644
- --- a/common/common.h
- +++ b/common/common.h
- @@ -171,13 +171,13 @@ static inline int x264_predictor_difference( int16_t (*mvc)[2], intptr_t i_mvc )
- return sum;
- }
- -static inline uint32_t x264_cabac_amvd_sum( int16_t *mvdleft, int16_t *mvdtop )
- +static inline uint16_t x264_cabac_mvd_sum( uint8_t *mvdleft, uint8_t *mvdtop )
- {
- int amvd0 = abs(mvdleft[0]) + abs(mvdtop[0]);
- int amvd1 = abs(mvdleft[1]) + abs(mvdtop[1]);
- amvd0 = (amvd0 > 2) + (amvd0 > 32);
- amvd1 = (amvd1 > 2) + (amvd1 > 32);
- - return amvd0 + (amvd1<<16);
- + return amvd0 + (amvd1<<8);
- }
- extern const uint8_t x264_exp2_lut[64];
- @@ -527,7 +527,7 @@ struct x264_t
- uint8_t (*non_zero_count)[16+4+4]; /* nzc. for I_PCM set to 16 */
- int8_t *chroma_pred_mode; /* chroma_pred_mode. cabac only. for non intra I_PRED_CHROMA_DC(0) */
- int16_t (*mv[2])[2]; /* mb mv. set to 0 for intra mb */
- - int16_t (*mvd[2])[2]; /* mb mv difference with predict. set to 0 if intra. cabac only */
- + uint8_t (*mvd[2])[2]; /* absolute value of mb mv difference with predict, clipped to [0,33]. set to 0 if intra. cabac only */
- int8_t *ref[2]; /* mb ref. set to -1 if non used (intra or Lx only) */
- int16_t (*mvr[2][32])[2]; /* 16x16 mv for each possible ref */
- int8_t *skipbp; /* block pattern for SKIP or DIRECT (sub)mbs. B-frames + cabac only */
- @@ -621,7 +621,7 @@ struct x264_t
- /* 0 if not available */
- ALIGNED_16( int16_t mv[2][X264_SCAN8_SIZE][2] );
- - ALIGNED_8( int16_t mvd[2][X264_SCAN8_SIZE][2] );
- + ALIGNED_8( uint8_t mvd[2][X264_SCAN8_SIZE][2] );
- /* 1 if SKIP or DIRECT. set only for B-frames + CABAC */
- ALIGNED_4( int8_t skip[X264_SCAN8_SIZE] );
- diff --git a/common/macroblock.c b/common/macroblock.c
- index 68c7e06..8a4f095 100644
- --- a/common/macroblock.c
- +++ b/common/macroblock.c
- @@ -712,8 +712,8 @@ int x264_macroblock_cache_init( x264_t *h )
- if( h->param.b_cabac )
- {
- CHECKED_MALLOC( h->mb.chroma_pred_mode, i_mb_count * sizeof(int8_t) );
- - CHECKED_MALLOC( h->mb.mvd[0], 2*16 * i_mb_count * sizeof(int16_t) );
- - CHECKED_MALLOC( h->mb.mvd[1], 2*16 * i_mb_count * sizeof(int16_t) );
- + CHECKED_MALLOC( h->mb.mvd[0], 2*16 * i_mb_count * sizeof(uint8_t) );
- + CHECKED_MALLOC( h->mb.mvd[1], 2*16 * i_mb_count * sizeof(uint8_t) );
- }
- for( i=0; i<2; i++ )
- @@ -1211,33 +1211,24 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
- if( h->param.b_cabac )
- {
- if( i_top_type >= 0 )
- - {
- - const int i8 = x264_scan8[0] - 8;
- - const int iv = i_top_4x4;
- - CP64( h->mb.cache.mvd[i_list][i8+0], h->mb.mvd[i_list][iv+0] );
- - CP64( h->mb.cache.mvd[i_list][i8+2], h->mb.mvd[i_list][iv+2] );
- - }
- + CP64( h->mb.cache.mvd[i_list][x264_scan8[0] - 8], h->mb.mvd[i_list][i_top_4x4] );
- else
- - {
- - const int i8 = x264_scan8[0] - 8;
- - M64( h->mb.cache.mvd[i_list][i8+0] ) = 0;
- - M64( h->mb.cache.mvd[i_list][i8+2] ) = 0;
- - }
- + M64( h->mb.cache.mvd[i_list][x264_scan8[0] - 8] ) = 0;
- if( i_left_type >= 0 )
- {
- const int i8 = x264_scan8[0] - 1;
- const int iv = i_mb_4x4 - 1;
- - CP32( h->mb.cache.mvd[i_list][i8+0*8], h->mb.mvd[i_list][iv + 0*s4x4] );
- - CP32( h->mb.cache.mvd[i_list][i8+1*8], h->mb.mvd[i_list][iv + 1*s4x4] );
- - CP32( h->mb.cache.mvd[i_list][i8+2*8], h->mb.mvd[i_list][iv + 2*s4x4] );
- - CP32( h->mb.cache.mvd[i_list][i8+3*8], h->mb.mvd[i_list][iv + 3*s4x4] );
- + CP16( h->mb.cache.mvd[i_list][i8+0*8], h->mb.mvd[i_list][iv + 0*s4x4] );
- + CP16( h->mb.cache.mvd[i_list][i8+1*8], h->mb.mvd[i_list][iv + 1*s4x4] );
- + CP16( h->mb.cache.mvd[i_list][i8+2*8], h->mb.mvd[i_list][iv + 2*s4x4] );
- + CP16( h->mb.cache.mvd[i_list][i8+3*8], h->mb.mvd[i_list][iv + 3*s4x4] );
- }
- else
- {
- const int i8 = x264_scan8[0] - 1;
- for( i = 0; i < 4; i++ )
- - M32( h->mb.cache.mvd[i_list][i8+i*8] ) = 0;
- + M16( h->mb.cache.mvd[i_list][i8+i*8] ) = 0;
- }
- }
- }
- @@ -1416,30 +1407,18 @@ void x264_macroblock_cache_save( x264_t *h )
- if( !IS_INTRA( i_mb_type ) && !IS_SKIP( i_mb_type ) && !IS_DIRECT( i_mb_type ) )
- {
- for( y = 0; y < 4; y++ )
- - {
- - CP64( h->mb.mvd[0][i_mb_4x4+y*s4x4+0], h->mb.cache.mvd[0][x264_scan8[0]+8*y+0] );
- - CP64( h->mb.mvd[0][i_mb_4x4+y*s4x4+2], h->mb.cache.mvd[0][x264_scan8[0]+8*y+2] );
- - }
- + CP64( h->mb.mvd[0][i_mb_4x4+y*s4x4], h->mb.cache.mvd[0][x264_scan8[0]+8*y] );
- if( h->sh.i_type == SLICE_TYPE_B )
- for( y = 0; y < 4; y++ )
- - {
- - CP64( h->mb.mvd[1][i_mb_4x4+y*s4x4+0], h->mb.cache.mvd[1][x264_scan8[0]+8*y+0] );
- - CP64( h->mb.mvd[1][i_mb_4x4+y*s4x4+2], h->mb.cache.mvd[1][x264_scan8[0]+8*y+2] );
- - }
- + CP64( h->mb.mvd[1][i_mb_4x4+y*s4x4], h->mb.cache.mvd[1][x264_scan8[0]+8*y] );
- }
- else
- {
- for( y = 0; y < 4; y++ )
- - {
- - M64( h->mb.mvd[0][i_mb_4x4+y*s4x4+0] ) = 0;
- - M64( h->mb.mvd[0][i_mb_4x4+y*s4x4+2] ) = 0;
- - }
- + M64( h->mb.mvd[0][i_mb_4x4+y*s4x4] ) = 0;
- if( h->sh.i_type == SLICE_TYPE_B )
- for( y = 0; y < 4; y++ )
- - {
- - M64( h->mb.mvd[1][i_mb_4x4+y*s4x4+0] ) = 0;
- - M64( h->mb.mvd[1][i_mb_4x4+y*s4x4+2] ) = 0;
- - }
- + M64( h->mb.mvd[1][i_mb_4x4+y*s4x4] ) = 0;
- }
- if( h->sh.i_type == SLICE_TYPE_B )
- diff --git a/common/macroblock.h b/common/macroblock.h
- index 48f3105..eb903d2 100644
- --- a/common/macroblock.h
- +++ b/common/macroblock.h
- @@ -353,6 +353,33 @@ static ALWAYS_INLINE void x264_macroblock_cache_rect1( void *dst, int width, int
- if( height == 4 ) M16( d+6 ) = val2;
- }
- }
- +static ALWAYS_INLINE void x264_macroblock_cache_rect2( void *dst, int width, int height, uint16_t val )
- +{
- + uint16_t *d = dst;
- + uint32_t val32 = val + (val<<16);
- + uint64_t val64 = val32 + ((uint64_t)val32<<32);
- + if( width == 4 )
- + {
- + M64( d+ 0 ) = val64;
- + if( height >= 2 ) M64( d+ 8 ) = val64;
- + if( height == 4 ) M64( d+16 ) = val64;
- + if( height == 4 ) M64( d+24 ) = val64;
- + }
- + else if( width == 2 )
- + {
- + M32( d+ 0 ) = val32;
- + if( height >= 2 ) M32( d+ 8 ) = val32;
- + if( height == 4 ) M32( d+16 ) = val32;
- + if( height == 4 ) M32( d+24 ) = val32;
- + }
- + else //if( width == 1 )
- + {
- + M16( d+ 0 ) = val;
- + if( height >= 2 ) M16( d+ 8 ) = val;
- + if( height == 4 ) M16( d+16 ) = val;
- + if( height == 4 ) M16( d+24 ) = val;
- + }
- +}
- static ALWAYS_INLINE void x264_macroblock_cache_rect4( void *dst, int width, int height, uint32_t val )
- {
- int dy;
- @@ -383,9 +410,9 @@ static ALWAYS_INLINE void x264_macroblock_cache_mv( x264_t *h, int x, int y, int
- {
- x264_macroblock_cache_rect4( &h->mb.cache.mv[i_list][X264_SCAN8_0+x+8*y], width, height, mv );
- }
- -static ALWAYS_INLINE void x264_macroblock_cache_mvd( x264_t *h, int x, int y, int width, int height, int i_list, uint32_t mv )
- +static ALWAYS_INLINE void x264_macroblock_cache_mvd( x264_t *h, int x, int y, int width, int height, int i_list, uint16_t mv )
- {
- - x264_macroblock_cache_rect4( &h->mb.cache.mvd[i_list][X264_SCAN8_0+x+8*y], width, height, mv );
- + x264_macroblock_cache_rect2( &h->mb.cache.mvd[i_list][X264_SCAN8_0+x+8*y], width, height, mv );
- }
- static ALWAYS_INLINE void x264_macroblock_cache_ref( x264_t *h, int x, int y, int width, int height, int i_list, uint8_t ref )
- {
- diff --git a/common/x86/util.h b/common/x86/util.h
- index c8bcf4b..0674323 100644
- --- a/common/x86/util.h
- +++ b/common/x86/util.h
- @@ -77,32 +77,26 @@ static inline int x264_predictor_difference_mmxext( int16_t (*mvc)[2], intptr_t
- );
- return sum;
- }
- -#define x264_cabac_amvd_sum x264_cabac_amvd_sum_mmxext
- -static ALWAYS_INLINE uint32_t x264_cabac_amvd_sum_mmxext(int16_t *mvdleft, int16_t *mvdtop)
- +#define x264_cabac_mvd_sum x264_cabac_mvd_sum_mmxext
- +static ALWAYS_INLINE uint16_t x264_cabac_mvd_sum_mmxext(uint8_t *mvdleft, uint8_t *mvdtop)
- {
- - static const uint64_t pw_2 = 0x0002000200020002ULL;
- - static const uint64_t pw_28 = 0x001C001C001C001CULL;
- - static const uint64_t pw_2184 = 0x0888088808880888ULL;
- - /* MIN(((x+28)*2184)>>16,2) = (x>2) + (x>32) */
- - /* 2184 = fix16(1/30) */
- - uint32_t amvd;
- + static const uint64_t pb_2 = 0x0202020202020202ULL;
- + static const uint64_t pb_32 = 0x2020202020202020ULL;
- + int amvd;
- asm(
- - "movd %1, %%mm0 \n"
- - "movd %2, %%mm1 \n"
- - "pxor %%mm2, %%mm2 \n"
- - "pxor %%mm3, %%mm3 \n"
- - "psubw %%mm0, %%mm2 \n"
- - "psubw %%mm1, %%mm3 \n"
- - "pmaxsw %%mm2, %%mm0 \n"
- - "pmaxsw %%mm3, %%mm1 \n"
- - "paddw %3, %%mm0 \n"
- - "paddw %%mm1, %%mm0 \n"
- - "pmulhuw %4, %%mm0 \n"
- - "pminsw %5, %%mm0 \n"
- - "movd %%mm0, %0 \n"
- + "movd %1, %%mm0 \n"
- + "movd %2, %%mm1 \n"
- + "paddb %%mm1, %%mm0 \n"
- + "pxor %%mm2, %%mm2 \n"
- + "movq %%mm0, %%mm1 \n"
- + "pcmpgtb %3, %%mm0 \n"
- + "pcmpgtb %4, %%mm1 \n"
- + "psubb %%mm0, %%mm2 \n"
- + "psubb %%mm1, %%mm2 \n"
- + "movd %%mm2, %0 \n"
- :"=r"(amvd)
- - :"m"(M32( mvdleft )),"m"(M32( mvdtop )),
- - "m"(pw_28),"m"(pw_2184),"m"(pw_2)
- + :"m"(M16( mvdleft )),"m"(M16( mvdtop )),
- + "m"(pb_2),"m"(pb_32)
- );
- return amvd;
- }
- diff --git a/encoder/cabac.c b/encoder/cabac.c
- index 271f527..083b783 100644
- --- a/encoder/cabac.c
- +++ b/encoder/cabac.c
- @@ -349,7 +349,7 @@ static void x264_cabac_mb_ref( x264_t *h, x264_cabac_t *cb, int i_list, int idx
- x264_cabac_encode_decision( cb, 54 + ctx, 0 );
- }
- -static inline void x264_cabac_mb_mvd_cpn( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int l, int mvd, int ctx )
- +static inline int x264_cabac_mb_mvd_cpn( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int l, int mvd, int ctx )
- {
- const int i_abs = abs( mvd );
- const int ctxbase = l ? 47 : 40;
- @@ -408,32 +408,34 @@ static inline void x264_cabac_mb_mvd_cpn( x264_t *h, x264_cabac_t *cb, int i_lis
- x264_cabac_encode_bypass( cb, mvd < 0 );
- }
- #endif
- + /* Since we don't need to keep track of MVDs larger than 33, just cap the value.
- + * This lets us store MVDs as 8-bit values instead of 16-bit. */
- + return X264_MIN( i_abs, 33 );
- }
- -static NOINLINE uint32_t x264_cabac_mb_mvd( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int width )
- +static NOINLINE uint16_t x264_cabac_mb_mvd( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int width )
- {
- ALIGNED_4( int16_t mvp[2] );
- - uint32_t amvd;
- int mdx, mdy;
- /* Calculate mvd */
- x264_mb_predict_mv( h, i_list, idx, width, mvp );
- mdx = h->mb.cache.mv[i_list][x264_scan8[idx]][0] - mvp[0];
- mdy = h->mb.cache.mv[i_list][x264_scan8[idx]][1] - mvp[1];
- - amvd = x264_cabac_amvd_sum(h->mb.cache.mvd[i_list][x264_scan8[idx] - 1],
- - h->mb.cache.mvd[i_list][x264_scan8[idx] - 8]);
- + uint16_t amvd = x264_cabac_mvd_sum(h->mb.cache.mvd[i_list][x264_scan8[idx] - 1],
- + h->mb.cache.mvd[i_list][x264_scan8[idx] - 8]);
- /* encode */
- - x264_cabac_mb_mvd_cpn( h, cb, i_list, idx, 0, mdx, amvd&0xFFFF );
- - x264_cabac_mb_mvd_cpn( h, cb, i_list, idx, 1, mdy, amvd>>16 );
- + mdx = x264_cabac_mb_mvd_cpn( h, cb, i_list, idx, 0, mdx, amvd&0xFF );
- + mdy = x264_cabac_mb_mvd_cpn( h, cb, i_list, idx, 1, mdy, amvd>>8 );
- - return pack16to32_mask(mdx,mdy);
- + return pack8to16(mdx,mdy);
- }
- #define x264_cabac_mb_mvd(h,cb,i_list,idx,width,height)\
- do\
- {\
- - uint32_t mvd = x264_cabac_mb_mvd(h,cb,i_list,idx,width);\
- + uint16_t mvd = x264_cabac_mb_mvd(h,cb,i_list,idx,width);\
- x264_macroblock_cache_mvd( h, block_idx_x[idx], block_idx_y[idx], width, height, i_list, mvd );\
- } while(0)
- diff --git a/encoder/me.c b/encoder/me.c
- index f58a6a8..44f6c7d 100644
- --- a/encoder/me.c
- +++ b/encoder/me.c
- @@ -1174,6 +1174,7 @@ void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i4, int
- m->mv[0] = bmx;
- m->mv[1] = bmy;
- x264_macroblock_cache_mv ( h, block_idx_x[i4], block_idx_y[i4], bw>>2, bh>>2, i_list, pack16to32_mask(bmx, bmy) );
- - x264_macroblock_cache_mvd( h, block_idx_x[i4], block_idx_y[i4], bw>>2, bh>>2, i_list, pack16to32_mask(bmx - m->mvp[0], bmy - m->mvp[1]) );
- + uint16_t amvd = pack8to16(X264_MIN(abs(bmx - m->mvp[0]),33), X264_MIN(abs(bmy - m->mvp[1]),33));
- + x264_macroblock_cache_mvd( h, block_idx_x[i4], block_idx_y[i4], bw>>2, bh>>2, i_list, amvd );
- h->mb.b_skip_mc = 0;
- }
- --
- 1.6.1.2
- From 54d1bed32086228ce2de06a5207501bdf258d9a9 Mon Sep 17 00:00:00 2001
- From: Anton Mitrofanov <BugMaster@narod.ru>
- Date: Fri, 19 Feb 2010 10:45:22 -0800
- Subject: [PATCH 10/16] Faster, more accurate psy-RD caching
- Keep more variants of cached Hadamard scores and only calculate them when necessary.
- Results in more calculation, but simpler lookups.
- Slightly more accurate due to internal rounding in SATD and SA8D functions.
- ---
- common/common.h | 8 ++---
- common/x86/mc-a2.asm | 6 +++-
- encoder/analyse.c | 39 ++++++---------------------
- encoder/rdo.c | 69 ++++++++++++++++++++++++++++---------------------
- 4 files changed, 55 insertions(+), 67 deletions(-)
- diff --git a/common/common.h b/common/common.h
- index ab54508..413b82f 100644
- --- a/common/common.h
- +++ b/common/common.h
- @@ -583,11 +583,9 @@ struct x264_t
- ALIGNED_16( int16_t fenc_dct8[4][64] );
- ALIGNED_16( int16_t fenc_dct4[16][16] );
- - /* Psy RD SATD scores */
- - int fenc_satd[4][4];
- - int fenc_satd_sum;
- - int fenc_sa8d[2][2];
- - int fenc_sa8d_sum;
- + /* Psy RD SATD/SA8D scores cache */
- + ALIGNED_16( uint64_t fenc_hadamard_cache[9] );
- + ALIGNED_16( uint32_t fenc_satd_cache[32] );
- /* pointer over mb of the frame to be compressed */
- uint8_t *p_fenc[3];
- diff --git a/common/x86/mc-a2.asm b/common/x86/mc-a2.asm
- index f2e69c0..d86d6ef 100644
- --- a/common/x86/mc-a2.asm
- +++ b/common/x86/mc-a2.asm
- @@ -731,15 +731,17 @@ cglobal x264_memcpy_aligned_sse2, 3,3
- ;-----------------------------------------------------------------------------
- %macro MEMZERO 1
- cglobal x264_memzero_aligned_%1, 2,2
- + add r0, r1
- + neg r1
- pxor m0, m0
- .loop:
- - sub r1d, mmsize*8
- %assign i 0
- %rep 8
- mova [r0 + r1 + i], m0
- %assign i i+mmsize
- %endrep
- - jg .loop
- + add r1d, mmsize*8
- + jl .loop
- REP_RET
- %endmacro
- diff --git a/encoder/analyse.c b/encoder/analyse.c
- index 6ee5f8e..02fbf7c 100644
- --- a/encoder/analyse.c
- +++ b/encoder/analyse.c
- @@ -578,34 +578,13 @@ static void inline x264_psy_trellis_init( x264_t *h, int do_both_dct )
- h->dctf.sub16x16_dct( h->mb.pic.fenc_dct4, h->mb.pic.p_fenc[0], zero );
- }
- -/* Pre-calculate fenc satd scores for psy RD, minus DC coefficients */
- -static inline void x264_mb_cache_fenc_satd( x264_t *h )
- +/* Reset fenc satd scores cache for psy RD */
- +static inline void x264_mb_init_fenc_cache( x264_t *h, int b_satd )
- {
- - ALIGNED_16( static uint8_t zero[16] ) = {0};
- - uint8_t *fenc;
- - int x, y, satd_sum = 0, sa8d_sum = 0;
- - if( h->param.analyse.i_trellis == 2 && h->mb.i_psy_trellis )
- - x264_psy_trellis_init( h, h->param.analyse.b_transform_8x8 );
- - if( !h->mb.i_psy_rd )
- - return;
- - for( y = 0; y < 4; y++ )
- - for( x = 0; x < 4; x++ )
- - {
- - fenc = h->mb.pic.p_fenc[0]+x*4+y*4*FENC_STRIDE;
- - h->mb.pic.fenc_satd[y][x] = h->pixf.satd[PIXEL_4x4]( zero, 0, fenc, FENC_STRIDE )
- - - (h->pixf.sad[PIXEL_4x4]( zero, 0, fenc, FENC_STRIDE )>>1);
- - satd_sum += h->mb.pic.fenc_satd[y][x];
- - }
- - for( y = 0; y < 2; y++ )
- - for( x = 0; x < 2; x++ )
- - {
- - fenc = h->mb.pic.p_fenc[0]+x*8+y*8*FENC_STRIDE;
- - h->mb.pic.fenc_sa8d[y][x] = h->pixf.sa8d[PIXEL_8x8]( zero, 0, fenc, FENC_STRIDE )
- - - (h->pixf.sad[PIXEL_8x8]( zero, 0, fenc, FENC_STRIDE )>>2);
- - sa8d_sum += h->mb.pic.fenc_sa8d[y][x];
- - }
- - h->mb.pic.fenc_satd_sum = satd_sum;
- - h->mb.pic.fenc_sa8d_sum = sa8d_sum;
- + /* Writes beyond the end of the array, but not a problem since fenc_satd_cache is right after. */
- + h->mc.memzero_aligned( h->mb.pic.fenc_hadamard_cache, sizeof(h->mb.pic.fenc_hadamard_cache) );
- + if( b_satd )
- + h->mc.memzero_aligned( h->mb.pic.fenc_satd_cache, sizeof(h->mb.pic.fenc_satd_cache) );
- }
- static void x264_mb_analyse_intra_chroma( x264_t *h, x264_mb_analysis_t *a )
- @@ -1193,7 +1172,7 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
- h->mb.i_type = P_L0;
- if( a->i_mbrd )
- {
- - x264_mb_cache_fenc_satd( h );
- + x264_mb_init_fenc_cache( h, a->i_mbrd >= 2 || h->param.analyse.inter & X264_ANALYSE_PSUB8x8 );
- if( a->l0.me16x16.i_ref == 0 && M32( a->l0.me16x16.mv ) == M32( h->mb.cache.pskip_mv ) && !a->b_force_intra )
- {
- h->mb.i_partition = D_16x16;
- @@ -2432,7 +2411,7 @@ void x264_macroblock_analyse( x264_t *h )
- {
- intra_analysis:
- if( analysis.i_mbrd )
- - x264_mb_cache_fenc_satd( h );
- + x264_mb_init_fenc_cache( h, analysis.i_mbrd >= 2 );
- x264_mb_analyse_intra( h, &analysis, COST_MAX );
- if( analysis.i_mbrd )
- x264_intra_rd( h, &analysis, COST_MAX );
- @@ -2749,7 +2728,7 @@ intra_analysis:
- int b_skip = 0;
- if( analysis.i_mbrd )
- - x264_mb_cache_fenc_satd( h );
- + x264_mb_init_fenc_cache( h, analysis.i_mbrd >= 2 );
- h->mb.i_type = B_SKIP;
- if( h->mb.b_direct_auto_write )
- diff --git a/encoder/rdo.c b/encoder/rdo.c
- index e15f47d..fed2a28 100644
- --- a/encoder/rdo.c
- +++ b/encoder/rdo.c
- @@ -61,36 +61,44 @@ static uint16_t cabac_size_5ones[128];
- #define COPY_CABAC h->mc.memcpy_aligned( &cabac_tmp.f8_bits_encoded, &h->cabac.f8_bits_encoded, \
- sizeof(x264_cabac_t) - offsetof(x264_cabac_t,f8_bits_encoded) )
- -
- -/* Sum the cached SATDs to avoid repeating them. */
- -static inline int sum_satd( x264_t *h, int pixel, int x, int y )
- +static inline uint64_t cached_hadamard( x264_t *h, int pixel, int x, int y )
- {
- - int satd = 0;
- - int min_x = x>>2;
- - int min_y = y>>2;
- - int max_x = (x>>2) + (x264_pixel_size[pixel].w>>2);
- - int max_y = (y>>2) + (x264_pixel_size[pixel].h>>2);
- - if( pixel == PIXEL_16x16 )
- - return h->mb.pic.fenc_satd_sum;
- - for( y = min_y; y < max_y; y++ )
- - for( x = min_x; x < max_x; x++ )
- - satd += h->mb.pic.fenc_satd[y][x];
- - return satd;
- + static const uint8_t hadamard_shift_x[4] = {4, 4, 3, 3};
- + static const uint8_t hadamard_shift_y[4] = {4-0, 3-0, 4-1, 3-1};
- + static const uint8_t hadamard_offset[4] = {0, 1, 3, 5};
- + int cache_index = (x >> hadamard_shift_x[pixel]) + (y >> hadamard_shift_y[pixel])
- + + hadamard_offset[pixel];
- + uint64_t res = h->mb.pic.fenc_hadamard_cache[cache_index];
- + if( res )
- + return res - 1;
- + else
- + {
- + uint8_t *fenc = h->mb.pic.p_fenc[0] + x + y*FENC_STRIDE;
- + res = h->pixf.hadamard_ac[pixel]( fenc, FENC_STRIDE );
- + h->mb.pic.fenc_hadamard_cache[cache_index] = res + 1;
- + return res;
- + }
- }
- -static inline int sum_sa8d( x264_t *h, int pixel, int x, int y )
- +static inline int cached_satd( x264_t *h, int pixel, int x, int y )
- {
- - int sa8d = 0;
- - int min_x = x>>3;
- - int min_y = y>>3;
- - int max_x = (x>>3) + (x264_pixel_size[pixel].w>>3);
- - int max_y = (y>>3) + (x264_pixel_size[pixel].h>>3);
- - if( pixel == PIXEL_16x16 )
- - return h->mb.pic.fenc_sa8d_sum;
- - for( y = min_y; y < max_y; y++ )
- - for( x = min_x; x < max_x; x++ )
- - sa8d += h->mb.pic.fenc_sa8d[y][x];
- - return sa8d;
- + static const uint8_t satd_shift_x[3] = {3, 2, 2};
- + static const uint8_t satd_shift_y[3] = {2-1, 3-2, 2-2};
- + static const uint8_t satd_offset[3] = {0, 8, 16};
- + ALIGNED_16( static uint8_t zero[16] );
- + int cache_index = (x >> satd_shift_x[pixel - PIXEL_8x4]) + (y >> satd_shift_y[pixel - PIXEL_8x4])
- + + satd_offset[pixel - PIXEL_8x4];
- + int res = h->mb.pic.fenc_satd_cache[cache_index];
- + if( res )
- + return res - 1;
- + else
- + {
- + uint8_t *fenc = h->mb.pic.p_fenc[0] + x + y*FENC_STRIDE;
- + int dc = h->pixf.sad[pixel]( fenc, FENC_STRIDE, zero, 0 ) >> 1;
- + res = h->pixf.satd[pixel]( fenc, FENC_STRIDE, zero, 0 ) - dc;
- + h->mb.pic.fenc_satd_cache[cache_index] = res + 1;
- + return res;
- + }
- }
- /* Psy RD distortion metric: SSD plus "Absolute Difference of Complexities" */
- @@ -113,15 +121,16 @@ static inline int ssd_plane( x264_t *h, int size, int p, int x, int y )
- /* If the plane is smaller than 8x8, we can't do an SA8D; this probably isn't a big problem. */
- if( size <= PIXEL_8x8 )
- {
- - uint64_t acs = h->pixf.hadamard_ac[size]( fdec, FDEC_STRIDE );
- - satd = abs((int32_t)acs - sum_satd( h, size, x, y ))
- - + abs((int32_t)(acs>>32) - sum_sa8d( h, size, x, y ));
- + uint64_t fdec_acs = h->pixf.hadamard_ac[size]( fdec, FDEC_STRIDE );
- + uint64_t fenc_acs = cached_hadamard( h, size, x, y );
- + satd = abs((int32_t)fdec_acs - (int32_t)fenc_acs)
- + + abs((int32_t)(fdec_acs>>32) - (int32_t)(fenc_acs>>32));
- satd >>= 1;
- }
- else
- {
- int dc = h->pixf.sad[size]( fdec, FDEC_STRIDE, zero, 0 ) >> 1;
- - satd = abs(h->pixf.satd[size]( fdec, FDEC_STRIDE, zero, 0 ) - dc - sum_satd( h, size, x, y ));
- + satd = abs(h->pixf.satd[size]( fdec, FDEC_STRIDE, zero, 0 ) - dc - cached_satd( h, size, x, y ));
- }
- satd = (satd * h->mb.i_psy_rd * h->mb.i_psy_rd_lambda + 128) >> 8;
- }
- --
- 1.6.1.2
- From c45278a7107934fdad77c0cac14a924b97a6272e Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Sun, 21 Feb 2010 01:56:12 -0800
- Subject: [PATCH 11/16] Move presets, tunings, and profiles into libx264
- Now any application calling libx264 can use them.
- Full documentation and guidelines for usage are included in x264.h.
- ---
- common/common.c | 266 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
- x264.c | 267 +++----------------------------------------------------
- x264.h | 96 +++++++++++++++++---
- 3 files changed, 357 insertions(+), 272 deletions(-)
- diff --git a/common/common.c b/common/common.c
- index 0dd7af5..a99b65b 100644
- --- a/common/common.c
- +++ b/common/common.c
- @@ -36,7 +36,7 @@ static void x264_log_default( void *, int, const char *, va_list );
- /****************************************************************************
- * x264_param_default:
- ****************************************************************************/
- -void x264_param_default( x264_param_t *param )
- +void x264_param_default( x264_param_t *param )
- {
- /* */
- memset( param, 0, sizeof( x264_param_t ) );
- @@ -160,6 +160,270 @@ void x264_param_default( x264_param_t *param )
- param->b_dts_compress = 0;
- }
- +static int x264_param_apply_preset( x264_param_t *param, const char *preset )
- +{
- + if( !strcasecmp( preset, "ultrafast" ) )
- + {
- + param->i_frame_reference = 1;
- + param->i_scenecut_threshold = 0;
- + param->b_deblocking_filter = 0;
- + param->b_cabac = 0;
- + param->i_bframe = 0;
- + param->analyse.intra = 0;
- + param->analyse.inter = 0;
- + param->analyse.b_transform_8x8 = 0;
- + param->analyse.i_me_method = X264_ME_DIA;
- + param->analyse.i_subpel_refine = 0;
- + param->rc.i_aq_mode = 0;
- + param->analyse.b_mixed_references = 0;
- + param->analyse.i_trellis = 0;
- + param->i_bframe_adaptive = X264_B_ADAPT_NONE;
- + param->rc.b_mb_tree = 0;
- + param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
- + }
- + else if( !strcasecmp( preset, "veryfast" ) )
- + {
- + param->analyse.inter = X264_ANALYSE_I8x8|X264_ANALYSE_I4x4;
- + param->analyse.i_me_method = X264_ME_DIA;
- + param->analyse.i_subpel_refine = 1;
- + param->i_frame_reference = 1;
- + param->analyse.b_mixed_references = 0;
- + param->analyse.i_trellis = 0;
- + param->rc.b_mb_tree = 0;
- + param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
- + }
- + else if( !strcasecmp( preset, "faster" ) )
- + {
- + param->analyse.b_mixed_references = 0;
- + param->i_frame_reference = 2;
- + param->analyse.i_subpel_refine = 4;
- + param->rc.b_mb_tree = 0;
- + param->analyse.i_weighted_pred = X264_WEIGHTP_BLIND;
- + }
- + else if( !strcasecmp( preset, "fast" ) )
- + {
- + param->i_frame_reference = 2;
- + param->analyse.i_subpel_refine = 6;
- + param->rc.i_lookahead = 30;
- + }
- + else if( !strcasecmp( preset, "medium" ) )
- + {
- + /* Default is medium */
- + }
- + else if( !strcasecmp( preset, "slow" ) )
- + {
- + param->analyse.i_me_method = X264_ME_UMH;
- + param->analyse.i_subpel_refine = 8;
- + param->i_frame_reference = 5;
- + param->i_bframe_adaptive = X264_B_ADAPT_TRELLIS;
- + param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_AUTO;
- + param->rc.i_lookahead = 50;
- + }
- + else if( !strcasecmp( preset, "slower" ) )
- + {
- + param->analyse.i_me_method = X264_ME_UMH;
- + param->analyse.i_subpel_refine = 9;
- + param->i_frame_reference = 8;
- + param->i_bframe_adaptive = X264_B_ADAPT_TRELLIS;
- + param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_AUTO;
- + param->analyse.inter |= X264_ANALYSE_PSUB8x8;
- + param->analyse.i_trellis = 2;
- + param->rc.i_lookahead = 60;
- + }
- + else if( !strcasecmp( preset, "veryslow" ) )
- + {
- + param->analyse.i_me_method = X264_ME_UMH;
- + param->analyse.i_subpel_refine = 10;
- + param->analyse.i_me_range = 24;
- + param->i_frame_reference = 16;
- + param->i_bframe_adaptive = X264_B_ADAPT_TRELLIS;
- + param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_AUTO;
- + param->analyse.inter |= X264_ANALYSE_PSUB8x8;
- + param->analyse.i_trellis = 2;
- + param->i_bframe = 8;
- + param->rc.i_lookahead = 60;
- + }
- + else if( !strcasecmp( preset, "placebo" ) )
- + {
- + param->analyse.i_me_method = X264_ME_TESA;
- + param->analyse.i_subpel_refine = 10;
- + param->analyse.i_me_range = 24;
- + param->i_frame_reference = 16;
- + param->i_bframe_adaptive = X264_B_ADAPT_TRELLIS;
- + param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_AUTO;
- + param->analyse.inter |= X264_ANALYSE_PSUB8x8;
- + param->analyse.b_fast_pskip = 0;
- + param->analyse.i_trellis = 2;
- + param->i_bframe = 16;
- + param->rc.i_lookahead = 60;
- + }
- + else
- + {
- + fprintf( stderr, "x264 [error]: invalid preset '%s'\n", preset );
- + return -1;
- + }
- + return 0;
- +}
- +
- +static int x264_param_apply_tune( x264_param_t *param, const char *tune )
- +{
- + char *tmp = x264_malloc( strlen( tune ) );
- + if( !tmp )
- + return -1;
- + tmp = strcpy( tmp, tune );
- + char *s = strtok( tmp, ",./-+" );
- + int psy_tuning_used = 0;
- + while( s )
- + {
- + if( !strncasecmp( s, "film", 4 ) )
- + {
- + if( psy_tuning_used++ ) goto psy_failure;
- + param->i_deblocking_filter_alphac0 = -1;
- + param->i_deblocking_filter_beta = -1;
- + param->analyse.f_psy_trellis = 0.15;
- + }
- + else if( !strncasecmp( s, "animation", 9 ) )
- + {
- + if( psy_tuning_used++ ) goto psy_failure;
- + param->i_frame_reference = param->i_frame_reference > 1 ? param->i_frame_reference*2 : 1;
- + param->i_deblocking_filter_alphac0 = 1;
- + param->i_deblocking_filter_beta = 1;
- + param->analyse.f_psy_rd = 0.4;
- + param->rc.f_aq_strength = 0.6;
- + param->i_bframe += 2;
- + }
- + else if( !strncasecmp( s, "grain", 5 ) )
- + {
- + if( psy_tuning_used++ ) goto psy_failure;
- + param->i_deblocking_filter_alphac0 = -2;
- + param->i_deblocking_filter_beta = -2;
- + param->analyse.f_psy_trellis = 0.25;
- + param->analyse.b_dct_decimate = 0;
- + param->rc.f_pb_factor = 1.1;
- + param->rc.f_ip_factor = 1.1;
- + param->rc.f_aq_strength = 0.5;
- + param->analyse.i_luma_deadzone[0] = 6;
- + param->analyse.i_luma_deadzone[1] = 6;
- + param->rc.f_qcompress = 0.8;
- + }
- + else if( !strncasecmp( s, "psnr", 4 ) )
- + {
- + if( psy_tuning_used++ ) goto psy_failure;
- + param->rc.i_aq_mode = X264_AQ_NONE;
- + param->analyse.b_psy = 0;
- + }
- + else if( !strncasecmp( s, "ssim", 4 ) )
- + {
- + if( psy_tuning_used++ ) goto psy_failure;
- + param->rc.i_aq_mode = X264_AQ_AUTOVARIANCE;
- + param->analyse.b_psy = 0;
- + }
- + else if( !strncasecmp( s, "fastdecode", 10 ) )
- + {
- + param->b_deblocking_filter = 0;
- + param->b_cabac = 0;
- + param->analyse.b_weighted_bipred = 0;
- + param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
- + }
- + else if( !strncasecmp( s, "zerolatency", 11 ) )
- + {
- + param->rc.i_lookahead = 0;
- + param->i_sync_lookahead = 0;
- + param->i_bframe = 0;
- + param->b_sliced_threads = 1;
- + }
- + else if( !strncasecmp( s, "touhou", 6 ) )
- + {
- + if( psy_tuning_used++ ) goto psy_failure;
- + param->i_frame_reference = param->i_frame_reference > 1 ? param->i_frame_reference*2 : 1;
- + param->i_deblocking_filter_alphac0 = -1;
- + param->i_deblocking_filter_beta = -1;
- + param->analyse.f_psy_trellis = 0.2;
- + param->rc.f_aq_strength = 1.3;
- + if( param->analyse.inter & X264_ANALYSE_PSUB16x16 )
- + param->analyse.inter |= X264_ANALYSE_PSUB8x8;
- + }
- + else
- + {
- + fprintf( stderr, "x264 [error]: invalid tune '%s'\n", s );
- + return -1;
- + }
- + if( 0 )
- + {
- + psy_failure:
- + fprintf( stderr, "x264 [warning]: only 1 psy tuning can be used: ignoring tune %s\n", s );
- + }
- + s = strtok( NULL, ",./-+" );
- + }
- + return 0;
- +}
- +
- +int x264_param_default_preset( x264_param_t *param, const char *preset, const char *tune )
- +{
- + x264_param_default( param );
- +
- + if( preset && x264_param_apply_preset( param, preset ) < 0 )
- + return -1;
- + if( tune && x264_param_apply_tune( param, tune ) < 0 )
- + return -1;
- + return 0;
- +}
- +
- +void x264_param_apply_fastfirstpass( x264_param_t *param )
- +{
- + /* Set faster options in case of turbo firstpass. */
- + if( param->rc.b_stat_read && !param->rc.b_stat_write )
- + {
- + param->i_frame_reference = 1;
- + param->analyse.b_transform_8x8 = 0;
- + param->analyse.inter = 0;
- + param->analyse.i_me_method = X264_ME_DIA;
- + param->analyse.i_subpel_refine = X264_MIN( 2, param->analyse.i_subpel_refine );
- + param->analyse.i_trellis = 0;
- + }
- +}
- +
- +int x264_param_apply_profile( x264_param_t *param, const char *profile )
- +{
- + if( !profile )
- + return 0;
- +
- + if( !strcasecmp( profile, "baseline" ) )
- + {
- + param->analyse.b_transform_8x8 = 0;
- + param->b_cabac = 0;
- + param->i_cqm_preset = X264_CQM_FLAT;
- + param->i_bframe = 0;
- + param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
- + if( param->b_interlaced )
- + {
- + fprintf( stderr, "x264 [error]: baseline profile doesn't support interlacing\n" );
- + return -1;
- + }
- + }
- + else if( !strcasecmp( profile, "main" ) )
- + {
- + param->analyse.b_transform_8x8 = 0;
- + param->i_cqm_preset = X264_CQM_FLAT;
- + }
- + else if( !strcasecmp( profile, "high" ) )
- + {
- + /* Default */
- + }
- + else
- + {
- + fprintf( stderr, "x264 [error]: invalid profile: %s\n", profile );
- + return -1;
- + }
- + if( (param->rc.i_rc_method == X264_RC_CQP && param->rc.i_qp_constant == 0) ||
- + (param->rc.i_rc_method == X264_RC_CRF && param->rc.f_rf_constant == 0) )
- + {
- + fprintf( stderr, "x264 [error]: %s profile doesn't support lossless\n", profile );
- + return -1;
- + }
- + return 0;
- +}
- +
- static int parse_enum( const char *arg, const char * const *names, int *dst )
- {
- int i;
- diff --git a/x264.c b/x264.c
- index 959626a..2875dd1 100644
- --- a/x264.c
- +++ b/x264.c
- @@ -115,8 +115,6 @@ int main( int argc, char **argv )
- _setmode(_fileno(stdout), _O_BINARY);
- #endif
- - x264_param_default( ¶m );
- -
- /* Parse command line */
- if( Parse( argc, argv, ¶m, &opt ) < 0 )
- return -1;
- @@ -799,12 +797,13 @@ static int Parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )
- char *profile = NULL;
- int b_thread_input = 0;
- int b_turbo = 1;
- - int b_pass1 = 0;
- int b_user_ref = 0;
- int b_user_fps = 0;
- int b_user_interlaced = 0;
- int i;
- cli_input_opt_t input_opt;
- + char *preset = NULL;
- + char *tune = NULL;
- memset( opt, 0, sizeof(cli_opt_t) );
- memset( &input_opt, 0, sizeof(cli_input_opt_t) );
- @@ -816,219 +815,20 @@ static int Parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )
- int c = getopt_long( argc, argv, short_options, long_options, NULL );
- if( c == -1 )
- break;
- -
- if( c == OPT_PRESET )
- {
- - if( !strcasecmp( optarg, "ultrafast" ) )
- - {
- - param->i_frame_reference = 1;
- - param->i_scenecut_threshold = 0;
- - param->b_deblocking_filter = 0;
- - param->b_cabac = 0;
- - param->i_bframe = 0;
- - param->analyse.intra = 0;
- - param->analyse.inter = 0;
- - param->analyse.b_transform_8x8 = 0;
- - param->analyse.i_me_method = X264_ME_DIA;
- - param->analyse.i_subpel_refine = 0;
- - param->rc.i_aq_mode = 0;
- - param->analyse.b_mixed_references = 0;
- - param->analyse.i_trellis = 0;
- - param->i_bframe_adaptive = X264_B_ADAPT_NONE;
- - param->rc.b_mb_tree = 0;
- - param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
- - }
- - else if( !strcasecmp( optarg, "veryfast" ) )
- - {
- - param->analyse.inter = X264_ANALYSE_I8x8|X264_ANALYSE_I4x4;
- - param->analyse.i_me_method = X264_ME_DIA;
- - param->analyse.i_subpel_refine = 1;
- - param->i_frame_reference = 1;
- - param->analyse.b_mixed_references = 0;
- - param->analyse.i_trellis = 0;
- - param->rc.b_mb_tree = 0;
- - param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
- - }
- - else if( !strcasecmp( optarg, "faster" ) )
- - {
- - param->analyse.b_mixed_references = 0;
- - param->i_frame_reference = 2;
- - param->analyse.i_subpel_refine = 4;
- - param->rc.b_mb_tree = 0;
- - param->analyse.i_weighted_pred = X264_WEIGHTP_BLIND;
- - }
- - else if( !strcasecmp( optarg, "fast" ) )
- - {
- - param->i_frame_reference = 2;
- - param->analyse.i_subpel_refine = 6;
- - param->rc.i_lookahead = 30;
- - }
- - else if( !strcasecmp( optarg, "medium" ) )
- - {
- - /* Default is medium */
- - }
- - else if( !strcasecmp( optarg, "slow" ) )
- - {
- - param->analyse.i_me_method = X264_ME_UMH;
- - param->analyse.i_subpel_refine = 8;
- - param->i_frame_reference = 5;
- - param->i_bframe_adaptive = X264_B_ADAPT_TRELLIS;
- - param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_AUTO;
- - param->rc.i_lookahead = 50;
- - }
- - else if( !strcasecmp( optarg, "slower" ) )
- - {
- - param->analyse.i_me_method = X264_ME_UMH;
- - param->analyse.i_subpel_refine = 9;
- - param->i_frame_reference = 8;
- - param->i_bframe_adaptive = X264_B_ADAPT_TRELLIS;
- - param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_AUTO;
- - param->analyse.inter |= X264_ANALYSE_PSUB8x8;
- - param->analyse.i_trellis = 2;
- - param->rc.i_lookahead = 60;
- - }
- - else if( !strcasecmp( optarg, "veryslow" ) )
- - {
- - param->analyse.i_me_method = X264_ME_UMH;
- - param->analyse.i_subpel_refine = 10;
- - param->analyse.i_me_range = 24;
- - param->i_frame_reference = 16;
- - param->i_bframe_adaptive = X264_B_ADAPT_TRELLIS;
- - param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_AUTO;
- - param->analyse.inter |= X264_ANALYSE_PSUB8x8;
- - param->analyse.i_trellis = 2;
- - param->i_bframe = 8;
- - param->rc.i_lookahead = 60;
- - }
- - else if( !strcasecmp( optarg, "placebo" ) )
- - {
- - param->analyse.i_me_method = X264_ME_TESA;
- - param->analyse.i_subpel_refine = 10;
- - param->analyse.i_me_range = 24;
- - param->i_frame_reference = 16;
- - param->i_bframe_adaptive = X264_B_ADAPT_TRELLIS;
- - param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_AUTO;
- - param->analyse.inter |= X264_ANALYSE_PSUB8x8;
- - param->analyse.b_fast_pskip = 0;
- - param->analyse.i_trellis = 2;
- - param->i_bframe = 16;
- - param->rc.i_lookahead = 60;
- + preset = optarg;
- + if( !strcmp( preset, "placebo" ) )
- b_turbo = 0;
- - }
- - else
- - {
- - fprintf( stderr, "x264 [error]: invalid preset '%s'\n", optarg );
- - return -1;
- - }
- }
- - else if( c == '?' )
- - return -1;
- - }
- -
- - /* Tunings are applied next. */
- - for( optind = 0;; )
- - {
- - int c = getopt_long( argc, argv, short_options, long_options, NULL );
- - if( c == -1 )
- - break;
- -
- if( c == OPT_TUNE )
- - {
- - char *s = strtok( optarg, ",./-+" );
- - int psy_tuning_used = 0;
- - while( s )
- - {
- - if( !strncasecmp( s, "film", 4 ) )
- - {
- - if( psy_tuning_used ) goto psy_failure;
- - param->i_deblocking_filter_alphac0 = -1;
- - param->i_deblocking_filter_beta = -1;
- - param->analyse.f_psy_trellis = 0.15;
- - psy_tuning_used = 1;
- - }
- - else if( !strncasecmp( s, "animation", 9 ) )
- - {
- - if( psy_tuning_used ) goto psy_failure;
- - param->i_frame_reference = param->i_frame_reference > 1 ? param->i_frame_reference*2 : 1;
- - param->i_deblocking_filter_alphac0 = 1;
- - param->i_deblocking_filter_beta = 1;
- - param->analyse.f_psy_rd = 0.4;
- - param->rc.f_aq_strength = 0.6;
- - param->i_bframe += 2;
- - psy_tuning_used = 1;
- - }
- - else if( !strncasecmp( s, "grain", 5 ) )
- - {
- - if( psy_tuning_used ) goto psy_failure;
- - param->i_deblocking_filter_alphac0 = -2;
- - param->i_deblocking_filter_beta = -2;
- - param->analyse.f_psy_trellis = 0.25;
- - param->analyse.b_dct_decimate = 0;
- - param->rc.f_pb_factor = 1.1;
- - param->rc.f_ip_factor = 1.1;
- - param->rc.f_aq_strength = 0.5;
- - param->analyse.i_luma_deadzone[0] = 6;
- - param->analyse.i_luma_deadzone[1] = 6;
- - param->rc.f_qcompress = 0.8;
- - psy_tuning_used = 1;
- - }
- - else if( !strncasecmp( s, "psnr", 4 ) )
- - {
- - if( psy_tuning_used ) goto psy_failure;
- - param->rc.i_aq_mode = X264_AQ_NONE;
- - param->analyse.b_psy = 0;
- - psy_tuning_used = 1;
- - }
- - else if( !strncasecmp( s, "ssim", 4 ) )
- - {
- - if( psy_tuning_used ) goto psy_failure;
- - param->rc.i_aq_mode = X264_AQ_AUTOVARIANCE;
- - param->analyse.b_psy = 0;
- - psy_tuning_used = 1;
- - }
- - else if( !strncasecmp( s, "fastdecode", 10 ) )
- - {
- - param->b_deblocking_filter = 0;
- - param->b_cabac = 0;
- - param->analyse.b_weighted_bipred = 0;
- - param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
- - }
- - else if( !strncasecmp( s, "zerolatency", 11 ) )
- - {
- - param->rc.i_lookahead = 0;
- - param->i_sync_lookahead = 0;
- - param->i_bframe = 0;
- - param->b_sliced_threads = 1;
- - }
- - else if( !strncasecmp( s, "touhou", 6 ) )
- - {
- - if( psy_tuning_used ) goto psy_failure;
- - param->i_frame_reference = param->i_frame_reference > 1 ? param->i_frame_reference*2 : 1;
- - param->i_deblocking_filter_alphac0 = -1;
- - param->i_deblocking_filter_beta = -1;
- - param->analyse.f_psy_trellis = 0.2;
- - param->rc.f_aq_strength = 1.3;
- - if( param->analyse.inter & X264_ANALYSE_PSUB16x16 )
- - param->analyse.inter |= X264_ANALYSE_PSUB8x8;
- - psy_tuning_used = 1;
- - }
- - else
- - {
- - fprintf( stderr, "x264 [error]: invalid tune '%s'\n", s );
- - return -1;
- - }
- - if( 0 )
- - {
- -psy_failure:
- - fprintf( stderr, "x264 [warning]: only 1 psy tuning can be used: ignoring tune %s\n", s );
- - }
- - s = strtok( NULL, ",./-+" );
- - }
- - }
- + tune = optarg;
- else if( c == '?' )
- return -1;
- }
- + x264_param_default_preset( param, preset, tune );
- +
- /* Parse command line options */
- for( optind = 0;; )
- {
- @@ -1144,9 +944,6 @@ psy_failure:
- case 'r':
- b_user_ref = 1;
- goto generic_option;
- - case 'p':
- - b_pass1 = atoi( optarg ) == 1;
- - goto generic_option;
- case OPT_FPS:
- b_user_fps = 1;
- param->b_vfr_input = 0;
- @@ -1185,54 +982,12 @@ generic_option:
- }
- }
- - /* Set faster options in case of turbo firstpass. */
- - if( b_turbo && b_pass1 )
- - {
- - param->i_frame_reference = 1;
- - param->analyse.b_transform_8x8 = 0;
- - param->analyse.inter = 0;
- - param->analyse.i_me_method = X264_ME_DIA;
- - param->analyse.i_subpel_refine = X264_MIN( 2, param->analyse.i_subpel_refine );
- - param->analyse.i_trellis = 0;
- - }
- + /* If first pass mode is used, apply faster settings. */
- + if( b_turbo )
- + x264_param_apply_fastfirstpass( param );
- /* Apply profile restrictions. */
- - if( profile )
- - {
- - if( !strcasecmp( profile, "baseline" ) )
- - {
- - param->analyse.b_transform_8x8 = 0;
- - param->b_cabac = 0;
- - param->i_cqm_preset = X264_CQM_FLAT;
- - param->i_bframe = 0;
- - param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
- - if( param->b_interlaced )
- - {
- - fprintf( stderr, "x264 [error]: baseline profile doesn't support interlacing\n" );
- - return -1;
- - }
- - }
- - else if( !strcasecmp( profile, "main" ) )
- - {
- - param->analyse.b_transform_8x8 = 0;
- - param->i_cqm_preset = X264_CQM_FLAT;
- - }
- - else if( !strcasecmp( profile, "high" ) )
- - {
- - /* Default */
- - }
- - else
- - {
- - fprintf( stderr, "x264 [error]: invalid profile: %s\n", profile );
- - return -1;
- - }
- - if( (param->rc.i_rc_method == X264_RC_CQP && param->rc.i_qp_constant == 0) ||
- - (param->rc.i_rc_method == X264_RC_CRF && param->rc.f_rf_constant == 0) )
- - {
- - fprintf( stderr, "x264 [error]: %s profile doesn't support lossless\n", profile );
- - return -1;
- - }
- - }
- + x264_param_apply_profile( param, profile );
- /* Get the file name */
- if( optind > argc - 1 || !output_filename )
- diff --git a/x264.h b/x264.h
- index e7d19b7..f317e98 100644
- --- a/x264.h
- +++ b/x264.h
- @@ -35,14 +35,14 @@
- #include <stdarg.h>
- -#define X264_BUILD 85
- +#define X264_BUILD 86
- /* x264_t:
- * opaque handler for encoder */
- typedef struct x264_t x264_t;
- /****************************************************************************
- - * Initialisation structure and function.
- + * Encoder parameters
- ****************************************************************************/
- /* CPU flags
- */
- @@ -332,6 +332,10 @@ typedef struct x264_param_t
- void (*param_free)( void* );
- } x264_param_t;
- +/****************************************************************************
- + * H.264 level restriction information
- + ****************************************************************************/
- +
- typedef struct {
- int level_idc;
- int mbps; /* max macroblock processing rate (macroblocks/sec) */
- @@ -350,6 +354,10 @@ typedef struct {
- /* all of the levels defined in the standard, terminated by .level_idc=0 */
- extern const x264_level_t x264_levels[];
- +/****************************************************************************
- + * Basic parameter handling functions
- + ****************************************************************************/
- +
- /* x264_param_default:
- * fill x264_param_t with default values and do CPU detection */
- void x264_param_default( x264_param_t * );
- @@ -366,15 +374,73 @@ void x264_param_default( x264_param_t * );
- int x264_param_parse( x264_param_t *, const char *name, const char *value );
- /****************************************************************************
- - * Picture structures and functions.
- + * Advanced parameter handling functions
- + ****************************************************************************/
- +
- +/* These functions expose the full power of x264's preset-tune-profile system for
- + * easy adjustment of large numbers of internal parameters.
- + *
- + * In order to replicate x264CLI's option handling, these functions MUST be called
- + * in the following order:
- + * 1) x264_param_default_preset
- + * 2) Custom user options (via param_parse or directly assigned variables)
- + * 3) x264_param_apply_fastfirstpass
- + * 4) x264_param_apply_profile
- + *
- + * Additionally, x264CLI does not apply step 3 if the preset chosen is "placebo"
- + * or --slow-firstpass is set. */
- +
- +/* x264_param_default_preset:
- + * The same as x264_param_default, but also use the passed preset and tune
- + * to modify the default settings.
- + * (either can be NULL, which implies no preset or no tune, respectively)
- + *
- + * Currently available presets are, ordered from fastest to slowest: */
- +static const char * const x264_preset_names[] = { "ultrafast", "veryfast", "faster", "fast", "medium", "slow", "slower", "veryslow", "placebo", 0 };
- +
- +/* Warning: the speed of these presets scales dramatically. Ultrafast is a full
- + * 100 times faster than placebo!
- + *
- + * Currently available tunings are: */
- +static const char * const x264_tune_names[] = { "film", "animation", "grain", "psnr", "ssim", "fastdecode", "zerolatency", 0 };
- +
- +/* Multiple tunings can be used if separated by a delimiter in ",./-+",
- + * however multiple psy tunings cannot be used.
- + * film, animation, grain, psnr, and ssim are psy tunings.
- + *
- + * returns 0 on success, negative on failure (e.g. invalid preset/tune name). */
- +int x264_param_default_preset( x264_param_t *, const char *preset, const char *tune );
- +
- +/* x264_param_apply_fastfirstpass:
- + * If first-pass mode is set (rc.b_stat_read == 1, rc.b_stat_write == 0),
- + * modify the encoder settings to disable options generally not useful on
- + * the first pass. */
- +void x264_param_apply_fastfirstpass( x264_param_t * );
- +
- +/* x264_param_apply_profile:
- + * Applies the restrictions of the given profile.
- + * Currently available profiles are, from most to least restrictive: */
- +static const char * const x264_profile_names[] = { "baseline", "main", "high", 0 };
- +
- +/* (can be NULL, in which case the function will do nothing)
- + *
- + * Does NOT guarantee that the given profile will be used: if the restrictions
- + * of "High" are applied to settings that are already Baseline-compatible, the
- + * stream will remain baseline. In short, it does not increase settings, only
- + * decrease them.
- + *
- + * returns 0 on success, negative on failure (e.g. invalid profile name). */
- +int x264_param_apply_profile( x264_param_t *, const char *profile );
- +
- +/****************************************************************************
- + * Picture structures and functions
- ****************************************************************************/
- typedef struct
- {
- - int i_csp;
- -
- - int i_plane;
- - int i_stride[4];
- - uint8_t *plane[4];
- + int i_csp; /* Colorspace */
- + int i_plane; /* Number of image planes */
- + int i_stride[4]; /* Strides for each plane */
- + uint8_t *plane[4]; /* Pointers to each plane */
- } x264_image_t;
- typedef struct
- @@ -421,9 +487,9 @@ int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_heigh
- void x264_picture_clean( x264_picture_t *pic );
- /****************************************************************************
- - * NAL structure and functions:
- + * NAL structure and functions
- ****************************************************************************/
- -/* nal */
- +
- enum nal_unit_type_e
- {
- NAL_UNKNOWN = 0,
- @@ -465,7 +531,7 @@ typedef struct
- } x264_nal_t;
- /****************************************************************************
- - * Encoder functions:
- + * Encoder functions
- ****************************************************************************/
- /* Force a link error in the case of linking against an incompatible API version.
- @@ -497,16 +563,16 @@ int x264_encoder_reconfig( x264_t *, x264_param_t * );
- void x264_encoder_parameters( x264_t *, x264_param_t * );
- /* x264_encoder_headers:
- * return the SPS and PPS that will be used for the whole stream.
- - * if i_nal > 0, returns the total size of all NAL payloads.
- + * *pi_nal is the number of NAL units outputted in pp_nal.
- * returns negative on error.
- * the payloads of all output NALs are guaranteed to be sequential in memory. */
- -int x264_encoder_headers( x264_t *, x264_nal_t **, int * );
- +int x264_encoder_headers( x264_t *, x264_nal_t **pp_nal, int *pi_nal );
- /* x264_encoder_encode:
- * encode one picture.
- - * if i_nal > 0, returns the total size of all NAL payloads.
- + * *pi_nal is the number of NAL units outputted in pp_nal.
- * returns negative on error, zero if no NAL units returned.
- * the payloads of all output NALs are guaranteed to be sequential in memory. */
- -int x264_encoder_encode ( x264_t *, x264_nal_t **, int *, x264_picture_t *, x264_picture_t * );
- +int x264_encoder_encode( x264_t *, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_in, x264_picture_t *pic_out );
- /* x264_encoder_close:
- * close an encoder handler */
- void x264_encoder_close ( x264_t * );
- --
- 1.6.1.2
- From cb7143299578377dbe1e11a93c074d0890d487e0 Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Sun, 21 Feb 2010 03:56:06 -0800
- Subject: [PATCH 12/16] Make b-pyramid normal the default
- Now that b-pyramid works with MB-tree and is spec compliant, there's no real reason not to make it default.
- Improves compression 0-5% depending on the video.
- Also allow 0/1/2 to be used as aliases for none/strict/normal (for conciseness).
- ---
- common/common.c | 9 ++++++++-
- x264.h | 2 +-
- 2 files changed, 9 insertions(+), 2 deletions(-)
- diff --git a/common/common.c b/common/common.c
- index a99b65b..2faf139 100644
- --- a/common/common.c
- +++ b/common/common.c
- @@ -75,7 +75,7 @@ void x264_param_default( x264_param_t *param )
- param->i_scenecut_threshold = 40;
- param->i_bframe_adaptive = X264_B_ADAPT_FAST;
- param->i_bframe_bias = 0;
- - param->i_bframe_pyramid = 0;
- + param->i_bframe_pyramid = X264_B_PYRAMID_NORMAL;
- param->b_interlaced = 0;
- param->b_constrained_intra = 0;
- @@ -637,7 +637,14 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
- OPT("b-bias")
- p->i_bframe_bias = atoi(value);
- OPT("b-pyramid")
- + {
- b_error |= parse_enum( value, x264_b_pyramid_names, &p->i_bframe_pyramid );
- + if( b_error )
- + {
- + b_error = 0;
- + p->i_bframe_pyramid = atoi(value);
- + }
- + }
- OPT("nf")
- p->b_deblocking_filter = !atobool(value);
- OPT2("filter", "deblock")
- diff --git a/x264.h b/x264.h
- index f317e98..dec296c 100644
- --- a/x264.h
- +++ b/x264.h
- @@ -35,7 +35,7 @@
- #include <stdarg.h>
- -#define X264_BUILD 86
- +#define X264_BUILD 87
- /* x264_t:
- * opaque handler for encoder */
- --
- 1.6.1.2
- From edebcf0074105c058c60e33b5bf7323743eb19e6 Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Sun, 21 Feb 2010 13:20:19 -0800
- Subject: [PATCH 13/16] Abide by the MinCR level limit
- Some Blu-ray analyzers were complaining about this.
- ---
- encoder/ratecontrol.c | 29 +++++++++++++++++++++++++++--
- encoder/set.c | 32 ++++++++++++++++----------------
- x264.h | 3 ++-
- 3 files changed, 45 insertions(+), 19 deletions(-)
- diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
- index 3d86aaa..d0fdb50 100644
- --- a/encoder/ratecontrol.c
- +++ b/encoder/ratecontrol.c
- @@ -136,6 +136,7 @@ struct x264_ratecontrol_t
- /* MBRC stuff */
- float frame_size_estimated; /* Access to this variable must be atomic: double is
- * not atomic on all arches we care about */
- + double frame_size_maximum; /* Maximum frame size due to MinCR */
- double frame_size_planned;
- double slice_size_planned;
- double max_frame_error;
- @@ -1039,6 +1040,24 @@ void x264_ratecontrol_start( x264_t *h, int i_force_qp, int overhead )
- memset( h->fdec->i_row_bits, 0, h->sps->i_mb_height * sizeof(int) );
- rc->row_pred = &rc->row_preds[h->sh.i_type];
- update_vbv_plan( h, overhead );
- +
- + const x264_level_t *l = x264_levels;
- + while( l->level_idc != 0 && l->level_idc != h->param.i_level_idc )
- + l++;
- +
- + /* The spec has a bizarre special case for the first frame. */
- + if( h->i_frame == 0 )
- + {
- + //384 * ( Max( PicSizeInMbs, fR * MaxMBPS ) + MaxMBPS * ( tr( 0 ) - tr,n( 0 ) ) ) / MinCR
- + double fr = 1. / 172;
- + int pic_size_in_mbs = h->sps->i_mb_width * h->sps->i_mb_height;
- + rc->frame_size_maximum = 384 * 8 * X264_MAX( pic_size_in_mbs, fr*l->mbps ) / l->mincr;
- + }
- + else
- + {
- + //384 * MaxMBPS * ( tr( n ) - tr( n - 1 ) ) / MinCR
- + rc->frame_size_maximum = 384 * 8 * (1 / rc->fps) * l->mbps / l->mincr;
- + }
- }
- if( h->sh.i_type != SLICE_TYPE_B )
- @@ -1220,9 +1239,10 @@ void x264_ratecontrol_mb( x264_t *h, int bits )
- b1 = predict_row_size_sum( h, y, rc->qpm ) + size_of_other_slices;
- }
- - /* avoid VBV underflow */
- + /* avoid VBV underflow or MinCR violation */
- while( (rc->qpm < h->param.rc.i_qp_max)
- - && (rc->buffer_fill - b1 < rc->buffer_rate * rc->max_frame_error) )
- + && ((rc->buffer_fill - b1 < rc->buffer_rate * rc->max_frame_error) ||
- + (rc->frame_size_maximum - b1 < rc->frame_size_maximum * rc->max_frame_error)))
- {
- rc->qpm ++;
- b1 = predict_row_size_sum( h, y, rc->qpm ) + size_of_other_slices;
- @@ -1677,6 +1697,11 @@ static double clip_qscale( x264_t *h, int pict_type, double q )
- q = X264_MAX( q0, q );
- }
- + /* Apply MinCR restrictions */
- + double bits = predict_size( &rcc->pred[h->sh.i_type], q, rcc->last_satd );
- + if( bits > rcc->frame_size_maximum )
- + q *= bits / rcc->frame_size_maximum;
- +
- /* Check B-frame complexity, and use up any bits that would
- * overflow before the next P-frame. */
- if( h->sh.i_type == SLICE_TYPE_P && !rcc->single_frame_vbv )
- diff --git a/encoder/set.c b/encoder/set.c
- index f79919b..03a6dee 100644
- --- a/encoder/set.c
- +++ b/encoder/set.c
- @@ -536,22 +536,22 @@ fail:
- const x264_level_t x264_levels[] =
- {
- - { 10, 1485, 99, 152064, 64, 175, 64, 64, 0, 0, 0, 1 },
- -// {"1b", 1485, 99, 152064, 128, 350, 64, 64, 0, 0, 0, 1 },
- - { 11, 3000, 396, 345600, 192, 500, 128, 64, 0, 0, 0, 1 },
- - { 12, 6000, 396, 912384, 384, 1000, 128, 64, 0, 0, 0, 1 },
- - { 13, 11880, 396, 912384, 768, 2000, 128, 64, 0, 0, 0, 1 },
- - { 20, 11880, 396, 912384, 2000, 2000, 128, 64, 0, 0, 0, 1 },
- - { 21, 19800, 792, 1824768, 4000, 4000, 256, 64, 0, 0, 0, 0 },
- - { 22, 20250, 1620, 3110400, 4000, 4000, 256, 64, 0, 0, 0, 0 },
- - { 30, 40500, 1620, 3110400, 10000, 10000, 256, 32, 22, 0, 1, 0 },
- - { 31, 108000, 3600, 6912000, 14000, 14000, 512, 16, 60, 1, 1, 0 },
- - { 32, 216000, 5120, 7864320, 20000, 20000, 512, 16, 60, 1, 1, 0 },
- - { 40, 245760, 8192, 12582912, 20000, 25000, 512, 16, 60, 1, 1, 0 },
- - { 41, 245760, 8192, 12582912, 50000, 62500, 512, 16, 24, 1, 1, 0 },
- - { 42, 522240, 8704, 13369344, 50000, 62500, 512, 16, 24, 1, 1, 1 },
- - { 50, 589824, 22080, 42393600, 135000, 135000, 512, 16, 24, 1, 1, 1 },
- - { 51, 983040, 36864, 70778880, 240000, 240000, 512, 16, 24, 1, 1, 1 },
- + { 10, 1485, 99, 152064, 64, 175, 64, 64, 0, 2, 0, 0, 1 },
- +// {"1b", 1485, 99, 152064, 128, 350, 64, 64, 0, 2, 0, 0, 1 },
- + { 11, 3000, 396, 345600, 192, 500, 128, 64, 0, 2, 0, 0, 1 },
- + { 12, 6000, 396, 912384, 384, 1000, 128, 64, 0, 2, 0, 0, 1 },
- + { 13, 11880, 396, 912384, 768, 2000, 128, 64, 0, 2, 0, 0, 1 },
- + { 20, 11880, 396, 912384, 2000, 2000, 128, 64, 0, 2, 0, 0, 1 },
- + { 21, 19800, 792, 1824768, 4000, 4000, 256, 64, 0, 2, 0, 0, 0 },
- + { 22, 20250, 1620, 3110400, 4000, 4000, 256, 64, 0, 2, 0, 0, 0 },
- + { 30, 40500, 1620, 3110400, 10000, 10000, 256, 32, 22, 2, 0, 1, 0 },
- + { 31, 108000, 3600, 6912000, 14000, 14000, 512, 16, 60, 4, 1, 1, 0 },
- + { 32, 216000, 5120, 7864320, 20000, 20000, 512, 16, 60, 4, 1, 1, 0 },
- + { 40, 245760, 8192, 12582912, 20000, 25000, 512, 16, 60, 4, 1, 1, 0 },
- + { 41, 245760, 8192, 12582912, 50000, 62500, 512, 16, 24, 2, 1, 1, 0 },
- + { 42, 522240, 8704, 13369344, 50000, 62500, 512, 16, 24, 2, 1, 1, 1 },
- + { 50, 589824, 22080, 42393600, 135000, 135000, 512, 16, 24, 2, 1, 1, 1 },
- + { 51, 983040, 36864, 70778880, 240000, 240000, 512, 16, 24, 2, 1, 1, 1 },
- { 0 }
- };
- diff --git a/x264.h b/x264.h
- index dec296c..7474a50 100644
- --- a/x264.h
- +++ b/x264.h
- @@ -35,7 +35,7 @@
- #include <stdarg.h>
- -#define X264_BUILD 87
- +#define X264_BUILD 88
- /* x264_t:
- * opaque handler for encoder */
- @@ -346,6 +346,7 @@ typedef struct {
- int mv_range; /* max vertical mv component range (pixels) */
- int mvs_per_2mb; /* max mvs per 2 consecutive mbs. */
- int slice_rate; /* ?? */
- + int mincr; /* min compression ratio */
- int bipred8x8; /* limit bipred to >=8x8 */
- int direct8x8; /* limit b_direct to >=8x8 */
- int frame_only; /* forbid interlacing */
- --
- 1.6.1.2
- From 1df2cf28b68242423638468f94ed742105f40d28 Mon Sep 17 00:00:00 2001
- From: Anton Mitrofanov <BugMaster@narod.ru>
- Date: Sun, 21 Feb 2010 13:21:11 -0800
- Subject: [PATCH 14/16] New algorithm for AQ mode 2
- Combines the auto-ness of AQ2 with a new var^0.25 instead of log(var) formula.
- Works better with MB-tree than the old AQ mode 2 and should give higher SSIM.
- ---
- encoder/ratecontrol.c | 9 ++++++---
- 1 files changed, 6 insertions(+), 3 deletions(-)
- diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
- index d0fdb50..8b47e29 100644
- --- a/encoder/ratecontrol.c
- +++ b/encoder/ratecontrol.c
- @@ -246,17 +246,20 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame )
- if( h->param.rc.i_aq_mode == X264_AQ_AUTOVARIANCE )
- {
- + float avg_adj_pow2 = 0.f;
- for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y++ )
- for( mb_x = 0; mb_x < h->sps->i_mb_width; mb_x++ )
- {
- uint32_t energy = ac_energy_mb( h, mb_x, mb_y, frame );
- - float qp_adj = x264_log2( energy + 2 );
- - qp_adj *= qp_adj;
- + float qp_adj = powf( energy + 1, 0.125f );
- frame->f_qp_offset[mb_x + mb_y*h->mb.i_mb_stride] = qp_adj;
- avg_adj += qp_adj;
- + avg_adj_pow2 += qp_adj * qp_adj;
- }
- avg_adj /= h->mb.i_mb_count;
- - strength = h->param.rc.f_aq_strength * avg_adj * (1.f / 6000.f);
- + avg_adj_pow2 /= h->mb.i_mb_count;
- + strength = h->param.rc.f_aq_strength * avg_adj;
- + avg_adj = avg_adj - 0.5f * (avg_adj_pow2 - 14.f) / avg_adj;
- }
- else
- strength = h->param.rc.f_aq_strength * 1.0397f;
- --
- 1.6.1.2
- From b487fb0af745cdc276e059d58fb2b2590203fe85 Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Sun, 21 Feb 2010 17:30:52 -0800
- Subject: [PATCH 15/16] Use short startcodes whenever possible
- Saves one byte per frame for every slice beyond the first.
- Only applies to Annex-B output mode.
- ---
- common/common.c | 6 +++---
- common/common.h | 2 +-
- encoder/encoder.c | 12 +++++++++---
- 3 files changed, 13 insertions(+), 7 deletions(-)
- diff --git a/common/common.c b/common/common.c
- index 2faf139..0410588 100644
- --- a/common/common.c
- +++ b/common/common.c
- @@ -985,17 +985,17 @@ void x264_picture_clean( x264_picture_t *pic )
- /****************************************************************************
- * x264_nal_encode:
- ****************************************************************************/
- -int x264_nal_encode( uint8_t *dst, int b_annexb, x264_nal_t *nal )
- +int x264_nal_encode( uint8_t *dst, x264_nal_t *nal, int b_annexb, int b_long_startcode )
- {
- uint8_t *src = nal->p_payload;
- uint8_t *end = nal->p_payload + nal->i_payload;
- uint8_t *orig_dst = dst;
- int i_count = 0, size;
- - /* long nal start code (we always use long ones) */
- if( b_annexb )
- {
- - *dst++ = 0x00;
- + if( b_long_startcode )
- + *dst++ = 0x00;
- *dst++ = 0x00;
- *dst++ = 0x00;
- *dst++ = 0x01;
- diff --git a/common/common.h b/common/common.h
- index 413b82f..d2b53b0 100644
- --- a/common/common.h
- +++ b/common/common.h
- @@ -121,7 +121,7 @@ int64_t x264_mdate( void );
- * the encoding options */
- char *x264_param2string( x264_param_t *p, int b_res );
- -int x264_nal_encode( uint8_t *dst, int b_annexb, x264_nal_t *nal );
- +int x264_nal_encode( uint8_t *dst, x264_nal_t *nal, int b_annexb, int b_long_startcode );
- /* log */
- void x264_log( x264_t *h, int i_level, const char *psz_fmt, ... );
- diff --git a/encoder/encoder.c b/encoder/encoder.c
- index 89bf457..c76938c 100644
- --- a/encoder/encoder.c
- +++ b/encoder/encoder.c
- @@ -1228,10 +1228,14 @@ static int x264_encoder_encapsulate_nals( x264_t *h )
- }
- uint8_t *nal_buffer = h->nal_buffer;
- + int long_startcode = 1;
- for( i = 0; i < h->out.i_nal; i++ )
- {
- - int size = x264_nal_encode( nal_buffer, h->param.b_annexb, &h->out.nal[i] );
- + int size = x264_nal_encode( nal_buffer, &h->out.nal[i], h->param.b_annexb, long_startcode );
- + /* Don't use long startcodes for any slice beyond the first. */
- + if( h->out.nal[i].i_type >= NAL_SLICE && h->out.nal[i].i_type <= NAL_SLICE_IDR )
- + long_startcode = 0;
- h->out.nal[i].i_payload = size;
- h->out.nal[i].p_payload = nal_buffer;
- nal_buffer += size;
- @@ -1715,8 +1719,10 @@ static int x264_slice_write( x264_t *h )
- bs_t bs_bak;
- x264_cabac_t cabac_bak;
- uint8_t cabac_prevbyte_bak = 0; /* Shut up GCC. */
- - /* Assume no more than 3 bytes of NALU escaping. */
- - int slice_max_size = h->param.i_slice_max_size > 0 ? (h->param.i_slice_max_size-3-NALU_OVERHEAD)*8 : INT_MAX;
- + /* Assume no more than 3 bytes of NALU escaping.
- + * Slices other than the first use a 3-byte startcode. */
- + int overhead_guess = (NALU_OVERHEAD - (h->param.b_annexb && h->sh.i_first_mb)) + 3;
- + int slice_max_size = h->param.i_slice_max_size > 0 ? (h->param.i_slice_max_size-overhead_guess)*8 : INT_MAX;
- int starting_bits = bs_pos(&h->out.bs);
- bs_realign( &h->out.bs );
- --
- 1.6.1.2
- From 81c1ae7de624e837cb3cc058ea0d8e8d3dccbeb3 Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Mon, 22 Feb 2010 17:33:17 -0800
- Subject: [PATCH 16/16] Faster probe_skip, 2x2 DC transform handling
- Move the 2x2 DC DCT into the dct_dc asm function to avoid some store-to-load forwarding penalties and extra register loads.
- Use dct_dc as part of the early termination in probe_skip.
- x86 asm partially by Holger Lubitz.
- ARM NEON asm by David Conrad.
- ---
- common/arm/dct-a.S | 14 +++++++++++---
- common/dct.c | 11 +++++++++++
- common/x86/dct-a.asm | 50 ++++++++++++++++++++++++++++++++++----------------
- encoder/macroblock.c | 13 +++++++++----
- 4 files changed, 65 insertions(+), 23 deletions(-)
- diff --git a/common/arm/dct-a.S b/common/arm/dct-a.S
- index 0ed7238..3b9fab9 100644
- --- a/common/arm/dct-a.S
- +++ b/common/arm/dct-a.S
- @@ -639,12 +639,20 @@ function x264_sub8x8_dct_dc_neon
- vld1.64 {d30}, [r1,:64], r3
- vadd.s16 q1, q12, q13
- vld1.64 {d31}, [r2,:64], ip
- - vpadd.s16 d0, d0, d1
- - vadd.s16 q1, q1, q14
- vsubl.u8 q15, d30, d31
- + vadd.s16 q1, q1, q14
- +
- + vadd.s16 d4, d0, d1
- vadd.s16 q1, q1, q15
- - vpadd.s16 d2, d2, d3
- + vsub.s16 d5, d0, d1
- + vadd.s16 d6, d2, d3
- + vsub.s16 d7, d2, d3
- + vadd.s16 q0, q2, q3
- + vsub.s16 q1, q2, q3
- +
- vpadd.s16 d0, d0, d2
- + vpadd.s16 d1, d1, d3
- + vpadd.s16 d0, d0, d1
- vst1.64 {d0}, [r0,:64]
- bx lr
- .endfunc
- diff --git a/common/dct.c b/common/dct.c
- index aa83ef4..55f78a5 100644
- --- a/common/dct.c
- +++ b/common/dct.c
- @@ -184,10 +184,21 @@ static int sub4x4_dct_dc( uint8_t *pix1, uint8_t *pix2 )
- static void sub8x8_dct_dc( int16_t dct[4], uint8_t *pix1, uint8_t *pix2 )
- {
- + int d0, d1, d2, d3;
- dct[0] = sub4x4_dct_dc( &pix1[0], &pix2[0] );
- dct[1] = sub4x4_dct_dc( &pix1[4], &pix2[4] );
- dct[2] = sub4x4_dct_dc( &pix1[4*FENC_STRIDE+0], &pix2[4*FDEC_STRIDE+0] );
- dct[3] = sub4x4_dct_dc( &pix1[4*FENC_STRIDE+4], &pix2[4*FDEC_STRIDE+4] );
- +
- + /* 2x2 DC transform */
- + d0 = dct[0] + dct[1];
- + d1 = dct[2] + dct[3];
- + d2 = dct[0] - dct[1];
- + d3 = dct[2] - dct[3];
- + dct[0] = d0 + d1;
- + dct[2] = d2 + d3;
- + dct[1] = d0 - d1;
- + dct[3] = d2 - d3;
- }
- static void add4x4_idct( uint8_t *p_dst, int16_t dct[16] )
- diff --git a/common/x86/dct-a.asm b/common/x86/dct-a.asm
- index 618433c..5dd51e5 100644
- --- a/common/x86/dct-a.asm
- +++ b/common/x86/dct-a.asm
- @@ -509,28 +509,43 @@ cglobal x264_add16x16_idct_dc_ssse3, 2,2,8
- movq m1, m2
- punpckldq m2, m3
- punpckhdq m1, m3
- - psadbw %1, m7
- - psadbw %2, m7
- - psadbw m2, m7
- - psadbw m1, m7
- + pxor m3, m3
- + psadbw %1, m3
- + psadbw %2, m3
- + psadbw m2, m3
- + psadbw m1, m3
- psubw %1, m2
- psubw %2, m1
- %endmacro
- +%macro DCT2x2 2 ; reg s1/s0 (!=m1), reg s3/s2
- + pshufw mm1, %1, 10100000b ; s1 s1 s0 s0
- + pshufw mm0, %2, 10110001b ; s3 __ s2 __
- + paddw mm1, %2 ; s1 s13 s0 s02
- + psubw mm1, mm0 ; d13 s13 d02 s02
- + pshufw mm0, mm1, 01000100b ; d02 s02 d02 s02
- + psrlq mm1, 32 ; __ __ d13 s13
- + paddw mm0, mm1 ; d02 s02 d02+d13 s02+s13
- + psllq mm1, 32 ; d13 s13
- + psubw mm0, mm1 ; d02-d13 s02-s13 d02+d13 s02+s13
- +%endmacro
- +
- INIT_MMX
- cglobal x264_sub8x8_dct_dc_mmxext, 3,3
- - pxor m7, m7
- - call .loop
- - add r1, FENC_STRIDE*4
- - add r2, FDEC_STRIDE*4
- - add r0, 4
- -.loop:
- DCTDC_2ROW_MMX m0, m4, 0
- DCTDC_2ROW_MMX m5, m6, 2
- paddw m0, m5
- paddw m4, m6
- - punpcklwd m0, m4
- - movd [r0], m0
- + punpckldq m0, m4
- + add r1, FENC_STRIDE*4
- + add r2, FDEC_STRIDE*4
- + DCTDC_2ROW_MMX m7, m4, 0
- + DCTDC_2ROW_MMX m5, m6, 2
- + paddw m7, m5
- + paddw m4, m6
- + punpckldq m7, m4
- + DCT2x2 m0, m7
- + movq [r0], m0
- ret
- INIT_XMM
- @@ -558,13 +573,16 @@ cglobal x264_sub8x8_dct_dc_sse2, 3,3,8
- DCTDC_2ROW_SSE2 2, 1, m4
- add r1, FENC_STRIDE*4
- add r2, FDEC_STRIDE*4
- - psubq m4, m6
- + psubd m4, m6
- DCTDC_2ROW_SSE2 0, 0, m5
- DCTDC_2ROW_SSE2 2, 1, m5
- - psubq m5, m6
- + psubd m5, m6
- packssdw m4, m5
- - packssdw m4, m4
- - movq [r0], m4
- + movhlps m5, m4
- + movdq2q mm0, m4
- + movdq2q mm7, m5
- + DCT2x2 mm0, mm7
- + movq [r0], mm0
- RET
- ;-----------------------------------------------------------------------------
- diff --git a/encoder/macroblock.c b/encoder/macroblock.c
- index f67a898..0be6201 100644
- --- a/encoder/macroblock.c
- +++ b/encoder/macroblock.c
- @@ -365,7 +365,6 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
- if( ssd[ch] > thresh )
- {
- h->dctf.sub8x8_dct_dc( dct2x2, h->mb.pic.p_fenc[1+ch], h->mb.pic.p_fdec[1+ch] );
- - dct2x2dc_dconly( dct2x2 );
- if( h->mb.b_trellis )
- nz_dc = x264_quant_dc_trellis( h, dct2x2, CQM_4IC+b_inter, i_qp, DCT_CHROMA_DC, !b_inter, 1 );
- else
- @@ -980,10 +979,10 @@ int x264_macroblock_probe_skip( x264_t *h, int b_bidir )
- if( ssd < thresh )
- continue;
- - h->dctf.sub8x8_dct( dct4x4, p_src, p_dst );
- + /* The vast majority of chroma checks will terminate during the DC check or the higher
- + * threshold check, so we can save time by doing a DC-only DCT. */
- + h->dctf.sub8x8_dct_dc( dct2x2, p_src, p_dst );
- - /* calculate dct DC */
- - dct2x2dc( dct2x2, dct4x4 );
- if( h->quantf.quant_2x2_dc( dct2x2, h->quant4_mf[CQM_4PC][i_qp][0]>>1, h->quant4_bias[CQM_4PC][i_qp][0]<<1 ) )
- return 0;
- @@ -991,9 +990,15 @@ int x264_macroblock_probe_skip( x264_t *h, int b_bidir )
- if( ssd < thresh*4 )
- continue;
- + h->dctf.sub8x8_dct( dct4x4, p_src, p_dst );
- +
- /* calculate dct coeffs */
- for( i4x4 = 0, i_decimate_mb = 0; i4x4 < 4; i4x4++ )
- {
- + /* We don't need to zero the DC coefficient before quantization because we already
- + * checked that all the DCs were zero above at twice the precision that quant4x4
- + * uses. This applies even though the DC here is being quantized before the 2x2
- + * transform. */
- if( !h->quantf.quant_4x4( dct4x4[i4x4], h->quant4_mf[CQM_4PC][i_qp], h->quant4_bias[CQM_4PC][i_qp] ) )
- continue;
- h->zigzagf.scan_4x4( dctscan, dct4x4[i4x4] );
- --
- 1.6.1.2
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement