Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- From c40e95310f84738f7bdee83a23d66518d6dd6a64 Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Sun, 14 Nov 2010 03:34:26 -0800
- Subject: [PATCH 3/3] Chroma weighted prediction
- Like luma weighted prediction, dramatically improves compression in fades.
- Up to 4-8db chroma PSNR gain in extreme cases (short, perfect fade-outs).
- On actual videos, helps up to ~1% overall.
- One example video with a decent number of fades (ef OP): 0.8% bitrate reduction overall, 7% bitrate reduction just counting chroma.
- Fixes a lot of artifacts in fades at lower bitrates.
- Original patch by Dylan Yudaken <dyudaken@gmail.com>.
- ---
- common/common.h | 2 +-
- encoder/encoder.c | 94 ++++++++++--------
- encoder/me.c | 8 ++
- encoder/ratecontrol.c | 47 +++++++--
- encoder/slicetype.c | 262 +++++++++++++++++++++++++++++++++++++------------
- 5 files changed, 299 insertions(+), 114 deletions(-)
- diff --git a/common/common.h b/common/common.h
- index 7d57119..1434e13 100644
- --- a/common/common.h
- +++ b/common/common.h
- @@ -805,7 +805,7 @@ struct x264_t
- int i_direct_score[2];
- int i_direct_frames[2];
- /* num p-frames weighted */
- - int i_wpred[3];
- + int i_wpred[2];
- } stat;
- diff --git a/encoder/encoder.c b/encoder/encoder.c
- index ede1c28..58331b9 100644
- --- a/encoder/encoder.c
- +++ b/encoder/encoder.c
- @@ -1468,49 +1468,67 @@ static void x264_weighted_pred_init( x264_t *h )
- int i_padv = PADV << h->param.b_interlaced;
- int denom = -1;
- - int weightluma = 0;
- + int weightplane[2] = { 0, 0 };
- int buffer_next = 0;
- - //FIXME: when chroma support is added, move this into loop
- - h->sh.weight[0][1].weightfn = h->sh.weight[0][2].weightfn = NULL;
- - h->sh.weight[0][1].i_denom = h->sh.weight[0][2].i_denom = 0;
- - for( int j = 0; j < h->i_ref0; j++ )
- + for( int i = 0; i < 3; i++ )
- {
- - if( h->fenc->weight[j][0].weightfn )
- + for( int j = 0; j < h->i_ref0; j++ )
- {
- - h->sh.weight[j][0] = h->fenc->weight[j][0];
- - // if weight is useless, don't write it to stream
- - if( h->sh.weight[j][0].i_scale == 1<<h->sh.weight[j][0].i_denom && h->sh.weight[j][0].i_offset == 0 )
- - h->sh.weight[j][0].weightfn = NULL;
- - else
- + if( h->fenc->weight[j][i].weightfn )
- {
- - if( !weightluma )
- + h->sh.weight[j][i] = h->fenc->weight[j][i];
- + // if weight is useless, don't write it to stream
- + if( h->sh.weight[j][i].i_scale == 1<<h->sh.weight[j][i].i_denom && h->sh.weight[j][i].i_offset == 0 )
- + h->sh.weight[j][i].weightfn = NULL;
- + else
- {
- - weightluma = 1;
- - h->sh.weight[0][0].i_denom = denom = h->sh.weight[j][0].i_denom;
- - assert( x264_clip3( denom, 0, 7 ) == denom );
- + if( !weightplane[!!i] )
- + {
- + weightplane[!!i] = 1;
- + h->sh.weight[0][!!i].i_denom = denom = h->sh.weight[j][i].i_denom;
- + assert( x264_clip3( denom, 0, 7 ) == denom );
- + }
- +
- + assert( h->sh.weight[j][i].i_denom == denom );
- + if( !i )
- + {
- + h->fenc->weighted[j] = h->mb.p_weight_buf[buffer_next++] + h->fenc->i_stride[0] * i_padv + PADH;
- + //scale full resolution frame
- + if( h->param.i_threads == 1 )
- + {
- + pixel *src = h->fref0[j]->filtered[0] - h->fref0[j]->i_stride[0]*i_padv - PADH;
- + pixel *dst = h->fenc->weighted[j] - h->fenc->i_stride[0]*i_padv - PADH;
- + int stride = h->fenc->i_stride[0];
- + int width = h->fenc->i_width[0] + PADH*2;
- + int height = h->fenc->i_lines[0] + i_padv*2;
- + x264_weight_scale_plane( h, dst, stride, src, stride, width, height, &h->sh.weight[j][0] );
- + h->fenc->i_lines_weighted = height;
- + }
- + }
- }
- - assert( h->sh.weight[j][0].i_denom == denom );
- - assert( x264_clip3( h->sh.weight[j][0].i_scale, 0, 127 ) == h->sh.weight[j][0].i_scale );
- - assert( x264_clip3( h->sh.weight[j][0].i_offset, -128, 127 ) == h->sh.weight[j][0].i_offset );
- - h->fenc->weighted[j] = h->mb.p_weight_buf[buffer_next++] +
- - h->fenc->i_stride[0] * i_padv + PADH;
- }
- }
- + }
- - //scale full resolution frame
- - if( h->sh.weight[j][0].weightfn && h->param.i_threads == 1 )
- + if( weightplane[1] )
- + for( int i = 0; i < h->i_ref0; i++ )
- {
- - pixel *src = h->fref0[j]->filtered[0] - h->fref0[j]->i_stride[0]*i_padv - PADH;
- - pixel *dst = h->fenc->weighted[j] - h->fenc->i_stride[0]*i_padv - PADH;
- - int stride = h->fenc->i_stride[0];
- - int width = h->fenc->i_width[0] + PADH*2;
- - int height = h->fenc->i_lines[0] + i_padv*2;
- - x264_weight_scale_plane( h, dst, stride, src, stride, width, height, &h->sh.weight[j][0] );
- - h->fenc->i_lines_weighted = height;
- + if( h->sh.weight[i][1].weightfn && !h->sh.weight[i][2].weightfn )
- + {
- + h->sh.weight[i][2].i_scale = 1 << h->sh.weight[0][1].i_denom;
- + h->sh.weight[i][2].i_offset = 0;
- + }
- + else if( h->sh.weight[i][2].weightfn && !h->sh.weight[i][1].weightfn )
- + {
- + h->sh.weight[i][1].i_scale = 1 << h->sh.weight[0][1].i_denom;
- + h->sh.weight[i][1].i_offset = 0;
- + }
- }
- - }
- - if( !weightluma )
- +
- + if( !weightplane[0] )
- h->sh.weight[0][0].i_denom = 0;
- + if( !weightplane[1] )
- + h->sh.weight[0][1].i_denom = h->sh.weight[0][2].i_denom = 0;
- }
- static inline void x264_reference_build_list( x264_t *h, int i_poc )
- @@ -2849,13 +2867,8 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
- {
- h->stat.i_consecutive_bframes[h->fdec->i_frame - h->fref0[0]->i_frame - 1]++;
- if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
- - for( int i = 0; i < 3; i++ )
- - for( int j = 0; j < h->i_ref0; j++ )
- - if( h->sh.weight[0][i].i_denom != 0 )
- - {
- - h->stat.i_wpred[i]++;
- - break;
- - }
- + for( int i = 0; i < 2; i++ )
- + h->stat.i_wpred[i] += !!h->sh.weight[0][i].i_denom;
- }
- if( h->sh.i_type == SLICE_TYPE_B )
- {
- @@ -3201,8 +3214,9 @@ void x264_encoder_close ( x264_t *h )
- fixed_pred_modes[3][3] * 100.0 / sum_pred_modes[3] );
- if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART && h->stat.i_frame_count[SLICE_TYPE_P] > 0 )
- - x264_log( h, X264_LOG_INFO, "Weighted P-Frames: Y:%.1f%%\n",
- - h->stat.i_wpred[0] * 100.0 / h->stat.i_frame_count[SLICE_TYPE_P] );
- + x264_log( h, X264_LOG_INFO, "Weighted P-Frames: Y:%.1f%% UV:%.1f%%\n",
- + h->stat.i_wpred[0] * 100.0 / h->stat.i_frame_count[SLICE_TYPE_P],
- + h->stat.i_wpred[1] * 100.0 / h->stat.i_frame_count[SLICE_TYPE_P] );
- for( int i_list = 0; i_list < 2; i_list++ )
- for( int i_slice = 0; i_slice < 2; i_slice++ )
- diff --git a/encoder/me.c b/encoder/me.c
- index 3f8d8e5..90f7dfd 100644
- --- a/encoder/me.c
- +++ b/encoder/me.c
- @@ -1110,7 +1110,15 @@ void x264_me_refine_bidir_rd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_wei
- uint64_t cost; \
- M32( cache_mv ) = pack16to32_mask(mx,my); \
- if( m->i_pixel <= PIXEL_8x8 ) \
- + { \
- h->mc.mc_chroma( pixu, pixv, FDEC_STRIDE, m->p_fref[4], m->i_stride[1], mx, my + mvy_offset, bw>>1, bh>>1 ); \
- + if( m->weight[1].weightfn ) \
- + m->weight[1].weightfn[x264_pixel_size[i_pixel].w>>3]( pixu, FDEC_STRIDE, pixu, FDEC_STRIDE, \
- + &m->weight[1], x264_pixel_size[i_pixel].h>>1 ); \
- + if( m->weight[2].weightfn ) \
- + m->weight[2].weightfn[x264_pixel_size[i_pixel].w>>3]( pixv, FDEC_STRIDE, pixv, FDEC_STRIDE, \
- + &m->weight[2], x264_pixel_size[i_pixel].h>>1 ); \
- + } \
- cost = x264_rd_cost_part( h, i_lambda2, i4, m->i_pixel ); \
- COPY4_IF_LT( bcost, cost, bmx, mx, bmy, my, dir, do_dir?mdir:dir ); \
- } \
- diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
- index 34879b7..212b474 100644
- --- a/encoder/ratecontrol.c
- +++ b/encoder/ratecontrol.c
- @@ -53,8 +53,8 @@ typedef struct
- int s_count;
- float blurred_complexity;
- char direct_mode;
- - int16_t weight[2];
- - int16_t i_weight_denom;
- + int16_t weight[3][2];
- + int16_t i_weight_denom[2];
- int refcount[16];
- int refs;
- int i_duration;
- @@ -227,8 +227,8 @@ static ALWAYS_INLINE uint32_t ac_energy_plane( x264_t *h, int mb_x, int mb_y, x2
- {
- ALIGNED_ARRAY_16( pixel, pix,[FENC_STRIDE*8] );
- h->mc.load_deinterleave_8x8x2_fenc( pix, frame->plane[1] + offset, stride );
- - return ac_energy_var( h->pixf.var[PIXEL_8x8]( pix, FENC_STRIDE ), 6, frame, i )
- - + ac_energy_var( h->pixf.var[PIXEL_8x8]( pix+FENC_STRIDE/2, FENC_STRIDE ), 6, frame, i );
- + return ac_energy_var( h->pixf.var[PIXEL_8x8]( pix, FENC_STRIDE ), 6, frame, 1 )
- + + ac_energy_var( h->pixf.var[PIXEL_8x8]( pix+FENC_STRIDE/2, FENC_STRIDE ), 6, frame, 2 );
- }
- else
- return ac_energy_var( h->pixf.var[PIXEL_16x16]( frame->plane[0] + offset, stride ), 8, frame, i );
- @@ -854,11 +854,19 @@ int x264_ratecontrol_new( x264_t *h )
- rce->refs = ref;
- /* find weights */
- - rce->i_weight_denom = -1;
- + rce->i_weight_denom[0] = rce->i_weight_denom[1] = -1;
- char *w = strchr( p, 'w' );
- if( w )
- - if( sscanf( w, "w:%hd,%hd,%hd", &rce->i_weight_denom, &rce->weight[0], &rce->weight[1] ) != 3 )
- - rce->i_weight_denom = -1;
- + {
- + int count = sscanf( w, "w:%hd,%hd,%hd,%hd,%hd,%hd,%hd,%hd",
- + &rce->i_weight_denom[0], &rce->weight[0][0], &rce->weight[0][1],
- + &rce->i_weight_denom[1], &rce->weight[1][0], &rce->weight[1][1],
- + &rce->weight[2][0], &rce->weight[2][1] );
- + if( count == 3 )
- + rce->i_weight_denom[1] = -1;
- + else if ( count != 8 )
- + rce->i_weight_denom[0] = rce->i_weight_denom[1] = -1;
- + }
- if( pict_type != 'b' )
- rce->kept_as_ref = 1;
- @@ -1485,8 +1493,15 @@ void x264_ratecontrol_set_weights( x264_t *h, x264_frame_t *frm )
- ratecontrol_entry_t *rce = &h->rc->entry[frm->i_frame];
- if( h->param.analyse.i_weighted_pred <= 0 )
- return;
- - if( rce->i_weight_denom >= 0 )
- - SET_WEIGHT( frm->weight[0][0], 1, rce->weight[0], rce->i_weight_denom, rce->weight[1] );
- +
- + if( rce->i_weight_denom[0] >= 0 )
- + SET_WEIGHT( frm->weight[0][0], 1, rce->weight[0][0], rce->i_weight_denom[0], rce->weight[0][1] );
- +
- + if( rce->i_weight_denom[1] >= 0 )
- + {
- + SET_WEIGHT( frm->weight[0][1], 1, rce->weight[1][0], rce->i_weight_denom[1], rce->weight[1][1] );
- + SET_WEIGHT( frm->weight[0][2], 1, rce->weight[2][0], rce->i_weight_denom[1], rce->weight[2][1] );
- + }
- }
- /* After encoding one frame, save stats and update ratecontrol state */
- @@ -1543,9 +1558,19 @@ int x264_ratecontrol_end( x264_t *h, int bits, int *filler )
- goto fail;
- }
- - if( h->sh.weight[0][0].weightfn )
- + if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART && h->sh.weight[0][0].weightfn )
- {
- - if( fprintf( rc->p_stat_file_out, "w:%"PRId32",%"PRId32",%"PRId32, h->sh.weight[0][0].i_denom, h->sh.weight[0][0].i_scale, h->sh.weight[0][0].i_offset ) < 0 )
- + if( fprintf( rc->p_stat_file_out, "w:%"PRId32",%"PRId32",%"PRId32,
- + h->sh.weight[0][0].i_denom, h->sh.weight[0][0].i_scale, h->sh.weight[0][0].i_offset ) < 0 )
- + goto fail;
- + if( h->sh.weight[0][1].weightfn || h->sh.weight[0][2].weightfn )
- + {
- + if( fprintf( rc->p_stat_file_out, ",%"PRId32",%"PRId32",%"PRId32",%"PRId32",%"PRId32"\n",
- + h->sh.weight[0][1].i_denom, h->sh.weight[0][1].i_scale, h->sh.weight[0][1].i_offset,
- + h->sh.weight[0][2].i_scale, h->sh.weight[0][2].i_offset ) < 0 )
- + goto fail;
- + }
- + else if( fprintf( rc->p_stat_file_out, "\n" ) < 0 )
- goto fail;
- }
- diff --git a/encoder/slicetype.c b/encoder/slicetype.c
- index dc02fbd..5921541 100644
- --- a/encoder/slicetype.c
- +++ b/encoder/slicetype.c
- @@ -98,7 +98,73 @@ static NOINLINE pixel *x264_weight_cost_init_luma( x264_t *h, x264_frame_t *fenc
- return ref->lowres[0];
- }
- -static NOINLINE unsigned int x264_weight_cost( x264_t *h, x264_frame_t *fenc, pixel *src, x264_weight_t *w )
- +/* How data is organized for chroma weightp:
- + [U: ref] [U: fenc]
- + [V: ref] [V: fenc]
- + fenc = ref + offset
- + v = u + stride * chroma height
- + * We'll need more room if we do 4:2:2 or 4:4:4. */
- +
- +static NOINLINE void x264_weight_cost_init_chroma( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, pixel *dstu, pixel *dstv )
- +{
- + int ref0_distance = fenc->i_frame - ref->i_frame - 1;
- + int i_stride = fenc->i_stride[1];
- + int i_offset = i_stride / 2;
- + int i_lines = fenc->i_lines[1];
- + int i_width = fenc->i_width[1];
- + int i_mb_xy = 0;
- +
- + if( fenc->lowres_mvs[0][ref0_distance][0][0] != 0x7FFF )
- + {
- + for( int y = 0, pel_offset_y = 0; y < i_lines; y += 8, pel_offset_y = y*i_stride )
- + for( int x = 0, pel_offset_x = 0; x < i_width; x += 8, i_mb_xy++, pel_offset_x += 8 )
- + {
- + /* XXX: The stride for our dst is twice what it needs to be, but we have plenty of
- + * memory (the same data is used for luma as well), so it's not a problem, at least
- + * with 4:2:0. */
- + pixel *pixu = dstu + pel_offset_y + pel_offset_x;
- + pixel *pixv = dstv + pel_offset_y + pel_offset_x;
- + pixel *src1 = ref->plane[1] + pel_offset_y + pel_offset_x*2; /* NV12 */
- + pixel *src2 = fenc->plane[1] + pel_offset_y + pel_offset_x*2;
- + int mvx = fenc->lowres_mvs[0][ref0_distance][i_mb_xy][0];
- + int mvy = fenc->lowres_mvs[0][ref0_distance][i_mb_xy][1];
- + h->mc.mc_chroma( pixu , pixv , i_stride, src1, i_stride, mvx, mvy, 8, 8 );
- + h->mc.mc_chroma( pixu+i_offset, pixv+i_offset, i_stride, src2, i_stride, 0, 0, 8, 8 );
- + }
- + }
- + else
- + {
- + for( int y = 0, pel_offset_y = 0; y < i_lines; y += 8, pel_offset_y = y*i_stride )
- + for( int x = 0, pel_offset_x = 0; x < i_width; x += 8, i_mb_xy++, pel_offset_x += 8 )
- + {
- + pixel *pixu = dstu + pel_offset_y + pel_offset_x;
- + pixel *pixv = dstv + pel_offset_y + pel_offset_x;
- + pixel *src1 = ref->plane[1] + pel_offset_y + pel_offset_x*2;
- + pixel *src2 = fenc->plane[1] + pel_offset_y + pel_offset_x*2;
- + h->mc.mc_chroma( pixu , pixv , i_stride, src1, i_stride, 0, 0, 8, 8 );
- + h->mc.mc_chroma( pixu+i_offset, pixv+i_offset, i_stride, src2, i_stride, 0, 0, 8, 8 );
- + }
- + }
- + x264_emms();
- +}
- +
- +static int x264_weight_slice_header_cost( x264_t *h, x264_weight_t *w )
- +{
- + /* Add cost of weights in the slice header. */
- + int numslices;
- + if( h->param.i_slice_count )
- + numslices = h->param.i_slice_count;
- + else if( h->param.i_slice_max_mbs )
- + numslices = (h->mb.i_mb_width * h->mb.i_mb_height + h->param.i_slice_max_mbs-1) / h->param.i_slice_max_mbs;
- + else
- + numslices = 1;
- + /* FIXME: find a way to account for --slice-max-size?
- + * Multiply by 2 as there will be a duplicate. 10 bits added as if there is a weighted frame, then an additional duplicate is used.
- + * Since using lowres frames, assume lambda = 1. */
- + return numslices * ( 10 + 2 * ( bs_size_ue( w[0].i_denom ) + bs_size_se( w[0].i_scale ) + bs_size_se( w[0].i_offset ) ) );
- +}
- +
- +static NOINLINE unsigned int x264_weight_cost_luma( x264_t *h, x264_frame_t *fenc, pixel *src, x264_weight_t *w )
- {
- unsigned int cost = 0;
- int i_stride = fenc->i_stride_lowres;
- @@ -117,18 +183,7 @@ static NOINLINE unsigned int x264_weight_cost( x264_t *h, x264_frame_t *fenc, pi
- w->weightfn[8>>2]( buf, 8, &src[pixoff], i_stride, w, 8 );
- cost += X264_MIN( h->pixf.mbcmp[PIXEL_8x8]( buf, 8, &fenc_plane[pixoff], i_stride ), fenc->i_intra_cost[i_mb] );
- }
- - /* Add cost of weights in the slice header. */
- - int numslices;
- - if( h->param.i_slice_count )
- - numslices = h->param.i_slice_count;
- - else if( h->param.i_slice_max_mbs )
- - numslices = (h->mb.i_mb_width * h->mb.i_mb_height + h->param.i_slice_max_mbs-1) / h->param.i_slice_max_mbs;
- - else
- - numslices = 1;
- - /* FIXME: find a way to account for --slice-max-size?
- - * Multiply by 2 as there will be a duplicate. 10 bits added as if there is a weighted frame, then an additional duplicate is used.
- - * Since using lowres frames, assume lambda = 1. */
- - cost += numslices * ( 10 + 2 * ( bs_size_ue( w[0].i_denom ) + bs_size_se( w[0].i_scale ) + bs_size_se( w[0].i_offset ) ) );
- + cost += x264_weight_slice_header_cost( h, w );
- }
- else
- for( int y = 0; y < i_lines; y += 8, pixoff = y*i_stride )
- @@ -138,6 +193,46 @@ static NOINLINE unsigned int x264_weight_cost( x264_t *h, x264_frame_t *fenc, pi
- return cost;
- }
- +static NOINLINE unsigned int x264_weight_cost_chroma( x264_t *h, x264_frame_t *fenc, pixel *ref, x264_weight_t *w )
- +{
- + int x, y;
- + unsigned int cost = 0;
- + int i_stride = fenc->i_stride[1];
- + int i_offset = i_stride / 2;
- + int i_lines = fenc->i_lines[1];
- + int i_width = fenc->i_width[1];
- + pixel *src = ref + i_offset;
- + ALIGNED_ARRAY_8( pixel, buf, [8*8] );
- + int pixoff = 0;
- + int i_mb = 0;
- + ALIGNED_8( pixel flat[8] ) = {0};
- + if( w )
- + {
- + for( y = 0; y < i_lines; y += 8, pixoff = y*i_stride )
- + for( x = 0; x < i_width; x += 8, i_mb++, pixoff += 8 )
- + {
- + w->weightfn[8>>2]( buf, 8, &ref[pixoff], i_stride, w, 8 );
- + /* The naive and seemingly sensible algorithm is to use mbcmp as in luma.
- + * But testing shows that for chroma the DC coefficient is by far the most
- + * important part of the coding cost. Thus a more useful chroma weight is
- + * obtained by comparing each block's DC coefficient instead of the actual
- + * pixels.
- + *
- + * FIXME: add a (faster) asm sum function to replace sad. */
- + cost += abs( h->pixf.sad_aligned[PIXEL_8x8]( buf, 8, flat, 0 ) -
- + h->pixf.sad_aligned[PIXEL_8x8]( &src[pixoff], i_stride, flat, 0 ) );
- + }
- + cost += x264_weight_slice_header_cost( h, w );
- + }
- + else
- + for( y = 0; y < i_lines; y += 8, pixoff = y*i_stride )
- + for( x = 0; x < i_width; x += 8, i_mb++, pixoff += 8 )
- + cost += abs( h->pixf.sad_aligned[PIXEL_8x8]( &ref[pixoff], i_stride, flat, 0 ) -
- + h->pixf.sad_aligned[PIXEL_8x8]( &src[pixoff], i_stride, flat, 0 ) );
- + x264_emms();
- + return cost;
- +}
- +
- void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, int b_lookahead )
- {
- float fenc_mean, ref_mean, fenc_var, ref_var;
- @@ -150,66 +245,109 @@ void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, int
- float guess_scale;
- int found;
- x264_weight_t *weights = fenc->weight[0];
- + SET_WEIGHT(weights[1], 0, 1, 0, 0 );
- + SET_WEIGHT(weights[2], 0, 1, 0, 0 );
- + /* Don't check chroma in lookahead, or if there wasn't a luma weight. */
- + for( int plane = 0; plane <= 2 && !( plane && ( !weights[0].weightfn || b_lookahead ) ); plane++ )
- + {
- + fenc_var = round( sqrt( fenc->i_pixel_ssd[plane] ) );
- + ref_var = round( sqrt( ref->i_pixel_ssd[plane] ) );
- + fenc_mean = (float)fenc->i_pixel_sum[plane] / (fenc->i_lines[!!plane] * fenc->i_width[!!plane]);
- + ref_mean = (float) ref->i_pixel_sum[plane] / (fenc->i_lines[!!plane] * fenc->i_width[!!plane]);
- +
- + //early termination
- + if( fabs( ref_mean - fenc_mean ) < 0.5 && fabsf( 1 - (float)fenc_var / ref_var ) < epsilon )
- + {
- + SET_WEIGHT( weights[plane], 0, 1, 0, 0 );
- + continue;
- + }
- - fenc_var = round( sqrt( fenc->i_pixel_ssd[0] ) );
- - ref_var = round( sqrt( ref->i_pixel_ssd[0] ) );
- - fenc_mean = (float)fenc->i_pixel_sum[0] / (fenc->i_lines[0] * fenc->i_width[0]);
- - ref_mean = (float) ref->i_pixel_sum[0] / (fenc->i_lines[0] * fenc->i_width[0]);
- + guess_scale = ref_var ? (float)fenc_var/ref_var : 0;
- - //early termination
- - if( fabs( ref_mean - fenc_mean ) < 0.5 && fabs( 1 - fenc_var / ref_var ) < epsilon )
- - {
- - SET_WEIGHT( weights[0], 0, 1, 0, 0 );
- - return;
- - }
- + if( plane )
- + {
- + weights[plane].i_denom = 6;
- + weights[plane].i_scale = x264_clip3( round(guess_scale * 64.0), 0, 255 );
- + if( weights[plane].i_scale > 127 )
- + {
- + weights[1].weightfn = weights[2].weightfn = 0;
- + break;
- + }
- + }
- + else
- + x264_weight_get_h264( round( guess_scale * 128 ), 0, &weights[plane] );
- - guess_scale = ref_var ? fenc_var/ref_var : 0;
- - x264_weight_get_h264( round( guess_scale * 128 ), 0, &weights[0] );
- + found = 0;
- + mindenom = weights[plane].i_denom;
- + minscale = weights[plane].i_scale;
- + minoff = 0;
- - found = 0;
- - mindenom = weights[0].i_denom;
- - minscale = weights[0].i_scale;
- - minoff = 0;
- - offset_search = x264_clip3( floor( fenc_mean - ref_mean * minscale / (1 << mindenom) + 0.5f*b_lookahead ), -128, 126 );
- + if( !plane && !fenc->b_intra_calculated )
- + {
- + x264_mb_analysis_t a;
- + x264_lowres_context_init( h, &a );
- + x264_slicetype_frame_cost( h, &a, &fenc, 0, 0, 0, 0 );
- + }
- - if( !fenc->b_intra_calculated )
- - {
- - x264_mb_analysis_t a;
- - x264_lowres_context_init( h, &a );
- - x264_slicetype_frame_cost( h, &a, &fenc, 0, 0, 0, 0 );
- - }
- - pixel *mcbuf = x264_weight_cost_init_luma( h, fenc, ref, h->mb.p_weight_buf[0] );
- - origscore = minscore = x264_weight_cost( h, fenc, mcbuf, 0 );
- + pixel *mcbuf;
- + if( !plane )
- + {
- + mcbuf = x264_weight_cost_init_luma( h, fenc, ref, h->mb.p_weight_buf[0] );
- + origscore = minscore = x264_weight_cost_luma( h, fenc, mcbuf, 0 );
- + }
- + else if( plane )
- + {
- + pixel *dstu = h->mb.p_weight_buf[0];
- + pixel *dstv = h->mb.p_weight_buf[0]+fenc->i_stride[1]*fenc->i_lines[1];
- + x264_weight_cost_init_chroma( h, fenc, ref, dstu, dstv );
- + mcbuf = plane == 1 ? dstu : dstv;
- + origscore = minscore = x264_weight_cost_chroma( h, fenc, mcbuf, 0 );
- + }
- - if( !minscore )
- - {
- - SET_WEIGHT( weights[0], 0, 1, 0, 0 );
- - return;
- - }
- + if( !minscore )
- + continue;
- - // This gives a slight improvement due to rounding errors but only tests
- - // one offset on lookahead.
- - // TODO: currently searches only offset +1. try other offsets/multipliers/combinations thereof?
- - for( int i_off = offset_search; i_off <= offset_search+!b_lookahead; i_off++ )
- - {
- - SET_WEIGHT( weights[0], 1, minscale, mindenom, i_off );
- - unsigned int s = x264_weight_cost( h, fenc, mcbuf, &weights[0] );
- - COPY3_IF_LT( minscore, s, minoff, i_off, found, 1 );
- + // This gives a slight improvement due to rounding errors but only tests
- + // one offset on lookahead.
- + // TODO: currently searches only offset +1. try other offsets/multipliers/combinations thereof?
- + offset_search = x264_clip3( floor( fenc_mean - ref_mean * minscale / (1 << mindenom) + 0.5f*b_lookahead ), -128, 126 );
- + for( int i_off = offset_search; i_off <= offset_search+!b_lookahead; i_off++ )
- + {
- + SET_WEIGHT( weights[plane], 1, minscale, mindenom, i_off );
- + unsigned int s;
- + if( plane )
- + s = x264_weight_cost_chroma( h, fenc, mcbuf, &weights[plane] );
- + else
- + s = x264_weight_cost_luma( h, fenc, mcbuf, &weights[plane] );
- + COPY3_IF_LT( minscore, s, minoff, i_off, found, 1 );
- + }
- + x264_emms();
- +
- + /* FIXME: More analysis can be done here on SAD vs. SATD termination. */
- + /* 0.2% termination derived experimentally to avoid weird weights in frames that are mostly intra. */
- + if( !found || (minscale == 1<<mindenom && minoff == 0) || (float)minscore / origscore > 0.998 )
- + {
- + SET_WEIGHT( weights[plane], 0, 1, 0, 0 );
- + continue;
- + }
- + else
- + SET_WEIGHT( weights[plane], 1, minscale, mindenom, minoff );
- +
- + if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_FAKE && weights[0].weightfn && !plane )
- + fenc->f_weighted_cost_delta[i_delta_index] = (float)minscore / origscore;
- }
- - x264_emms();
- - /* FIXME: More analysis can be done here on SAD vs. SATD termination. */
- - /* 0.2% termination derived experimentally to avoid weird weights in frames that are mostly intra. */
- - if( !found || (minscale == 1<<mindenom && minoff == 0) || (float)minscore / origscore > 0.998 )
- + //FIXME, what is the correct way to deal with this?
- + if( weights[1].weightfn && weights[2].weightfn && weights[1].i_denom != weights[2].i_denom )
- {
- - SET_WEIGHT( weights[0], 0, 1, 0, 0 );
- - return;
- + int denom = X264_MIN( weights[1].i_denom, weights[2].i_denom );
- + int i;
- + for( i = 1; i <= 2; i++ )
- + {
- + weights[i].i_scale = x264_clip3( weights[i].i_scale >> ( weights[i].i_denom - denom ), 0, 255 );
- + weights[i].i_denom = denom;
- + }
- }
- - else
- - SET_WEIGHT( weights[0], 1, minscale, mindenom, minoff );
- -
- - if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_FAKE && weights[0].weightfn )
- - fenc->f_weighted_cost_delta[i_delta_index] = (float)minscore / origscore;
- if( weights[0].weightfn && b_lookahead )
- {
- --
- 1.7.3.2.146.gca209
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement