Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- diff --git a/common/common.h b/common/common.h
- index b8c6dfd..615c045 100644
- --- a/common/common.h
- +++ b/common/common.h
- @@ -750,7 +750,7 @@ struct x264_t
- int i_direct_score[2];
- int i_direct_frames[2];
- /* num p-frames weighted */
- - int i_wpred[3];
- + int i_wpred[2];
- } stat;
- diff --git a/common/frame.h b/common/frame.h
- index 419da08..7ba014f 100644
- --- a/common/frame.h
- +++ b/common/frame.h
- @@ -112,8 +112,8 @@ typedef struct x264_frame
- uint16_t *i_inv_qscale_factor;
- int b_scenecut; /* Set to zero if the frame cannot possibly be part of a real scenecut. */
- float f_weighted_cost_delta[X264_BFRAME_MAX+2];
- - uint32_t i_pixel_sum;
- - uint64_t i_pixel_ssd;
- + uint32_t i_pixel_sum[3];
- + uint64_t i_pixel_ssd[3];
- /* hrd */
- x264_hrd_t hrd_timing;
- diff --git a/encoder/analyse.c b/encoder/analyse.c
- index 2ece9dc..74672d1 100644
- --- a/encoder/analyse.c
- +++ b/encoder/analyse.c
- @@ -1480,7 +1480,7 @@ static int x264_mb_analyse_inter_p4x4_chroma( x264_t *h, x264_mb_analysis_t *a,
- weight[1].weightfn[width>>2]( &pix1[x+y*16], 16, &pix1[x+y*16], 16, &weight[1], height ); \
- h->mc.mc_chroma( &pix2[x+y*16], 16, &p_fref[5][or+x+y*i_stride], i_stride, (me).mv[0], (me).mv[1]+mvy_offset, width, height ); \
- if( weight[2].weightfn ) \
- - weight[1].weightfn[width>>2]( &pix2[x+y*16], 16, &pix2[x+y*16], 16, &weight[2], height );
- + weight[2].weightfn[width>>2]( &pix2[x+y*16], 16, &pix2[x+y*16], 16, &weight[2], height );
- if( pixel == PIXEL_4x4 )
- diff --git a/encoder/encoder.c b/encoder/encoder.c
- index 300041e..9e119ca 100644
- --- a/encoder/encoder.c
- +++ b/encoder/encoder.c
- @@ -1395,49 +1395,66 @@ static void x264_weighted_pred_init( x264_t *h )
- int i_padv = PADV << h->param.b_interlaced;
- int denom = -1;
- - int weightluma = 0;
- + int weightplane[2] = { 0, 0 };
- int buffer_next = 0;
- - //FIXME: when chroma support is added, move this into loop
- - h->sh.weight[0][1].weightfn = h->sh.weight[0][2].weightfn = NULL;
- - h->sh.weight[0][1].i_denom = h->sh.weight[0][2].i_denom = 0;
- - for( int j = 0; j < h->i_ref0; j++ )
- +
- + for( int i = 0; i < 3; i++ )
- {
- - if( h->fenc->weight[j][0].weightfn )
- + for( int j = 0; j < h->i_ref0; j++ )
- {
- - h->sh.weight[j][0] = h->fenc->weight[j][0];
- - // if weight is useless, don't write it to stream
- - if( h->sh.weight[j][0].i_scale == 1<<h->sh.weight[j][0].i_denom && h->sh.weight[j][0].i_offset == 0 )
- - h->sh.weight[j][0].weightfn = NULL;
- - else
- + if( h->fenc->weight[j][i].weightfn )
- {
- - if( !weightluma )
- + h->sh.weight[j][i] = h->fenc->weight[j][i];
- + // if weight is useless, don't write it to stream
- + if( h->sh.weight[j][i].i_scale == 1<<h->sh.weight[j][i].i_denom && h->sh.weight[j][i].i_offset == 0 )
- + h->sh.weight[j][i].weightfn = NULL;
- + else
- {
- - weightluma = 1;
- - h->sh.weight[0][0].i_denom = denom = h->sh.weight[j][0].i_denom;
- - assert( x264_clip3( denom, 0, 7 ) == denom );
- + if( !weightplane[!!i] )
- + {
- + weightplane[!!i] = 1;
- + h->sh.weight[0][!!i].i_denom = denom = h->sh.weight[j][i].i_denom;
- + assert( x264_clip3( denom, 0, 7 ) == denom );
- + }
- +
- + assert( h->sh.weight[j][i].i_denom == denom );
- + if( !i )
- + {
- + h->fenc->weighted[j] = h->mb.p_weight_buf[buffer_next++] + h->fenc->i_stride[0] * i_padv + PADH;
- + //scale full resolution frame
- + if( h->param.i_threads == 1 )
- + {
- + uint8_t *src = h->fref0[j]->filtered[0] - h->fref0[j]->i_stride[0]*i_padv - PADH;
- + uint8_t *dst = h->fenc->weighted[j] - h->fenc->i_stride[0]*i_padv - PADH;
- + int stride = h->fenc->i_stride[0];
- + int width = h->fenc->i_width[0] + PADH*2;
- + int height = h->fenc->i_lines[0] + i_padv*2;
- + x264_weight_scale_plane( h, dst, stride, src, stride, width, height, &h->sh.weight[j][0] );
- + h->fenc->i_lines_weighted = height;
- + }
- + }
- }
- - assert( h->sh.weight[j][0].i_denom == denom );
- - assert( x264_clip3( h->sh.weight[j][0].i_scale, 0, 127 ) == h->sh.weight[j][0].i_scale );
- - assert( x264_clip3( h->sh.weight[j][0].i_offset, -128, 127 ) == h->sh.weight[j][0].i_offset );
- - h->fenc->weighted[j] = h->mb.p_weight_buf[buffer_next++] +
- - h->fenc->i_stride[0] * i_padv + PADH;
- }
- }
- -
- - //scale full resolution frame
- - if( h->sh.weight[j][0].weightfn && h->param.i_threads == 1 )
- - {
- - uint8_t *src = h->fref0[j]->filtered[0] - h->fref0[j]->i_stride[0]*i_padv - PADH;
- - uint8_t *dst = h->fenc->weighted[j] - h->fenc->i_stride[0]*i_padv - PADH;
- - int stride = h->fenc->i_stride[0];
- - int width = h->fenc->i_width[0] + PADH*2;
- - int height = h->fenc->i_lines[0] + i_padv*2;
- - x264_weight_scale_plane( h, dst, stride, src, stride, width, height, &h->sh.weight[j][0] );
- - h->fenc->i_lines_weighted = height;
- - }
- }
- - if( !weightluma )
- +
- + if( weightplane[1] )
- + for( int i = 0; i < h->i_ref0; i++ )
- + if( h->sh.weight[i][1].weightfn && !h->sh.weight[i][2].weightfn )
- + {
- + h->sh.weight[i][2].i_scale = 1 << h->sh.weight[0][1].i_denom;
- + h->sh.weight[i][2].i_offset = 0;
- + }
- + else if( h->sh.weight[i][2].weightfn && !h->sh.weight[i][1].weightfn )
- + {
- + h->sh.weight[i][1].i_scale = 1 << h->sh.weight[0][1].i_denom;
- + h->sh.weight[i][1].i_offset = 0;
- + }
- +
- + if( !weightplane[0] )
- h->sh.weight[0][0].i_denom = 0;
- + if( !weightplane[1] )
- + h->sh.weight[0][1].i_denom = h->sh.weight[0][2].i_denom = 0;
- }
- static inline void x264_reference_build_list( x264_t *h, int i_poc )
- @@ -1530,6 +1547,7 @@ static inline void x264_reference_build_list( x264_t *h, int i_poc )
- {
- w[0] = h->fenc->weight[0][0];
- w[0].i_offset--;
- +
- h->mc.weight_cache( h, &w[0] );
- idx = x264_weighted_reference_duplicate( h, 0, w );
- }
- @@ -2639,13 +2657,8 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
- {
- h->stat.i_consecutive_bframes[h->fdec->i_frame - h->fref0[0]->i_frame - 1]++;
- if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
- - for( int i = 0; i < 3; i++ )
- - for( int j = 0; j < h->i_ref0; j++ )
- - if( h->sh.weight[0][i].i_denom != 0 )
- - {
- - h->stat.i_wpred[i]++;
- - break;
- - }
- + for( int i = 0; i < 2; i++ )
- + h->stat.i_wpred[i] += !!h->sh.weight[0][i].i_denom;
- }
- if( h->sh.i_type == SLICE_TYPE_B )
- {
- @@ -2987,8 +3000,9 @@ void x264_encoder_close ( x264_t *h )
- }
- if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART && h->stat.i_frame_count[SLICE_TYPE_P] > 0 )
- - x264_log( h, X264_LOG_INFO, "Weighted P-Frames: Y:%.1f%%\n",
- - h->stat.i_wpred[0] * 100.0 / h->stat.i_frame_count[SLICE_TYPE_P] );
- + x264_log( h, X264_LOG_INFO, "Weighted P-Frames: Y:%.1f%% UV:%.1f%%\n",
- + h->stat.i_wpred[0] * 100.0 / h->stat.i_frame_count[SLICE_TYPE_P],
- + h->stat.i_wpred[1] * 100.0 / h->stat.i_frame_count[SLICE_TYPE_P] );
- for( int i_list = 0; i_list < 2; i_list++ )
- for( int i_slice = 0; i_slice < 2; i_slice++ )
- diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
- index b51dbf7..dc771db 100644
- --- a/encoder/ratecontrol.c
- +++ b/encoder/ratecontrol.c
- @@ -51,8 +51,8 @@ typedef struct
- int s_count;
- float blurred_complexity;
- char direct_mode;
- - int16_t weight[2];
- - int16_t i_weight_denom;
- + int16_t weight[3][2];
- + int16_t i_weight_denom[2];
- int refcount[16];
- int refs;
- int i_duration;
- @@ -777,11 +777,19 @@ int x264_ratecontrol_new( x264_t *h )
- rce->refs = ref;
- /* find weights */
- - rce->i_weight_denom = -1;
- + rce->i_weight_denom[0] = rce->i_weight_denom[1] = -1;
- char *w = strchr( p, 'w' );
- if( w )
- - if( sscanf( w, "w:%hd,%hd,%hd", &rce->i_weight_denom, &rce->weight[0], &rce->weight[1] ) != 3 )
- - rce->i_weight_denom = -1;
- + {
- + int count = sscanf( w, "w:%hd,%hd,%hd,%hd,%hd,%hd,%hd,%hd",
- + &rce->i_weight_denom[0], &rce->weight[0][0], &rce->weight[0][1],
- + &rce->i_weight_denom[1], &rce->weight[1][0], &rce->weight[1][1],
- + &rce->weight[2][0], &rce->weight[2][1] );
- + if( count == 3 )
- + rce->i_weight_denom[1] = -1;
- + else if ( count != 8 )
- + rce->i_weight_denom[0] = rce->i_weight_denom[1] = -1;
- + }
- if( pict_type != 'b' )
- rce->kept_as_ref = 1;
- @@ -1391,8 +1399,15 @@ void x264_ratecontrol_set_weights( x264_t *h, x264_frame_t *frm )
- ratecontrol_entry_t *rce = &h->rc->entry[frm->i_frame];
- if( h->param.analyse.i_weighted_pred <= 0 )
- return;
- - if( rce->i_weight_denom >= 0 )
- - SET_WEIGHT( frm->weight[0][0], 1, rce->weight[0], rce->i_weight_denom, rce->weight[1] );
- +
- + if( rce->i_weight_denom[0] >= 0 )
- + SET_WEIGHT( frm->weight[0][0], 1, rce->weight[0][0], rce->i_weight_denom[0], rce->weight[0][1] );
- +
- + if( rce->i_weight_denom[1] >= 0 )
- + {
- + SET_WEIGHT( frm->weight[0][1], 1, rce->weight[1][0], rce->i_weight_denom[1], rce->weight[1][1] );
- + SET_WEIGHT( frm->weight[0][2], 1, rce->weight[2][0], rce->i_weight_denom[1], rce->weight[2][1] );
- + }
- }
- /* After encoding one frame, save stats and update ratecontrol state */
- @@ -1449,9 +1464,19 @@ int x264_ratecontrol_end( x264_t *h, int bits, int *filler )
- goto fail;
- }
- - if( h->sh.weight[0][0].weightfn )
- + if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART && h->sh.weight[0][0].weightfn )
- {
- - if( fprintf( rc->p_stat_file_out, "w:%"PRId32",%"PRId32",%"PRId32, h->sh.weight[0][0].i_denom, h->sh.weight[0][0].i_scale, h->sh.weight[0][0].i_offset ) < 0 )
- + if( fprintf( rc->p_stat_file_out, "w:%"PRId32",%"PRId32",%"PRId32,
- + h->sh.weight[0][0].i_denom, h->sh.weight[0][0].i_scale, h->sh.weight[0][0].i_offset ) < 0 )
- + goto fail;
- + if( h->sh.weight[0][1].weightfn || h->sh.weight[0][2].weightfn )
- + {
- + if( fprintf( rc->p_stat_file_out, ",%"PRId32",%"PRId32",%"PRId32",%"PRId32",%"PRId32"\n",
- + h->sh.weight[0][1].i_denom, h->sh.weight[0][1].i_scale, h->sh.weight[0][1].i_offset,
- + h->sh.weight[0][2].i_scale, h->sh.weight[0][2].i_offset ) < 0 )
- + goto fail;
- + }
- + else if( fprintf( rc->p_stat_file_out, "\n" ) < 0 )
- goto fail;
- }
- diff --git a/encoder/slicetype.c b/encoder/slicetype.c
- index 120a38a..63d1a26 100644
- --- a/encoder/slicetype.c
- +++ b/encoder/slicetype.c
- @@ -70,21 +70,42 @@ static void x264_weight_get_h264( unsigned int weight_nonh264, int offset, x264_
- void x264_weight_plane_analyse( x264_t *h, x264_frame_t *frame )
- {
- - uint32_t sad = 0;
- - uint64_t ssd = 0;
- - uint8_t *p = frame->plane[0];
- - int stride = frame->i_stride[0];
- - int width = frame->i_width[0];
- - int height = frame->i_lines[0];
- - for( int y = 0; y < height>>4; y++, p += stride*16 )
- - for( int x = 0; x < width; x += 16 )
- + for( int plane = 0; plane < 3; plane++ )
- + {
- + uint32_t sad = 0;
- + uint64_t ssd = 0;
- + uint8_t *p = frame->plane[plane];
- + int stride = frame->i_stride[!!plane];
- + int width = frame->i_width[!!plane];
- + int height = frame->i_lines[!!plane];
- + if( !plane )
- {
- - uint64_t res = h->pixf.var[PIXEL_16x16]( p + x, stride );
- - sad += (uint32_t)res;
- - ssd += res >> 32;
- + for( int y = 0; y < height; y+=16, p += stride*16 )
- + for( int x = 0; x < width; x += 16 )
- + {
- + uint64_t res = h->pixf.var[PIXEL_16x16]( p + x, stride );
- + sad += (uint32_t)res;
- + ssd += res >> 32;
- + }
- + frame->i_pixel_sum[plane] = sad;
- + frame->i_pixel_ssd[plane] = ssd - ((uint64_t)sad * sad + width * height / 2) / (width * height);
- }
- - frame->i_pixel_sum = sad;
- - frame->i_pixel_ssd = ssd - ((uint64_t)sad * sad + width * height / 2) / (width * height);
- + else
- + {
- + ALIGNED_16( uint8_t flat[8] ) = {0};
- + for( int y = 0; y < height; y+=8, p += stride*8 )
- + for( int x = 0; x < width; x += 8 )
- + {
- + uint32_t res = h->pixf.sad_aligned[PIXEL_8x8]( p + x, stride, flat, 0 );
- + sad += res;
- + ssd += res*res;
- + }
- + int mean = ( sad * 64 + width * height / 2 ) / ( width * height );
- + ssd = (ssd + width * height / 128) / (width*height / 64);
- + frame->i_pixel_sum[plane] = sad;
- + frame->i_pixel_ssd[plane] = ssd - mean*mean;
- + }
- + }
- }
- static NOINLINE uint8_t *x264_weight_cost_init_luma( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, uint8_t *dest )
- @@ -115,7 +136,35 @@ static NOINLINE uint8_t *x264_weight_cost_init_luma( x264_t *h, x264_frame_t *fe
- return ref->lowres[0];
- }
- -static NOINLINE unsigned int x264_weight_cost( x264_t *h, x264_frame_t *fenc, uint8_t *src, x264_weight_t *w )
- +static NOINLINE uint8_t *x264_weight_cost_init_chroma( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, uint8_t *dest, int i )
- +{
- + int ref0_distance = fenc->i_frame - ref->i_frame - 1;
- + if( fenc->lowres_mvs[0][ref0_distance][0][0] != 0x7FFF )
- + {
- + int i_stride = fenc->i_stride[1];
- + int i_lines = fenc->i_lines[1];
- + int i_width = fenc->i_width[1];
- + int i_mb_xy = 0;
- + int x,y;
- + int i_pel_offset = 0;
- +
- + for( y = 0; y < i_lines; y += 8, i_pel_offset = y*i_stride )
- + for( x = 0; x < i_width; x += 8, i_mb_xy++, i_pel_offset += 8 )
- + {
- + uint8_t *pix = &dest[ i_pel_offset ];
- + int mvx = fenc->lowres_mvs[0][ref0_distance][i_mb_xy][0];
- + int mvy = fenc->lowres_mvs[0][ref0_distance][i_mb_xy][1];
- + h->mc.mc_chroma( pix, i_stride, ref->plane[i], i_stride,
- + mvx, mvy, 8, 8 );
- + }
- + x264_emms();
- + return dest;
- + }
- + x264_emms();
- + return ref->plane[i];
- +}
- +
- +static NOINLINE unsigned int x264_weight_cost_luma( x264_t *h, x264_frame_t *fenc, uint8_t *src, x264_weight_t *w )
- {
- unsigned int cost = 0;
- int i_stride = fenc->i_stride_lowres;
- @@ -155,6 +204,35 @@ static NOINLINE unsigned int x264_weight_cost( x264_t *h, x264_frame_t *fenc, ui
- return cost;
- }
- +static NOINLINE unsigned int x264_weight_cost_chroma( x264_t *h, x264_frame_t *fenc, uint8_t *src, x264_weight_t *w, int i )
- +{
- + int x, y;
- + unsigned int cost = 0;
- + int i_stride = fenc->i_stride[i];
- + int i_lines = fenc->i_lines[i];
- + int i_width = fenc->i_width[i];
- + uint8_t *fenc_plane = fenc->plane[i];
- + ALIGNED_ARRAY_16( uint8_t, buf, [8*8] );
- + int pixoff = 0;
- + int i_mb = 0;
- + ALIGNED_16( uint8_t flat[8]) = {0};
- + if( w )
- + for( y = 0; y < i_lines; y += 8, pixoff = y*i_stride )
- + for( x = 0; x < i_width; x += 8, i_mb++, pixoff += 8 )
- + {
- + w->weightfn[8>>2]( buf, 8, &src[pixoff], i_stride, w, 8 );
- + cost += abs( h->pixf.sad_aligned[PIXEL_8x8]( &src[pixoff], i_stride, flat, 0 ) -
- + h->pixf.sad_aligned[PIXEL_8x8]( buf, 8, flat, 0 ) );
- + }
- + else
- + for( y = 0; y < i_lines; y += 8, pixoff = y*i_stride )
- + for( x = 0; x < i_width; x += 8, i_mb++, pixoff += 8 )
- + cost += abs( h->pixf.sad_aligned[PIXEL_8x8]( &src[pixoff], i_stride, flat, 0 ) -
- + h->pixf.sad_aligned[PIXEL_8x8]( &fenc_plane[pixoff], i_stride, flat, 0 ) );
- + x264_emms();
- + return cost;
- +}
- +
- void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, int b_lookahead )
- {
- float fenc_mean, ref_mean, fenc_var, ref_var;
- @@ -167,66 +245,106 @@ void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, int
- float guess_scale;
- int found;
- x264_weight_t *weights = fenc->weight[0];
- + SET_WEIGHT(weights[1], 0, 1, 0, 0 );
- + SET_WEIGHT(weights[2], 0, 1, 0, 0 );
- + for( int plane = 0; plane <= 2 && !( plane && ( !weights[0].weightfn || b_lookahead ) ); plane++ )
- + {
- + fenc_var = round( sqrt( fenc->i_pixel_ssd[plane] ) );
- + ref_var = round( sqrt( ref->i_pixel_ssd[plane] ) );
- + fenc_mean = (float)fenc->i_pixel_sum[plane] / (fenc->i_lines[!!plane] * fenc->i_width[!!plane]);
- + ref_mean = (float) ref->i_pixel_sum[plane] / (fenc->i_lines[!!plane] * fenc->i_width[!!plane]);
- - fenc_var = round( sqrt( fenc->i_pixel_ssd ) );
- - ref_var = round( sqrt( ref->i_pixel_ssd ) );
- - fenc_mean = (float)fenc->i_pixel_sum / (fenc->i_lines[0] * fenc->i_width[0]);
- - ref_mean = (float) ref->i_pixel_sum / (fenc->i_lines[0] * fenc->i_width[0]);
- + //early termination
- + if( fabs( ref_mean - fenc_mean ) < 0.5 && fabsf( 1 - (float)fenc_var / ref_var ) < epsilon )
- + {
- + SET_WEIGHT( weights[plane], 0, 1, 0, 0 );
- + continue;
- + }
- - //early termination
- - if( fabs( ref_mean - fenc_mean ) < 0.5 && fabs( 1 - fenc_var / ref_var ) < epsilon )
- - {
- - SET_WEIGHT( weights[0], 0, 1, 0, 0 );
- - return;
- - }
- + guess_scale = ref_var ? (float)fenc_var/ref_var : 0;
- - guess_scale = ref_var ? fenc_var/ref_var : 0;
- - x264_weight_get_h264( round( guess_scale * 128 ), 0, &weights[0] );
- + if( plane )
- + {
- + weights[plane].i_denom = 6;
- + weights[plane].i_scale = x264_clip_uint8( round(guess_scale * 64.0) );
- + if( weights[plane].i_scale > 127 )
- + {
- + weights[1].weightfn = weights[2].weightfn = 0;
- + break;
- + }
- + }
- + else
- + x264_weight_get_h264( round( guess_scale * 128 ), 0, &weights[plane] );
- - found = 0;
- - mindenom = weights[0].i_denom;
- - minscale = weights[0].i_scale;
- - minoff = 0;
- - offset_search = x264_clip3( floor( fenc_mean - ref_mean * minscale / (1 << mindenom) + 0.5f*b_lookahead ), -128, 126 );
- + found = 0;
- + mindenom = weights[plane].i_denom;
- + minscale = weights[plane].i_scale;
- + minoff = 0;
- - if( !fenc->b_intra_calculated )
- - {
- - x264_mb_analysis_t a;
- - x264_lowres_context_init( h, &a );
- - x264_slicetype_frame_cost( h, &a, &fenc, 0, 0, 0, 0 );
- - }
- - uint8_t *mcbuf = x264_weight_cost_init_luma( h, fenc, ref, h->mb.p_weight_buf[0] );
- - origscore = minscore = x264_weight_cost( h, fenc, mcbuf, 0 );
- + if( !plane && !fenc->b_intra_calculated )
- + {
- + x264_mb_analysis_t a;
- + x264_lowres_context_init( h, &a );
- + x264_slicetype_frame_cost( h, &a, &fenc, 0, 0, 0, 0 );
- + }
- - if( !minscore )
- - {
- - SET_WEIGHT( weights[0], 0, 1, 0, 0 );
- - return;
- - }
- + uint8_t *mcbuf;
- + if( !plane )
- + {
- + mcbuf = x264_weight_cost_init_luma( h, fenc, ref, h->mb.p_weight_buf[0] );
- + origscore = minscore = x264_weight_cost_luma( h, fenc, mcbuf, 0 );
- + }
- + else
- + {
- + mcbuf = x264_weight_cost_init_chroma( h, fenc, ref, h->mb.p_weight_buf[0], plane );
- + origscore = minscore = x264_weight_cost_chroma( h, fenc, mcbuf, 0, plane );
- + }
- - // This gives a slight improvement due to rounding errors but only tests
- - // one offset on lookahead.
- - // TODO: currently searches only offset +1. try other offsets/multipliers/combinations thereof?
- - for( int i_off = offset_search; i_off <= offset_search+!b_lookahead; i_off++ )
- - {
- - SET_WEIGHT( weights[0], 1, minscale, mindenom, i_off );
- - unsigned int s = x264_weight_cost( h, fenc, mcbuf, &weights[0] );
- - COPY3_IF_LT( minscore, s, minoff, i_off, found, 1 );
- + if( !minscore )
- + continue;
- +
- + // This gives a slight improvement due to rounding errors but only tests
- + // one offset on lookahead.
- + // TODO: currently searches only offset +1. try other offsets/multipliers/combinations thereof?
- +
- + offset_search = x264_clip3( floor( fenc_mean - ref_mean * minscale / (1 << mindenom) + 0.5f*b_lookahead ), -128, 126 );
- + for( int i_off = offset_search; i_off <= offset_search+!b_lookahead; i_off++ )
- + {
- + SET_WEIGHT( weights[plane], 1, minscale, mindenom, i_off );
- + unsigned int s;
- + if( plane )
- + s = x264_weight_cost_chroma( h, fenc, mcbuf, &weights[plane], plane );
- + else
- + s = x264_weight_cost_luma( h, fenc, mcbuf, &weights[plane] );
- + COPY3_IF_LT( minscore, s, minoff, i_off, found, 1 );
- + }
- + x264_emms();
- +
- + /* FIXME: More analysis can be done here on SAD vs. SATD termination. */
- + /* 0.2% termination derived experimentally to avoid weird weights in frames that are mostly intra. */
- + if( !found || (minscale == 1<<mindenom && minoff == 0) || (float)minscore / origscore > 0.998 )
- + {
- + SET_WEIGHT( weights[plane], 0, 1, 0, 0 );
- + continue;
- + }
- + else
- + SET_WEIGHT( weights[plane], 1, minscale, mindenom, minoff );
- +
- + if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_FAKE && weights[0].weightfn && !plane )
- + fenc->f_weighted_cost_delta[i_delta_index] = (float)minscore / origscore;
- }
- - x264_emms();
- - /* FIXME: More analysis can be done here on SAD vs. SATD termination. */
- - /* 0.2% termination derived experimentally to avoid weird weights in frames that are mostly intra. */
- - if( !found || (minscale == 1<<mindenom && minoff == 0) || (float)minscore / origscore > 0.998 )
- + //FIXME, what is the correct way to deal with this?
- + if( weights[1].weightfn && weights[2].weightfn && weights[1].i_denom != weights[2].i_denom )
- {
- - SET_WEIGHT( weights[0], 0, 1, 0, 0 );
- - return;
- + int denom = X264_MIN( weights[1].i_denom, weights[2].i_denom );
- + int i;
- + for( i = 1; i <= 2; i++ )
- + {
- + weights[i].i_scale = x264_clip_uint8( weights[i].i_scale >> ( weights[i].i_denom - denom ) );
- + weights[i].i_denom = denom;
- + }
- }
- - else
- - SET_WEIGHT( weights[0], 1, minscale, mindenom, minoff );
- -
- - if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_FAKE && weights[0].weightfn )
- - fenc->f_weighted_cost_delta[i_delta_index] = (float)minscore / origscore;
- if( weights[0].weightfn && b_lookahead )
- {
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement