Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- diff --git a/common/common.h b/common/common.h
- index 4307619..5c0ef1d 100644
- --- a/common/common.h
- +++ b/common/common.h
- @@ -59,12 +59,13 @@ do {\
- #define X264_PCM_COST (384*BIT_DEPTH+16)
- #define X264_LOOKAHEAD_MAX 250
- #define QP_BD_OFFSET (6*(BIT_DEPTH-8))
- -#define QP_MAX (51+QP_BD_OFFSET)
- -#define QP_MAX_MAX (51+2*6)
- -#define LAMBDA_MAX (91 << (BIT_DEPTH-8))
- +#define QP_MAX_SPEC (51+QP_BD_OFFSET)
- +#define QP_MAX (QP_MAX_SPEC+24)
- +#define QP_MAX_MAX (51+2*6+24)
- #define PIXEL_MAX ((1 << BIT_DEPTH)-1)
- // arbitrary, but low because SATD scores are 1/4 normal
- #define X264_LOOKAHEAD_QP (12+QP_BD_OFFSET)
- +#define SPEC_QP(x) X264_MIN((x), QP_MAX_SPEC)
- // number of pixels (per thread) in progress at any given time.
- // 16 for the macroblock in progress + 3 for deblocking + 3 for motion compensation filter + 2 for extra safety
- @@ -458,12 +459,11 @@ struct x264_t
- udctcoef (*quant8_mf[2])[64]; /* [2][52][64] */
- udctcoef (*quant4_bias[4])[16]; /* [4][52][16] */
- udctcoef (*quant8_bias[2])[64]; /* [2][52][64] */
- + udctcoef (*nr_offset_emergency)[3][64];
- - /* mv/ref cost arrays. Indexed by lambda instead of
- - * qp because, due to rounding, some quantizers share
- - * lambdas. This saves memory. */
- - uint16_t *cost_mv[LAMBDA_MAX+1];
- - uint16_t *cost_mv_fpel[LAMBDA_MAX+1][4];
- + /* mv/ref cost arrays. */
- + uint16_t *cost_mv[QP_MAX+1];
- + uint16_t *cost_mv_fpel[QP_MAX+1][4];
- const uint8_t *chroma_qp_table; /* includes both the nonlinear luma->chroma mapping and chroma_qp_offset */
- @@ -811,9 +811,14 @@ struct x264_t
- } stat;
- - ALIGNED_16( uint32_t nr_residual_sum[2][64] );
- - ALIGNED_16( udctcoef nr_offset[2][64] );
- - uint32_t nr_count[2];
- + /* 0 = luma 4x4, 1 = luma 8x8, 2 = chroma 4x4 */
- + udctcoef (*nr_offset)[64];
- + uint32_t (*nr_residual_sum)[64];
- + uint32_t *nr_count;
- +
- + ALIGNED_16( udctcoef nr_offset_denoise[3][64] );
- + ALIGNED_16( uint32_t nr_residual_sum_buf[2][3][64] );
- + uint32_t nr_count_buf[2][3];
- /* Buffers that are allocated per-thread even in sliced threads. */
- void *scratch_buffer; /* for any temporary storage that doesn't want repeated malloc */
- diff --git a/common/macroblock.c b/common/macroblock.c
- index 5c76d3f..8013957 100644
- --- a/common/macroblock.c
- +++ b/common/macroblock.c
- @@ -1171,7 +1171,7 @@ void x264_macroblock_cache_save( x264_t *h )
- if( h->mb.i_type != I_16x16 && h->mb.i_cbp_luma == 0 && h->mb.i_cbp_chroma == 0 )
- h->mb.i_qp = h->mb.i_last_qp;
- h->mb.qp[i_mb_xy] = h->mb.i_qp;
- - h->mb.i_last_dqp = h->mb.i_qp - h->mb.i_last_qp;
- + h->mb.i_last_dqp = SPEC_QP( h->mb.i_qp ) - SPEC_QP( h->mb.i_last_qp );
- h->mb.i_last_qp = h->mb.i_qp;
- }
- diff --git a/common/quant.c b/common/quant.c
- index 816e60a..36e59ee 100644
- --- a/common/quant.c
- +++ b/common/quant.c
- @@ -143,7 +143,7 @@ static void dequant_4x4_dc( dctcoef dct[16], int dequant_mf[6][16], int i_qp )
- static void x264_denoise_dct( dctcoef *dct, uint32_t *sum, udctcoef *offset, int size )
- {
- - for( int i = 1; i < size; i++ )
- + for( int i = 0; i < size; i++ )
- {
- int level = dct[i];
- int sign = level>>31;
- diff --git a/common/set.c b/common/set.c
- index 92b6b7a..c4d5549 100644
- --- a/common/set.c
- +++ b/common/set.c
- @@ -23,6 +23,8 @@
- * For more information, contact us at licensing@x264.com.
- *****************************************************************************/
- +#define _ISOC99_SOURCE
- +#include <math.h>
- #include "common.h"
- #define SHIFT(x,s) ((s)<=0 ? (x)<<-(s) : ((x)+(1<<((s)-1)))>>(s))
- @@ -146,21 +148,22 @@ int x264_cqm_init( x264_t *h )
- quant8_mf[i_list][q][i] = DIV(def_quant8[q][i] * 16, h->pps->scaling_list[4+i_list][i]);
- }
- }
- - for( int q = 0; q < QP_MAX+1; q++ )
- + for( int qm = 0; qm <= QP_MAX; qm++ )
- {
- int j;
- + int q = SPEC_QP( qm );
- for( int i_list = 0; i_list < 4; i_list++ )
- for( int i = 0; i < 16; i++ )
- {
- - h->unquant4_mf[i_list][q][i] = (1ULL << (q/6 + 15 + 8)) / quant4_mf[i_list][q%6][i];
- - h->quant4_mf[i_list][q][i] = j = SHIFT(quant4_mf[i_list][q%6][i], q/6 - 1);
- + h->unquant4_mf[i_list][qm][i] = (1ULL << (q/6 + 15 + 8)) / quant4_mf[i_list][q%6][i];
- + h->quant4_mf[i_list][qm][i] = j = SHIFT(quant4_mf[i_list][q%6][i], q/6 - 1);
- if( !j )
- {
- min_qp_err = X264_MIN( min_qp_err, q );
- continue;
- }
- // round to nearest, unless that would cause the deadzone to be negative
- - h->quant4_bias[i_list][q][i] = X264_MIN( DIV(deadzone[i_list]<<10, j), (1<<15)/j );
- + h->quant4_bias[i_list][qm][i] = X264_MIN( DIV(deadzone[i_list]<<10, j), (1<<15)/j );
- if( j > 0xffff && q > max_qp_err && (i_list == CQM_4IY || i_list == CQM_4PY) )
- max_qp_err = q;
- if( j > 0xffff && q > max_chroma_qp_err && (i_list == CQM_4IC || i_list == CQM_4PC) )
- @@ -170,20 +173,62 @@ int x264_cqm_init( x264_t *h )
- for( int i_list = 0; i_list < 2; i_list++ )
- for( int i = 0; i < 64; i++ )
- {
- - h->unquant8_mf[i_list][q][i] = (1ULL << (q/6 + 16 + 8)) / quant8_mf[i_list][q%6][i];
- + h->unquant8_mf[i_list][qm][i] = (1ULL << (q/6 + 16 + 8)) / quant8_mf[i_list][q%6][i];
- j = SHIFT(quant8_mf[i_list][q%6][i], q/6);
- - h->quant8_mf[i_list][q][i] = (uint16_t)j;
- + h->quant8_mf[i_list][qm][i] = (uint16_t)j;
- if( !j )
- {
- min_qp_err = X264_MIN( min_qp_err, q );
- continue;
- }
- - h->quant8_bias[i_list][q][i] = X264_MIN( DIV(deadzone[i_list]<<10, j), (1<<15)/j );
- + h->quant8_bias[i_list][qm][i] = X264_MIN( DIV(deadzone[i_list]<<10, j), (1<<15)/j );
- if( j > 0xffff && q > max_qp_err )
- max_qp_err = q;
- }
- }
- + x264_emms();
- + CHECKED_MALLOC( h->nr_offset_emergency, sizeof(*h->nr_offset_emergency)*(QP_MAX-QP_MAX_SPEC) );
- + for( int q = 0; q < QP_MAX - QP_MAX_SPEC; q++ )
- + for( int cat = 0; cat <= 2; cat++ )
- + {
- + int dct8x8 = cat == 1;
- + int size = dct8x8 ? 64 : 16;
- + udctcoef *nr_offset = h->nr_offset_emergency[q][cat];
- + int lowest_dc = (QP_MAX-QP_MAX_SPEC)/2;
- +
- + for( int i = 0; i < size; i++ )
- + {
- + /* True "emergency mode": remove all DCT coefficients */
- + if( q == QP_MAX - QP_MAX_SPEC - 1 )
- + {
- + nr_offset[i] = (1 << (7 + BIT_DEPTH)) - 1;
- + continue;
- + }
- +
- + double pos;
- + if( i == 0 )
- + {
- + /* Only reduce DC at very high QPs */
- + if( q < lowest_dc )
- + {
- + nr_offset[i] = 0;
- + continue;
- + }
- + pos = (double)(q-lowest_dc+1) / (QP_MAX - QP_MAX_SPEC - lowest_dc);
- + }
- + else
- + pos = (double)(q+1) / (QP_MAX - QP_MAX_SPEC);
- +
- + /* Exponentially interpolate between "start" and "end" */
- + double start = dct8x8 ? h->unquant8_mf[CQM_8PY][QP_MAX_SPEC][i] / 4096.0
- + : h->unquant4_mf[CQM_4PY][QP_MAX_SPEC][i] / 4096.0;
- + double end = start * 32;
- + double scale = log(end / start);
- + double bias = exp(scale * pos) * start;
- + nr_offset[i] = bias + 0.5;
- + }
- + }
- if( !h->mb.b_lossless && max_qp_err >= h->param.rc.i_qp_min )
- {
- @@ -233,6 +278,7 @@ void x264_cqm_delete( x264_t *h )
- {
- CQM_DELETE( 4, 4 );
- CQM_DELETE( 8, 2 );
- + x264_free( h->nr_offset_emergency );
- }
- static int x264_cqm_parse_jmlist( x264_t *h, const char *buf, const char *name,
- diff --git a/common/x86/quant-a.asm b/common/x86/quant-a.asm
- index 5d7a15e..e09f6c9 100644
- --- a/common/x86/quant-a.asm
- +++ b/common/x86/quant-a.asm
- @@ -699,8 +699,7 @@ DEQUANT_DC sse2
- ; void denoise_dct( int32_t *dct, uint32_t *sum, uint32_t *offset, int size )
- ;-----------------------------------------------------------------------------
- %macro DENOISE_DCT 1-2 0
- -cglobal denoise_dct_%1, 4,5,%2
- - mov r4d, [r0] ; backup DC coefficient
- +cglobal denoise_dct_%1, 4,4,%2
- pxor m6, m6
- .loop:
- sub r3, mmsize/2
- @@ -727,8 +726,7 @@ cglobal denoise_dct_%1, 4,5,%2
- mova [r1+r3*4+0*mmsize], m4
- mova [r1+r3*4+1*mmsize], m5
- jg .loop
- - mov [r0], r4d ; restore DC coefficient
- - RET
- + REP_RET
- %endmacro
- %define PABSD PABSD_MMX
- @@ -749,8 +747,7 @@ DENOISE_DCT ssse3, 8
- ; void denoise_dct( int16_t *dct, uint32_t *sum, uint16_t *offset, int size )
- ;-----------------------------------------------------------------------------
- %macro DENOISE_DCT 1-2 0
- -cglobal denoise_dct_%1, 4,5,%2
- - movzx r4d, word [r0]
- +cglobal denoise_dct_%1, 4,4,%2
- pxor m6, m6
- .loop:
- sub r3, mmsize
- @@ -781,8 +778,7 @@ cglobal denoise_dct_%1, 4,5,%2
- mova [r1+r3*4+2*mmsize], m5
- mova [r1+r3*4+3*mmsize], m3
- jg .loop
- - mov [r0], r4w
- - RET
- + REP_RET
- %endmacro
- %define PABSW PABSW_MMX
- diff --git a/encoder/analyse.c b/encoder/analyse.c
- index a997425..d2ec63f 100644
- --- a/encoder/analyse.c
- +++ b/encoder/analyse.c
- @@ -147,18 +147,26 @@ const uint16_t x264_lambda_tab[QP_MAX_MAX+1] = {
- 25, 29, 32, 36, 40, 45, 51, 57, /* 40-47 */
- 64, 72, 81, 91, 102, 114, 128, 144, /* 48-55 */
- 161, 181, 203, 228, 256, 287, 323, 362, /* 56-63 */
- + 406, 456, 512, 575, 645, 724, 813, 912, /* 64-71 */
- +1024,1149,1290,1448,1625,1825,2048,2299, /* 72-79 */
- +2048,2299,2580,2896,3251,3649,4096,4598, /* 80-87 */
- };
- /* lambda2 = pow(lambda,2) * .9 * 256 */
- +/* Capped to avoid overflow */
- const int x264_lambda2_tab[QP_MAX_MAX+1] = {
- - 14, 18, 22, 28, 36, 45, 57, 72, /* 0- 7 */
- - 91, 115, 145, 182, 230, 290, 365, 460, /* 8-15 */
- - 580, 731, 921, 1161, 1462, 1843, 2322, 2925, /* 16-23 */
- - 3686, 4644, 5851, 7372, 9289, 11703, 14745, 18578, /* 24-31 */
- - 23407, 29491, 37156, 46814, 58982, 74313, 93628, 117964, /* 32-39 */
- - 148626, 187257, 235929, 297252, 374514, 471859, 594505, 749029, /* 40-47 */
- - 943718,1189010,1498059, 1887436, 2378021, 2996119, 3774873, 4756042, /* 48-55 */
- -5992238,7549747,9512085,11984476,15099494,19024170,23968953,30198988, /* 56-63 */
- + 14, 18, 22, 28, 36, 45, 57, 72, /* 0- 7 */
- + 91, 115, 145, 182, 230, 290, 365, 460, /* 8-15 */
- + 580, 731, 921, 1161, 1462, 1843, 2322, 2925, /* 16-23 */
- + 3686, 4644, 5851, 7372, 9289, 11703, 14745, 18578, /* 24-31 */
- + 23407, 29491, 37156, 46814, 58982, 74313, 93628, 117964, /* 32-39 */
- + 148626, 187257, 235929, 297252, 374514, 471859, 594505, 749029, /* 40-47 */
- + 943718, 1189010, 1498059, 1887436, 2378021, 2996119, 3774873, 4756042, /* 48-55 */
- + 5992238, 7549747, 9512085, 11984476, 15099494, 19024170,23968953,30198988, /* 56-63 */
- + 38048341, 47937906, 60397977, 76096683, 95875813,120795955, /* 64-69 */
- +134217727,134217727,134217727,134217727,134217727,134217727, /* 70-75 */
- +134217727,134217727,134217727,134217727,134217727,134217727, /* 76-81 */
- +134217727,134217727,134217727,134217727,134217727,134217727, /* 82-87 */
- };
- const uint8_t x264_exp2_lut[64] = {
- @@ -196,29 +204,40 @@ const float x264_log2_lz_lut[32] = {
- // I'm just matching the behaviour of deadzone quant.
- static const int x264_trellis_lambda2_tab[2][QP_MAX_MAX+1] = {
- // inter lambda = .85 * .85 * 2**(qp/3. + 10 - LAMBDA_BITS)
- - { 46, 58, 73, 92, 117, 147,
- - 185, 233, 294, 370, 466, 587,
- - 740, 932, 1174, 1480, 1864, 2349,
- - 2959, 3728, 4697, 5918, 7457, 9395,
- - 11837, 14914, 18790, 23674, 29828, 37581,
- - 47349, 59656, 75163, 94699, 119313, 150326,
- - 189399, 238627, 300652, 378798, 477255, 601304,
- - 757596, 954511, 1202608, 1515192, 1909022, 2405217,
- - 3030384, 3818045, 4810435, 6060769, 7636091, 9620872,
- - 12121539,15272182,19241743,24243077,30544363,38483486,
- - 48486154,61088726,76966972,96972308 },
- + {
- + 46, 58, 73, 92, 117, 147,
- + 185, 233, 294, 370, 466, 587,
- + 740, 932, 1174, 1480, 1864, 2349,
- + 2959, 3728, 4697, 5918, 7457, 9395,
- + 11837, 14914, 18790, 23674, 29828, 37581,
- + 47349, 59656, 75163, 94699, 119313, 150326,
- + 189399, 238627, 300652, 378798, 477255, 601304,
- + 757596, 954511, 1202608, 1515192, 1909022, 2405217,
- + 3030384, 3818045, 4810435, 6060769, 7636091, 9620872,
- + 12121539, 15272182, 19241743, 24243077, 30544363, 38483486,
- + 48486154, 61088726, 76966972, 96972308,
- + 122177453,134217727,134217727,134217727,134217727,134217727,
- + 134217727,134217727,134217727,134217727,134217727,134217727,
- + 134217727,134217727,134217727,134217727,134217727,134217727
- + },
- // intra lambda = .65 * .65 * 2**(qp/3. + 10 - LAMBDA_BITS)
- - { 27, 34, 43, 54, 68, 86,
- - 108, 136, 172, 216, 273, 343,
- - 433, 545, 687, 865, 1090, 1374,
- - 1731, 2180, 2747, 3461, 4361, 5494,
- - 6922, 8721, 10988, 13844, 17442, 21976,
- - 27688, 34885, 43953, 55377, 69771, 87906,
- - 110755, 139543, 175813, 221511, 279087, 351627,
- - 443023, 558174, 703255, 886046, 1116348, 1406511,
- - 1772093, 2232697, 2813022, 3544186, 4465396, 5626046,
- - 7088374, 8930791,11252092,14176748,17861583,22504184,
- - 28353495,35723165,45008368,56706990 }
- + {
- + 27, 34, 43, 54, 68, 86,
- + 108, 136, 172, 216, 273, 343,
- + 433, 545, 687, 865, 1090, 1374,
- + 1731, 2180, 2747, 3461, 4361, 5494,
- + 6922, 8721, 10988, 13844, 17442, 21976,
- + 27688, 34885, 43953, 55377, 69771, 87906,
- + 110755, 139543, 175813, 221511, 279087, 351627,
- + 443023, 558174, 703255, 886046, 1116348, 1406511,
- + 1772093, 2232697, 2813022, 3544186, 4465396, 5626046,
- + 7088374, 8930791, 11252092, 14176748, 17861583, 22504184,
- + 28353495, 35723165, 45008368, 56706990,
- + 71446330, 90016736,113413980,134217727,134217727,134217727,
- + 134217727,134217727,134217727,134217727,134217727,134217727,
- + 134217727,134217727,134217727,134217727,134217727,134217727,
- + 134217727,134217727,134217727,134217727,134217727,134217727
- + }
- };
- static const uint16_t x264_chroma_lambda2_offset_tab[] = {
- @@ -247,35 +266,35 @@ static const uint8_t i_sub_mb_p_cost_table[4] = {
- static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a );
- -static uint16_t x264_cost_ref[LAMBDA_MAX+1][3][33];
- +static uint16_t x264_cost_ref[QP_MAX+1][3][33];
- static UNUSED x264_pthread_mutex_t cost_ref_mutex = X264_PTHREAD_MUTEX_INITIALIZER;
- int x264_analyse_init_costs( x264_t *h, int qp )
- {
- int lambda = x264_lambda_tab[qp];
- - if( h->cost_mv[lambda] )
- + if( h->cost_mv[qp] )
- return 0;
- /* factor of 4 from qpel, 2 from sign, and 2 because mv can be opposite from mvp */
- - CHECKED_MALLOC( h->cost_mv[lambda], (4*4*2048 + 1) * sizeof(uint16_t) );
- - h->cost_mv[lambda] += 2*4*2048;
- + CHECKED_MALLOC( h->cost_mv[qp], (4*4*2048 + 1) * sizeof(uint16_t) );
- + h->cost_mv[qp] += 2*4*2048;
- for( int i = 0; i <= 2*4*2048; i++ )
- {
- - h->cost_mv[lambda][-i] =
- - h->cost_mv[lambda][i] = lambda * (log2f(i+1)*2 + 0.718f + !!i) + .5f;
- + h->cost_mv[qp][-i] =
- + h->cost_mv[qp][i] = X264_MIN( lambda * (log2f(i+1)*2 + 0.718f + !!i) + .5f, (1<<16)-1 );
- }
- x264_pthread_mutex_lock( &cost_ref_mutex );
- for( int i = 0; i < 3; i++ )
- for( int j = 0; j < 33; j++ )
- - x264_cost_ref[lambda][i][j] = i ? lambda * bs_size_te( i, j ) : 0;
- + x264_cost_ref[qp][i][j] = X264_MIN( i ? lambda * bs_size_te( i, j ) : 0, (1<<16)-1 );
- x264_pthread_mutex_unlock( &cost_ref_mutex );
- - if( h->param.analyse.i_me_method >= X264_ME_ESA && !h->cost_mv_fpel[lambda][0] )
- + if( h->param.analyse.i_me_method >= X264_ME_ESA && !h->cost_mv_fpel[qp][0] )
- {
- for( int j = 0; j < 4; j++ )
- {
- - CHECKED_MALLOC( h->cost_mv_fpel[lambda][j], (4*2048 + 1) * sizeof(uint16_t) );
- - h->cost_mv_fpel[lambda][j] += 2*2048;
- + CHECKED_MALLOC( h->cost_mv_fpel[qp][j], (4*2048 + 1) * sizeof(uint16_t) );
- + h->cost_mv_fpel[qp][j] += 2*2048;
- for( int i = -2*2048; i < 2*2048; i++ )
- - h->cost_mv_fpel[lambda][j][i] = h->cost_mv[lambda][i*4+j];
- + h->cost_mv_fpel[qp][j][i] = h->cost_mv[qp][i*4+j];
- }
- }
- return 0;
- @@ -285,7 +304,7 @@ fail:
- void x264_analyse_free_costs( x264_t *h )
- {
- - for( int i = 0; i < LAMBDA_MAX+1; i++ )
- + for( int i = 0; i < QP_MAX+1; i++ )
- {
- if( h->cost_mv[i] )
- x264_free( h->cost_mv[i] - 2*4*2048 );
- @@ -326,16 +345,16 @@ void x264_analyse_weight_frame( x264_t *h, int end )
- /* initialize an array of lambda*nbits for all possible mvs */
- static void x264_mb_analyse_load_costs( x264_t *h, x264_mb_analysis_t *a )
- {
- - a->p_cost_mv = h->cost_mv[a->i_lambda];
- - a->p_cost_ref[0] = x264_cost_ref[a->i_lambda][x264_clip3(h->sh.i_num_ref_idx_l0_active-1,0,2)];
- - a->p_cost_ref[1] = x264_cost_ref[a->i_lambda][x264_clip3(h->sh.i_num_ref_idx_l1_active-1,0,2)];
- + a->p_cost_mv = h->cost_mv[a->i_qp];
- + a->p_cost_ref[0] = x264_cost_ref[a->i_qp][x264_clip3(h->sh.i_num_ref_idx_l0_active-1,0,2)];
- + a->p_cost_ref[1] = x264_cost_ref[a->i_qp][x264_clip3(h->sh.i_num_ref_idx_l1_active-1,0,2)];
- }
- static void x264_mb_analyse_init_qp( x264_t *h, x264_mb_analysis_t *a, int i_qp )
- {
- /* conduct the analysis using this lamda and QP */
- a->i_qp = h->mb.i_qp = i_qp;
- - h->mb.i_chroma_qp = h->chroma_qp_table[i_qp];
- + h->mb.i_chroma_qp = h->chroma_qp_table[SPEC_QP( i_qp )];
- a->i_lambda = x264_lambda_tab[i_qp];
- a->i_lambda2 = x264_lambda2_tab[i_qp];
- @@ -343,10 +362,11 @@ static void x264_mb_analyse_init_qp( x264_t *h, x264_mb_analysis_t *a, int i_qp
- h->mb.b_trellis = h->param.analyse.i_trellis > 1 && a->i_mbrd;
- if( h->param.analyse.i_trellis )
- {
- + int effective_chroma_qp = h->mb.i_chroma_qp + X264_MAX( i_qp - QP_MAX_SPEC, 0 );
- h->mb.i_trellis_lambda2[0][0] = x264_trellis_lambda2_tab[0][h->mb.i_qp];
- h->mb.i_trellis_lambda2[0][1] = x264_trellis_lambda2_tab[1][h->mb.i_qp];
- - h->mb.i_trellis_lambda2[1][0] = x264_trellis_lambda2_tab[0][h->mb.i_chroma_qp];
- - h->mb.i_trellis_lambda2[1][1] = x264_trellis_lambda2_tab[1][h->mb.i_chroma_qp];
- + h->mb.i_trellis_lambda2[1][0] = x264_trellis_lambda2_tab[0][effective_chroma_qp];
- + h->mb.i_trellis_lambda2[1][1] = x264_trellis_lambda2_tab[1][effective_chroma_qp];
- }
- h->mb.i_psy_rd_lambda = a->i_lambda;
- /* Adjusting chroma lambda based on QP offset hurts PSNR but improves visual quality. */
- @@ -366,7 +386,6 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
- x264_mb_analyse_init_qp( h, a, i_qp );
- h->mb.b_transform_8x8 = 0;
- - h->mb.b_noise_reduction = 0;
- /* I: Intra part */
- a->i_satd_i16x16 =
- @@ -384,6 +403,21 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
- a->i_mbrd ? 2 :
- !h->param.analyse.i_trellis && !h->param.analyse.i_noise_reduction;
- + if( h->mb.i_qp > QP_MAX_SPEC )
- + {
- + h->nr_offset = h->nr_offset_emergency[h->mb.i_qp-QP_MAX_SPEC-1];
- + h->nr_residual_sum = h->nr_residual_sum_buf[1];
- + h->nr_count = h->nr_count_buf[1];
- + h->mb.b_noise_reduction = 1;
- + }
- + else
- + {
- + h->nr_offset = h->nr_offset_denoise;
- + h->nr_residual_sum = h->nr_residual_sum_buf[0];
- + h->nr_count = h->nr_count_buf[0];
- + h->mb.b_noise_reduction = 0;
- + }
- +
- /* II: Inter part P/B frame */
- if( h->sh.i_type != SLICE_TYPE_I )
- {
- @@ -2731,6 +2765,7 @@ void x264_macroblock_analyse( x264_t *h )
- int i_cost = COST_MAX;
- h->mb.i_qp = x264_ratecontrol_mb_qp( h );
- +
- /* If the QP of this MB is within 1 of the previous MB, code the same QP as the previous MB,
- * to lower the bit cost of the qp_delta. Don't do this if QPRD is enabled. */
- if( h->param.rc.i_aq_mode && h->param.analyse.i_subpel_refine < 10 && abs(h->mb.i_qp - h->mb.i_last_qp) == 1 )
- @@ -3476,7 +3511,8 @@ intra_analysis:
- x264_mb_analyse_qp_rd( h, &analysis );
- h->mb.b_trellis = h->param.analyse.i_trellis;
- - h->mb.b_noise_reduction = !!h->param.analyse.i_noise_reduction;
- + h->mb.b_noise_reduction = h->mb.b_noise_reduction || (!!h->param.analyse.i_noise_reduction && !IS_INTRA( h->mb.i_type ));
- +
- if( !IS_SKIP(h->mb.i_type) && h->mb.i_psy_trellis && h->param.analyse.i_trellis == 1 )
- x264_psy_trellis_init( h, 0 );
- if( h->mb.b_trellis == 1 || h->mb.b_noise_reduction )
- diff --git a/encoder/cabac.c b/encoder/cabac.c
- index 5539b4b..a2d13f0 100644
- --- a/encoder/cabac.c
- +++ b/encoder/cabac.c
- @@ -246,7 +246,7 @@ static void x264_cabac_mb_cbp_chroma( x264_t *h, x264_cabac_t *cb )
- static void x264_cabac_mb_qp_delta( x264_t *h, x264_cabac_t *cb )
- {
- - int i_dqp = h->mb.i_qp - h->mb.i_last_qp;
- + int i_dqp = SPEC_QP( h->mb.i_qp) - SPEC_QP( h->mb.i_last_qp );
- int ctx;
- /* Avoid writing a delta quant if we have an empty i16x16 block, e.g. in a completely flat background area */
- diff --git a/encoder/cavlc.c b/encoder/cavlc.c
- index ca35210..7224a9d 100644
- --- a/encoder/cavlc.c
- +++ b/encoder/cavlc.c
- @@ -208,7 +208,7 @@ static const uint8_t ct_index[17] = {0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,3};
- static void cavlc_qp_delta( x264_t *h )
- {
- bs_t *s = &h->out.bs;
- - int i_dqp = h->mb.i_qp - h->mb.i_last_qp;
- + int i_dqp = SPEC_QP( h->mb.i_qp ) - SPEC_QP( h->mb.i_last_qp );
- /* Avoid writing a delta quant if we have an empty i16x16 block, e.g. in a completely flat background area */
- if( h->mb.i_type == I_16x16 && !(h->mb.i_cbp_luma | h->mb.i_cbp_chroma)
- diff --git a/encoder/encoder.c b/encoder/encoder.c
- index a6b53fd..89f1f39 100644
- --- a/encoder/encoder.c
- +++ b/encoder/encoder.c
- @@ -163,7 +163,7 @@ static void x264_slice_header_init( x264_t *h, x264_slice_header_t *sh,
- sh->i_cabac_init_idc = param->i_cabac_init_idc;
- sh->i_qp = i_qp;
- - sh->i_qp_delta = i_qp - pps->i_pic_init_qp;
- + sh->i_qp_delta = SPEC_QP( sh->i_qp ) - pps->i_pic_init_qp;
- sh->b_sp_for_swidth = 0;
- sh->i_qs_delta = 0;
- @@ -1050,7 +1050,8 @@ x264_t *x264_encoder_open( x264_param_t *param )
- p += sprintf( p, " none!" );
- x264_log( h, X264_LOG_INFO, "%s\n", buf );
- - for( qp = h->param.rc.i_qp_min; qp <= h->param.rc.i_qp_max; qp++ )
- + int qp_max = h->param.rc.i_qp_max == QP_MAX_SPEC ? QP_MAX : h->param.rc.i_qp_max;
- + for( qp = h->param.rc.i_qp_min; qp <= qp_max; qp++ )
- if( x264_analyse_init_costs( h, qp ) )
- goto fail;
- if( x264_analyse_init_costs( h, X264_LOOKAHEAD_QP ) )
- @@ -1058,7 +1059,7 @@ x264_t *x264_encoder_open( x264_param_t *param )
- static const uint16_t cost_mv_correct[7] = { 24, 47, 95, 189, 379, 757, 1515 };
- /* Checks for known miscompilation issues. */
- - if( h->cost_mv[x264_lambda_tab[X264_LOOKAHEAD_QP]][2013] != cost_mv_correct[BIT_DEPTH-8] )
- + if( h->cost_mv[X264_LOOKAHEAD_QP][2013] != cost_mv_correct[BIT_DEPTH-8] )
- {
- x264_log( h, X264_LOG_ERROR, "MV cost test failed: x264 has been miscompiled!\n" );
- goto fail;
- @@ -1879,7 +1880,7 @@ static int x264_slice_write( x264_t *h )
- if( h->sh.i_first_mb != h->i_threadslice_start * h->mb.i_mb_width )
- {
- h->sh.i_qp = h->mb.i_last_qp;
- - h->sh.i_qp_delta = h->sh.i_qp - h->pps->i_pic_init_qp;
- + h->sh.i_qp_delta = SPEC_QP( h->sh.i_qp ) - h->pps->i_pic_init_qp;
- }
- x264_slice_header_write( &h->out.bs, &h->sh, h->i_nal_ref_idc );
- diff --git a/encoder/macroblock.c b/encoder/macroblock.c
- index 39fed61..11a63d9 100644
- --- a/encoder/macroblock.c
- +++ b/encoder/macroblock.c
- @@ -84,6 +84,8 @@ static inline void dct2x2dc( dctcoef d[4], dctcoef dct4x4[4][16] )
- static ALWAYS_INLINE int x264_quant_4x4( x264_t *h, dctcoef dct[16], int i_qp, int ctx_block_cat, int b_intra, int idx )
- {
- int i_quant_cat = b_intra ? CQM_4IY : CQM_4PY;
- + if( h->mb.b_noise_reduction && ctx_block_cat != DCT_LUMA_AC )
- + h->quantf.denoise_dct( dct, h->nr_residual_sum[0], h->nr_offset[0], 16 );
- if( h->mb.b_trellis )
- return x264_quant_4x4_trellis( h, dct, i_quant_cat, i_qp, ctx_block_cat, b_intra, 0, idx );
- else
- @@ -93,6 +95,8 @@ static ALWAYS_INLINE int x264_quant_4x4( x264_t *h, dctcoef dct[16], int i_qp, i
- static ALWAYS_INLINE int x264_quant_8x8( x264_t *h, dctcoef dct[64], int i_qp, int b_intra, int idx )
- {
- int i_quant_cat = b_intra ? CQM_8IY : CQM_8PY;
- + if( h->mb.b_noise_reduction )
- + h->quantf.denoise_dct( dct, h->nr_residual_sum[1], h->nr_offset[1], 64 );
- if( h->mb.b_trellis )
- return x264_quant_8x8_trellis( h, dct, i_quant_cat, i_qp, b_intra, idx );
- else
- @@ -115,6 +119,7 @@ void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qp )
- int nz;
- pixel *p_src = &h->mb.pic.p_fenc[0][block_idx_xy_fenc[idx]];
- pixel *p_dst = &h->mb.pic.p_fdec[0][block_idx_xy_fdec[idx]];
- + int i_qp_spec = SPEC_QP( i_qp );
- ALIGNED_ARRAY_16( dctcoef, dct4x4,[16] );
- if( h->mb.b_lossless )
- @@ -133,7 +138,7 @@ void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qp )
- {
- h->mb.i_cbp_luma |= 1<<(idx>>2);
- h->zigzagf.scan_4x4( h->dct.luma4x4[idx], dct4x4 );
- - h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[CQM_4IY], i_qp );
- + h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[CQM_4IY], i_qp_spec );
- h->dctf.add4x4_idct( p_dst, dct4x4 );
- }
- }
- @@ -161,6 +166,7 @@ void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qp )
- int nz;
- pixel *p_src = &h->mb.pic.p_fenc[0][8*x + 8*y*FENC_STRIDE];
- pixel *p_dst = &h->mb.pic.p_fdec[0][8*x + 8*y*FDEC_STRIDE];
- + int i_qp_spec = SPEC_QP( i_qp );
- ALIGNED_ARRAY_16( dctcoef, dct8x8,[64] );
- if( h->mb.b_lossless )
- @@ -178,7 +184,7 @@ void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qp )
- {
- h->mb.i_cbp_luma |= 1<<idx;
- h->zigzagf.scan_8x8( h->dct.luma8x8[idx], dct8x8 );
- - h->quantf.dequant_8x8( dct8x8, h->dequant8_mf[CQM_8IY], i_qp );
- + h->quantf.dequant_8x8( dct8x8, h->dequant8_mf[CQM_8IY], i_qp_spec );
- h->dctf.add8x8_idct8( p_dst, dct8x8 );
- STORE_8x8_NNZ( s8, 1 );
- }
- @@ -196,6 +202,7 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qp )
- int nz;
- int decimate_score = h->mb.b_dct_decimate ? 0 : 9;
- + int i_qp_spec = SPEC_QP( i_qp );
- if( h->mb.b_lossless )
- {
- @@ -218,6 +225,8 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qp )
- for( int i = 0; i < 16; i++ )
- {
- /* copy dc coeff */
- + if( h->mb.b_noise_reduction )
- + h->quantf.denoise_dct( dct4x4[i], h->nr_residual_sum[0], h->nr_offset[0], 16 );
- dct_dc4x4[block_idx_xy_1d[i]] = dct4x4[i][0];
- dct4x4[i][0] = 0;
- @@ -227,7 +236,7 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qp )
- if( nz )
- {
- h->zigzagf.scan_4x4( h->dct.luma4x4[i], dct4x4[i] );
- - h->quantf.dequant_4x4( dct4x4[i], h->dequant4_mf[CQM_4IY], i_qp );
- + h->quantf.dequant_4x4( dct4x4[i], h->dequant4_mf[CQM_4IY], i_qp_spec );
- if( decimate_score < 6 ) decimate_score += h->quantf.decimate_score15( h->dct.luma4x4[i] );
- h->mb.i_cbp_luma = 0xf;
- }
- @@ -254,7 +263,7 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qp )
- /* output samples to fdec */
- h->dctf.idct4x4dc( dct_dc4x4 );
- - h->quantf.dequant_4x4_dc( dct_dc4x4, h->dequant4_mf[CQM_4IY], i_qp ); /* XXX not inversed */
- + h->quantf.dequant_4x4_dc( dct_dc4x4, h->dequant4_mf[CQM_4IY], i_qp_spec ); /* XXX not inversed */
- if( h->mb.i_cbp_luma )
- for( int i = 0; i < 16; i++ )
- dct4x4[i][0] = dct_dc4x4[block_idx_xy_1d[i]];
- @@ -326,13 +335,15 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
- {
- int nz, nz_dc;
- int b_decimate = b_inter && h->mb.b_dct_decimate;
- + int i_qp_spec = SPEC_QP( i_qp );
- ALIGNED_ARRAY_16( dctcoef, dct2x2,[4] );
- h->mb.i_cbp_chroma = 0;
- + h->nr_count[2] += h->mb.b_noise_reduction * 4;
- /* Early termination: check variance of chroma residual before encoding.
- * Don't bother trying early termination at low QPs.
- * Values are experimentally derived. */
- - if( b_decimate && i_qp >= (h->mb.b_trellis ? 12 : 18) )
- + if( b_decimate && i_qp >= (h->mb.b_trellis ? 12 : 18) && !h->mb.b_noise_reduction )
- {
- int thresh = (x264_lambda2_tab[i_qp] + 32) >> 6;
- int ssd[2];
- @@ -363,11 +374,11 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
- if( nz_dc )
- {
- - if( !x264_mb_optimize_chroma_dc( h, b_inter, i_qp, dct2x2 ) )
- + if( !x264_mb_optimize_chroma_dc( h, b_inter, i_qp_spec, dct2x2 ) )
- continue;
- h->mb.cache.non_zero_count[x264_scan8[25]+ch] = 1;
- zigzag_scan_2x2_dc( h->dct.chroma_dc[ch], dct2x2 );
- - idct_dequant_2x2_dconly( dct2x2, dct2x2, h->dequant4_mf[CQM_4IC + b_inter], i_qp );
- + idct_dequant_2x2_dconly( dct2x2, dct2x2, h->dequant4_mf[CQM_4IC + b_inter], i_qp_spec );
- h->dctf.add8x8_idct_dc( h->mb.pic.p_fdec[1+ch], dct2x2 );
- h->mb.i_cbp_chroma = 1;
- }
- @@ -401,6 +412,9 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
- }
- h->dctf.sub8x8_dct( dct4x4, p_src, p_dst );
- + if( h->mb.b_noise_reduction )
- + for( int i = 0; i < 4; i++ )
- + h->quantf.denoise_dct( dct4x4[i], h->nr_residual_sum[2], h->nr_offset[2], 16 );
- dct2x2dc( dct2x2, dct4x4 );
- /* calculate dct coeffs */
- for( int i = 0; i < 4; i++ )
- @@ -414,7 +428,7 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
- {
- nz_ac = 1;
- h->zigzagf.scan_4x4( h->dct.luma4x4[16+i+ch*4], dct4x4[i] );
- - h->quantf.dequant_4x4( dct4x4[i], h->dequant4_mf[CQM_4IC + b_inter], i_qp );
- + h->quantf.dequant_4x4( dct4x4[i], h->dequant4_mf[CQM_4IC + b_inter], i_qp_spec );
- if( b_decimate )
- i_decimate_score += h->quantf.decimate_score15( h->dct.luma4x4[16+i+ch*4] );
- }
- @@ -443,7 +457,7 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
- }
- /* DC-only */
- zigzag_scan_2x2_dc( h->dct.chroma_dc[ch], dct2x2 );
- - idct_dequant_2x2_dconly( dct2x2, dct2x2, h->dequant4_mf[CQM_4IC + b_inter], i_qp );
- + idct_dequant_2x2_dconly( dct2x2, dct2x2, h->dequant4_mf[CQM_4IC + b_inter], i_qp_spec );
- h->dctf.add8x8_idct_dc( p_dst, dct2x2 );
- }
- else
- @@ -452,7 +466,7 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
- if( nz_dc )
- {
- zigzag_scan_2x2_dc( h->dct.chroma_dc[ch], dct2x2 );
- - idct_dequant_2x2_dc( dct2x2, dct4x4, h->dequant4_mf[CQM_4IC + b_inter], i_qp );
- + idct_dequant_2x2_dc( dct2x2, dct4x4, h->dequant4_mf[CQM_4IC + b_inter], i_qp_spec );
- }
- h->dctf.add8x8_idct( p_dst, dct4x4 );
- }
- @@ -587,6 +601,7 @@ void x264_macroblock_encode( x264_t *h )
- int b_decimate = h->mb.b_dct_decimate;
- int b_force_no_skip = 0;
- int nz;
- + int i_qp_spec = SPEC_QP( i_qp );
- h->mb.i_cbp_luma = 0;
- h->mb.cache.non_zero_count[x264_scan8[24]] = 0;
- @@ -748,8 +763,6 @@ void x264_macroblock_encode( x264_t *h )
- for( int idx = 0; idx < 4; idx++ )
- {
- - if( h->mb.b_noise_reduction )
- - h->quantf.denoise_dct( dct8x8[idx], h->nr_residual_sum[1], h->nr_offset[1], 64 );
- nz = x264_quant_8x8( h, dct8x8[idx], i_qp, 0, idx );
- if( nz )
- @@ -782,7 +795,7 @@ void x264_macroblock_encode( x264_t *h )
- if( h->mb.i_cbp_luma&(1<<idx) )
- {
- - h->quantf.dequant_8x8( dct8x8[idx], h->dequant8_mf[CQM_8PY], i_qp );
- + h->quantf.dequant_8x8( dct8x8[idx], h->dequant8_mf[CQM_8PY], i_qp_spec );
- h->dctf.add8x8_idct8( &h->mb.pic.p_fdec[0][8*x + 8*y*FDEC_STRIDE], dct8x8[idx] );
- STORE_8x8_NNZ( s8, 1 );
- }
- @@ -807,15 +820,13 @@ void x264_macroblock_encode( x264_t *h )
- {
- int idx = i8x8 * 4 + i4x4;
- - if( h->mb.b_noise_reduction )
- - h->quantf.denoise_dct( dct4x4[idx], h->nr_residual_sum[0], h->nr_offset[0], 16 );
- nz = x264_quant_4x4( h, dct4x4[idx], i_qp, DCT_LUMA_4x4, 0, idx );
- h->mb.cache.non_zero_count[x264_scan8[idx]] = nz;
- if( nz )
- {
- h->zigzagf.scan_4x4( h->dct.luma4x4[idx], dct4x4[idx] );
- - h->quantf.dequant_4x4( dct4x4[idx], h->dequant4_mf[CQM_4PY], i_qp );
- + h->quantf.dequant_4x4( dct4x4[idx], h->dequant4_mf[CQM_4PY], i_qp_spec );
- if( b_decimate && i_decimate_8x8 < 6 )
- i_decimate_8x8 += h->quantf.decimate_score16( h->dct.luma4x4[idx] );
- cbp = 1;
- @@ -1019,12 +1030,16 @@ int x264_macroblock_probe_skip( x264_t *h, int b_bidir )
- void x264_noise_reduction_update( x264_t *h )
- {
- - for( int cat = 0; cat < 2; cat++ )
- + h->nr_offset = h->nr_offset_denoise;
- + h->nr_residual_sum = h->nr_residual_sum_buf[0];
- + h->nr_count = h->nr_count_buf[0];
- + for( int cat = 0; cat < 3; cat++ )
- {
- - int size = cat ? 64 : 16;
- - const uint16_t *weight = cat ? x264_dct8_weight2_tab : x264_dct4_weight2_tab;
- + int dct8x8 = cat == 1;
- + int size = dct8x8 ? 64 : 16;
- + const uint16_t *weight = dct8x8 ? x264_dct8_weight2_tab : x264_dct4_weight2_tab;
- - if( h->nr_count[cat] > (cat ? (1<<16) : (1<<18)) )
- + if( h->nr_count[cat] > (dct8x8 ? (1<<16) : (1<<18)) )
- {
- for( int i = 0; i < size; i++ )
- h->nr_residual_sum[cat][i] >>= 1;
- @@ -1036,6 +1051,9 @@ void x264_noise_reduction_update( x264_t *h )
- ((uint64_t)h->param.analyse.i_noise_reduction * h->nr_count[cat]
- + h->nr_residual_sum[cat][i]/2)
- / ((uint64_t)h->nr_residual_sum[cat][i] * weight[i]/256 + 1);
- +
- + /* Don't denoise DC coefficients */
- + h->nr_offset[cat][0] = 0;
- }
- }
- @@ -1054,6 +1072,7 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
- int b_decimate = h->mb.b_dct_decimate;
- int nnz8x8 = 0;
- int nz;
- + int i_qp_spec = SPEC_QP( i_qp );
- if( !h->mb.b_skip_mc )
- x264_mb_mc_8x8( h, i8 );
- @@ -1101,7 +1120,7 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
- if( nnz8x8 )
- {
- - h->quantf.dequant_8x8( dct8x8, h->dequant8_mf[CQM_8PY], i_qp );
- + h->quantf.dequant_8x8( dct8x8, h->dequant8_mf[CQM_8PY], i_qp_spec );
- h->dctf.add8x8_idct8( p_fdec, dct8x8 );
- STORE_8x8_NNZ( s8, 1 );
- }
- @@ -1123,7 +1142,7 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
- if( nz )
- {
- h->zigzagf.scan_4x4( h->dct.luma4x4[i8*4+i4], dct4x4[i4] );
- - h->quantf.dequant_4x4( dct4x4[i4], h->dequant4_mf[CQM_4PY], i_qp );
- + h->quantf.dequant_4x4( dct4x4[i4], h->dequant4_mf[CQM_4PY], i_qp_spec );
- if( b_decimate )
- i_decimate_8x8 += h->quantf.decimate_score16( h->dct.luma4x4[i8*4+i4] );
- nnz8x8 = 1;
- @@ -1146,8 +1165,9 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
- ALIGNED_ARRAY_16( dctcoef, dct4x4,[16] );
- p_fenc = h->mb.pic.p_fenc[1+ch] + 4*x + 4*y*FENC_STRIDE;
- p_fdec = h->mb.pic.p_fdec[1+ch] + 4*x + 4*y*FDEC_STRIDE;
- -
- h->dctf.sub4x4_dct( dct4x4, p_fenc, p_fdec );
- + if( h->mb.b_noise_reduction );
- + h->quantf.denoise_dct( dct4x4, h->nr_residual_sum[2], h->nr_offset[2], 16 );
- dct4x4[0] = 0;
- if( h->mb.b_trellis )
- @@ -1159,7 +1179,7 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
- if( nz )
- {
- h->zigzagf.scan_4x4( h->dct.luma4x4[16+i8+ch*4], dct4x4 );
- - h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[CQM_4PC], i_qp );
- + h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[CQM_4PC], i_qp_spec );
- h->dctf.add4x4_idct( p_fdec, dct4x4 );
- }
- }
- diff --git a/encoder/me.c b/encoder/me.c
- index 90f7dfd..9ca04d0 100644
- --- a/encoder/me.c
- +++ b/encoder/me.c
- @@ -597,7 +597,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
- int delta = x264_pixel_size[sad_size].w;
- int16_t *xs = h->scratch_buffer;
- int xn;
- - uint16_t *cost_fpel_mvx = h->cost_mv_fpel[x264_lambda_tab[h->mb.i_qp]][-m->mvp[0]&3] + (-m->mvp[0]>>2);
- + uint16_t *cost_fpel_mvx = h->cost_mv_fpel[h->mb.i_qp][-m->mvp[0]&3] + (-m->mvp[0]>>2);
- h->pixf.sad_x4[sad_size]( zero, p_fenc, p_fenc+delta,
- p_fenc+delta*FENC_STRIDE, p_fenc+delta+delta*FENC_STRIDE,
- diff --git a/encoder/me.h b/encoder/me.h
- index fd99a5d..58cd3e0 100644
- --- a/encoder/me.h
- +++ b/encoder/me.h
- @@ -71,7 +71,7 @@ void x264_me_refine_bidir_rd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_wei
- void x264_me_refine_bidir_satd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight );
- uint64_t x264_rd_cost_part( x264_t *h, int i_lambda2, int i8, int i_pixel );
- -extern uint16_t *x264_cost_mv_fpel[LAMBDA_MAX+1][4];
- +extern uint16_t *x264_cost_mv_fpel[QP_MAX+1][4];
- #define COPY1_IF_LT(x,y)\
- if((y)<(x))\
- diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
- index 8f7ef9a..e727ab0 100644
- --- a/encoder/ratecontrol.c
- +++ b/encoder/ratecontrol.c
- @@ -1441,8 +1441,14 @@ int x264_ratecontrol_mb_qp( x264_t *h )
- x264_emms();
- float qp = h->rc->qpm;
- if( h->param.rc.i_aq_mode )
- - /* MB-tree currently doesn't adjust quantizers in unreferenced frames. */
- - qp += h->fdec->b_kept_as_ref ? h->fenc->f_qp_offset[h->mb.i_mb_xy] : h->fenc->f_qp_offset_aq[h->mb.i_mb_xy];
- + {
- + /* MB-tree currently doesn't adjust quantizers in unreferenced frames. */
- + float qp_offset = h->fdec->b_kept_as_ref ? h->fenc->f_qp_offset[h->mb.i_mb_xy] : h->fenc->f_qp_offset_aq[h->mb.i_mb_xy];
- + /* Scale AQ's effect towards zero in emergency mode. */
- + if( qp > QP_MAX_SPEC )
- + qp_offset *= (QP_MAX - qp) / (QP_MAX_SPEC - QP_MAX);
- + qp += qp_offset;
- + }
- return x264_clip3( qp + .5, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement