Advertisement
Guest User

Untitled

a guest
Jun 28th, 2017
490
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 40.86 KB | None | 0 0
  1. diff --git a/common/common.h b/common/common.h
  2. index 4307619..5c0ef1d 100644
  3. --- a/common/common.h
  4. +++ b/common/common.h
  5. @@ -59,12 +59,13 @@ do {\
  6.  #define X264_PCM_COST (384*BIT_DEPTH+16)
  7.  #define X264_LOOKAHEAD_MAX 250
  8.  #define QP_BD_OFFSET (6*(BIT_DEPTH-8))
  9. -#define QP_MAX (51+QP_BD_OFFSET)
  10. -#define QP_MAX_MAX (51+2*6)
  11. -#define LAMBDA_MAX (91 << (BIT_DEPTH-8))
  12. +#define QP_MAX_SPEC (51+QP_BD_OFFSET)
  13. +#define QP_MAX (QP_MAX_SPEC+24)
  14. +#define QP_MAX_MAX (51+2*6+24)
  15.  #define PIXEL_MAX ((1 << BIT_DEPTH)-1)
  16.  // arbitrary, but low because SATD scores are 1/4 normal
  17.  #define X264_LOOKAHEAD_QP (12+QP_BD_OFFSET)
  18. +#define SPEC_QP(x) X264_MIN((x), QP_MAX_SPEC)
  19.  
  20.  // number of pixels (per thread) in progress at any given time.
  21.  // 16 for the macroblock in progress + 3 for deblocking + 3 for motion compensation filter + 2 for extra safety
  22. @@ -458,12 +459,11 @@ struct x264_t
  23.      udctcoef        (*quant8_mf[2])[64];     /* [2][52][64] */
  24.      udctcoef        (*quant4_bias[4])[16];   /* [4][52][16] */
  25.      udctcoef        (*quant8_bias[2])[64];   /* [2][52][64] */
  26. +    udctcoef        (*nr_offset_emergency)[3][64];
  27.  
  28. -    /* mv/ref cost arrays.  Indexed by lambda instead of
  29. -     * qp because, due to rounding, some quantizers share
  30. -     * lambdas.  This saves memory. */
  31. -    uint16_t *cost_mv[LAMBDA_MAX+1];
  32. -    uint16_t *cost_mv_fpel[LAMBDA_MAX+1][4];
  33. +    /* mv/ref cost arrays. */
  34. +    uint16_t *cost_mv[QP_MAX+1];
  35. +    uint16_t *cost_mv_fpel[QP_MAX+1][4];
  36.  
  37.      const uint8_t   *chroma_qp_table; /* includes both the nonlinear luma->chroma mapping and chroma_qp_offset */
  38.  
  39. @@ -811,9 +811,14 @@ struct x264_t
  40.  
  41.      } stat;
  42.  
  43. -    ALIGNED_16( uint32_t nr_residual_sum[2][64] );
  44. -    ALIGNED_16( udctcoef nr_offset[2][64] );
  45. -    uint32_t        nr_count[2];
  46. +    /* 0 = luma 4x4, 1 = luma 8x8, 2 = chroma 4x4 */
  47. +    udctcoef (*nr_offset)[64];
  48. +    uint32_t (*nr_residual_sum)[64];
  49. +    uint32_t *nr_count;
  50. +
  51. +    ALIGNED_16( udctcoef nr_offset_denoise[3][64] );
  52. +    ALIGNED_16( uint32_t nr_residual_sum_buf[2][3][64] );
  53. +    uint32_t nr_count_buf[2][3];
  54.  
  55.      /* Buffers that are allocated per-thread even in sliced threads. */
  56.      void *scratch_buffer; /* for any temporary storage that doesn't want repeated malloc */
  57. diff --git a/common/macroblock.c b/common/macroblock.c
  58. index 5c76d3f..8013957 100644
  59. --- a/common/macroblock.c
  60. +++ b/common/macroblock.c
  61. @@ -1171,7 +1171,7 @@ void x264_macroblock_cache_save( x264_t *h )
  62.          if( h->mb.i_type != I_16x16 && h->mb.i_cbp_luma == 0 && h->mb.i_cbp_chroma == 0 )
  63.              h->mb.i_qp = h->mb.i_last_qp;
  64.          h->mb.qp[i_mb_xy] = h->mb.i_qp;
  65. -        h->mb.i_last_dqp = h->mb.i_qp - h->mb.i_last_qp;
  66. +        h->mb.i_last_dqp = SPEC_QP( h->mb.i_qp ) - SPEC_QP( h->mb.i_last_qp );
  67.          h->mb.i_last_qp = h->mb.i_qp;
  68.      }
  69.  
  70. diff --git a/common/quant.c b/common/quant.c
  71. index 816e60a..36e59ee 100644
  72. --- a/common/quant.c
  73. +++ b/common/quant.c
  74. @@ -143,7 +143,7 @@ static void dequant_4x4_dc( dctcoef dct[16], int dequant_mf[6][16], int i_qp )
  75.  
  76.  static void x264_denoise_dct( dctcoef *dct, uint32_t *sum, udctcoef *offset, int size )
  77.  {
  78. -    for( int i = 1; i < size; i++ )
  79. +    for( int i = 0; i < size; i++ )
  80.      {
  81.          int level = dct[i];
  82.          int sign = level>>31;
  83. diff --git a/common/set.c b/common/set.c
  84. index 92b6b7a..c4d5549 100644
  85. --- a/common/set.c
  86. +++ b/common/set.c
  87. @@ -23,6 +23,8 @@
  88.   * For more information, contact us at licensing@x264.com.
  89.   *****************************************************************************/
  90.  
  91. +#define _ISOC99_SOURCE
  92. +#include <math.h>
  93.  #include "common.h"
  94.  
  95.  #define SHIFT(x,s) ((s)<=0 ? (x)<<-(s) : ((x)+(1<<((s)-1)))>>(s))
  96. @@ -146,21 +148,22 @@ int x264_cqm_init( x264_t *h )
  97.                       quant8_mf[i_list][q][i] = DIV(def_quant8[q][i] * 16, h->pps->scaling_list[4+i_list][i]);
  98.              }
  99.      }
  100. -    for( int q = 0; q < QP_MAX+1; q++ )
  101. +    for( int qm = 0; qm <= QP_MAX; qm++ )
  102.      {
  103.          int j;
  104. +        int q = SPEC_QP( qm );
  105.          for( int i_list = 0; i_list < 4; i_list++ )
  106.              for( int i = 0; i < 16; i++ )
  107.              {
  108. -                h->unquant4_mf[i_list][q][i] = (1ULL << (q/6 + 15 + 8)) / quant4_mf[i_list][q%6][i];
  109. -                h->quant4_mf[i_list][q][i] = j = SHIFT(quant4_mf[i_list][q%6][i], q/6 - 1);
  110. +                h->unquant4_mf[i_list][qm][i] = (1ULL << (q/6 + 15 + 8)) / quant4_mf[i_list][q%6][i];
  111. +                h->quant4_mf[i_list][qm][i] = j = SHIFT(quant4_mf[i_list][q%6][i], q/6 - 1);
  112.                  if( !j )
  113.                  {
  114.                      min_qp_err = X264_MIN( min_qp_err, q );
  115.                      continue;
  116.                  }
  117.                  // round to nearest, unless that would cause the deadzone to be negative
  118. -                h->quant4_bias[i_list][q][i] = X264_MIN( DIV(deadzone[i_list]<<10, j), (1<<15)/j );
  119. +                h->quant4_bias[i_list][qm][i] = X264_MIN( DIV(deadzone[i_list]<<10, j), (1<<15)/j );
  120.                  if( j > 0xffff && q > max_qp_err && (i_list == CQM_4IY || i_list == CQM_4PY) )
  121.                      max_qp_err = q;
  122.                  if( j > 0xffff && q > max_chroma_qp_err && (i_list == CQM_4IC || i_list == CQM_4PC) )
  123. @@ -170,20 +173,62 @@ int x264_cqm_init( x264_t *h )
  124.              for( int i_list = 0; i_list < 2; i_list++ )
  125.                  for( int i = 0; i < 64; i++ )
  126.                  {
  127. -                    h->unquant8_mf[i_list][q][i] = (1ULL << (q/6 + 16 + 8)) / quant8_mf[i_list][q%6][i];
  128. +                    h->unquant8_mf[i_list][qm][i] = (1ULL << (q/6 + 16 + 8)) / quant8_mf[i_list][q%6][i];
  129.                      j = SHIFT(quant8_mf[i_list][q%6][i], q/6);
  130. -                    h->quant8_mf[i_list][q][i] = (uint16_t)j;
  131. +                    h->quant8_mf[i_list][qm][i] = (uint16_t)j;
  132.  
  133.                      if( !j )
  134.                      {
  135.                          min_qp_err = X264_MIN( min_qp_err, q );
  136.                          continue;
  137.                      }
  138. -                    h->quant8_bias[i_list][q][i] = X264_MIN( DIV(deadzone[i_list]<<10, j), (1<<15)/j );
  139. +                    h->quant8_bias[i_list][qm][i] = X264_MIN( DIV(deadzone[i_list]<<10, j), (1<<15)/j );
  140.                      if( j > 0xffff && q > max_qp_err )
  141.                          max_qp_err = q;
  142.                  }
  143.      }
  144. +    x264_emms();
  145. +    CHECKED_MALLOC( h->nr_offset_emergency, sizeof(*h->nr_offset_emergency)*(QP_MAX-QP_MAX_SPEC) );
  146. +    for( int q = 0; q < QP_MAX - QP_MAX_SPEC; q++ )
  147. +        for( int cat = 0; cat <= 2; cat++ )
  148. +        {
  149. +            int dct8x8 = cat == 1;
  150. +            int size = dct8x8 ? 64 : 16;
  151. +            udctcoef *nr_offset = h->nr_offset_emergency[q][cat];
  152. +            int lowest_dc = (QP_MAX-QP_MAX_SPEC)/2;
  153. +
  154. +            for( int i = 0; i < size; i++ )
  155. +            {
  156. +                /* True "emergency mode": remove all DCT coefficients */
  157. +                if( q == QP_MAX - QP_MAX_SPEC - 1 )
  158. +                {
  159. +                    nr_offset[i] = (1 << (7 + BIT_DEPTH)) - 1;
  160. +                    continue;
  161. +                }
  162. +
  163. +                double pos;
  164. +                if( i == 0 )
  165. +                {
  166. +                    /* Only reduce DC at very high QPs */
  167. +                    if( q < lowest_dc )
  168. +                    {
  169. +                        nr_offset[i] = 0;
  170. +                        continue;
  171. +                    }
  172. +                    pos = (double)(q-lowest_dc+1) / (QP_MAX - QP_MAX_SPEC - lowest_dc);
  173. +                }
  174. +                else
  175. +                    pos = (double)(q+1) / (QP_MAX - QP_MAX_SPEC);
  176. +
  177. +                /* Exponentially interpolate between "start" and "end" */
  178. +                double start = dct8x8 ? h->unquant8_mf[CQM_8PY][QP_MAX_SPEC][i] / 4096.0
  179. +                                      : h->unquant4_mf[CQM_4PY][QP_MAX_SPEC][i] / 4096.0;
  180. +                double end = start * 32;
  181. +                double scale = log(end / start);
  182. +                double bias = exp(scale * pos) * start;
  183. +                nr_offset[i] = bias + 0.5;
  184. +            }
  185. +        }
  186.  
  187.      if( !h->mb.b_lossless && max_qp_err >= h->param.rc.i_qp_min )
  188.      {
  189. @@ -233,6 +278,7 @@ void x264_cqm_delete( x264_t *h )
  190.  {
  191.      CQM_DELETE( 4, 4 );
  192.      CQM_DELETE( 8, 2 );
  193. +    x264_free( h->nr_offset_emergency );
  194.  }
  195.  
  196.  static int x264_cqm_parse_jmlist( x264_t *h, const char *buf, const char *name,
  197. diff --git a/common/x86/quant-a.asm b/common/x86/quant-a.asm
  198. index 5d7a15e..e09f6c9 100644
  199. --- a/common/x86/quant-a.asm
  200. +++ b/common/x86/quant-a.asm
  201. @@ -699,8 +699,7 @@ DEQUANT_DC sse2
  202.  ; void denoise_dct( int32_t *dct, uint32_t *sum, uint32_t *offset, int size )
  203.  ;-----------------------------------------------------------------------------
  204.  %macro DENOISE_DCT 1-2 0
  205. -cglobal denoise_dct_%1, 4,5,%2
  206. -    mov       r4d, [r0] ; backup DC coefficient
  207. +cglobal denoise_dct_%1, 4,4,%2
  208.      pxor      m6, m6
  209.  .loop:
  210.      sub       r3, mmsize/2
  211. @@ -727,8 +726,7 @@ cglobal denoise_dct_%1, 4,5,%2
  212.      mova      [r1+r3*4+0*mmsize], m4
  213.      mova      [r1+r3*4+1*mmsize], m5
  214.      jg .loop
  215. -    mov       [r0], r4d ; restore DC coefficient
  216. -    RET
  217. +    REP_RET
  218.  %endmacro
  219.  
  220.  %define PABSD PABSD_MMX
  221. @@ -749,8 +747,7 @@ DENOISE_DCT ssse3, 8
  222.  ; void denoise_dct( int16_t *dct, uint32_t *sum, uint16_t *offset, int size )
  223.  ;-----------------------------------------------------------------------------
  224.  %macro DENOISE_DCT 1-2 0
  225. -cglobal denoise_dct_%1, 4,5,%2
  226. -    movzx     r4d, word [r0]
  227. +cglobal denoise_dct_%1, 4,4,%2
  228.      pxor      m6, m6
  229.  .loop:
  230.      sub       r3, mmsize
  231. @@ -781,8 +778,7 @@ cglobal denoise_dct_%1, 4,5,%2
  232.      mova      [r1+r3*4+2*mmsize], m5
  233.      mova      [r1+r3*4+3*mmsize], m3
  234.      jg .loop
  235. -    mov       [r0], r4w
  236. -    RET
  237. +    REP_RET
  238.  %endmacro
  239.  
  240.  %define PABSW PABSW_MMX
  241. diff --git a/encoder/analyse.c b/encoder/analyse.c
  242. index a997425..d2ec63f 100644
  243. --- a/encoder/analyse.c
  244. +++ b/encoder/analyse.c
  245. @@ -147,18 +147,26 @@ const uint16_t x264_lambda_tab[QP_MAX_MAX+1] = {
  246.    25,  29,  32,  36,  40,  45,  51,  57, /* 40-47 */
  247.    64,  72,  81,  91, 102, 114, 128, 144, /* 48-55 */
  248.   161, 181, 203, 228, 256, 287, 323, 362, /* 56-63 */
  249. + 406, 456, 512, 575, 645, 724, 813, 912, /* 64-71 */
  250. +1024,1149,1290,1448,1625,1825,2048,2299, /* 72-79 */
  251. +2048,2299,2580,2896,3251,3649,4096,4598, /* 80-87 */
  252.  };
  253.  
  254.  /* lambda2 = pow(lambda,2) * .9 * 256 */
  255. +/* Capped to avoid overflow */
  256.  const int x264_lambda2_tab[QP_MAX_MAX+1] = {
  257. -     14,     18,     22,      28,      36,      45,      57,      72, /*  0- 7 */
  258. -     91,    115,    145,     182,     230,     290,     365,     460, /*  8-15 */
  259. -    580,    731,    921,    1161,    1462,    1843,    2322,    2925, /* 16-23 */
  260. -   3686,   4644,   5851,    7372,    9289,   11703,   14745,   18578, /* 24-31 */
  261. -  23407,  29491,  37156,   46814,   58982,   74313,   93628,  117964, /* 32-39 */
  262. - 148626, 187257, 235929,  297252,  374514,  471859,  594505,  749029, /* 40-47 */
  263. - 943718,1189010,1498059, 1887436, 2378021, 2996119, 3774873, 4756042, /* 48-55 */
  264. -5992238,7549747,9512085,11984476,15099494,19024170,23968953,30198988, /* 56-63 */
  265. +       14,       18,       22,       28,       36,       45,      57,      72, /*  0- 7 */
  266. +       91,      115,      145,      182,      230,      290,     365,     460, /*  8-15 */
  267. +      580,      731,      921,     1161,     1462,     1843,    2322,    2925, /* 16-23 */
  268. +     3686,     4644,     5851,     7372,     9289,    11703,   14745,   18578, /* 24-31 */
  269. +    23407,    29491,    37156,    46814,    58982,    74313,   93628,  117964, /* 32-39 */
  270. +   148626,   187257,   235929,   297252,   374514,   471859,  594505,  749029, /* 40-47 */
  271. +   943718,  1189010,  1498059,  1887436,  2378021,  2996119, 3774873, 4756042, /* 48-55 */
  272. +  5992238,  7549747,  9512085, 11984476, 15099494, 19024170,23968953,30198988, /* 56-63 */
  273. + 38048341, 47937906, 60397977, 76096683, 95875813,120795955,                   /* 64-69 */
  274. +134217727,134217727,134217727,134217727,134217727,134217727,                   /* 70-75 */
  275. +134217727,134217727,134217727,134217727,134217727,134217727,                   /* 76-81 */
  276. +134217727,134217727,134217727,134217727,134217727,134217727,                   /* 82-87 */
  277.  };
  278.  
  279.  const uint8_t x264_exp2_lut[64] = {
  280. @@ -196,29 +204,40 @@ const float x264_log2_lz_lut[32] = {
  281.  // I'm just matching the behaviour of deadzone quant.
  282.  static const int x264_trellis_lambda2_tab[2][QP_MAX_MAX+1] = {
  283.      // inter lambda = .85 * .85 * 2**(qp/3. + 10 - LAMBDA_BITS)
  284. -    {      46,      58,      73,      92,     117,     147,
  285. -          185,     233,     294,     370,     466,     587,
  286. -          740,     932,    1174,    1480,    1864,    2349,
  287. -         2959,    3728,    4697,    5918,    7457,    9395,
  288. -        11837,   14914,   18790,   23674,   29828,   37581,
  289. -        47349,   59656,   75163,   94699,  119313,  150326,
  290. -       189399,  238627,  300652,  378798,  477255,  601304,
  291. -       757596,  954511, 1202608, 1515192, 1909022, 2405217,
  292. -      3030384, 3818045, 4810435, 6060769, 7636091, 9620872,
  293. -     12121539,15272182,19241743,24243077,30544363,38483486,
  294. -     48486154,61088726,76966972,96972308 },
  295. +    {
  296. +               46,       58,       73,       92,      117,      147,
  297. +              185,      233,      294,      370,      466,      587,
  298. +              740,      932,     1174,     1480,     1864,     2349,
  299. +             2959,     3728,     4697,     5918,     7457,     9395,
  300. +            11837,    14914,    18790,    23674,    29828,    37581,
  301. +            47349,    59656,    75163,    94699,   119313,   150326,
  302. +           189399,   238627,   300652,   378798,   477255,   601304,
  303. +           757596,   954511,  1202608,  1515192,  1909022,  2405217,
  304. +          3030384,  3818045,  4810435,  6060769,  7636091,  9620872,
  305. +         12121539, 15272182, 19241743, 24243077, 30544363, 38483486,
  306. +         48486154, 61088726, 76966972, 96972308,
  307. +        122177453,134217727,134217727,134217727,134217727,134217727,
  308. +        134217727,134217727,134217727,134217727,134217727,134217727,
  309. +        134217727,134217727,134217727,134217727,134217727,134217727
  310. +    },
  311.      // intra lambda = .65 * .65 * 2**(qp/3. + 10 - LAMBDA_BITS)
  312. -    {      27,      34,      43,      54,      68,      86,
  313. -          108,     136,     172,     216,     273,     343,
  314. -          433,     545,     687,     865,    1090,    1374,
  315. -         1731,    2180,    2747,    3461,    4361,    5494,
  316. -         6922,    8721,   10988,   13844,   17442,   21976,
  317. -        27688,   34885,   43953,   55377,   69771,   87906,
  318. -       110755,  139543,  175813,  221511,  279087,  351627,
  319. -       443023,  558174,  703255,  886046, 1116348, 1406511,
  320. -      1772093, 2232697, 2813022, 3544186, 4465396, 5626046,
  321. -      7088374, 8930791,11252092,14176748,17861583,22504184,
  322. -     28353495,35723165,45008368,56706990 }
  323. +    {
  324. +               27,       34,       43,       54,       68,       86,
  325. +              108,      136,      172,      216,      273,      343,
  326. +              433,      545,      687,      865,     1090,     1374,
  327. +             1731,     2180,     2747,     3461,     4361,     5494,
  328. +             6922,     8721,    10988,    13844,    17442,    21976,
  329. +            27688,    34885,    43953,    55377,    69771,    87906,
  330. +           110755,   139543,   175813,   221511,   279087,   351627,
  331. +           443023,   558174,   703255,   886046,  1116348,  1406511,
  332. +          1772093,  2232697,  2813022,  3544186,  4465396,  5626046,
  333. +          7088374,  8930791, 11252092, 14176748, 17861583, 22504184,
  334. +         28353495, 35723165, 45008368, 56706990,
  335. +         71446330, 90016736,113413980,134217727,134217727,134217727,
  336. +        134217727,134217727,134217727,134217727,134217727,134217727,
  337. +        134217727,134217727,134217727,134217727,134217727,134217727,
  338. +        134217727,134217727,134217727,134217727,134217727,134217727
  339. +    }
  340.  };
  341.  
  342.  static const uint16_t x264_chroma_lambda2_offset_tab[] = {
  343. @@ -247,35 +266,35 @@ static const uint8_t i_sub_mb_p_cost_table[4] = {
  344.  
  345.  static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a );
  346.  
  347. -static uint16_t x264_cost_ref[LAMBDA_MAX+1][3][33];
  348. +static uint16_t x264_cost_ref[QP_MAX+1][3][33];
  349.  static UNUSED x264_pthread_mutex_t cost_ref_mutex = X264_PTHREAD_MUTEX_INITIALIZER;
  350.  
  351.  int x264_analyse_init_costs( x264_t *h, int qp )
  352.  {
  353.      int lambda = x264_lambda_tab[qp];
  354. -    if( h->cost_mv[lambda] )
  355. +    if( h->cost_mv[qp] )
  356.          return 0;
  357.      /* factor of 4 from qpel, 2 from sign, and 2 because mv can be opposite from mvp */
  358. -    CHECKED_MALLOC( h->cost_mv[lambda], (4*4*2048 + 1) * sizeof(uint16_t) );
  359. -    h->cost_mv[lambda] += 2*4*2048;
  360. +    CHECKED_MALLOC( h->cost_mv[qp], (4*4*2048 + 1) * sizeof(uint16_t) );
  361. +    h->cost_mv[qp] += 2*4*2048;
  362.      for( int i = 0; i <= 2*4*2048; i++ )
  363.      {
  364. -        h->cost_mv[lambda][-i] =
  365. -        h->cost_mv[lambda][i]  = lambda * (log2f(i+1)*2 + 0.718f + !!i) + .5f;
  366. +        h->cost_mv[qp][-i] =
  367. +        h->cost_mv[qp][i]  = X264_MIN( lambda * (log2f(i+1)*2 + 0.718f + !!i) + .5f, (1<<16)-1 );
  368.      }
  369.      x264_pthread_mutex_lock( &cost_ref_mutex );
  370.      for( int i = 0; i < 3; i++ )
  371.          for( int j = 0; j < 33; j++ )
  372. -            x264_cost_ref[lambda][i][j] = i ? lambda * bs_size_te( i, j ) : 0;
  373. +            x264_cost_ref[qp][i][j] = X264_MIN( i ? lambda * bs_size_te( i, j ) : 0, (1<<16)-1 );
  374.      x264_pthread_mutex_unlock( &cost_ref_mutex );
  375. -    if( h->param.analyse.i_me_method >= X264_ME_ESA && !h->cost_mv_fpel[lambda][0] )
  376. +    if( h->param.analyse.i_me_method >= X264_ME_ESA && !h->cost_mv_fpel[qp][0] )
  377.      {
  378.          for( int j = 0; j < 4; j++ )
  379.          {
  380. -            CHECKED_MALLOC( h->cost_mv_fpel[lambda][j], (4*2048 + 1) * sizeof(uint16_t) );
  381. -            h->cost_mv_fpel[lambda][j] += 2*2048;
  382. +            CHECKED_MALLOC( h->cost_mv_fpel[qp][j], (4*2048 + 1) * sizeof(uint16_t) );
  383. +            h->cost_mv_fpel[qp][j] += 2*2048;
  384.              for( int i = -2*2048; i < 2*2048; i++ )
  385. -                h->cost_mv_fpel[lambda][j][i] = h->cost_mv[lambda][i*4+j];
  386. +                h->cost_mv_fpel[qp][j][i] = h->cost_mv[qp][i*4+j];
  387.          }
  388.      }
  389.      return 0;
  390. @@ -285,7 +304,7 @@ fail:
  391.  
  392.  void x264_analyse_free_costs( x264_t *h )
  393.  {
  394. -    for( int i = 0; i < LAMBDA_MAX+1; i++ )
  395. +    for( int i = 0; i < QP_MAX+1; i++ )
  396.      {
  397.          if( h->cost_mv[i] )
  398.              x264_free( h->cost_mv[i] - 2*4*2048 );
  399. @@ -326,16 +345,16 @@ void x264_analyse_weight_frame( x264_t *h, int end )
  400.  /* initialize an array of lambda*nbits for all possible mvs */
  401.  static void x264_mb_analyse_load_costs( x264_t *h, x264_mb_analysis_t *a )
  402.  {
  403. -    a->p_cost_mv = h->cost_mv[a->i_lambda];
  404. -    a->p_cost_ref[0] = x264_cost_ref[a->i_lambda][x264_clip3(h->sh.i_num_ref_idx_l0_active-1,0,2)];
  405. -    a->p_cost_ref[1] = x264_cost_ref[a->i_lambda][x264_clip3(h->sh.i_num_ref_idx_l1_active-1,0,2)];
  406. +    a->p_cost_mv = h->cost_mv[a->i_qp];
  407. +    a->p_cost_ref[0] = x264_cost_ref[a->i_qp][x264_clip3(h->sh.i_num_ref_idx_l0_active-1,0,2)];
  408. +    a->p_cost_ref[1] = x264_cost_ref[a->i_qp][x264_clip3(h->sh.i_num_ref_idx_l1_active-1,0,2)];
  409.  }
  410.  
  411.  static void x264_mb_analyse_init_qp( x264_t *h, x264_mb_analysis_t *a, int i_qp )
  412.  {
  413.      /* conduct the analysis using this lamda and QP */
  414.      a->i_qp = h->mb.i_qp = i_qp;
  415. -    h->mb.i_chroma_qp = h->chroma_qp_table[i_qp];
  416. +    h->mb.i_chroma_qp = h->chroma_qp_table[SPEC_QP( i_qp )];
  417.  
  418.      a->i_lambda = x264_lambda_tab[i_qp];
  419.      a->i_lambda2 = x264_lambda2_tab[i_qp];
  420. @@ -343,10 +362,11 @@ static void x264_mb_analyse_init_qp( x264_t *h, x264_mb_analysis_t *a, int i_qp
  421.      h->mb.b_trellis = h->param.analyse.i_trellis > 1 && a->i_mbrd;
  422.      if( h->param.analyse.i_trellis )
  423.      {
  424. +        int effective_chroma_qp = h->mb.i_chroma_qp + X264_MAX( i_qp - QP_MAX_SPEC, 0 );
  425.          h->mb.i_trellis_lambda2[0][0] = x264_trellis_lambda2_tab[0][h->mb.i_qp];
  426.          h->mb.i_trellis_lambda2[0][1] = x264_trellis_lambda2_tab[1][h->mb.i_qp];
  427. -        h->mb.i_trellis_lambda2[1][0] = x264_trellis_lambda2_tab[0][h->mb.i_chroma_qp];
  428. -        h->mb.i_trellis_lambda2[1][1] = x264_trellis_lambda2_tab[1][h->mb.i_chroma_qp];
  429. +        h->mb.i_trellis_lambda2[1][0] = x264_trellis_lambda2_tab[0][effective_chroma_qp];
  430. +        h->mb.i_trellis_lambda2[1][1] = x264_trellis_lambda2_tab[1][effective_chroma_qp];
  431.      }
  432.      h->mb.i_psy_rd_lambda = a->i_lambda;
  433.      /* Adjusting chroma lambda based on QP offset hurts PSNR but improves visual quality. */
  434. @@ -366,7 +386,6 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
  435.      x264_mb_analyse_init_qp( h, a, i_qp );
  436.  
  437.      h->mb.b_transform_8x8 = 0;
  438. -    h->mb.b_noise_reduction = 0;
  439.  
  440.      /* I: Intra part */
  441.      a->i_satd_i16x16 =
  442. @@ -384,6 +403,21 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
  443.          a->i_mbrd ? 2 :
  444.          !h->param.analyse.i_trellis && !h->param.analyse.i_noise_reduction;
  445.  
  446. +    if( h->mb.i_qp > QP_MAX_SPEC )
  447. +    {
  448. +        h->nr_offset = h->nr_offset_emergency[h->mb.i_qp-QP_MAX_SPEC-1];
  449. +        h->nr_residual_sum = h->nr_residual_sum_buf[1];
  450. +        h->nr_count = h->nr_count_buf[1];
  451. +        h->mb.b_noise_reduction = 1;
  452. +    }
  453. +    else
  454. +    {
  455. +        h->nr_offset = h->nr_offset_denoise;
  456. +        h->nr_residual_sum = h->nr_residual_sum_buf[0];
  457. +        h->nr_count = h->nr_count_buf[0];
  458. +        h->mb.b_noise_reduction = 0;
  459. +    }
  460. +
  461.      /* II: Inter part P/B frame */
  462.      if( h->sh.i_type != SLICE_TYPE_I )
  463.      {
  464. @@ -2731,6 +2765,7 @@ void x264_macroblock_analyse( x264_t *h )
  465.      int i_cost = COST_MAX;
  466.  
  467.      h->mb.i_qp = x264_ratecontrol_mb_qp( h );
  468. +
  469.      /* If the QP of this MB is within 1 of the previous MB, code the same QP as the previous MB,
  470.       * to lower the bit cost of the qp_delta.  Don't do this if QPRD is enabled. */
  471.      if( h->param.rc.i_aq_mode && h->param.analyse.i_subpel_refine < 10 && abs(h->mb.i_qp - h->mb.i_last_qp) == 1 )
  472. @@ -3476,7 +3511,8 @@ intra_analysis:
  473.          x264_mb_analyse_qp_rd( h, &analysis );
  474.  
  475.      h->mb.b_trellis = h->param.analyse.i_trellis;
  476. -    h->mb.b_noise_reduction = !!h->param.analyse.i_noise_reduction;
  477. +    h->mb.b_noise_reduction = h->mb.b_noise_reduction || (!!h->param.analyse.i_noise_reduction && !IS_INTRA( h->mb.i_type ));
  478. +
  479.      if( !IS_SKIP(h->mb.i_type) && h->mb.i_psy_trellis && h->param.analyse.i_trellis == 1 )
  480.          x264_psy_trellis_init( h, 0 );
  481.      if( h->mb.b_trellis == 1 || h->mb.b_noise_reduction )
  482. diff --git a/encoder/cabac.c b/encoder/cabac.c
  483. index 5539b4b..a2d13f0 100644
  484. --- a/encoder/cabac.c
  485. +++ b/encoder/cabac.c
  486. @@ -246,7 +246,7 @@ static void x264_cabac_mb_cbp_chroma( x264_t *h, x264_cabac_t *cb )
  487.  
  488.  static void x264_cabac_mb_qp_delta( x264_t *h, x264_cabac_t *cb )
  489.  {
  490. -    int i_dqp = h->mb.i_qp - h->mb.i_last_qp;
  491. +    int i_dqp = SPEC_QP( h->mb.i_qp) - SPEC_QP( h->mb.i_last_qp );
  492.      int ctx;
  493.  
  494.      /* Avoid writing a delta quant if we have an empty i16x16 block, e.g. in a completely flat background area */
  495. diff --git a/encoder/cavlc.c b/encoder/cavlc.c
  496. index ca35210..7224a9d 100644
  497. --- a/encoder/cavlc.c
  498. +++ b/encoder/cavlc.c
  499. @@ -208,7 +208,7 @@ static const uint8_t ct_index[17] = {0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,3};
  500.  static void cavlc_qp_delta( x264_t *h )
  501.  {
  502.      bs_t *s = &h->out.bs;
  503. -    int i_dqp = h->mb.i_qp - h->mb.i_last_qp;
  504. +    int i_dqp = SPEC_QP( h->mb.i_qp ) - SPEC_QP( h->mb.i_last_qp );
  505.  
  506.      /* Avoid writing a delta quant if we have an empty i16x16 block, e.g. in a completely flat background area */
  507.      if( h->mb.i_type == I_16x16 && !(h->mb.i_cbp_luma | h->mb.i_cbp_chroma)
  508. diff --git a/encoder/encoder.c b/encoder/encoder.c
  509. index a6b53fd..89f1f39 100644
  510. --- a/encoder/encoder.c
  511. +++ b/encoder/encoder.c
  512. @@ -163,7 +163,7 @@ static void x264_slice_header_init( x264_t *h, x264_slice_header_t *sh,
  513.      sh->i_cabac_init_idc = param->i_cabac_init_idc;
  514.  
  515.      sh->i_qp = i_qp;
  516. -    sh->i_qp_delta = i_qp - pps->i_pic_init_qp;
  517. +    sh->i_qp_delta = SPEC_QP( sh->i_qp ) - pps->i_pic_init_qp;
  518.      sh->b_sp_for_swidth = 0;
  519.      sh->i_qs_delta = 0;
  520.  
  521. @@ -1050,7 +1050,8 @@ x264_t *x264_encoder_open( x264_param_t *param )
  522.          p += sprintf( p, " none!" );
  523.      x264_log( h, X264_LOG_INFO, "%s\n", buf );
  524.  
  525. -    for( qp = h->param.rc.i_qp_min; qp <= h->param.rc.i_qp_max; qp++ )
  526. +    int qp_max = h->param.rc.i_qp_max == QP_MAX_SPEC ? QP_MAX : h->param.rc.i_qp_max;
  527. +    for( qp = h->param.rc.i_qp_min; qp <= qp_max; qp++ )
  528.          if( x264_analyse_init_costs( h, qp ) )
  529.              goto fail;
  530.      if( x264_analyse_init_costs( h, X264_LOOKAHEAD_QP ) )
  531. @@ -1058,7 +1059,7 @@ x264_t *x264_encoder_open( x264_param_t *param )
  532.  
  533.      static const uint16_t cost_mv_correct[7] = { 24, 47, 95, 189, 379, 757, 1515 };
  534.      /* Checks for known miscompilation issues. */
  535. -    if( h->cost_mv[x264_lambda_tab[X264_LOOKAHEAD_QP]][2013] != cost_mv_correct[BIT_DEPTH-8] )
  536. +    if( h->cost_mv[X264_LOOKAHEAD_QP][2013] != cost_mv_correct[BIT_DEPTH-8] )
  537.      {
  538.          x264_log( h, X264_LOG_ERROR, "MV cost test failed: x264 has been miscompiled!\n" );
  539.          goto fail;
  540. @@ -1879,7 +1880,7 @@ static int x264_slice_write( x264_t *h )
  541.      if( h->sh.i_first_mb != h->i_threadslice_start * h->mb.i_mb_width )
  542.      {
  543.          h->sh.i_qp = h->mb.i_last_qp;
  544. -        h->sh.i_qp_delta = h->sh.i_qp - h->pps->i_pic_init_qp;
  545. +        h->sh.i_qp_delta = SPEC_QP( h->sh.i_qp ) - h->pps->i_pic_init_qp;
  546.      }
  547.  
  548.      x264_slice_header_write( &h->out.bs, &h->sh, h->i_nal_ref_idc );
  549. diff --git a/encoder/macroblock.c b/encoder/macroblock.c
  550. index 39fed61..11a63d9 100644
  551. --- a/encoder/macroblock.c
  552. +++ b/encoder/macroblock.c
  553. @@ -84,6 +84,8 @@ static inline void dct2x2dc( dctcoef d[4], dctcoef dct4x4[4][16] )
  554.  static ALWAYS_INLINE int x264_quant_4x4( x264_t *h, dctcoef dct[16], int i_qp, int ctx_block_cat, int b_intra, int idx )
  555.  {
  556.      int i_quant_cat = b_intra ? CQM_4IY : CQM_4PY;
  557. +    if( h->mb.b_noise_reduction && ctx_block_cat != DCT_LUMA_AC )
  558. +        h->quantf.denoise_dct( dct, h->nr_residual_sum[0], h->nr_offset[0], 16 );
  559.      if( h->mb.b_trellis )
  560.          return x264_quant_4x4_trellis( h, dct, i_quant_cat, i_qp, ctx_block_cat, b_intra, 0, idx );
  561.      else
  562. @@ -93,6 +95,8 @@ static ALWAYS_INLINE int x264_quant_4x4( x264_t *h, dctcoef dct[16], int i_qp, i
  563.  static ALWAYS_INLINE int x264_quant_8x8( x264_t *h, dctcoef dct[64], int i_qp, int b_intra, int idx )
  564.  {
  565.      int i_quant_cat = b_intra ? CQM_8IY : CQM_8PY;
  566. +    if( h->mb.b_noise_reduction )
  567. +        h->quantf.denoise_dct( dct, h->nr_residual_sum[1], h->nr_offset[1], 64 );
  568.      if( h->mb.b_trellis )
  569.          return x264_quant_8x8_trellis( h, dct, i_quant_cat, i_qp, b_intra, idx );
  570.      else
  571. @@ -115,6 +119,7 @@ void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qp )
  572.      int nz;
  573.      pixel *p_src = &h->mb.pic.p_fenc[0][block_idx_xy_fenc[idx]];
  574.      pixel *p_dst = &h->mb.pic.p_fdec[0][block_idx_xy_fdec[idx]];
  575. +    int i_qp_spec = SPEC_QP( i_qp );
  576.      ALIGNED_ARRAY_16( dctcoef, dct4x4,[16] );
  577.  
  578.      if( h->mb.b_lossless )
  579. @@ -133,7 +138,7 @@ void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qp )
  580.      {
  581.          h->mb.i_cbp_luma |= 1<<(idx>>2);
  582.          h->zigzagf.scan_4x4( h->dct.luma4x4[idx], dct4x4 );
  583. -        h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[CQM_4IY], i_qp );
  584. +        h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[CQM_4IY], i_qp_spec );
  585.          h->dctf.add4x4_idct( p_dst, dct4x4 );
  586.      }
  587.  }
  588. @@ -161,6 +166,7 @@ void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qp )
  589.      int nz;
  590.      pixel *p_src = &h->mb.pic.p_fenc[0][8*x + 8*y*FENC_STRIDE];
  591.      pixel *p_dst = &h->mb.pic.p_fdec[0][8*x + 8*y*FDEC_STRIDE];
  592. +    int i_qp_spec = SPEC_QP( i_qp );
  593.      ALIGNED_ARRAY_16( dctcoef, dct8x8,[64] );
  594.  
  595.      if( h->mb.b_lossless )
  596. @@ -178,7 +184,7 @@ void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qp )
  597.      {
  598.          h->mb.i_cbp_luma |= 1<<idx;
  599.          h->zigzagf.scan_8x8( h->dct.luma8x8[idx], dct8x8 );
  600. -        h->quantf.dequant_8x8( dct8x8, h->dequant8_mf[CQM_8IY], i_qp );
  601. +        h->quantf.dequant_8x8( dct8x8, h->dequant8_mf[CQM_8IY], i_qp_spec );
  602.          h->dctf.add8x8_idct8( p_dst, dct8x8 );
  603.          STORE_8x8_NNZ( s8, 1 );
  604.      }
  605. @@ -196,6 +202,7 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qp )
  606.  
  607.      int nz;
  608.      int decimate_score = h->mb.b_dct_decimate ? 0 : 9;
  609. +    int i_qp_spec = SPEC_QP( i_qp );
  610.  
  611.      if( h->mb.b_lossless )
  612.      {
  613. @@ -218,6 +225,8 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qp )
  614.      for( int i = 0; i < 16; i++ )
  615.      {
  616.          /* copy dc coeff */
  617. +        if( h->mb.b_noise_reduction )
  618. +            h->quantf.denoise_dct( dct4x4[i], h->nr_residual_sum[0], h->nr_offset[0], 16 );
  619.          dct_dc4x4[block_idx_xy_1d[i]] = dct4x4[i][0];
  620.          dct4x4[i][0] = 0;
  621.  
  622. @@ -227,7 +236,7 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qp )
  623.          if( nz )
  624.          {
  625.              h->zigzagf.scan_4x4( h->dct.luma4x4[i], dct4x4[i] );
  626. -            h->quantf.dequant_4x4( dct4x4[i], h->dequant4_mf[CQM_4IY], i_qp );
  627. +            h->quantf.dequant_4x4( dct4x4[i], h->dequant4_mf[CQM_4IY], i_qp_spec );
  628.              if( decimate_score < 6 ) decimate_score += h->quantf.decimate_score15( h->dct.luma4x4[i] );
  629.              h->mb.i_cbp_luma = 0xf;
  630.          }
  631. @@ -254,7 +263,7 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qp )
  632.  
  633.          /* output samples to fdec */
  634.          h->dctf.idct4x4dc( dct_dc4x4 );
  635. -        h->quantf.dequant_4x4_dc( dct_dc4x4, h->dequant4_mf[CQM_4IY], i_qp );  /* XXX not inversed */
  636. +        h->quantf.dequant_4x4_dc( dct_dc4x4, h->dequant4_mf[CQM_4IY], i_qp_spec );  /* XXX not inversed */
  637.          if( h->mb.i_cbp_luma )
  638.              for( int i = 0; i < 16; i++ )
  639.                  dct4x4[i][0] = dct_dc4x4[block_idx_xy_1d[i]];
  640. @@ -326,13 +335,15 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
  641.  {
  642.      int nz, nz_dc;
  643.      int b_decimate = b_inter && h->mb.b_dct_decimate;
  644. +    int i_qp_spec = SPEC_QP( i_qp );
  645.      ALIGNED_ARRAY_16( dctcoef, dct2x2,[4] );
  646.      h->mb.i_cbp_chroma = 0;
  647. +    h->nr_count[2] += h->mb.b_noise_reduction * 4;
  648.  
  649.      /* Early termination: check variance of chroma residual before encoding.
  650.       * Don't bother trying early termination at low QPs.
  651.       * Values are experimentally derived. */
  652. -    if( b_decimate && i_qp >= (h->mb.b_trellis ? 12 : 18) )
  653. +    if( b_decimate && i_qp >= (h->mb.b_trellis ? 12 : 18) && !h->mb.b_noise_reduction )
  654.      {
  655.          int thresh = (x264_lambda2_tab[i_qp] + 32) >> 6;
  656.          int ssd[2];
  657. @@ -363,11 +374,11 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
  658.  
  659.                      if( nz_dc )
  660.                      {
  661. -                        if( !x264_mb_optimize_chroma_dc( h, b_inter, i_qp, dct2x2 ) )
  662. +                        if( !x264_mb_optimize_chroma_dc( h, b_inter, i_qp_spec, dct2x2 ) )
  663.                              continue;
  664.                          h->mb.cache.non_zero_count[x264_scan8[25]+ch] = 1;
  665.                          zigzag_scan_2x2_dc( h->dct.chroma_dc[ch], dct2x2 );
  666. -                        idct_dequant_2x2_dconly( dct2x2, dct2x2, h->dequant4_mf[CQM_4IC + b_inter], i_qp );
  667. +                        idct_dequant_2x2_dconly( dct2x2, dct2x2, h->dequant4_mf[CQM_4IC + b_inter], i_qp_spec );
  668.                          h->dctf.add8x8_idct_dc( h->mb.pic.p_fdec[1+ch], dct2x2 );
  669.                          h->mb.i_cbp_chroma = 1;
  670.                      }
  671. @@ -401,6 +412,9 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
  672.          }
  673.  
  674.          h->dctf.sub8x8_dct( dct4x4, p_src, p_dst );
  675. +        if( h->mb.b_noise_reduction )
  676. +            for( int i = 0; i < 4; i++ )
  677. +                h->quantf.denoise_dct( dct4x4[i], h->nr_residual_sum[2], h->nr_offset[2], 16 );
  678.          dct2x2dc( dct2x2, dct4x4 );
  679.          /* calculate dct coeffs */
  680.          for( int i = 0; i < 4; i++ )
  681. @@ -414,7 +428,7 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
  682.              {
  683.                  nz_ac = 1;
  684.                  h->zigzagf.scan_4x4( h->dct.luma4x4[16+i+ch*4], dct4x4[i] );
  685. -                h->quantf.dequant_4x4( dct4x4[i], h->dequant4_mf[CQM_4IC + b_inter], i_qp );
  686. +                h->quantf.dequant_4x4( dct4x4[i], h->dequant4_mf[CQM_4IC + b_inter], i_qp_spec );
  687.                  if( b_decimate )
  688.                      i_decimate_score += h->quantf.decimate_score15( h->dct.luma4x4[16+i+ch*4] );
  689.              }
  690. @@ -443,7 +457,7 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
  691.              }
  692.              /* DC-only */
  693.              zigzag_scan_2x2_dc( h->dct.chroma_dc[ch], dct2x2 );
  694. -            idct_dequant_2x2_dconly( dct2x2, dct2x2, h->dequant4_mf[CQM_4IC + b_inter], i_qp );
  695. +            idct_dequant_2x2_dconly( dct2x2, dct2x2, h->dequant4_mf[CQM_4IC + b_inter], i_qp_spec );
  696.              h->dctf.add8x8_idct_dc( p_dst, dct2x2 );
  697.          }
  698.          else
  699. @@ -452,7 +466,7 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
  700.              if( nz_dc )
  701.              {
  702.                  zigzag_scan_2x2_dc( h->dct.chroma_dc[ch], dct2x2 );
  703. -                idct_dequant_2x2_dc( dct2x2, dct4x4, h->dequant4_mf[CQM_4IC + b_inter], i_qp );
  704. +                idct_dequant_2x2_dc( dct2x2, dct4x4, h->dequant4_mf[CQM_4IC + b_inter], i_qp_spec );
  705.              }
  706.              h->dctf.add8x8_idct( p_dst, dct4x4 );
  707.          }
  708. @@ -587,6 +601,7 @@ void x264_macroblock_encode( x264_t *h )
  709.      int b_decimate = h->mb.b_dct_decimate;
  710.      int b_force_no_skip = 0;
  711.      int nz;
  712. +    int i_qp_spec = SPEC_QP( i_qp );
  713.      h->mb.i_cbp_luma = 0;
  714.      h->mb.cache.non_zero_count[x264_scan8[24]] = 0;
  715.  
  716. @@ -748,8 +763,6 @@ void x264_macroblock_encode( x264_t *h )
  717.  
  718.              for( int idx = 0; idx < 4; idx++ )
  719.              {
  720. -                if( h->mb.b_noise_reduction )
  721. -                    h->quantf.denoise_dct( dct8x8[idx], h->nr_residual_sum[1], h->nr_offset[1], 64 );
  722.                  nz = x264_quant_8x8( h, dct8x8[idx], i_qp, 0, idx );
  723.  
  724.                  if( nz )
  725. @@ -782,7 +795,7 @@ void x264_macroblock_encode( x264_t *h )
  726.  
  727.                      if( h->mb.i_cbp_luma&(1<<idx) )
  728.                      {
  729. -                        h->quantf.dequant_8x8( dct8x8[idx], h->dequant8_mf[CQM_8PY], i_qp );
  730. +                        h->quantf.dequant_8x8( dct8x8[idx], h->dequant8_mf[CQM_8PY], i_qp_spec );
  731.                          h->dctf.add8x8_idct8( &h->mb.pic.p_fdec[0][8*x + 8*y*FDEC_STRIDE], dct8x8[idx] );
  732.                          STORE_8x8_NNZ( s8, 1 );
  733.                      }
  734. @@ -807,15 +820,13 @@ void x264_macroblock_encode( x264_t *h )
  735.                  {
  736.                      int idx = i8x8 * 4 + i4x4;
  737.  
  738. -                    if( h->mb.b_noise_reduction )
  739. -                        h->quantf.denoise_dct( dct4x4[idx], h->nr_residual_sum[0], h->nr_offset[0], 16 );
  740.                      nz = x264_quant_4x4( h, dct4x4[idx], i_qp, DCT_LUMA_4x4, 0, idx );
  741.                      h->mb.cache.non_zero_count[x264_scan8[idx]] = nz;
  742.  
  743.                      if( nz )
  744.                      {
  745.                          h->zigzagf.scan_4x4( h->dct.luma4x4[idx], dct4x4[idx] );
  746. -                        h->quantf.dequant_4x4( dct4x4[idx], h->dequant4_mf[CQM_4PY], i_qp );
  747. +                        h->quantf.dequant_4x4( dct4x4[idx], h->dequant4_mf[CQM_4PY], i_qp_spec );
  748.                          if( b_decimate && i_decimate_8x8 < 6 )
  749.                              i_decimate_8x8 += h->quantf.decimate_score16( h->dct.luma4x4[idx] );
  750.                          cbp = 1;
  751. @@ -1019,12 +1030,16 @@ int x264_macroblock_probe_skip( x264_t *h, int b_bidir )
  752.  
  753.  void x264_noise_reduction_update( x264_t *h )
  754.  {
  755. -    for( int cat = 0; cat < 2; cat++ )
  756. +    h->nr_offset = h->nr_offset_denoise;
  757. +    h->nr_residual_sum = h->nr_residual_sum_buf[0];
  758. +    h->nr_count = h->nr_count_buf[0];
  759. +    for( int cat = 0; cat < 3; cat++ )
  760.      {
  761. -        int size = cat ? 64 : 16;
  762. -        const uint16_t *weight = cat ? x264_dct8_weight2_tab : x264_dct4_weight2_tab;
  763. +        int dct8x8 = cat == 1;
  764. +        int size = dct8x8 ? 64 : 16;
  765. +        const uint16_t *weight = dct8x8 ? x264_dct8_weight2_tab : x264_dct4_weight2_tab;
  766.  
  767. -        if( h->nr_count[cat] > (cat ? (1<<16) : (1<<18)) )
  768. +        if( h->nr_count[cat] > (dct8x8 ? (1<<16) : (1<<18)) )
  769.          {
  770.              for( int i = 0; i < size; i++ )
  771.                  h->nr_residual_sum[cat][i] >>= 1;
  772. @@ -1036,6 +1051,9 @@ void x264_noise_reduction_update( x264_t *h )
  773.                  ((uint64_t)h->param.analyse.i_noise_reduction * h->nr_count[cat]
  774.                   + h->nr_residual_sum[cat][i]/2)
  775.                / ((uint64_t)h->nr_residual_sum[cat][i] * weight[i]/256 + 1);
  776. +
  777. +        /* Don't denoise DC coefficients */
  778. +        h->nr_offset[cat][0] = 0;
  779.      }
  780.  }
  781.  
  782. @@ -1054,6 +1072,7 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
  783.      int b_decimate = h->mb.b_dct_decimate;
  784.      int nnz8x8 = 0;
  785.      int nz;
  786. +    int i_qp_spec = SPEC_QP( i_qp );
  787.  
  788.      if( !h->mb.b_skip_mc )
  789.          x264_mb_mc_8x8( h, i8 );
  790. @@ -1101,7 +1120,7 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
  791.  
  792.                  if( nnz8x8 )
  793.                  {
  794. -                    h->quantf.dequant_8x8( dct8x8, h->dequant8_mf[CQM_8PY], i_qp );
  795. +                    h->quantf.dequant_8x8( dct8x8, h->dequant8_mf[CQM_8PY], i_qp_spec );
  796.                      h->dctf.add8x8_idct8( p_fdec, dct8x8 );
  797.                      STORE_8x8_NNZ( s8, 1 );
  798.                  }
  799. @@ -1123,7 +1142,7 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
  800.                  if( nz )
  801.                  {
  802.                      h->zigzagf.scan_4x4( h->dct.luma4x4[i8*4+i4], dct4x4[i4] );
  803. -                    h->quantf.dequant_4x4( dct4x4[i4], h->dequant4_mf[CQM_4PY], i_qp );
  804. +                    h->quantf.dequant_4x4( dct4x4[i4], h->dequant4_mf[CQM_4PY], i_qp_spec );
  805.                      if( b_decimate )
  806.                          i_decimate_8x8 += h->quantf.decimate_score16( h->dct.luma4x4[i8*4+i4] );
  807.                      nnz8x8 = 1;
  808. @@ -1146,8 +1165,9 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
  809.              ALIGNED_ARRAY_16( dctcoef, dct4x4,[16] );
  810.              p_fenc = h->mb.pic.p_fenc[1+ch] + 4*x + 4*y*FENC_STRIDE;
  811.              p_fdec = h->mb.pic.p_fdec[1+ch] + 4*x + 4*y*FDEC_STRIDE;
  812. -
  813.              h->dctf.sub4x4_dct( dct4x4, p_fenc, p_fdec );
  814. +            if( h->mb.b_noise_reduction );
  815. +                h->quantf.denoise_dct( dct4x4, h->nr_residual_sum[2], h->nr_offset[2], 16 );
  816.              dct4x4[0] = 0;
  817.  
  818.              if( h->mb.b_trellis )
  819. @@ -1159,7 +1179,7 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
  820.              if( nz )
  821.              {
  822.                  h->zigzagf.scan_4x4( h->dct.luma4x4[16+i8+ch*4], dct4x4 );
  823. -                h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[CQM_4PC], i_qp );
  824. +                h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[CQM_4PC], i_qp_spec );
  825.                  h->dctf.add4x4_idct( p_fdec, dct4x4 );
  826.              }
  827.          }
  828. diff --git a/encoder/me.c b/encoder/me.c
  829. index 90f7dfd..9ca04d0 100644
  830. --- a/encoder/me.c
  831. +++ b/encoder/me.c
  832. @@ -597,7 +597,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
  833.              int delta = x264_pixel_size[sad_size].w;
  834.              int16_t *xs = h->scratch_buffer;
  835.              int xn;
  836. -            uint16_t *cost_fpel_mvx = h->cost_mv_fpel[x264_lambda_tab[h->mb.i_qp]][-m->mvp[0]&3] + (-m->mvp[0]>>2);
  837. +            uint16_t *cost_fpel_mvx = h->cost_mv_fpel[h->mb.i_qp][-m->mvp[0]&3] + (-m->mvp[0]>>2);
  838.  
  839.              h->pixf.sad_x4[sad_size]( zero, p_fenc, p_fenc+delta,
  840.                  p_fenc+delta*FENC_STRIDE, p_fenc+delta+delta*FENC_STRIDE,
  841. diff --git a/encoder/me.h b/encoder/me.h
  842. index fd99a5d..58cd3e0 100644
  843. --- a/encoder/me.h
  844. +++ b/encoder/me.h
  845. @@ -71,7 +71,7 @@ void x264_me_refine_bidir_rd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_wei
  846.  void x264_me_refine_bidir_satd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight );
  847.  uint64_t x264_rd_cost_part( x264_t *h, int i_lambda2, int i8, int i_pixel );
  848.  
  849. -extern uint16_t *x264_cost_mv_fpel[LAMBDA_MAX+1][4];
  850. +extern uint16_t *x264_cost_mv_fpel[QP_MAX+1][4];
  851.  
  852.  #define COPY1_IF_LT(x,y)\
  853.  if((y)<(x))\
  854. diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
  855. index 8f7ef9a..e727ab0 100644
  856. --- a/encoder/ratecontrol.c
  857. +++ b/encoder/ratecontrol.c
  858. @@ -1441,8 +1441,14 @@ int x264_ratecontrol_mb_qp( x264_t *h )
  859.      x264_emms();
  860.      float qp = h->rc->qpm;
  861.      if( h->param.rc.i_aq_mode )
  862. -        /* MB-tree currently doesn't adjust quantizers in unreferenced frames. */
  863. -        qp += h->fdec->b_kept_as_ref ? h->fenc->f_qp_offset[h->mb.i_mb_xy] : h->fenc->f_qp_offset_aq[h->mb.i_mb_xy];
  864. +    {
  865. +         /* MB-tree currently doesn't adjust quantizers in unreferenced frames. */
  866. +        float qp_offset = h->fdec->b_kept_as_ref ? h->fenc->f_qp_offset[h->mb.i_mb_xy] : h->fenc->f_qp_offset_aq[h->mb.i_mb_xy];
  867. +        /* Scale AQ's effect towards zero in emergency mode. */
  868. +        if( qp > QP_MAX_SPEC )
  869. +            qp_offset *= (QP_MAX - qp) / (QP_MAX_SPEC - QP_MAX);
  870. +        qp += qp_offset;
  871. +    }
  872.      return x264_clip3( qp + .5, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
  873.  }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement