Dark Shikari

diff --git a/common/common.c b/common/common.c
index 44d9113..5b9b2c4 100644
--- a/common/common.c
+++ b/common/common.c
@@ -123,6 +123,9 @@ void    x264_param_default( x264_param_t *param )
     param->analyse.i_chroma_qp_offset = 0;
     param->analyse.b_fast_pskip = 1;
     param->analyse.b_dct_decimate = 1;
+    param->analyse.f_aq_strength = 1.0;
+    param->analyse.i_aq_mode = 2;
+    param->analyse.i_aq_metric = 3;
     param->analyse.i_luma_deadzone[0] = 21;
     param->analyse.i_luma_deadzone[1] = 11;
     param->analyse.b_psnr = 1;
@@ -455,6 +458,12 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
         p->analyse.b_fast_pskip = atobool(value);
     OPT("dct-decimate")
         p->analyse.b_dct_decimate = atobool(value);
+    OPT("aq-strength")
+        p->analyse.f_aq_strength = atof(value);
+    OPT("aq-mode")
+        p->analyse.i_aq_mode = atoi(value);
+    OPT("aq-metric")
+        p->analyse.i_aq_metric = atoi(value);
     OPT("deadzone-inter")
         p->analyse.i_luma_deadzone[0] = atoi(value);
     OPT("deadzone-intra")
@@ -883,6 +892,10 @@ char *x264_param2string( x264_param_t *p, int b_res )
         s += sprintf( s, " ip_ratio=%.2f", p->rc.f_ip_factor );
         if( p->i_bframe )
             s += sprintf( s, " pb_ratio=%.2f", p->rc.f_pb_factor );
+        if( p->analyse.i_aq_mode )
+            s += sprintf( s, " aq=%d:%.1f", p->analyse.i_aq_mode, p->analyse.f_aq_strength );
+        else
+            s += sprintf( s, " aq=0" );
         if( p->rc.psz_zones )
             s += sprintf( s, " zones=%s", p->rc.psz_zones );
         else if( p->rc.i_zones )
diff --git a/common/pixel.c b/common/pixel.c
index 1d5567b..a86932a 100644
--- a/common/pixel.c
+++ b/common/pixel.c
@@ -95,6 +95,39 @@ PIXEL_SSD_C( x264_pixel_ssd_8x4,    8,  4 )
 PIXEL_SSD_C( x264_pixel_ssd_4x8,    4,  8 )
 PIXEL_SSD_C( x264_pixel_ssd_4x4,    4,  4 )

+#define PIXEL_NSSD_C( name, lx, ly) \
+static int name( uint8_t *pix1, int i_stride_pix1,  \
+                 uint8_t *pix2, int i_stride_pix2 ) \
+{\
+    int score1=0;\
+    int score2=0;\
+    int x,y;\
+    for(y=0; y<ly; y++){\
+        for(x=0; x<lx; x++){\
+            score1 += abs(pix1[x] - pix2[x]);\
+        }\
+        if(y+1<ly){\
+            for(x=0; x<lx-1; x++){\
+                score2 += abs(  pix1[x] - pix1[x+i_stride_pix1]\
+                             - pix1[x+1] + pix1[x+1+i_stride_pix1])\
+                        -abs(  pix2[x] - pix2[x+i_stride_pix2]\
+                             - pix2[x+1] + pix2[x+1+i_stride_pix2]);\
+            }\
+        }\
+        pix1 += i_stride_pix1;\
+        pix2 += i_stride_pix2;\
+    }\
+    return score1 + abs(score2)*8;\
+}
+
+PIXEL_NSSD_C( x264_pixel_nssd_16x16, 16, 16 )
+PIXEL_NSSD_C( x264_pixel_nssd_16x8,  16,  8 )
+PIXEL_NSSD_C( x264_pixel_nssd_8x16,   8, 16 )
+PIXEL_NSSD_C( x264_pixel_nssd_8x8,    8,  8 )
+PIXEL_NSSD_C( x264_pixel_nssd_8x4,    8,  4 )
+PIXEL_NSSD_C( x264_pixel_nssd_4x8,    4,  8 )
+PIXEL_NSSD_C( x264_pixel_nssd_4x4,    4,  4 )
+
 int64_t x264_pixel_ssd_wxh( x264_pixel_function_t *pf, uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2, int i_width, int i_height )
 {
     int64_t i_ssd = 0;
@@ -530,6 +563,7 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
     INIT7( satd, );
     INIT7( satd_x3, );
     INIT7( satd_x4, );
+    INIT7( nssd, );
     INIT4( sa8d, );
     INIT_ADS( );

diff --git a/common/pixel.h b/common/pixel.h
index d533620..5a5ddec 100644
--- a/common/pixel.h
+++ b/common/pixel.h
@@ -74,6 +74,7 @@ typedef struct
     x264_pixel_cmp_t fpelcmp[7]; /* either satd or sad for fullpel motion search */
     x264_pixel_cmp_x3_t fpelcmp_x3[7];
     x264_pixel_cmp_x4_t fpelcmp_x4[7];
+    x264_pixel_cmp_t nssd[7];

     void (*ssim_4x4x2_core)( const uint8_t *pix1, int stride1,
                              const uint8_t *pix2, int stride2, int sums[2][4] );
diff --git a/encoder/analyse.c b/encoder/analyse.c
index 0264621..b89724d 100644
--- a/encoder/analyse.c
+++ b/encoder/analyse.c
@@ -218,7 +218,7 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
     h->mb.i_subpel_refine = h->param.analyse.i_subpel_refine;
     h->mb.b_chroma_me = h->param.analyse.b_chroma_me && h->sh.i_type == SLICE_TYPE_P
                         && h->mb.i_subpel_refine >= 5;
-    h->mb.b_trellis = h->param.analyse.i_trellis > 1 && a->b_mbrd;
+    h->mb.b_trellis = (h->param.analyse.i_trellis > 1 && a->b_mbrd);
     h->mb.b_transform_8x8 = 0;
     h->mb.b_noise_reduction = 0;

@@ -2064,8 +2064,13 @@ void x264_macroblock_analyse( x264_t *h )
     int i_cost = COST_MAX;
     int i;

-    /* init analysis */
-    x264_mb_analyse_init( h, &analysis, x264_ratecontrol_qp( h ) );
+    h->mb.i_qp = x264_ratecontrol_qp( h );
+
+    if( h->param.analyse.i_aq_mode )
+        x264_adaptive_quant( h );
+
+     /* init analysis */
+    x264_mb_analyse_init( h, &analysis, h->mb.i_qp );

     /*--------------------------- Do the analysis ---------------------------*/
     if( h->sh.i_type == SLICE_TYPE_I )
diff --git a/encoder/encoder.c b/encoder/encoder.c
index 3dadb02..73f289c 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -401,6 +401,7 @@ static int x264_validate_parameters( x264_t *h )
         h->param.analyse.b_fast_pskip = 0;
         h->param.analyse.i_noise_reduction = 0;
         h->param.analyse.i_subpel_refine = x264_clip3( h->param.analyse.i_subpel_refine, 1, 6 );
+        h->param.analyse.i_aq_mode = 0;
     }
     if( h->param.rc.i_rc_method == X264_RC_CQP )
     {
@@ -475,6 +476,17 @@ static int x264_validate_parameters( x264_t *h )
     if( !h->param.b_cabac )
         h->param.analyse.i_trellis = 0;
     h->param.analyse.i_trellis = x264_clip3( h->param.analyse.i_trellis, 0, 2 );
+    h->param.analyse.i_aq_mode = x264_clip3(h->param.analyse.i_aq_mode, 0, 2);
+    if(h->param.analyse.f_aq_strength <= 0) h->param.analyse.i_aq_mode = 0;
+    /* VAQ on mode 1 effectively replaces qcomp, so qcomp is raised towards 1 to compensate. */
+    if(h->param.analyse.i_aq_mode == 2)
+        h->param.rc.f_qcompress = x264_clip3f(h->param.rc.f_qcompress + h->param.analyse.f_aq_strength * 0.4 / 0.28, 0, 1);
+    h->param.analyse.i_aq_metric = x264_clip3(h->param.analyse.i_aq_metric, 0, 3);
+    if(h->param.analyse.i_aq_metric > 0)
+    {
+        x264_log( h, X264_LOG_WARNING, "AQ METRIC %d IS AN EXPERIMENTAL ADAPTIVE QUANTIZATION MODE.\n", h->param.analyse.i_aq_metric );
+        x264_log( h, X264_LOG_WARNING, "USE IT AT YOUR OWN RISK!\n" );
+    }
     h->param.analyse.i_noise_reduction = x264_clip3( h->param.analyse.i_noise_reduction, 0, 1<<16 );

     {
diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
index 0c8a6d7..ea9aafc 100644
--- a/encoder/ratecontrol.c
+++ b/encoder/ratecontrol.c
@@ -127,6 +127,10 @@ struct x264_ratecontrol_t
     predictor_t *pred_b_from_p; /* predict B-frame size from P-frame satd */
     int bframes;                /* # consecutive B-frames before this P-frame */
     int bframe_bits;            /* total cost of those frames */
+
+    /* AQ stuff */
+    float aq_threshold;
+    int *ac_energy;

     int i_zones;
     x264_zone_t *zones;
@@ -169,6 +173,247 @@ static inline double qscale2bits(ratecontrol_entry_t *rce, double qscale)
            + rce->misc_bits;
 }

+static const int window_weights[7][7] =
+{{41,68,94,104,94,68,41},
+{68,115,155,171,155,115,68},
+{94,155,209,230,209,155,94},
+{104,171,230,256,230,171,104},
+{94,155,209,230,209,155,94},
+{68,115,155,171,155,115,68},
+{41,68,94,104,94,68,41}};
+
+static inline int windowed_variance( x264_t *h, uint8_t *plane, int stride, int window_x, int window_y, int blocksize, int mb_x, int mb_y, int step )
+{
+    int x,y,locx,locy,n=0;
+    uint64_t total = 0;
+    int shiftx = (window_x - 1) / 2; int shifty = (window_y - 1) / 2;
+    int startx = shiftx; int starty = shifty;
+    int endx = blocksize - shiftx; int endy = blocksize - shifty;
+    plane -= (shiftx + shifty * stride);
+    if(mb_x == 0) startx += shiftx;
+    if(mb_y == 0) starty += shifty;
+    if(mb_x == h->sps->i_mb_width - 1) endx -= shiftx;
+    if(mb_y == h->sps->i_mb_height - 1) endy -= shifty;
+    plane += starty * stride;
+    for(locy = starty; locy < endy; locy+=step)
+    {
+        for(locx = startx; locx < endx; locx+=step)
+        {
+            int sum = 0;
+            for(y = 0; y < window_y; y++)
+                for(x = 0; x < window_x; x++)
+                    sum += window_weights[y][x] * plane[x+y*stride+locx];
+            sum = (sum + 64) >> 7;
+            sum = (sum + (window_x*window_y+1)/2)/(window_x*window_y);
+            int ssd = 0;
+            for(y = 0; y < window_y; y++)
+                for(x = 0; x < window_x; x++)
+                {
+                    int val = plane[x+y*stride+locx] - sum;
+                    ssd += (window_weights[y][x] * val * val + 64) >> 7;
+                }
+            total += ssd;
+            n++;
+        }
+        plane += stride*step;
+    }
+    return (total * 256) / n;
+}
+
+static inline int fast_windowed_variance( x264_t *h, uint8_t *plane, int stride, int blocksize, int mb_x, int mb_y, int step )
+{
+    DECLARE_ALIGNED( static uint8_t, zero[8], 16 ) = {0,0,0,0,0,0,0,0};
+    int locx,locy,n=0;
+    uint64_t total = 0;
+    int shiftx = 4; int shifty = 4;
+    int startx = shiftx; int starty = shifty;
+    int endx = blocksize - shiftx; int endy = blocksize - shifty;
+    plane -= (shiftx + shifty * stride);
+    if(mb_x == 0) startx += shiftx;
+    if(mb_y == 0) starty += shifty;
+    if(mb_x == h->sps->i_mb_width - 1) endx -= shiftx;
+    if(mb_y == h->sps->i_mb_height - 1) endy -= shifty;
+    plane += starty * stride;
+    for(locy = starty; locy < endy; locy+=step)
+    {
+        for(locx = startx; locx < endx; locx+=step)
+        {
+            int sad = h->pixf.sad[PIXEL_8x8](plane+locx,stride,zero,0);
+            int ssd = h->pixf.ssd[PIXEL_8x8](plane+locx,stride,zero,0);
+            total += ssd - ((sad * sad) >> 6);
+            n++;
+        }
+        plane += stride*step;
+    }
+    return (total * 196) / n;
+}
+
+// Find the total AC energy of the block in all planes.
+static int aq_metric_3( x264_t *h, int mb_x, int mb_y, int *satd )
+{
+    DECLARE_ALIGNED( static uint8_t, zero[16], 16 ) = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+    unsigned int var=0, i;
+    for( i=0; i<1; i++ )
+    {
+        int w = i ? 8 : 16;
+        int stride = h->fenc->i_stride[i];
+        int pix = i ? PIXEL_8x8 : PIXEL_16x16;
+        int offset = h->mb.b_interlaced
+            ? w * (mb_x + (mb_y&~1) * stride) + (mb_y&1) * stride
+            : w * (mb_x + mb_y * stride);
+        stride <<= h->mb.b_interlaced;
+        var += windowed_variance(h, h->fenc->plane[i]+offset, stride, 7, 7, w, mb_x, mb_y, 1);
+        if( var && satd )
+            *satd += h->pixf.satd[pix](zero, 0, h->fenc->plane[i]+offset, stride);
+    }
+    return (var+16) >> 5;
+}
+
+static int aq_metric_2( x264_t *h, int mb_x, int mb_y, int *satd )
+{
+    DECLARE_ALIGNED( static uint8_t, zero[16], 16 ) = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+    unsigned int var=0, i;
+    for( i=0; i<1; i++ )
+    {
+        int w = i ? 8 : 16;
+        int stride = h->fenc->i_stride[i];
+        int pix = i ? PIXEL_8x8 : PIXEL_16x16;
+        int offset = h->mb.b_interlaced
+            ? w * (mb_x + (mb_y&~1) * stride) + (mb_y&1) * stride
+            : w * (mb_x + mb_y * stride);
+        stride <<= h->mb.b_interlaced;
+        var += windowed_variance(h, h->fenc->plane[i]+offset, stride, 7, 7, w, mb_x, mb_y, 2);
+        if( var && satd )
+            *satd += h->pixf.satd[pix](zero, 0, h->fenc->plane[i]+offset, stride);
+    }
+    return (var+16) >> 5;
+}
+
+static int aq_metric_1( x264_t *h, int mb_x, int mb_y, int *satd )
+{
+    DECLARE_ALIGNED( static uint8_t, zero[16], 16 ) = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+    unsigned int var=0, i;
+    for( i=0; i<1; i++ )
+    {
+        int w = i ? 8 : 16;
+        int stride = h->fenc->i_stride[i];
+        int pix = i ? PIXEL_8x8 : PIXEL_16x16;
+        int offset = h->mb.b_interlaced
+            ? w * (mb_x + (mb_y&~1) * stride) + (mb_y&1) * stride
+            : w * (mb_x + mb_y * stride);
+        stride <<= h->mb.b_interlaced;
+        var += fast_windowed_variance(h, h->fenc->plane[i]+offset, stride, w, mb_x, mb_y, 2);
+        if( var && satd )
+            *satd += h->pixf.satd[pix](zero, 0, h->fenc->plane[i]+offset, stride);
+    }
+    return (var+16) >> 5;
+}
+
+static int aq_metric_0( x264_t *h, int mb_x, int mb_y, int *satd )
+{
+    DECLARE_ALIGNED( static uint8_t, flat[16], 16 ) = {128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128};
+    unsigned int var=0, sad, ssd, i;
+    for( i=0; i<3; i++ )
+    {
+        int w = i ? 8 : 16;
+        int stride = h->fenc->i_stride[i];
+        int offset = h->mb.b_interlaced
+            ? w * (mb_x + (mb_y&~1) * stride) + (mb_y&1) * stride
+            : w * (mb_x + mb_y * stride);
+        int pix = i ? PIXEL_8x8 : PIXEL_16x16;
+        stride <<= h->mb.b_interlaced;
+        sad = h->pixf.sad[pix](flat, 0, h->fenc->plane[i]+offset, stride);
+        ssd = h->pixf.ssd[pix](flat, 0, h->fenc->plane[i]+offset, stride);
+        var += ssd - (sad * sad >> (i?6:8));
+        // SATD to represent the block's overall complexity (bit cost) for intra encoding.
+        // exclude the DC coef, because nothing short of an actual intra prediction will estimate DC cost.
+        if( var && satd )
+            *satd += h->pixf.satd[pix](flat, 0, h->fenc->plane[i]+offset, stride) - sad/2;
+    }
+    return var;
+}
+
+void x264_autosense_aq( x264_t *h )
+{
+    double total = 0;
+    double n = 0;
+    int mb_x, mb_y;
+    /* FIXME: Some of the SATDs might be already calculated elsewhere (ratecontrol?).  Can we reuse them? */
+    /* FIXME: Is chroma SATD necessary? */
+    for( mb_y=0; mb_y<h->sps->i_mb_height; mb_y++ )
+        for( mb_x=0; mb_x<h->sps->i_mb_width; mb_x++ )
+        {
+            int energy, satd=0;
+            if(h->param.analyse.i_aq_metric == 0)
+                energy = aq_metric_0( h, h->mb.i_mb_x, h->mb.i_mb_y, &satd );
+            else if(h->param.analyse.i_aq_metric == 1)
+                energy = aq_metric_1( h, h->mb.i_mb_x, h->mb.i_mb_y, &satd );
+            else if(h->param.analyse.i_aq_metric == 2)
+                energy = aq_metric_2( h, h->mb.i_mb_x, h->mb.i_mb_y, &satd );
+            else
+                energy = aq_metric_3( h, h->mb.i_mb_x, h->mb.i_mb_y, &satd );
+            h->rc->ac_energy[mb_x + mb_y * h->sps->i_mb_width] = energy;
+            /* Weight the energy value by the SATD value of the MB.  This represents the fact that
+            the more complex blocks in a frame should be weighted more when calculating the optimal threshold.
+            This also helps diminish the negative effect of large numbers of simple blocks in a frame, such as in the case
+            of a letterboxed film. */
+            if( energy )
+            {
+                x264_cpu_restore(h->param.cpu);
+                total += logf(energy) * satd;
+                n += satd;
+            }
+        }
+    x264_cpu_restore(h->param.cpu);
+    /* Calculate and store the threshold. */
+    h->rc->aq_threshold = n ? total/n : 15;
+}
+
+/*****************************************************************************
+* x264_adaptive_quant:
+ * adjust macroblock QP based on variance (AC energy) of the MB.
+ * high variance  = higher QP
+ * low variance = lower QP
+ * This generally increases SSIM and lowers PSNR.
+*****************************************************************************/
+void x264_adaptive_quant( x264_t *h )
+{
+    int qp = h->mb.i_qp;
+    int energy;
+    if(h->param.analyse.i_aq_mode == 2)
+    {
+        if(h->param.analyse.i_aq_metric == 0)
+            energy = aq_metric_0( h, h->mb.i_mb_x, h->mb.i_mb_y, NULL );
+            //printf("%d ",energy);
+        else if(h->param.analyse.i_aq_metric == 1)
+            energy = aq_metric_1( h, h->mb.i_mb_x, h->mb.i_mb_y, NULL );
+            //printf("%d ",energy);
+        else if(h->param.analyse.i_aq_metric == 2)
+            energy = aq_metric_2( h, h->mb.i_mb_x, h->mb.i_mb_y, NULL );
+            //printf("%d ",energy);
+        else
+            energy = aq_metric_3( h, h->mb.i_mb_x, h->mb.i_mb_y, NULL );
+            //printf("%d\n",energy);
+    }
+    else
+        energy = h->rc->ac_energy[h->mb.i_mb_xy];
+    if(energy == 0)
+        h->mb.i_qp = h->mb.i_last_qp;
+    else
+    {
+        x264_cpu_restore(h->param.cpu);
+        float result = energy;
+        /* Adjust the QP based on the AC energy of the macroblock. */
+        float qp_adj = 1.5 * (logf(result) - h->rc->aq_threshold);
+        if(h->param.analyse.i_aq_mode == 1) qp_adj = x264_clip3f(qp_adj, -5, 5);
+        int new_qp = x264_clip3(qp + qp_adj * h->param.analyse.f_aq_strength + .5, h->param.rc.i_qp_min, h->param.rc.i_qp_max);
+        /* If the QP of this MB is within 1 of the previous MB, code the same QP as the previous MB,
+         * to lower the bit cost of the qp_delta. */
+        //if(abs(new_qp - h->mb.i_last_qp) == 1) new_qp = h->mb.i_last_qp;
+        h->mb.i_qp = new_qp;
+    }
+    h->mb.i_chroma_qp = i_chroma_qp_table[x264_clip3( h->mb.i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )];
+}

 int x264_ratecontrol_new( x264_t *h )
 {
@@ -244,7 +489,7 @@ int x264_ratecontrol_new( x264_t *h )
         rc->rate_tolerance = 0.01;
     }

-    h->mb.b_variable_qp = rc->b_vbv && !rc->b_2pass;
+    h->mb.b_variable_qp = (rc->b_vbv && !rc->b_2pass) || h->param.analyse.i_aq_mode;

     if( rc->b_abr )
     {
@@ -458,10 +703,13 @@ int x264_ratecontrol_new( x264_t *h )
         x264_free( p );
     }

-    for( i=1; i<h->param.i_threads; i++ )
+    for( i=0; i<h->param.i_threads; i++ )
     {
         h->thread[i]->rc = rc+i;
-        rc[i] = rc[0];
+        if( i )
+            rc[i] = rc[0];
+        if( h->param.analyse.i_aq_mode == 1 )
+            rc[i].ac_energy = x264_malloc( h->mb.i_mb_count * sizeof(int) );
     }

     return 0;
@@ -623,6 +871,8 @@ void x264_ratecontrol_delete( x264_t *h )
                     x264_free( rc->zones[i].param );
         x264_free( rc->zones );
     }
+    for( i=0; i<h->param.i_threads; i++ )
+        x264_free( rc[i].ac_energy );
     x264_free( rc );
 }

@@ -729,6 +979,12 @@ void x264_ratecontrol_start( x264_t *h, int i_force_qp )

     if( h->sh.i_type != SLICE_TYPE_B )
         rc->last_non_b_pict_type = h->sh.i_type;
+
+    /* Adaptive AQ thresholding algorithm. */
+    if( h->param.analyse.i_aq_mode == 2 )
+        h->rc->aq_threshold = logf(5000.0); /* Arbitrary value for "center" of AQ curve. */
+    else if( h->param.analyse.i_aq_mode == 1 )
+        x264_autosense_aq(h);
 }

 double predict_row_size( x264_t *h, int y, int qp )
diff --git a/encoder/ratecontrol.h b/encoder/ratecontrol.h
index d4af2c0..e8b2ea1 100644
--- a/encoder/ratecontrol.h
+++ b/encoder/ratecontrol.h
@@ -34,6 +34,7 @@ void x264_ratecontrol_mb( x264_t *, int bits );
 int  x264_ratecontrol_qp( x264_t * );
 void x264_ratecontrol_end( x264_t *, int bits );
 void x264_ratecontrol_summary( x264_t * );
+void x264_adaptive_quant    ( x264_t * );

 #endif

diff --git a/x264.c b/x264.c
index f68755d..37618fe 100644
--- a/x264.c
+++ b/x264.c
@@ -244,6 +244,19 @@ static void Help( x264_param_t *defaults, int b_longhelp )
         "                                  - 2: enabled on all mode decisions\n", defaults->analyse.i_trellis );
     H0( "      --no-fast-pskip         Disables early SKIP detection on P-frames\n" );
     H0( "      --no-dct-decimate       Disables coefficient thresholding on P-frames\n" );
+    H0( "      --aq-strength <float>   Reduces blocking and blurring in flat and\n"
+        "                              textured areas. [%.1f]\n"
+        "                                  - 0.2: weak AQ\n"
+        "                                  - 1.0: very strong AQ\n", defaults->analyse.f_aq_strength );
+    H0( "      --aq-mode <integer>     How AQ distributes bits [%d]\n"
+        "                                  - 0: Disabled\n"
+        "                                  - 1: Avoid moving bits between frames\n"
+        "                                  - 2: Move bits between frames\n", defaults->analyse.i_aq_mode );
+    H0( "      --aq-metric <integer>     The metric used for AQ [%d]\n"
+        "                                  - 0: Whole-macroblock variance (fastest)\n"
+        "                                  - 1: Partial overlapped block variance\n"
+        "                                  - 2: Partial overlapped gaussian variance\n"
+        "                                  - 3: Full overlapped gaussian variance (slowest)", defaults->analyse.i_aq_metric );
     H0( "      --nr <integer>          Noise reduction [%d]\n", defaults->analyse.i_noise_reduction );
     H1( "\n" );
     H1( "      --deadzone-inter <int>  Set the size of the inter luma quantization deadzone [%d]\n", defaults->analyse.i_luma_deadzone[0] );
@@ -407,6 +420,9 @@ static int  Parse( int argc, char **argv,
             { "trellis", required_argument, NULL, 't' },
             { "no-fast-pskip", no_argument, NULL, 0 },
             { "no-dct-decimate", no_argument, NULL, 0 },
+            { "aq-strength", required_argument, NULL, 0 },
+            { "aq-mode", required_argument, NULL, 0 },
+            { "aq-metric", required_argument, NULL, 0 },
             { "deadzone-inter", required_argument, NULL, '0' },
             { "deadzone-intra", required_argument, NULL, '0' },
             { "level",   required_argument, NULL, 0 },
diff --git a/x264.h b/x264.h
index 70c9eaf..63712d4 100644
--- a/x264.h
+++ b/x264.h
@@ -232,6 +232,10 @@ typedef struct x264_param_t
         int          i_trellis;  /* trellis RD quantization */
         int          b_fast_pskip; /* early SKIP detection on P-frames */
         int          b_dct_decimate; /* transform coefficient thresholding on P-frames */
+        float        f_aq_strength; /* psy adaptive QP */
+        int          i_aq_mode; /* 0 = off, 1 = auto, 2 = static sensitivity */
+        int          i_aq_metric; /* 0 = macroblock variance, 1 = partial overlapped 8x8 block variance */
+        /* 2 = partial overlapped 7x7 gaussian window variance, 3 = full overlapped 7x7 gaussian window variance. */
         int          i_noise_reduction; /* adaptive pseudo-deadzone */

         /* the deadzone size that will be used in luma quantization */