Advertisement
Guest User

Untitled

a guest
May 23rd, 2017
64
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. diff --git a/common/common.h b/common/common.h
  2. index b8c6dfd..615c045 100644
  3. --- a/common/common.h
  4. +++ b/common/common.h
  5. @@ -750,7 +750,7 @@ struct x264_t
  6.          int     i_direct_score[2];
  7.          int     i_direct_frames[2];
  8.          /* num p-frames weighted */
  9. -        int     i_wpred[3];
  10. +        int     i_wpred[2];
  11.  
  12.      } stat;
  13.  
  14. diff --git a/common/frame.h b/common/frame.h
  15. index 419da08..7ba014f 100644
  16. --- a/common/frame.h
  17. +++ b/common/frame.h
  18. @@ -112,8 +112,8 @@ typedef struct x264_frame
  19.      uint16_t *i_inv_qscale_factor;
  20.      int     b_scenecut; /* Set to zero if the frame cannot possibly be part of a real scenecut. */
  21.      float   f_weighted_cost_delta[X264_BFRAME_MAX+2];
  22. -    uint32_t i_pixel_sum;
  23. -    uint64_t i_pixel_ssd;
  24. +    uint32_t i_pixel_sum[3];
  25. +    uint64_t i_pixel_ssd[3];
  26.  
  27.      /* hrd */
  28.      x264_hrd_t hrd_timing;
  29. diff --git a/encoder/encoder.c b/encoder/encoder.c
  30. index 300041e..805f451 100644
  31. --- a/encoder/encoder.c
  32. +++ b/encoder/encoder.c
  33. @@ -1395,49 +1395,66 @@ static void x264_weighted_pred_init( x264_t *h )
  34.  
  35.      int i_padv = PADV << h->param.b_interlaced;
  36.      int denom = -1;
  37. -    int weightluma = 0;
  38. +    int weightplane[2] = { 0, 0 };
  39.      int buffer_next = 0;
  40. -    //FIXME: when chroma support is added, move this into loop
  41. -    h->sh.weight[0][1].weightfn = h->sh.weight[0][2].weightfn = NULL;
  42. -    h->sh.weight[0][1].i_denom = h->sh.weight[0][2].i_denom = 0;
  43. -    for( int j = 0; j < h->i_ref0; j++ )
  44. +
  45. +    for( int i = 0; i < 3; i++ )
  46.      {
  47. -        if( h->fenc->weight[j][0].weightfn )
  48. +        for( int j = 0; j < h->i_ref0; j++ )
  49.          {
  50. -            h->sh.weight[j][0] = h->fenc->weight[j][0];
  51. -            // if weight is useless, don't write it to stream
  52. -            if( h->sh.weight[j][0].i_scale == 1<<h->sh.weight[j][0].i_denom && h->sh.weight[j][0].i_offset == 0 )
  53. -                h->sh.weight[j][0].weightfn = NULL;
  54. -            else
  55. +            if( h->fenc->weight[j][i].weightfn )
  56.              {
  57. -                if( !weightluma )
  58. +                h->sh.weight[j][i] = h->fenc->weight[j][i];
  59. +                // if weight is useless, don't write it to stream
  60. +                if( h->sh.weight[j][i].i_scale == 1<<h->sh.weight[j][i].i_denom && h->sh.weight[j][i].i_offset == 0 )
  61. +                    h->sh.weight[j][i].weightfn = NULL;
  62. +                else
  63.                  {
  64. -                    weightluma = 1;
  65. -                    h->sh.weight[0][0].i_denom = denom = h->sh.weight[j][0].i_denom;
  66. -                    assert( x264_clip3( denom, 0, 7 ) == denom );
  67. +                    if( !weightplane[!!i] )
  68. +                    {
  69. +                        weightplane[!!i] = 1;
  70. +                        h->sh.weight[0][!!i].i_denom = denom = h->sh.weight[j][i].i_denom;
  71. +                        assert( x264_clip3( denom, 0, 7 ) == denom );
  72. +                    }
  73. +
  74. +                    assert( h->sh.weight[j][i].i_denom == denom );
  75. +                    if( !i )
  76. +                    {
  77. +                        h->fenc->weighted[j] = h->mb.p_weight_buf[buffer_next++] + h->fenc->i_stride[0] * i_padv + PADH;
  78. +                        //scale full resolution frame
  79. +                        if( h->param.i_threads == 1 )
  80. +                        {
  81. +                            uint8_t *src = h->fref0[j]->filtered[0] - h->fref0[j]->i_stride[0]*i_padv - PADH;
  82. +                            uint8_t *dst = h->fenc->weighted[j] - h->fenc->i_stride[0]*i_padv - PADH;
  83. +                            int stride = h->fenc->i_stride[0];
  84. +                            int width = h->fenc->i_width[0] + PADH*2;
  85. +                            int height = h->fenc->i_lines[0] + i_padv*2;
  86. +                            x264_weight_scale_plane( h, dst, stride, src, stride, width, height, &h->sh.weight[j][0] );
  87. +                            h->fenc->i_lines_weighted = height;
  88. +                        }
  89. +                    }
  90.                  }
  91. -                assert( h->sh.weight[j][0].i_denom == denom );
  92. -                assert( x264_clip3( h->sh.weight[j][0].i_scale, 0, 127 ) == h->sh.weight[j][0].i_scale );
  93. -                assert( x264_clip3( h->sh.weight[j][0].i_offset, -128, 127 ) == h->sh.weight[j][0].i_offset );
  94. -                h->fenc->weighted[j] = h->mb.p_weight_buf[buffer_next++] +
  95. -                    h->fenc->i_stride[0] * i_padv + PADH;
  96.              }
  97.          }
  98. -
  99. -        //scale full resolution frame
  100. -        if( h->sh.weight[j][0].weightfn && h->param.i_threads == 1 )
  101. -        {
  102. -            uint8_t *src = h->fref0[j]->filtered[0] - h->fref0[j]->i_stride[0]*i_padv - PADH;
  103. -            uint8_t *dst = h->fenc->weighted[j] - h->fenc->i_stride[0]*i_padv - PADH;
  104. -            int stride = h->fenc->i_stride[0];
  105. -            int width = h->fenc->i_width[0] + PADH*2;
  106. -            int height = h->fenc->i_lines[0] + i_padv*2;
  107. -            x264_weight_scale_plane( h, dst, stride, src, stride, width, height, &h->sh.weight[j][0] );
  108. -            h->fenc->i_lines_weighted = height;
  109. -        }
  110.      }
  111. -    if( !weightluma )
  112. +
  113. +    if( weightplane[1] )
  114. +        for( int i = 0; i < h->i_ref0; i++ )
  115. +            if( h->sh.weight[i][1].weightfn && !h->sh.weight[i][2].weightfn )
  116. +            {
  117. +                h->sh.weight[i][2].i_scale = 1 << h->sh.weight[0][1].i_denom;
  118. +                h->sh.weight[i][2].i_offset = 0;
  119. +            }
  120. +            else if( h->sh.weight[i][2].weightfn && !h->sh.weight[i][1].weightfn )
  121. +            {
  122. +                h->sh.weight[i][1].i_scale = 1 << h->sh.weight[0][1].i_denom;
  123. +                h->sh.weight[i][1].i_offset = 0;
  124. +            }
  125. +
  126. +    if( !weightplane[0] )
  127.          h->sh.weight[0][0].i_denom = 0;
  128. +    if( !weightplane[1] )
  129. +        h->sh.weight[0][1].i_denom = h->sh.weight[0][2].i_denom = 0;
  130.  }
  131.  
  132.  static inline void x264_reference_build_list( x264_t *h, int i_poc )
  133. @@ -2639,13 +2656,8 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
  134.      {
  135.          h->stat.i_consecutive_bframes[h->fdec->i_frame - h->fref0[0]->i_frame - 1]++;
  136.          if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
  137. -            for( int i = 0; i < 3; i++ )
  138. -                for( int j = 0; j < h->i_ref0; j++ )
  139. -                    if( h->sh.weight[0][i].i_denom != 0 )
  140. -                    {
  141. -                        h->stat.i_wpred[i]++;
  142. -                        break;
  143. -                    }
  144. +            for( int i = 0; i < 2; i++ )
  145. +                h->stat.i_wpred[i] += !!h->sh.weight[0][i].i_denom;
  146.      }
  147.      if( h->sh.i_type == SLICE_TYPE_B )
  148.      {
  149. @@ -2987,8 +2999,9 @@ void    x264_encoder_close  ( x264_t *h )
  150.          }
  151.  
  152.          if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART && h->stat.i_frame_count[SLICE_TYPE_P] > 0 )
  153. -            x264_log( h, X264_LOG_INFO, "Weighted P-Frames: Y:%.1f%%\n",
  154. -                      h->stat.i_wpred[0] * 100.0 / h->stat.i_frame_count[SLICE_TYPE_P] );
  155. +            x264_log( h, X264_LOG_INFO, "Weighted P-Frames: Y:%.1f%% UV:%.1f%%\n",
  156. +                      h->stat.i_wpred[0] * 100.0 / h->stat.i_frame_count[SLICE_TYPE_P],
  157. +                      h->stat.i_wpred[1] * 100.0 / h->stat.i_frame_count[SLICE_TYPE_P] );
  158.  
  159.          for( int i_list = 0; i_list < 2; i_list++ )
  160.              for( int i_slice = 0; i_slice < 2; i_slice++ )
  161. diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
  162. index b51dbf7..dc771db 100644
  163. --- a/encoder/ratecontrol.c
  164. +++ b/encoder/ratecontrol.c
  165. @@ -51,8 +51,8 @@ typedef struct
  166.      int s_count;
  167.      float blurred_complexity;
  168.      char direct_mode;
  169. -    int16_t weight[2];
  170. -    int16_t i_weight_denom;
  171. +    int16_t weight[3][2];
  172. +    int16_t i_weight_denom[2];
  173.      int refcount[16];
  174.      int refs;
  175.      int i_duration;
  176. @@ -777,11 +777,19 @@ int x264_ratecontrol_new( x264_t *h )
  177.              rce->refs = ref;
  178.  
  179.              /* find weights */
  180. -            rce->i_weight_denom = -1;
  181. +            rce->i_weight_denom[0] = rce->i_weight_denom[1] = -1;
  182.              char *w = strchr( p, 'w' );
  183.              if( w )
  184. -                if( sscanf( w, "w:%hd,%hd,%hd", &rce->i_weight_denom, &rce->weight[0], &rce->weight[1] ) != 3 )
  185. -                    rce->i_weight_denom = -1;
  186. +            {
  187. +                int count = sscanf( w, "w:%hd,%hd,%hd,%hd,%hd,%hd,%hd,%hd",
  188. +                                    &rce->i_weight_denom[0], &rce->weight[0][0], &rce->weight[0][1],
  189. +                                    &rce->i_weight_denom[1], &rce->weight[1][0], &rce->weight[1][1],
  190. +                                    &rce->weight[2][0], &rce->weight[2][1] );
  191. +                if( count == 3 )
  192. +                    rce->i_weight_denom[1] = -1;
  193. +                else if ( count != 8 )
  194. +                    rce->i_weight_denom[0] = rce->i_weight_denom[1] = -1;
  195. +            }
  196.  
  197.              if( pict_type != 'b' )
  198.                  rce->kept_as_ref = 1;
  199. @@ -1391,8 +1399,15 @@ void x264_ratecontrol_set_weights( x264_t *h, x264_frame_t *frm )
  200.      ratecontrol_entry_t *rce = &h->rc->entry[frm->i_frame];
  201.      if( h->param.analyse.i_weighted_pred <= 0 )
  202.          return;
  203. -    if( rce->i_weight_denom >= 0 )
  204. -        SET_WEIGHT( frm->weight[0][0], 1, rce->weight[0], rce->i_weight_denom, rce->weight[1] );
  205. +
  206. +    if( rce->i_weight_denom[0] >= 0 )
  207. +        SET_WEIGHT( frm->weight[0][0], 1, rce->weight[0][0], rce->i_weight_denom[0], rce->weight[0][1] );
  208. +
  209. +    if( rce->i_weight_denom[1] >= 0 )
  210. +    {
  211. +        SET_WEIGHT( frm->weight[0][1], 1, rce->weight[1][0], rce->i_weight_denom[1], rce->weight[1][1] );
  212. +        SET_WEIGHT( frm->weight[0][2], 1, rce->weight[2][0], rce->i_weight_denom[1], rce->weight[2][1] );
  213. +    }
  214.  }
  215.  
  216.  /* After encoding one frame, save stats and update ratecontrol state */
  217. @@ -1449,9 +1464,19 @@ int x264_ratecontrol_end( x264_t *h, int bits, int *filler )
  218.                  goto fail;
  219.          }
  220.  
  221. -        if( h->sh.weight[0][0].weightfn )
  222. +        if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART && h->sh.weight[0][0].weightfn )
  223.          {
  224. -            if( fprintf( rc->p_stat_file_out, "w:%"PRId32",%"PRId32",%"PRId32, h->sh.weight[0][0].i_denom, h->sh.weight[0][0].i_scale, h->sh.weight[0][0].i_offset ) < 0 )
  225. +            if( fprintf( rc->p_stat_file_out, "w:%"PRId32",%"PRId32",%"PRId32,
  226. +                         h->sh.weight[0][0].i_denom, h->sh.weight[0][0].i_scale, h->sh.weight[0][0].i_offset ) < 0 )
  227. +                goto fail;
  228. +            if( h->sh.weight[0][1].weightfn || h->sh.weight[0][2].weightfn )
  229. +            {
  230. +                if( fprintf( rc->p_stat_file_out, ",%"PRId32",%"PRId32",%"PRId32",%"PRId32",%"PRId32"\n",
  231. +                             h->sh.weight[0][1].i_denom, h->sh.weight[0][1].i_scale, h->sh.weight[0][1].i_offset,
  232. +                             h->sh.weight[0][2].i_scale, h->sh.weight[0][2].i_offset ) < 0 )
  233. +                    goto fail;
  234. +            }
  235. +            else if( fprintf( rc->p_stat_file_out, "\n" ) < 0 )
  236.                  goto fail;
  237.          }
  238.  
  239. diff --git a/encoder/slicetype.c b/encoder/slicetype.c
  240. index 120a38a..3aedf77 100644
  241. --- a/encoder/slicetype.c
  242. +++ b/encoder/slicetype.c
  243. @@ -70,21 +70,42 @@ static void x264_weight_get_h264( unsigned int weight_nonh264, int offset, x264_
  244.  
  245.  void x264_weight_plane_analyse( x264_t *h, x264_frame_t *frame )
  246.  {
  247. -    uint32_t sad = 0;
  248. -    uint64_t ssd = 0;
  249. -    uint8_t *p = frame->plane[0];
  250. -    int stride = frame->i_stride[0];
  251. -    int width = frame->i_width[0];
  252. -    int height = frame->i_lines[0];
  253. -    for( int y = 0; y < height>>4; y++, p += stride*16 )
  254. -        for( int x = 0; x < width; x += 16 )
  255. +    for( int plane = 0; plane < 3; plane++ )
  256. +    {
  257. +        uint32_t sad = 0;
  258. +        uint64_t ssd = 0;
  259. +        uint8_t *p = frame->plane[plane];
  260. +        int stride = frame->i_stride[!!plane];
  261. +        int width = frame->i_width[!!plane];
  262. +        int height = frame->i_lines[!!plane];
  263. +        if( !plane )
  264.          {
  265. -            uint64_t res = h->pixf.var[PIXEL_16x16]( p + x, stride );
  266. -            sad += (uint32_t)res;
  267. -            ssd += res >> 32;
  268. +            for( int y = 0; y < height; y+=16, p += stride*16 )
  269. +                for( int x = 0; x < width; x += 16 )
  270. +                {
  271. +                    uint64_t res = h->pixf.var[PIXEL_16x16]( p + x, stride );
  272. +                    sad += (uint32_t)res;
  273. +                    ssd += res >> 32;
  274. +                }
  275. +            frame->i_pixel_sum[plane] = sad;
  276. +            frame->i_pixel_ssd[plane] = ssd - ((uint64_t)sad * sad + width * height / 2) / (width * height);
  277.          }
  278. -    frame->i_pixel_sum = sad;
  279. -    frame->i_pixel_ssd = ssd - ((uint64_t)sad * sad + width * height / 2) / (width * height);
  280. +        else
  281. +        {
  282. +            ALIGNED_16( uint8_t flat[8]) = {0};
  283. +            for( int y = 0; y < height; y+=8, p += stride*8 )
  284. +                for( int x = 0; x < width; x += 8 )
  285. +                {
  286. +                    uint32_t res = h->pixf.sad[PIXEL_8x8]( p + x, stride, flat, 0 );
  287. +                    sad += res;
  288. +                    ssd += res*res;
  289. +                }
  290. +            int mean = ( sad * 64 + width * height / 2 ) / ( width * height );
  291. +            ssd = (ssd + width * height / 128) / (width*height / 64);
  292. +            frame->i_pixel_sum[plane] = sad;
  293. +            frame->i_pixel_ssd[plane] = ssd - mean*mean;
  294. +        }
  295. +    }
  296.  }
  297.  
  298.  static NOINLINE uint8_t *x264_weight_cost_init_luma( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, uint8_t *dest )
  299. @@ -115,7 +136,35 @@ static NOINLINE uint8_t *x264_weight_cost_init_luma( x264_t *h, x264_frame_t *fe
  300.      return ref->lowres[0];
  301.  }
  302.  
  303. -static NOINLINE unsigned int x264_weight_cost( x264_t *h, x264_frame_t *fenc, uint8_t *src, x264_weight_t *w )
  304. +static NOINLINE uint8_t *x264_weight_cost_init_chroma( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, uint8_t *dest, int i )
  305. +{
  306. +    int ref0_distance = fenc->i_frame - ref->i_frame - 1;
  307. +    if( fenc->lowres_mvs[0][ref0_distance][0][0] != 0x7FFF )
  308. +    {
  309. +        int i_stride = fenc->i_stride[1];
  310. +        int i_lines = fenc->i_lines[1];
  311. +        int i_width = fenc->i_width[1];
  312. +        int i_mb_xy = 0;
  313. +        int x,y;
  314. +        int i_pel_offset = 0;
  315. +
  316. +        for( y = 0; y < i_lines; y += 8, i_pel_offset = y*i_stride )
  317. +            for( x = 0; x < i_width; x += 8, i_mb_xy++, i_pel_offset += 8 )
  318. +            {
  319. +                uint8_t *pix = &dest[ i_pel_offset ];
  320. +                int mvx = fenc->lowres_mvs[0][ref0_distance][i_mb_xy][0];
  321. +                int mvy = fenc->lowres_mvs[0][ref0_distance][i_mb_xy][1];
  322. +                h->mc.mc_chroma( pix, i_stride, ref->plane[i], i_stride,
  323. +                               mvx, mvy, 8, 8 );
  324. +            }
  325. +        x264_emms();
  326. +        return dest;
  327. +    }
  328. +    x264_emms();
  329. +    return ref->plane[i];
  330. +}
  331. +
  332. +static NOINLINE unsigned int x264_weight_cost_luma( x264_t *h, x264_frame_t *fenc, uint8_t *src, x264_weight_t *w )
  333.  {
  334.      unsigned int cost = 0;
  335.      int i_stride = fenc->i_stride_lowres;
  336. @@ -155,6 +204,35 @@ static NOINLINE unsigned int x264_weight_cost( x264_t *h, x264_frame_t *fenc, ui
  337.      return cost;
  338.  }
  339.  
  340. +static NOINLINE unsigned int x264_weight_cost_chroma( x264_t *h, x264_frame_t *fenc, uint8_t *src, x264_weight_t *w, int i )
  341. +{
  342. +    int x, y;
  343. +    unsigned int cost = 0;
  344. +    int i_stride = fenc->i_stride[i];
  345. +    int i_lines = fenc->i_lines[i];
  346. +    int i_width = fenc->i_width[i];
  347. +    uint8_t *fenc_plane = fenc->plane[i];
  348. +    ALIGNED_ARRAY_16( uint8_t, buf, [8*8] );
  349. +    int pixoff = 0;
  350. +    int i_mb = 0;
  351. +    ALIGNED_16( uint8_t flat[8]) = {0};
  352. +    if( w )
  353. +        for( y = 0; y < i_lines; y += 8, pixoff = y*i_stride )
  354. +            for( x = 0; x < i_width; x += 8, i_mb++, pixoff += 8)
  355. +            {
  356. +                w->weightfn[8>>2]( buf, 8, &src[pixoff], i_stride, w, 8 );
  357. +                cost += abs( h->pixf.sad[PIXEL_8x8]( &src[pixoff], i_stride, flat, 0 ) -
  358. +                             h->pixf.sad[PIXEL_8x8]( buf, 8, flat, 0 ) );
  359. +            }
  360. +    else
  361. +        for( y = 0; y < i_lines; y += 8, pixoff = y*i_stride )
  362. +            for( x = 0; x < i_width; x += 8, i_mb++, pixoff += 8 )
  363. +                cost += abs( h->pixf.sad[PIXEL_8x8]( &src[pixoff], i_stride, flat, 0 ) -
  364. +                             h->pixf.sad[PIXEL_8x8]( &fenc_plane[pixoff], i_stride, flat, 0 ) );
  365. +    x264_emms();
  366. +    return cost;
  367. +}
  368. +
  369.  void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, int b_lookahead )
  370.  {
  371.      float fenc_mean, ref_mean, fenc_var, ref_var;
  372. @@ -167,66 +245,106 @@ void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, int
  373.      float guess_scale;
  374.      int found;
  375.      x264_weight_t *weights = fenc->weight[0];
  376. +    SET_WEIGHT(weights[1], 0, 1, 0, 0 );
  377. +    SET_WEIGHT(weights[2], 0, 1, 0, 0 );
  378. +    for( int plane = 0; plane <= 2  && !( plane && ( !weights[0].weightfn || b_lookahead ) ); plane++ )
  379. +    {
  380. +        fenc_var = round( sqrt( fenc->i_pixel_ssd[plane] ) );
  381. +        ref_var  = round( sqrt(  ref->i_pixel_ssd[plane] ) );
  382. +        fenc_mean = (float)fenc->i_pixel_sum[plane] / (fenc->i_lines[!!plane] * fenc->i_width[!!plane]);
  383. +        ref_mean  = (float) ref->i_pixel_sum[plane] / (fenc->i_lines[!!plane] * fenc->i_width[!!plane]);
  384.  
  385. -    fenc_var = round( sqrt( fenc->i_pixel_ssd ) );
  386. -    ref_var  = round( sqrt(  ref->i_pixel_ssd ) );
  387. -    fenc_mean = (float)fenc->i_pixel_sum / (fenc->i_lines[0] * fenc->i_width[0]);
  388. -    ref_mean  = (float) ref->i_pixel_sum / (fenc->i_lines[0] * fenc->i_width[0]);
  389. +        //early termination
  390. +        if( fabs( ref_mean - fenc_mean ) < 0.5 && fabsf( 1 - (float)fenc_var / ref_var ) < epsilon )
  391. +        {
  392. +            SET_WEIGHT( weights[plane], 0, 1, 0, 0 );
  393. +            continue;
  394. +        }
  395.  
  396. -    //early termination
  397. -    if( fabs( ref_mean - fenc_mean ) < 0.5 && fabs( 1 - fenc_var / ref_var ) < epsilon )
  398. -    {
  399. -        SET_WEIGHT( weights[0], 0, 1, 0, 0 );
  400. -        return;
  401. -    }
  402. +        guess_scale = ref_var ? (float)fenc_var/ref_var : 0;
  403.  
  404. -    guess_scale = ref_var ? fenc_var/ref_var : 0;
  405. -    x264_weight_get_h264( round( guess_scale * 128 ), 0, &weights[0] );
  406. +        if( plane )
  407. +        {
  408. +            weights[plane].i_denom = 6;
  409. +            weights[plane].i_scale = x264_clip_uint8( round(guess_scale * 64.0) );
  410. +            if( weights[plane].i_scale > 127 )
  411. +            {
  412. +                weights[1].weightfn = weights[2].weightfn = 0;
  413. +                break;
  414. +            }
  415. +        }
  416. +        else
  417. +            x264_weight_get_h264( round( guess_scale * 128 ), 0, &weights[plane] );
  418.  
  419. -    found = 0;
  420. -    mindenom = weights[0].i_denom;
  421. -    minscale = weights[0].i_scale;
  422. -    minoff = 0;
  423. -    offset_search = x264_clip3( floor( fenc_mean - ref_mean * minscale / (1 << mindenom) + 0.5f*b_lookahead ), -128, 126 );
  424. +        found = 0;
  425. +        mindenom = weights[plane].i_denom;
  426. +        minscale = weights[plane].i_scale;
  427. +        minoff = 0;
  428.  
  429. -    if( !fenc->b_intra_calculated )
  430. -    {
  431. -        x264_mb_analysis_t a;
  432. -        x264_lowres_context_init( h, &a );
  433. -        x264_slicetype_frame_cost( h, &a, &fenc, 0, 0, 0, 0 );
  434. -    }
  435. -    uint8_t *mcbuf = x264_weight_cost_init_luma( h, fenc, ref, h->mb.p_weight_buf[0] );
  436. -    origscore = minscore = x264_weight_cost( h, fenc, mcbuf, 0 );
  437. +        if( !plane && !fenc->b_intra_calculated )
  438. +        {
  439. +            x264_mb_analysis_t a;
  440. +            x264_lowres_context_init( h, &a );
  441. +            x264_slicetype_frame_cost( h, &a, &fenc, 0, 0, 0, 0 );
  442. +        }
  443.  
  444. -    if( !minscore )
  445. -    {
  446. -        SET_WEIGHT( weights[0], 0, 1, 0, 0 );
  447. -        return;
  448. -    }
  449. +        uint8_t *mcbuf;
  450. +        if( !plane )
  451. +        {
  452. +            mcbuf = x264_weight_cost_init_luma( h, fenc, ref, h->mb.p_weight_buf[0] );
  453. +            origscore = minscore = x264_weight_cost_luma( h, fenc, mcbuf, 0 );
  454. +        }
  455. +        else
  456. +        {
  457. +            mcbuf = x264_weight_cost_init_chroma( h, fenc, ref, h->mb.p_weight_buf[0], plane );
  458. +            origscore = minscore = x264_weight_cost_chroma( h, fenc, mcbuf, 0, plane );
  459. +        }
  460.  
  461. -    // This gives a slight improvement due to rounding errors but only tests
  462. -    // one offset on lookahead.
  463. -    // TODO: currently searches only offset +1. try other offsets/multipliers/combinations thereof?
  464. -    for( int i_off = offset_search; i_off <= offset_search+!b_lookahead; i_off++ )
  465. -    {
  466. -        SET_WEIGHT( weights[0], 1, minscale, mindenom, i_off );
  467. -        unsigned int s = x264_weight_cost( h, fenc, mcbuf, &weights[0] );
  468. -        COPY3_IF_LT( minscore, s, minoff, i_off, found, 1 );
  469. +        if( !minscore )
  470. +            continue;
  471. +
  472. +        // This gives a slight improvement due to rounding errors but only tests
  473. +        // one offset on lookahead.
  474. +        // TODO: currently searches only offset +1. try other offsets/multipliers/combinations thereof?
  475. +
  476. +        offset_search = x264_clip3( floor( fenc_mean - ref_mean * minscale / (1 << mindenom) + 0.5f*b_lookahead ), -128, 126 );
  477. +        for( int i_off = offset_search; i_off <= offset_search+!b_lookahead; i_off++ )
  478. +        {
  479. +            SET_WEIGHT( weights[plane], 1, minscale, mindenom, i_off );
  480. +            unsigned int s;
  481. +            if( plane )
  482. +                s = x264_weight_cost_chroma( h, fenc, mcbuf, &weights[plane], plane );
  483. +            else
  484. +                s = x264_weight_cost_luma( h, fenc, mcbuf, &weights[plane] );
  485. +            COPY3_IF_LT( minscore, s, minoff, i_off, found, 1 );
  486. +        }
  487. +        x264_emms();
  488. +
  489. +        /* FIXME: More analysis can be done here on SAD vs. SATD termination. */
  490. +        /* 0.2% termination derived experimentally to avoid weird weights in frames that are mostly intra. */
  491. +        if( !found || (minscale == 1<<mindenom && minoff == 0) || (float)minscore / origscore > 0.998 )
  492. +        {
  493. +            SET_WEIGHT( weights[plane], 0, 1, 0, 0 );
  494. +            continue;
  495. +        }
  496. +        else
  497. +            SET_WEIGHT( weights[plane], 1, minscale, mindenom, minoff );
  498. +
  499. +        if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_FAKE && weights[0].weightfn && !plane )
  500. +            fenc->f_weighted_cost_delta[i_delta_index] = (float)minscore / origscore;
  501.      }
  502. -    x264_emms();
  503.  
  504. -    /* FIXME: More analysis can be done here on SAD vs. SATD termination. */
  505. -    /* 0.2% termination derived experimentally to avoid weird weights in frames that are mostly intra. */
  506. -    if( !found || (minscale == 1<<mindenom && minoff == 0) || (float)minscore / origscore > 0.998 )
  507. +    //FIXME, what is the correct way to deal with this?
  508. +    if( weights[1].weightfn && weights[2].weightfn && weights[1].i_denom != weights[2].i_denom )
  509.      {
  510. -        SET_WEIGHT( weights[0], 0, 1, 0, 0 );
  511. -        return;
  512. +        int denom = X264_MIN( weights[1].i_denom, weights[2].i_denom );
  513. +        int i;
  514. +        for( i = 1; i <= 2; i++ )
  515. +        {
  516. +            weights[i].i_scale = x264_clip_uint8( weights[i].i_scale >> ( weights[i].i_denom - denom ) );
  517. +            weights[i].i_denom = denom;
  518. +        }
  519.      }
  520. -    else
  521. -        SET_WEIGHT( weights[0], 1, minscale, mindenom, minoff );
  522. -
  523. -    if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_FAKE && weights[0].weightfn )
  524. -        fenc->f_weighted_cost_delta[i_delta_index] = (float)minscore / origscore;
  525.  
  526.      if( weights[0].weightfn && b_lookahead )
  527.      {
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement