Advertisement
Guest User

Untitled

a guest
May 23rd, 2017
81
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Diff 24.21 KB | None | 0 0
  1. diff --git a/common/common.h b/common/common.h
  2. index b8c6dfd..615c045 100644
  3. --- a/common/common.h
  4. +++ b/common/common.h
  5. @@ -750,7 +750,7 @@ struct x264_t
  6.          int     i_direct_score[2];
  7.          int     i_direct_frames[2];
  8.          /* num p-frames weighted */
  9. -        int     i_wpred[3];
  10. +        int     i_wpred[2];
  11.  
  12.      } stat;
  13.  
  14. diff --git a/common/frame.h b/common/frame.h
  15. index 419da08..7ba014f 100644
  16. --- a/common/frame.h
  17. +++ b/common/frame.h
  18. @@ -112,8 +112,8 @@ typedef struct x264_frame
  19.      uint16_t *i_inv_qscale_factor;
  20.      int     b_scenecut; /* Set to zero if the frame cannot possibly be part of a real scenecut. */
  21.      float   f_weighted_cost_delta[X264_BFRAME_MAX+2];
  22. -    uint32_t i_pixel_sum;
  23. -    uint64_t i_pixel_ssd;
  24. +    uint32_t i_pixel_sum[3];
  25. +    uint64_t i_pixel_ssd[3];
  26.  
  27.      /* hrd */
  28.      x264_hrd_t hrd_timing;
  29. diff --git a/encoder/analyse.c b/encoder/analyse.c
  30. index 2ece9dc..74672d1 100644
  31. --- a/encoder/analyse.c
  32. +++ b/encoder/analyse.c
  33. @@ -1480,7 +1480,7 @@ static int x264_mb_analyse_inter_p4x4_chroma( x264_t *h, x264_mb_analysis_t *a,
  34.          weight[1].weightfn[width>>2]( &pix1[x+y*16], 16, &pix1[x+y*16], 16, &weight[1], height ); \
  35.      h->mc.mc_chroma( &pix2[x+y*16], 16, &p_fref[5][or+x+y*i_stride], i_stride, (me).mv[0], (me).mv[1]+mvy_offset, width, height ); \
  36.      if( weight[2].weightfn ) \
  37. -        weight[1].weightfn[width>>2]( &pix2[x+y*16], 16, &pix2[x+y*16], 16, &weight[2], height );
  38. +        weight[2].weightfn[width>>2]( &pix2[x+y*16], 16, &pix2[x+y*16], 16, &weight[2], height );
  39.  
  40.  
  41.      if( pixel == PIXEL_4x4 )
  42. diff --git a/encoder/encoder.c b/encoder/encoder.c
  43. index 300041e..9e119ca 100644
  44. --- a/encoder/encoder.c
  45. +++ b/encoder/encoder.c
  46. @@ -1395,49 +1395,66 @@ static void x264_weighted_pred_init( x264_t *h )
  47.  
  48.      int i_padv = PADV << h->param.b_interlaced;
  49.      int denom = -1;
  50. -    int weightluma = 0;
  51. +    int weightplane[2] = { 0, 0 };
  52.      int buffer_next = 0;
  53. -    //FIXME: when chroma support is added, move this into loop
  54. -    h->sh.weight[0][1].weightfn = h->sh.weight[0][2].weightfn = NULL;
  55. -    h->sh.weight[0][1].i_denom = h->sh.weight[0][2].i_denom = 0;
  56. -    for( int j = 0; j < h->i_ref0; j++ )
  57. +
  58. +    for( int i = 0; i < 3; i++ )
  59.      {
  60. -        if( h->fenc->weight[j][0].weightfn )
  61. +        for( int j = 0; j < h->i_ref0; j++ )
  62.          {
  63. -            h->sh.weight[j][0] = h->fenc->weight[j][0];
  64. -            // if weight is useless, don't write it to stream
  65. -            if( h->sh.weight[j][0].i_scale == 1<<h->sh.weight[j][0].i_denom && h->sh.weight[j][0].i_offset == 0 )
  66. -                h->sh.weight[j][0].weightfn = NULL;
  67. -            else
  68. +            if( h->fenc->weight[j][i].weightfn )
  69.              {
  70. -                if( !weightluma )
  71. +                h->sh.weight[j][i] = h->fenc->weight[j][i];
  72. +                // if weight is useless, don't write it to stream
  73. +                if( h->sh.weight[j][i].i_scale == 1<<h->sh.weight[j][i].i_denom && h->sh.weight[j][i].i_offset == 0 )
  74. +                    h->sh.weight[j][i].weightfn = NULL;
  75. +                else
  76.                  {
  77. -                    weightluma = 1;
  78. -                    h->sh.weight[0][0].i_denom = denom = h->sh.weight[j][0].i_denom;
  79. -                    assert( x264_clip3( denom, 0, 7 ) == denom );
  80. +                    if( !weightplane[!!i] )
  81. +                    {
  82. +                        weightplane[!!i] = 1;
  83. +                        h->sh.weight[0][!!i].i_denom = denom = h->sh.weight[j][i].i_denom;
  84. +                        assert( x264_clip3( denom, 0, 7 ) == denom );
  85. +                    }
  86. +
  87. +                    assert( h->sh.weight[j][i].i_denom == denom );
  88. +                    if( !i )
  89. +                    {
  90. +                        h->fenc->weighted[j] = h->mb.p_weight_buf[buffer_next++] + h->fenc->i_stride[0] * i_padv + PADH;
  91. +                        //scale full resolution frame
  92. +                        if( h->param.i_threads == 1 )
  93. +                        {
  94. +                            uint8_t *src = h->fref0[j]->filtered[0] - h->fref0[j]->i_stride[0]*i_padv - PADH;
  95. +                            uint8_t *dst = h->fenc->weighted[j] - h->fenc->i_stride[0]*i_padv - PADH;
  96. +                            int stride = h->fenc->i_stride[0];
  97. +                            int width = h->fenc->i_width[0] + PADH*2;
  98. +                            int height = h->fenc->i_lines[0] + i_padv*2;
  99. +                            x264_weight_scale_plane( h, dst, stride, src, stride, width, height, &h->sh.weight[j][0] );
  100. +                            h->fenc->i_lines_weighted = height;
  101. +                        }
  102. +                    }
  103.                  }
  104. -                assert( h->sh.weight[j][0].i_denom == denom );
  105. -                assert( x264_clip3( h->sh.weight[j][0].i_scale, 0, 127 ) == h->sh.weight[j][0].i_scale );
  106. -                assert( x264_clip3( h->sh.weight[j][0].i_offset, -128, 127 ) == h->sh.weight[j][0].i_offset );
  107. -                h->fenc->weighted[j] = h->mb.p_weight_buf[buffer_next++] +
  108. -                    h->fenc->i_stride[0] * i_padv + PADH;
  109.              }
  110.          }
  111. -
  112. -        //scale full resolution frame
  113. -        if( h->sh.weight[j][0].weightfn && h->param.i_threads == 1 )
  114. -        {
  115. -            uint8_t *src = h->fref0[j]->filtered[0] - h->fref0[j]->i_stride[0]*i_padv - PADH;
  116. -            uint8_t *dst = h->fenc->weighted[j] - h->fenc->i_stride[0]*i_padv - PADH;
  117. -            int stride = h->fenc->i_stride[0];
  118. -            int width = h->fenc->i_width[0] + PADH*2;
  119. -            int height = h->fenc->i_lines[0] + i_padv*2;
  120. -            x264_weight_scale_plane( h, dst, stride, src, stride, width, height, &h->sh.weight[j][0] );
  121. -            h->fenc->i_lines_weighted = height;
  122. -        }
  123.      }
  124. -    if( !weightluma )
  125. +
  126. +    if( weightplane[1] )
  127. +        for( int i = 0; i < h->i_ref0; i++ )
  128. +            if( h->sh.weight[i][1].weightfn && !h->sh.weight[i][2].weightfn )
  129. +            {
  130. +                h->sh.weight[i][2].i_scale = 1 << h->sh.weight[0][1].i_denom;
  131. +                h->sh.weight[i][2].i_offset = 0;
  132. +            }
  133. +            else if( h->sh.weight[i][2].weightfn && !h->sh.weight[i][1].weightfn )
  134. +            {
  135. +                h->sh.weight[i][1].i_scale = 1 << h->sh.weight[0][1].i_denom;
  136. +                h->sh.weight[i][1].i_offset = 0;
  137. +            }
  138. +
  139. +    if( !weightplane[0] )
  140.          h->sh.weight[0][0].i_denom = 0;
  141. +    if( !weightplane[1] )
  142. +        h->sh.weight[0][1].i_denom = h->sh.weight[0][2].i_denom = 0;
  143.  }
  144.  
  145.  static inline void x264_reference_build_list( x264_t *h, int i_poc )
  146. @@ -1530,6 +1547,7 @@ static inline void x264_reference_build_list( x264_t *h, int i_poc )
  147.                  {
  148.                      w[0] = h->fenc->weight[0][0];
  149.                      w[0].i_offset--;
  150. +
  151.                      h->mc.weight_cache( h, &w[0] );
  152.                      idx = x264_weighted_reference_duplicate( h, 0, w );
  153.                  }
  154. @@ -2639,13 +2657,8 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
  155.      {
  156.          h->stat.i_consecutive_bframes[h->fdec->i_frame - h->fref0[0]->i_frame - 1]++;
  157.          if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
  158. -            for( int i = 0; i < 3; i++ )
  159. -                for( int j = 0; j < h->i_ref0; j++ )
  160. -                    if( h->sh.weight[0][i].i_denom != 0 )
  161. -                    {
  162. -                        h->stat.i_wpred[i]++;
  163. -                        break;
  164. -                    }
  165. +            for( int i = 0; i < 2; i++ )
  166. +                h->stat.i_wpred[i] += !!h->sh.weight[0][i].i_denom;
  167.      }
  168.      if( h->sh.i_type == SLICE_TYPE_B )
  169.      {
  170. @@ -2987,8 +3000,9 @@ void    x264_encoder_close  ( x264_t *h )
  171.          }
  172.  
  173.          if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART && h->stat.i_frame_count[SLICE_TYPE_P] > 0 )
  174. -            x264_log( h, X264_LOG_INFO, "Weighted P-Frames: Y:%.1f%%\n",
  175. -                      h->stat.i_wpred[0] * 100.0 / h->stat.i_frame_count[SLICE_TYPE_P] );
  176. +            x264_log( h, X264_LOG_INFO, "Weighted P-Frames: Y:%.1f%% UV:%.1f%%\n",
  177. +                      h->stat.i_wpred[0] * 100.0 / h->stat.i_frame_count[SLICE_TYPE_P],
  178. +                      h->stat.i_wpred[1] * 100.0 / h->stat.i_frame_count[SLICE_TYPE_P] );
  179.  
  180.          for( int i_list = 0; i_list < 2; i_list++ )
  181.              for( int i_slice = 0; i_slice < 2; i_slice++ )
  182. diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
  183. index b51dbf7..dc771db 100644
  184. --- a/encoder/ratecontrol.c
  185. +++ b/encoder/ratecontrol.c
  186. @@ -51,8 +51,8 @@ typedef struct
  187.      int s_count;
  188.      float blurred_complexity;
  189.      char direct_mode;
  190. -    int16_t weight[2];
  191. -    int16_t i_weight_denom;
  192. +    int16_t weight[3][2];
  193. +    int16_t i_weight_denom[2];
  194.      int refcount[16];
  195.      int refs;
  196.      int i_duration;
  197. @@ -777,11 +777,19 @@ int x264_ratecontrol_new( x264_t *h )
  198.              rce->refs = ref;
  199.  
  200.              /* find weights */
  201. -            rce->i_weight_denom = -1;
  202. +            rce->i_weight_denom[0] = rce->i_weight_denom[1] = -1;
  203.              char *w = strchr( p, 'w' );
  204.              if( w )
  205. -                if( sscanf( w, "w:%hd,%hd,%hd", &rce->i_weight_denom, &rce->weight[0], &rce->weight[1] ) != 3 )
  206. -                    rce->i_weight_denom = -1;
  207. +            {
  208. +                int count = sscanf( w, "w:%hd,%hd,%hd,%hd,%hd,%hd,%hd,%hd",
  209. +                                    &rce->i_weight_denom[0], &rce->weight[0][0], &rce->weight[0][1],
  210. +                                    &rce->i_weight_denom[1], &rce->weight[1][0], &rce->weight[1][1],
  211. +                                    &rce->weight[2][0], &rce->weight[2][1] );
  212. +                if( count == 3 )
  213. +                    rce->i_weight_denom[1] = -1;
  214. +                else if ( count != 8 )
  215. +                    rce->i_weight_denom[0] = rce->i_weight_denom[1] = -1;
  216. +            }
  217.  
  218.              if( pict_type != 'b' )
  219.                  rce->kept_as_ref = 1;
  220. @@ -1391,8 +1399,15 @@ void x264_ratecontrol_set_weights( x264_t *h, x264_frame_t *frm )
  221.      ratecontrol_entry_t *rce = &h->rc->entry[frm->i_frame];
  222.      if( h->param.analyse.i_weighted_pred <= 0 )
  223.          return;
  224. -    if( rce->i_weight_denom >= 0 )
  225. -        SET_WEIGHT( frm->weight[0][0], 1, rce->weight[0], rce->i_weight_denom, rce->weight[1] );
  226. +
  227. +    if( rce->i_weight_denom[0] >= 0 )
  228. +        SET_WEIGHT( frm->weight[0][0], 1, rce->weight[0][0], rce->i_weight_denom[0], rce->weight[0][1] );
  229. +
  230. +    if( rce->i_weight_denom[1] >= 0 )
  231. +    {
  232. +        SET_WEIGHT( frm->weight[0][1], 1, rce->weight[1][0], rce->i_weight_denom[1], rce->weight[1][1] );
  233. +        SET_WEIGHT( frm->weight[0][2], 1, rce->weight[2][0], rce->i_weight_denom[1], rce->weight[2][1] );
  234. +    }
  235.  }
  236.  
  237.  /* After encoding one frame, save stats and update ratecontrol state */
  238. @@ -1449,9 +1464,19 @@ int x264_ratecontrol_end( x264_t *h, int bits, int *filler )
  239.                  goto fail;
  240.          }
  241.  
  242. -        if( h->sh.weight[0][0].weightfn )
  243. +        if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART && h->sh.weight[0][0].weightfn )
  244.          {
  245. -            if( fprintf( rc->p_stat_file_out, "w:%"PRId32",%"PRId32",%"PRId32, h->sh.weight[0][0].i_denom, h->sh.weight[0][0].i_scale, h->sh.weight[0][0].i_offset ) < 0 )
  246. +            if( fprintf( rc->p_stat_file_out, "w:%"PRId32",%"PRId32",%"PRId32,
  247. +                         h->sh.weight[0][0].i_denom, h->sh.weight[0][0].i_scale, h->sh.weight[0][0].i_offset ) < 0 )
  248. +                goto fail;
  249. +            if( h->sh.weight[0][1].weightfn || h->sh.weight[0][2].weightfn )
  250. +            {
  251. +                if( fprintf( rc->p_stat_file_out, ",%"PRId32",%"PRId32",%"PRId32",%"PRId32",%"PRId32"\n",
  252. +                             h->sh.weight[0][1].i_denom, h->sh.weight[0][1].i_scale, h->sh.weight[0][1].i_offset,
  253. +                             h->sh.weight[0][2].i_scale, h->sh.weight[0][2].i_offset ) < 0 )
  254. +                    goto fail;
  255. +            }
  256. +            else if( fprintf( rc->p_stat_file_out, "\n" ) < 0 )
  257.                  goto fail;
  258.          }
  259.  
  260. diff --git a/encoder/slicetype.c b/encoder/slicetype.c
  261. index 120a38a..63d1a26 100644
  262. --- a/encoder/slicetype.c
  263. +++ b/encoder/slicetype.c
  264. @@ -70,21 +70,42 @@ static void x264_weight_get_h264( unsigned int weight_nonh264, int offset, x264_
  265.  
  266.  void x264_weight_plane_analyse( x264_t *h, x264_frame_t *frame )
  267.  {
  268. -    uint32_t sad = 0;
  269. -    uint64_t ssd = 0;
  270. -    uint8_t *p = frame->plane[0];
  271. -    int stride = frame->i_stride[0];
  272. -    int width = frame->i_width[0];
  273. -    int height = frame->i_lines[0];
  274. -    for( int y = 0; y < height>>4; y++, p += stride*16 )
  275. -        for( int x = 0; x < width; x += 16 )
  276. +    for( int plane = 0; plane < 3; plane++ )
  277. +    {
  278. +        uint32_t sad = 0;
  279. +        uint64_t ssd = 0;
  280. +        uint8_t *p = frame->plane[plane];
  281. +        int stride = frame->i_stride[!!plane];
  282. +        int width = frame->i_width[!!plane];
  283. +        int height = frame->i_lines[!!plane];
  284. +        if( !plane )
  285.          {
  286. -            uint64_t res = h->pixf.var[PIXEL_16x16]( p + x, stride );
  287. -            sad += (uint32_t)res;
  288. -            ssd += res >> 32;
  289. +            for( int y = 0; y < height; y+=16, p += stride*16 )
  290. +                for( int x = 0; x < width; x += 16 )
  291. +                {
  292. +                    uint64_t res = h->pixf.var[PIXEL_16x16]( p + x, stride );
  293. +                    sad += (uint32_t)res;
  294. +                    ssd += res >> 32;
  295. +                }
  296. +            frame->i_pixel_sum[plane] = sad;
  297. +            frame->i_pixel_ssd[plane] = ssd - ((uint64_t)sad * sad + width * height / 2) / (width * height);
  298.          }
  299. -    frame->i_pixel_sum = sad;
  300. -    frame->i_pixel_ssd = ssd - ((uint64_t)sad * sad + width * height / 2) / (width * height);
  301. +        else
  302. +        {
  303. +            ALIGNED_16( uint8_t flat[8] ) = {0};
  304. +            for( int y = 0; y < height; y+=8, p += stride*8 )
  305. +                for( int x = 0; x < width; x += 8 )
  306. +                {
  307. +                    uint32_t res = h->pixf.sad_aligned[PIXEL_8x8]( p + x, stride, flat, 0 );
  308. +                    sad += res;
  309. +                    ssd += res*res;
  310. +                }
  311. +            int mean = ( sad * 64 + width * height / 2 ) / ( width * height );
  312. +            ssd = (ssd + width * height / 128) / (width*height / 64);
  313. +            frame->i_pixel_sum[plane] = sad;
  314. +            frame->i_pixel_ssd[plane] = ssd - mean*mean;
  315. +        }
  316. +    }
  317.  }
  318.  
  319.  static NOINLINE uint8_t *x264_weight_cost_init_luma( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, uint8_t *dest )
  320. @@ -115,7 +136,35 @@ static NOINLINE uint8_t *x264_weight_cost_init_luma( x264_t *h, x264_frame_t *fe
  321.      return ref->lowres[0];
  322.  }
  323.  
  324. -static NOINLINE unsigned int x264_weight_cost( x264_t *h, x264_frame_t *fenc, uint8_t *src, x264_weight_t *w )
  325. +static NOINLINE uint8_t *x264_weight_cost_init_chroma( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, uint8_t *dest, int i )
  326. +{
  327. +    int ref0_distance = fenc->i_frame - ref->i_frame - 1;
  328. +    if( fenc->lowres_mvs[0][ref0_distance][0][0] != 0x7FFF )
  329. +    {
  330. +        int i_stride = fenc->i_stride[1];
  331. +        int i_lines = fenc->i_lines[1];
  332. +        int i_width = fenc->i_width[1];
  333. +        int i_mb_xy = 0;
  334. +        int x,y;
  335. +        int i_pel_offset = 0;
  336. +
  337. +        for( y = 0; y < i_lines; y += 8, i_pel_offset = y*i_stride )
  338. +            for( x = 0; x < i_width; x += 8, i_mb_xy++, i_pel_offset += 8 )
  339. +            {
  340. +                uint8_t *pix = &dest[ i_pel_offset ];
  341. +                int mvx = fenc->lowres_mvs[0][ref0_distance][i_mb_xy][0];
  342. +                int mvy = fenc->lowres_mvs[0][ref0_distance][i_mb_xy][1];
  343. +                h->mc.mc_chroma( pix, i_stride, ref->plane[i], i_stride,
  344. +                               mvx, mvy, 8, 8 );
  345. +            }
  346. +        x264_emms();
  347. +        return dest;
  348. +    }
  349. +    x264_emms();
  350. +    return ref->plane[i];
  351. +}
  352. +
  353. +static NOINLINE unsigned int x264_weight_cost_luma( x264_t *h, x264_frame_t *fenc, uint8_t *src, x264_weight_t *w )
  354.  {
  355.      unsigned int cost = 0;
  356.      int i_stride = fenc->i_stride_lowres;
  357. @@ -155,6 +204,35 @@ static NOINLINE unsigned int x264_weight_cost( x264_t *h, x264_frame_t *fenc, ui
  358.      return cost;
  359.  }
  360.  
  361. +static NOINLINE unsigned int x264_weight_cost_chroma( x264_t *h, x264_frame_t *fenc, uint8_t *src, x264_weight_t *w, int i )
  362. +{
  363. +    int x, y;
  364. +    unsigned int cost = 0;
  365. +    int i_stride = fenc->i_stride[i];
  366. +    int i_lines = fenc->i_lines[i];
  367. +    int i_width = fenc->i_width[i];
  368. +    uint8_t *fenc_plane = fenc->plane[i];
  369. +    ALIGNED_ARRAY_16( uint8_t, buf, [8*8] );
  370. +    int pixoff = 0;
  371. +    int i_mb = 0;
  372. +    ALIGNED_16( uint8_t flat[8]) = {0};
  373. +    if( w )
  374. +        for( y = 0; y < i_lines; y += 8, pixoff = y*i_stride )
  375. +            for( x = 0; x < i_width; x += 8, i_mb++, pixoff += 8 )
  376. +            {
  377. +                w->weightfn[8>>2]( buf, 8, &src[pixoff], i_stride, w, 8 );
  378. +                cost += abs( h->pixf.sad_aligned[PIXEL_8x8]( &src[pixoff], i_stride, flat, 0 ) -
  379. +                             h->pixf.sad_aligned[PIXEL_8x8]( buf, 8, flat, 0 ) );
  380. +            }
  381. +    else
  382. +        for( y = 0; y < i_lines; y += 8, pixoff = y*i_stride )
  383. +            for( x = 0; x < i_width; x += 8, i_mb++, pixoff += 8 )
  384. +                cost += abs( h->pixf.sad_aligned[PIXEL_8x8]( &src[pixoff], i_stride, flat, 0 ) -
  385. +                             h->pixf.sad_aligned[PIXEL_8x8]( &fenc_plane[pixoff], i_stride, flat, 0 ) );
  386. +    x264_emms();
  387. +    return cost;
  388. +}
  389. +
  390.  void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, int b_lookahead )
  391.  {
  392.      float fenc_mean, ref_mean, fenc_var, ref_var;
  393. @@ -167,66 +245,106 @@ void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, int
  394.      float guess_scale;
  395.      int found;
  396.      x264_weight_t *weights = fenc->weight[0];
  397. +    SET_WEIGHT(weights[1], 0, 1, 0, 0 );
  398. +    SET_WEIGHT(weights[2], 0, 1, 0, 0 );
  399. +    for( int plane = 0; plane <= 2  && !( plane && ( !weights[0].weightfn || b_lookahead ) ); plane++ )
  400. +    {
  401. +        fenc_var = round( sqrt( fenc->i_pixel_ssd[plane] ) );
  402. +        ref_var  = round( sqrt(  ref->i_pixel_ssd[plane] ) );
  403. +        fenc_mean = (float)fenc->i_pixel_sum[plane] / (fenc->i_lines[!!plane] * fenc->i_width[!!plane]);
  404. +        ref_mean  = (float) ref->i_pixel_sum[plane] / (fenc->i_lines[!!plane] * fenc->i_width[!!plane]);
  405.  
  406. -    fenc_var = round( sqrt( fenc->i_pixel_ssd ) );
  407. -    ref_var  = round( sqrt(  ref->i_pixel_ssd ) );
  408. -    fenc_mean = (float)fenc->i_pixel_sum / (fenc->i_lines[0] * fenc->i_width[0]);
  409. -    ref_mean  = (float) ref->i_pixel_sum / (fenc->i_lines[0] * fenc->i_width[0]);
  410. +        //early termination
  411. +        if( fabs( ref_mean - fenc_mean ) < 0.5 && fabsf( 1 - (float)fenc_var / ref_var ) < epsilon )
  412. +        {
  413. +            SET_WEIGHT( weights[plane], 0, 1, 0, 0 );
  414. +            continue;
  415. +        }
  416.  
  417. -    //early termination
  418. -    if( fabs( ref_mean - fenc_mean ) < 0.5 && fabs( 1 - fenc_var / ref_var ) < epsilon )
  419. -    {
  420. -        SET_WEIGHT( weights[0], 0, 1, 0, 0 );
  421. -        return;
  422. -    }
  423. +        guess_scale = ref_var ? (float)fenc_var/ref_var : 0;
  424.  
  425. -    guess_scale = ref_var ? fenc_var/ref_var : 0;
  426. -    x264_weight_get_h264( round( guess_scale * 128 ), 0, &weights[0] );
  427. +        if( plane )
  428. +        {
  429. +            weights[plane].i_denom = 6;
  430. +            weights[plane].i_scale = x264_clip_uint8( round(guess_scale * 64.0) );
  431. +            if( weights[plane].i_scale > 127 )
  432. +            {
  433. +                weights[1].weightfn = weights[2].weightfn = 0;
  434. +                break;
  435. +            }
  436. +        }
  437. +        else
  438. +            x264_weight_get_h264( round( guess_scale * 128 ), 0, &weights[plane] );
  439.  
  440. -    found = 0;
  441. -    mindenom = weights[0].i_denom;
  442. -    minscale = weights[0].i_scale;
  443. -    minoff = 0;
  444. -    offset_search = x264_clip3( floor( fenc_mean - ref_mean * minscale / (1 << mindenom) + 0.5f*b_lookahead ), -128, 126 );
  445. +        found = 0;
  446. +        mindenom = weights[plane].i_denom;
  447. +        minscale = weights[plane].i_scale;
  448. +        minoff = 0;
  449.  
  450. -    if( !fenc->b_intra_calculated )
  451. -    {
  452. -        x264_mb_analysis_t a;
  453. -        x264_lowres_context_init( h, &a );
  454. -        x264_slicetype_frame_cost( h, &a, &fenc, 0, 0, 0, 0 );
  455. -    }
  456. -    uint8_t *mcbuf = x264_weight_cost_init_luma( h, fenc, ref, h->mb.p_weight_buf[0] );
  457. -    origscore = minscore = x264_weight_cost( h, fenc, mcbuf, 0 );
  458. +        if( !plane && !fenc->b_intra_calculated )
  459. +        {
  460. +            x264_mb_analysis_t a;
  461. +            x264_lowres_context_init( h, &a );
  462. +            x264_slicetype_frame_cost( h, &a, &fenc, 0, 0, 0, 0 );
  463. +        }
  464.  
  465. -    if( !minscore )
  466. -    {
  467. -        SET_WEIGHT( weights[0], 0, 1, 0, 0 );
  468. -        return;
  469. -    }
  470. +        uint8_t *mcbuf;
  471. +        if( !plane )
  472. +        {
  473. +            mcbuf = x264_weight_cost_init_luma( h, fenc, ref, h->mb.p_weight_buf[0] );
  474. +            origscore = minscore = x264_weight_cost_luma( h, fenc, mcbuf, 0 );
  475. +        }
  476. +        else
  477. +        {
  478. +            mcbuf = x264_weight_cost_init_chroma( h, fenc, ref, h->mb.p_weight_buf[0], plane );
  479. +            origscore = minscore = x264_weight_cost_chroma( h, fenc, mcbuf, 0, plane );
  480. +        }
  481.  
  482. -    // This gives a slight improvement due to rounding errors but only tests
  483. -    // one offset on lookahead.
  484. -    // TODO: currently searches only offset +1. try other offsets/multipliers/combinations thereof?
  485. -    for( int i_off = offset_search; i_off <= offset_search+!b_lookahead; i_off++ )
  486. -    {
  487. -        SET_WEIGHT( weights[0], 1, minscale, mindenom, i_off );
  488. -        unsigned int s = x264_weight_cost( h, fenc, mcbuf, &weights[0] );
  489. -        COPY3_IF_LT( minscore, s, minoff, i_off, found, 1 );
  490. +        if( !minscore )
  491. +            continue;
  492. +
  493. +        // This gives a slight improvement due to rounding errors but only tests
  494. +        // one offset on lookahead.
  495. +        // TODO: currently searches only offset +1. try other offsets/multipliers/combinations thereof?
  496. +
  497. +        offset_search = x264_clip3( floor( fenc_mean - ref_mean * minscale / (1 << mindenom) + 0.5f*b_lookahead ), -128, 126 );
  498. +        for( int i_off = offset_search; i_off <= offset_search+!b_lookahead; i_off++ )
  499. +        {
  500. +            SET_WEIGHT( weights[plane], 1, minscale, mindenom, i_off );
  501. +            unsigned int s;
  502. +            if( plane )
  503. +                s = x264_weight_cost_chroma( h, fenc, mcbuf, &weights[plane], plane );
  504. +            else
  505. +                s = x264_weight_cost_luma( h, fenc, mcbuf, &weights[plane] );
  506. +            COPY3_IF_LT( minscore, s, minoff, i_off, found, 1 );
  507. +        }
  508. +        x264_emms();
  509. +
  510. +        /* FIXME: More analysis can be done here on SAD vs. SATD termination. */
  511. +        /* 0.2% termination derived experimentally to avoid weird weights in frames that are mostly intra. */
  512. +        if( !found || (minscale == 1<<mindenom && minoff == 0) || (float)minscore / origscore > 0.998 )
  513. +        {
  514. +            SET_WEIGHT( weights[plane], 0, 1, 0, 0 );
  515. +            continue;
  516. +        }
  517. +        else
  518. +            SET_WEIGHT( weights[plane], 1, minscale, mindenom, minoff );
  519. +
  520. +        if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_FAKE && weights[0].weightfn && !plane )
  521. +            fenc->f_weighted_cost_delta[i_delta_index] = (float)minscore / origscore;
  522.      }
  523. -    x264_emms();
  524.  
  525. -    /* FIXME: More analysis can be done here on SAD vs. SATD termination. */
  526. -    /* 0.2% termination derived experimentally to avoid weird weights in frames that are mostly intra. */
  527. -    if( !found || (minscale == 1<<mindenom && minoff == 0) || (float)minscore / origscore > 0.998 )
  528. +    //FIXME, what is the correct way to deal with this?
  529. +    if( weights[1].weightfn && weights[2].weightfn && weights[1].i_denom != weights[2].i_denom )
  530.      {
  531. -        SET_WEIGHT( weights[0], 0, 1, 0, 0 );
  532. -        return;
  533. +        int denom = X264_MIN( weights[1].i_denom, weights[2].i_denom );
  534. +        int i;
  535. +        for( i = 1; i <= 2; i++ )
  536. +        {
  537. +            weights[i].i_scale = x264_clip_uint8( weights[i].i_scale >> ( weights[i].i_denom - denom ) );
  538. +            weights[i].i_denom = denom;
  539. +        }
  540.      }
  541. -    else
  542. -        SET_WEIGHT( weights[0], 1, minscale, mindenom, minoff );
  543. -
  544. -    if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_FAKE && weights[0].weightfn )
  545. -        fenc->f_weighted_cost_delta[i_delta_index] = (float)minscore / origscore;
  546.  
  547.      if( weights[0].weightfn && b_lookahead )
  548.      {
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement