Advertisement
Guest User

techouse

a guest
Jun 8th, 2008
395
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 21.20 KB | None | 0 0
  1. diff --git a/common/common.c b/common/common.c
  2. index ff8ce77..8a3a593 100644
  3. --- a/common/common.c
  4. +++ b/common/common.c
  5. @@ -95,6 +95,7 @@ void x264_param_default( x264_param_t *param )
  6. param->rc.f_pb_factor = 1.3;
  7. param->rc.i_aq_mode = X264_AQ_GLOBAL;
  8. param->rc.f_aq_strength = 1.0;
  9. + param->analyse.i_fgo = 0;
  10.  
  11. param->rc.b_stat_write = 0;
  12. param->rc.psz_stat_out = "x264_2pass.log";
  13. @@ -519,6 +520,8 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
  14. p->rc.i_aq_mode = atoi(value);
  15. OPT("aq-strength")
  16. p->rc.f_aq_strength = atof(value);
  17. + OPT("fgo")
  18. + p->analyse.i_fgo = atoi(value);
  19. OPT("pass")
  20. {
  21. int i = x264_clip3( atoi(value), 0, 3 );
  22. @@ -867,6 +870,7 @@ char *x264_param2string( x264_param_t *p, int b_res )
  23. s += sprintf( s, " nr=%d", p->analyse.i_noise_reduction );
  24. s += sprintf( s, " decimate=%d", p->analyse.b_dct_decimate );
  25. s += sprintf( s, " mbaff=%d", p->b_interlaced );
  26. + s += sprintf( s, " fgo=%d", p->analyse.i_fgo );
  27.  
  28. s += sprintf( s, " bframes=%d", p->i_bframe );
  29. if( p->i_bframe )
  30. diff --git a/common/pixel.c b/common/pixel.c
  31. index 1d5567b..71fc811 100644
  32. --- a/common/pixel.c
  33. +++ b/common/pixel.c
  34. @@ -70,7 +70,7 @@ PIXEL_SAD_C( x264_pixel_sad_4x4, 4, 4 )
  35. ****************************************************************************/
  36. #define PIXEL_SSD_C( name, lx, ly ) \
  37. static int name( uint8_t *pix1, int i_stride_pix1, \
  38. - uint8_t *pix2, int i_stride_pix2 ) \
  39. + uint8_t *pix2, int i_stride_pix2, int weight ) \
  40. { \
  41. int i_sum = 0; \
  42. int x, y; \
  43. @@ -95,6 +95,61 @@ PIXEL_SSD_C( x264_pixel_ssd_8x4, 8, 4 )
  44. PIXEL_SSD_C( x264_pixel_ssd_4x8, 4, 8 )
  45. PIXEL_SSD_C( x264_pixel_ssd_4x4, 4, 4 )
  46.  
  47. +#define PIXEL_NOISE_C( lx, ly) \
  48. +static int x264_pixel_noise_##lx##x##ly( uint8_t *pix, int i_stride ) \
  49. +{\
  50. + int score = 0;\
  51. + int x,y;\
  52. + for(y=0; y<ly; y++){\
  53. + if(y+1<ly){\
  54. + for(x=0; x<lx-1; x++)\
  55. + score += abs( pix[x] - pix[x+i_stride]\
  56. + - pix[x+1] + pix[x+1+i_stride]);\
  57. + }\
  58. + pix += i_stride;\
  59. + }\
  60. + return score;\
  61. +}
  62. +
  63. +PIXEL_NOISE_C( 16, 16 )
  64. +PIXEL_NOISE_C( 16, 8 )
  65. +PIXEL_NOISE_C( 8, 16 )
  66. +PIXEL_NOISE_C( 8, 8 )
  67. +PIXEL_NOISE_C( 8, 4 )
  68. +PIXEL_NOISE_C( 4, 8 )
  69. +PIXEL_NOISE_C( 4, 4 )
  70. +
  71. +#define PIXEL_NSSD( nssdname, lx, ly, ssdname, noisename)\
  72. +static int x264_pixel_nssd_##lx##x##ly##nssdname( uint8_t *pix1,\
  73. +int i_stride_pix1, uint8_t *pix2, int i_stride_pix2, int weight ) \
  74. +{\
  75. + int ssd = x264_pixel_ssd_##lx##x##ly##ssdname( pix1, i_stride_pix1, \
  76. + pix2, i_stride_pix2, weight );\
  77. + int noise1 = x264_pixel_noise_##lx##x##ly##noisename( pix1, i_stride_pix1 );\
  78. + int noise2 = x264_pixel_noise_##lx##x##ly##noisename( pix2, i_stride_pix2 );\
  79. + return ssd + abs(noise1 - noise2) * weight;\
  80. +}
  81. +
  82. +PIXEL_NSSD( , 16, 16, , )
  83. +PIXEL_NSSD( , 16, 8, , )
  84. +PIXEL_NSSD( , 8, 16, , )
  85. +PIXEL_NSSD( , 8, 8, , )
  86. +PIXEL_NSSD( , 8, 4, , )
  87. +PIXEL_NSSD( , 4, 8, , )
  88. +PIXEL_NSSD( , 4, 4, , )
  89. +PIXEL_NSSD( _mmxext, 16, 16, _mmx, _mmxext)
  90. +PIXEL_NSSD( _mmxext, 16, 8, _mmx, _mmxext)
  91. +PIXEL_NSSD( _mmxext, 8, 16, _mmx, _mmxext)
  92. +PIXEL_NSSD( _mmxext, 8, 8, _mmx, _mmxext)
  93. +PIXEL_NSSD( _mmxext, 8, 4, _mmx, _mmxext)
  94. +PIXEL_NSSD( _mmxext, 4, 8, _mmx, )
  95. +PIXEL_NSSD( _mmxext, 4, 4, _mmx, )
  96. +PIXEL_NSSD(_sse2, 16, 16, _sse2, _mmxext)
  97. +PIXEL_NSSD(_sse2, 16, 8, _sse2, _mmxext)
  98. +PIXEL_NSSD(_sse2, 8, 16, _sse2, _mmxext)
  99. +PIXEL_NSSD(_sse2, 8, 8, _sse2, _mmxext)
  100. +PIXEL_NSSD(_sse2, 8, 4, _sse2, _mmxext)
  101. +
  102. int64_t x264_pixel_ssd_wxh( x264_pixel_function_t *pf, uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2, int i_width, int i_height )
  103. {
  104. int64_t i_ssd = 0;
  105. @@ -102,7 +157,7 @@ int64_t x264_pixel_ssd_wxh( x264_pixel_function_t *pf, uint8_t *pix1, int i_pix1
  106. int align = !(((long)pix1 | (long)pix2 | i_pix1 | i_pix2) & 15);
  107.  
  108. #define SSD(size) i_ssd += pf->ssd[size]( pix1 + y*i_pix1 + x, i_pix1, \
  109. - pix2 + y*i_pix2 + x, i_pix2 );
  110. + pix2 + y*i_pix2 + x, i_pix2, 0 );
  111. for( y = 0; y < i_height-15; y += 16 )
  112. {
  113. x = 0;
  114. @@ -527,6 +582,7 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
  115. INIT7( sad_x3, );
  116. INIT7( sad_x4, );
  117. INIT7( ssd, );
  118. + INIT7( nssd, );
  119. INIT7( satd, );
  120. INIT7( satd_x3, );
  121. INIT7( satd_x4, );
  122. @@ -550,6 +606,7 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
  123. INIT7( satd, _mmxext );
  124. INIT7( satd_x3, _mmxext );
  125. INIT7( satd_x4, _mmxext );
  126. + INIT7( nssd, _mmxext );
  127. INIT_ADS( _mmxext );
  128.  
  129. #ifdef ARCH_X86
  130. @@ -605,6 +662,7 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
  131. if( cpu&X264_CPU_SSE2 )
  132. {
  133. INIT5( ssd, _sse2 );
  134. + INIT5( nssd, _sse2);
  135. INIT5( satd, _sse2 );
  136. INIT5( satd_x3, _sse2 );
  137. INIT5( satd_x4, _sse2 );
  138. diff --git a/common/pixel.h b/common/pixel.h
  139. index c95a304..c22de07 100644
  140. --- a/common/pixel.h
  141. +++ b/common/pixel.h
  142. @@ -27,6 +27,7 @@
  143. // SSD assumes all args aligned
  144. // other cmp functions assume first arg aligned
  145. typedef int (*x264_pixel_cmp_t) ( uint8_t *, int, uint8_t *, int );
  146. +typedef int (*x264_pixel_cmp_weight_t) ( uint8_t *, int, uint8_t *, int, int );
  147. typedef void (*x264_pixel_cmp_x3_t) ( uint8_t *, uint8_t *, uint8_t *, uint8_t *, int, int[3] );
  148. typedef void (*x264_pixel_cmp_x4_t) ( uint8_t *, uint8_t *, uint8_t *, uint8_t *, uint8_t *, int, int[4] );
  149.  
  150. @@ -66,10 +67,12 @@ static const uint8_t x264_size2pixel[5][5] = {
  151. typedef struct
  152. {
  153. x264_pixel_cmp_t sad[7];
  154. - x264_pixel_cmp_t ssd[7];
  155. + x264_pixel_cmp_weight_t ssd[7];
  156. + x264_pixel_cmp_weight_t nssd[7];
  157. x264_pixel_cmp_t satd[7];
  158. x264_pixel_cmp_t ssim[7];
  159. x264_pixel_cmp_t sa8d[4];
  160. + x264_pixel_cmp_weight_t rdcmp[7]; /* either ssd or nsse for mode decision */
  161. x264_pixel_cmp_t mbcmp[7]; /* either satd or sad for subpel refine and mode decision */
  162. x264_pixel_cmp_t fpelcmp[7]; /* either satd or sad for fullpel motion search */
  163. x264_pixel_cmp_x3_t fpelcmp_x3[7];
  164. diff --git a/common/x86/pixel-a.asm b/common/x86/pixel-a.asm
  165. index b4d0656..4efcd04 100644
  166. --- a/common/x86/pixel-a.asm
  167. +++ b/common/x86/pixel-a.asm
  168. @@ -1285,6 +1285,92 @@ SATD_W4 ssse3 ; mmx, but uses pabsw from ssse3.
  169.  
  170.  
  171. ;=============================================================================
  172. +; NSSD
  173. +;=============================================================================
  174. +
  175. +%macro NOISE_CORE_LOAD_FIRST 3
  176. + mova %1, [r0+%3]
  177. + movu %2, [r0+%3+1]
  178. +%endmacro
  179. +
  180. +%macro NOISE_CORE_LOAD_LAST 3
  181. + mova %1, [r0+%3]
  182. + mova %2, %1
  183. + psllq %1, 8
  184. + psrlq %2, 8
  185. + psrlq %1, 8
  186. +%endmacro
  187. +
  188. +%macro NOISE_CORE_START 5
  189. + NOISE_CORE_LOAD %1, %2, %5
  190. + mova %3, %1
  191. + mova %4, %2
  192. + punpcklbw %1, m7
  193. + punpcklbw %2, m7
  194. + punpckhbw %3, m7
  195. + punpckhbw %4, m7
  196. + psubw %1, %2
  197. + psubw %3, %4
  198. +%endmacro
  199. +
  200. +%macro NOISE_CORE 7
  201. + NOISE_CORE_START %1, %2, %3, %4, %7
  202. + psubw %5, %1
  203. + psubw %6, %3
  204. + ABS2 %5, %6, %4, %2
  205. + paddw %6, %5
  206. + paddw m6, %6
  207. +%endmacro
  208. +
  209. +;arguments: src, stride
  210. +;macro arguments: width, height, name
  211. +%macro NOISE 3
  212. +%if %1 == 16
  213. +cglobal x264_pixel_noise_%1x%2_%3, 2,3
  214. + mov r2, r0
  215. +%else
  216. +cglobal x264_pixel_noise_%1x%2_%3, 2,2
  217. +x264_pixel_noise_%1x%2_%3 %+ .skip_prologue
  218. +%endif
  219. + pxor m7, m7
  220. + pxor m6, m6
  221. + NOISE_CORE_START m0, m1, m2, m3, 0
  222. + NOISE_CORE m4, m1, m5, m3, m0, m2, r1
  223. + lea r0, [r0+r1*2]
  224. +%rep (%2 - 2) / 2
  225. + NOISE_CORE m0, m1, m2, m3, m4, m5, 0
  226. + NOISE_CORE m4, m1, m5, m3, m0, m2, r1
  227. + lea r0, [r0+r1*2]
  228. +%endrep
  229. + mova m0, m6
  230. + punpcklwd m0, m7
  231. + punpckhwd m6, m7
  232. + paddd m6, m0
  233. + mova m0, m6
  234. + psrlq m6, 32
  235. + paddd m0, m6
  236. +%if %1 == 16
  237. + lea r0, [r2+8]
  238. + movd r2d, m0
  239. + call x264_pixel_noise_8x%2_%3 %+ .skip_prologue
  240. + add eax, r2d
  241. +%else
  242. + movd eax, m0
  243. +%endif
  244. + RET
  245. +%endmacro
  246. +
  247. +INIT_MMX
  248. +%define ABS2 ABS2_MMX
  249. +%define NOISE_CORE_LOAD NOISE_CORE_LOAD_LAST
  250. +NOISE 8, 16, mmxext
  251. +NOISE 8, 8, mmxext
  252. +NOISE 8, 4, mmxext
  253. +%define NOISE_CORE_LOAD NOISE_CORE_LOAD_FIRST
  254. +NOISE 16, 16, mmxext
  255. +NOISE 16, 8, mmxext
  256. +
  257. +;=============================================================================
  258. ; SSIM
  259. ;=============================================================================
  260.  
  261. diff --git a/common/x86/pixel.h b/common/x86/pixel.h
  262. index fcacaf2..2f80e82 100644
  263. --- a/common/x86/pixel.h
  264. +++ b/common/x86/pixel.h
  265. @@ -46,8 +46,9 @@ DECL_X1( sad, sse3 )
  266. DECL_X4( sad, mmxext )
  267. DECL_X4( sad, sse2 )
  268. DECL_X4( sad, sse3 )
  269. -DECL_X1( ssd, mmx )
  270. -DECL_X1( ssd, sse2 )
  271. +DECL_PIXELS( int, ssd, mmx, ( uint8_t *, int, uint8_t *, int, int ) )
  272. +DECL_PIXELS( int, ssd, sse2, ( uint8_t *, int, uint8_t *, int, int ) )
  273. +DECL_PIXELS( int, noise, mmxext, ( uint8_t *, int ) )
  274. DECL_X1( satd, mmxext )
  275. DECL_X1( satd, sse2 )
  276. DECL_X1( satd, ssse3 )
  277. diff --git a/encoder/analyse.c b/encoder/analyse.c
  278. index de3cf57..fd7e478 100644
  279. --- a/encoder/analyse.c
  280. +++ b/encoder/analyse.c
  281. @@ -765,7 +765,8 @@ static void x264_intra_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd_thresh )
  282. else
  283. a->i_satd_i4x4 = COST_MAX;
  284.  
  285. - if( a->i_satd_i8x8 <= i_satd_thresh && a->i_satd_i8x8 < COST_MAX )
  286. + if( (a->i_satd_i8x8 <= i_satd_thresh || h->param.analyse.i_fgo)
  287. + && a->i_satd_i8x8 < COST_MAX )
  288. {
  289. h->mb.i_type = I_8x8;
  290. x264_analyse_update_cache( h, a );
  291. @@ -1928,7 +1929,7 @@ static void x264_mb_analyse_b_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd_i
  292. //FIXME not all the update_cache calls are needed
  293. h->mb.i_partition = D_16x16;
  294. /* L0 */
  295. - if( a->l0.me16x16.cost <= thresh && a->l0.i_rd16x16 == COST_MAX )
  296. + if( (a->l0.me16x16.cost <= thresh || h->param.analyse.i_fgo) && a->l0.i_rd16x16 == COST_MAX )
  297. {
  298. h->mb.i_type = B_L0_L0;
  299. x264_analyse_update_cache( h, a );
  300. @@ -1936,7 +1937,7 @@ static void x264_mb_analyse_b_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd_i
  301. }
  302.  
  303. /* L1 */
  304. - if( a->l1.me16x16.cost <= thresh && a->l1.i_rd16x16 == COST_MAX )
  305. + if( (a->l1.me16x16.cost <= thresh || h->param.analyse.i_fgo) && a->l1.i_rd16x16 == COST_MAX )
  306. {
  307. h->mb.i_type = B_L1_L1;
  308. x264_analyse_update_cache( h, a );
  309. @@ -2277,7 +2278,8 @@ void x264_macroblock_analyse( x264_t *h )
  310.  
  311. if( analysis.b_mbrd )
  312. {
  313. - x264_mb_analyse_p_rd( h, &analysis, X264_MIN(i_satd_inter, i_satd_intra) );
  314. + x264_mb_analyse_p_rd( h, &analysis, h->param.analyse.i_fgo ?
  315. + i_satd_inter : X264_MIN(i_satd_inter, i_satd_intra) );
  316. i_type = P_L0;
  317. i_partition = D_16x16;
  318. i_cost = analysis.l0.me16x16.cost;
  319. diff --git a/encoder/encoder.c b/encoder/encoder.c
  320. index 2b81e64..83f0141 100644
  321. --- a/encoder/encoder.c
  322. +++ b/encoder/encoder.c
  323. @@ -402,6 +402,7 @@ static int x264_validate_parameters( x264_t *h )
  324. h->param.analyse.i_noise_reduction = 0;
  325. h->param.analyse.i_subpel_refine = x264_clip3( h->param.analyse.i_subpel_refine, 1, 6 );
  326. h->param.rc.i_aq_mode = 0;
  327. + h->param.analyse.i_fgo = 0;
  328. }
  329. if( h->param.rc.i_rc_method == X264_RC_CQP )
  330. {
  331. @@ -500,6 +501,29 @@ static int x264_validate_parameters( x264_t *h )
  332. if( h->param.analyse.i_direct_8x8_inference < 0 )
  333. h->param.analyse.i_direct_8x8_inference = l->direct8x8;
  334. }
  335. +
  336. + if( h->param.analyse.i_fgo )
  337. + {
  338. + if(h->param.analyse.i_subpel_refine < 6 ||
  339. + (!h->param.analyse.b_bframe_rdo && h->param.i_bframe) )
  340. + {
  341. + if(h->param.i_bframe)
  342. + x264_log( h, X264_LOG_WARNING, "fgo requires b-rdo and subme >= 6\n" );
  343. + else
  344. + x264_log( h, X264_LOG_WARNING, "fgo requires subme >= 6\n" );
  345. + h->param.analyse.i_fgo = 0;
  346. + }
  347. + else
  348. + {
  349. + /* Arbitrary clipping. */
  350. + h->param.analyse.i_fgo = x264_clip3(h->param.analyse.i_fgo, 0, 50);
  351. + /* P-skip's threshold isn't necessarily accurate when using NSSD/FGO */
  352. + h->param.analyse.b_fast_pskip = 0;
  353. + /* B-frame QPs need to be lower to retain grain */
  354. + /* Arbitrary formula to scale pbratio based on fgo strength. */
  355. + h->param.rc.f_pb_factor = 1 + (h->param.rc.f_pb_factor - 1) / pow(h->param.analyse.i_fgo,0.3);
  356. + }
  357. + }
  358.  
  359. if( h->param.i_threads > 1 )
  360. {
  361. @@ -562,6 +586,7 @@ static void mbcmp_init( x264_t *h )
  362. memcpy( h->pixf.fpelcmp, satd ? h->pixf.satd : h->pixf.sad, sizeof(h->pixf.fpelcmp) );
  363. memcpy( h->pixf.fpelcmp_x3, satd ? h->pixf.satd_x3 : h->pixf.sad_x3, sizeof(h->pixf.fpelcmp_x3) );
  364. memcpy( h->pixf.fpelcmp_x4, satd ? h->pixf.satd_x4 : h->pixf.sad_x4, sizeof(h->pixf.fpelcmp_x4) );
  365. + memcpy( h->pixf.rdcmp, h->param.analyse.i_fgo ? h->pixf.nssd : h->pixf.ssd, sizeof(h->pixf.rdcmp) );
  366. }
  367.  
  368. /****************************************************************************
  369. diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
  370. index 11790ea..52b436f 100644
  371. --- a/encoder/ratecontrol.c
  372. +++ b/encoder/ratecontrol.c
  373. @@ -194,7 +194,7 @@ static NOINLINE int ac_energy_mb( x264_t *h, int mb_x, int mb_y, int *satd )
  374. int pix = i ? PIXEL_8x8 : PIXEL_16x16;
  375. stride <<= h->mb.b_interlaced;
  376. sad = h->pixf.sad[pix]( flat, 0, h->fenc->plane[i]+offset, stride );
  377. - ssd = h->pixf.ssd[pix]( flat, 0, h->fenc->plane[i]+offset, stride );
  378. + ssd = h->pixf.ssd[pix]( flat, 0, h->fenc->plane[i]+offset, stride, 0 );
  379. var += ssd - (sad * sad >> (i?6:8));
  380. // SATD to represent the block's overall complexity (bit cost) for intra encoding.
  381. // exclude the DC coef, because nothing short of an actual intra prediction will estimate DC cost.
  382. @@ -269,7 +269,7 @@ int x264_ratecontrol_new( x264_t *h )
  383.  
  384. rc->b_abr = h->param.rc.i_rc_method != X264_RC_CQP && !h->param.rc.b_stat_read;
  385. rc->b_2pass = h->param.rc.i_rc_method == X264_RC_ABR && h->param.rc.b_stat_read;
  386. -
  387. +
  388. /* FIXME: use integers */
  389. if(h->param.i_fps_num > 0 && h->param.i_fps_den > 0)
  390. rc->fps = (float) h->param.i_fps_num / h->param.i_fps_den;
  391. @@ -679,7 +679,7 @@ void x264_ratecontrol_summary( x264_t *h )
  392. if( rc->b_abr && h->param.rc.i_rc_method == X264_RC_ABR && rc->cbr_decay > .9999 )
  393. {
  394. double base_cplx = h->mb.i_mb_count * (h->param.i_bframe ? 120 : 80);
  395. - x264_log( h, X264_LOG_INFO, "final ratefactor: %.2f\n",
  396. + x264_log( h, X264_LOG_INFO, "final ratefactor: %.2f\n",
  397. qscale2qp( pow( base_cplx, 1 - h->param.rc.f_qcompress )
  398. * rc->cplxr_sum / rc->wanted_bits_window ) );
  399. }
  400. @@ -838,7 +838,7 @@ double predict_row_size( x264_t *h, int y, int qp )
  401. x264_ratecontrol_t *rc = h->rc;
  402. double pred_s = predict_size( rc->row_pred, qp2qscale(qp), h->fdec->i_row_satd[y] );
  403. double pred_t = 0;
  404. - if( h->sh.i_type != SLICE_TYPE_I
  405. + if( h->sh.i_type != SLICE_TYPE_I
  406. && h->fref0[0]->i_type == h->fdec->i_type
  407. && h->fref0[0]->i_row_satd[y] > 0 )
  408. {
  409. @@ -1007,7 +1007,7 @@ void x264_ratecontrol_end( x264_t *h, int bits )
  410. int dir_frame = h->stat.frame.i_direct_score[1] - h->stat.frame.i_direct_score[0];
  411. int dir_avg = h->stat.i_direct_score[1] - h->stat.i_direct_score[0];
  412. char c_direct = h->mb.b_direct_auto_write ?
  413. - ( dir_frame>0 ? 's' : dir_frame<0 ? 't' :
  414. + ( dir_frame>0 ? 's' : dir_frame<0 ? 't' :
  415. dir_avg>0 ? 's' : dir_avg<0 ? 't' : '-' )
  416. : '-';
  417. fprintf( rc->p_stat_file_out,
  418. diff --git a/encoder/rdo.c b/encoder/rdo.c
  419. index 8607e07..ec956fe 100644
  420. --- a/encoder/rdo.c
  421. +++ b/encoder/rdo.c
  422. @@ -55,18 +55,22 @@ static uint16_t cabac_prefix_size[15][128];
  423.  
  424. static int ssd_mb( x264_t *h )
  425. {
  426. - return h->pixf.ssd[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE,
  427. - h->mb.pic.p_fdec[0], FDEC_STRIDE )
  428. - + h->pixf.ssd[PIXEL_8x8]( h->mb.pic.p_fenc[1], FENC_STRIDE,
  429. - h->mb.pic.p_fdec[1], FDEC_STRIDE )
  430. - + h->pixf.ssd[PIXEL_8x8]( h->mb.pic.p_fenc[2], FENC_STRIDE,
  431. - h->mb.pic.p_fdec[2], FDEC_STRIDE );
  432. + return h->pixf.rdcmp[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE,
  433. + h->mb.pic.p_fdec[0], FDEC_STRIDE,
  434. + h->param.analyse.i_fgo )
  435. + + h->pixf.rdcmp[PIXEL_8x8] ( h->mb.pic.p_fenc[1], FENC_STRIDE,
  436. + h->mb.pic.p_fdec[1], FDEC_STRIDE,
  437. + h->param.analyse.i_fgo )
  438. + + h->pixf.rdcmp[PIXEL_8x8] ( h->mb.pic.p_fenc[2], FENC_STRIDE,
  439. + h->mb.pic.p_fdec[2], FDEC_STRIDE,
  440. + h->param.analyse.i_fgo );
  441. }
  442.  
  443. static int ssd_plane( x264_t *h, int size, int p, int x, int y )
  444. {
  445. - return h->pixf.ssd[size]( h->mb.pic.p_fenc[p] + x+y*FENC_STRIDE, FENC_STRIDE,
  446. - h->mb.pic.p_fdec[p] + x+y*FDEC_STRIDE, FDEC_STRIDE );
  447. + return h->pixf.rdcmp[size]( h->mb.pic.p_fenc[p] + x+y*FENC_STRIDE, FENC_STRIDE,
  448. + h->mb.pic.p_fdec[p] + x+y*FDEC_STRIDE, FDEC_STRIDE,
  449. + h->param.analyse.i_fgo );
  450. }
  451.  
  452. static int x264_rd_cost_mb( x264_t *h, int i_lambda2 )
  453. diff --git a/tools/checkasm.c b/tools/checkasm.c
  454. index 73faf12..ecfb540 100644
  455. --- a/tools/checkasm.c
  456. +++ b/tools/checkasm.c
  457. @@ -69,9 +69,32 @@ static int check_pixel( int cpu_ref, int cpu_new )
  458. } \
  459. } \
  460. report( "pixel " #name " :" );
  461. +
  462. +#define TEST_PIXEL_WEIGHT( name, align ) \
  463. + for( i = 0, ok = 1, used_asm = 0; i < 7; i++ ) \
  464. + { \
  465. + int res_c, res_asm; \
  466. + if( pixel_asm.name[i] != pixel_ref.name[i] ) \
  467. + { \
  468. + for( j=0; j<64; j++ ) \
  469. + { \
  470. + used_asm = 1; \
  471. + res_c = call_c( pixel_c.name[i], buf1, 32, buf2+j*!align, 16, j ); \
  472. + res_asm = call_a( pixel_asm.name[i], buf1, 32, buf2+j*!align, 16, j ); \
  473. + if( res_c != res_asm ) \
  474. + { \
  475. + ok = 0; \
  476. + fprintf( stderr, #name "[%d]: %d != %d [FAILED]\n", i, res_c, res_asm ); \
  477. + break; \
  478. + } \
  479. + } \
  480. + } \
  481. + } \
  482. + report( "pixel " #name " :" );
  483.  
  484. TEST_PIXEL( sad, 0 );
  485. - TEST_PIXEL( ssd, 1 );
  486. + TEST_PIXEL_WEIGHT( ssd, 1 );
  487. + TEST_PIXEL_WEIGHT( nssd, 1 );
  488. TEST_PIXEL( satd, 0 );
  489. TEST_PIXEL( sa8d, 0 );
  490.  
  491. diff --git a/x264.c b/x264.c
  492. index 70adb71..88c8c78 100644
  493. --- a/x264.c
  494. +++ b/x264.c
  495. @@ -196,6 +196,9 @@ static void Help( x264_param_t *defaults, int b_longhelp )
  496. " textured areas. [%.1f]\n"
  497. " - 0.5: weak AQ\n"
  498. " - 1.5: strong AQ\n", defaults->rc.f_aq_strength );
  499. + H0( " --fgo <int> Activates Film Grain Optimization.[%d]\n"
  500. + " - 5: weak FGO\n"
  501. + " - 15: strong FGO\n", defaults->analyse.i_fgo);
  502. H0( "\n" );
  503. H0( " -p, --pass <1|2|3> Enable multipass ratecontrol\n"
  504. " - 1: First pass, creates stats file\n"
  505. @@ -420,6 +423,7 @@ static int Parse( int argc, char **argv,
  506. { "no-dct-decimate", no_argument, NULL, 0 },
  507. { "aq-strength", required_argument, NULL, 0 },
  508. { "aq-mode", required_argument, NULL, 0 },
  509. + { "fgo", required_argument, NULL, 0 },
  510. { "deadzone-inter", required_argument, NULL, '0' },
  511. { "deadzone-intra", required_argument, NULL, '0' },
  512. { "level", required_argument, NULL, 0 },
  513. diff --git a/x264.h b/x264.h
  514. index d2c6510..c0156ea 100644
  515. --- a/x264.h
  516. +++ b/x264.h
  517. @@ -236,6 +236,7 @@ typedef struct x264_param_t
  518. int b_fast_pskip; /* early SKIP detection on P-frames */
  519. int b_dct_decimate; /* transform coefficient thresholding on P-frames */
  520. int i_noise_reduction; /* adaptive pseudo-deadzone */
  521. + int i_fgo; /* psy film grain optimization */
  522.  
  523. /* the deadzone size that will be used in luma quantization */
  524. int i_luma_deadzone[2]; /* {inter, intra} */
  525.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement