Guest User

Untitled

a guest
Jul 30th, 2018
241
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 65.45 KB | None | 0 0
  1. From 09b5a943c62212447a0151dfd5324f8e36715272 Mon Sep 17 00:00:00 2001
  2. From: Anton Mitrofanov <BugMaster@narod.ru>
  3. Date: Thu, 6 May 2010 10:03:31 -0700
  4. Subject: [PATCH 1/9] More cosmetics
  5.  
  6. ---
  7. common/cpu.c | 4 +-
  8. common/macroblock.c | 6 +++-
  9. common/mc.c | 4 +-
  10. common/mvpred.c | 12 ++++----
  11. common/ppc/dct.c | 2 +-
  12. common/ppc/mc.c | 12 ++++----
  13. common/ppc/ppccommon.h | 8 +++---
  14. common/ppc/quant.c | 6 ++--
  15. common/predict.c | 2 +-
  16. common/x86/const-a.asm | 2 +-
  17. common/x86/mc-c.c | 2 +-
  18. common/x86/predict-c.c | 2 +-
  19. encoder/cabac.c | 8 +++---
  20. encoder/me.c | 18 ++++++------
  21. input/avs.c | 2 +-
  22. tools/checkasm.c | 66 ++++++++++++++++++++++++------------------------
  23. 16 files changed, 79 insertions(+), 77 deletions(-)
  24.  
  25. diff --git a/common/cpu.c b/common/cpu.c
  26. index 904eedc..933a754 100644
  27. --- a/common/cpu.c
  28. +++ b/common/cpu.c
  29. @@ -87,8 +87,8 @@ static void sigill_handler( int sig )
  30. #endif
  31.  
  32. #ifdef HAVE_MMX
  33. -extern int x264_cpu_cpuid_test( void );
  34. -extern uint32_t x264_cpu_cpuid( uint32_t op, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx );
  35. +int x264_cpu_cpuid_test( void );
  36. +uint32_t x264_cpu_cpuid( uint32_t op, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx );
  37.  
  38. uint32_t x264_cpu_detect( void )
  39. {
  40. diff --git a/common/macroblock.c b/common/macroblock.c
  41. index f402588..110c3a5 100644
  42. --- a/common/macroblock.c
  43. +++ b/common/macroblock.c
  44. @@ -295,7 +295,8 @@ int x264_macroblock_cache_allocate( x264_t *h )
  45. }
  46.  
  47. return 0;
  48. -fail: return -1;
  49. +fail:
  50. + return -1;
  51. }
  52. void x264_macroblock_cache_free( x264_t *h )
  53. {
  54. @@ -348,7 +349,8 @@ int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
  55. CHECKED_MALLOC( h->scratch_buffer, scratch_size );
  56.  
  57. return 0;
  58. -fail: return -1;
  59. +fail:
  60. + return -1;
  61. }
  62.  
  63. void x264_macroblock_thread_free( x264_t *h, int b_lookahead )
  64. diff --git a/common/mc.c b/common/mc.c
  65. index ad7fe79..ada8bdc 100644
  66. --- a/common/mc.c
  67. +++ b/common/mc.c
  68. @@ -97,9 +97,9 @@ static void name( uint8_t *pix1, int i_stride_pix1, \
  69. uint8_t *pix2, int i_stride_pix2, \
  70. uint8_t *pix3, int i_stride_pix3, int weight ) \
  71. { \
  72. - if( weight == 32 )\
  73. + if( weight == 32 ) \
  74. pixel_avg_wxh( pix1, i_stride_pix1, pix2, i_stride_pix2, pix3, i_stride_pix3, width, height ); \
  75. - else\
  76. + else \
  77. pixel_avg_weight_wxh( pix1, i_stride_pix1, pix2, i_stride_pix2, pix3, i_stride_pix3, width, height, weight ); \
  78. }
  79. PIXEL_AVG_C( pixel_avg_16x16, 16, 16 )
  80. diff --git a/common/mvpred.c b/common/mvpred.c
  81. index de91826..54b4d5a 100755
  82. --- a/common/mvpred.c
  83. +++ b/common/mvpred.c
  84. @@ -394,7 +394,7 @@ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t mvc[
  85. int16_t (*mvr)[2] = h->mb.mvr[i_list][i_ref];
  86. int i = 0;
  87.  
  88. -#define SET_MVP(mvp)\
  89. +#define SET_MVP(mvp) \
  90. { \
  91. CP32( mvc[i], mvp ); \
  92. i++; \
  93. @@ -445,13 +445,13 @@ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t mvc[
  94. if( h->sh.b_mbaff && field^(i_ref&1) )
  95. refpoc += h->sh.i_delta_poc_bottom;
  96.  
  97. -#define SET_TMVP( dx, dy )\
  98. +#define SET_TMVP( dx, dy ) \
  99. { \
  100. int mb_index = h->mb.i_mb_xy + dx + dy*h->mb.i_mb_stride; \
  101. - int scale = (curpoc - refpoc) * l0->inv_ref_poc[h->mb.b_interlaced&field];\
  102. - mvc[i][0] = (l0->mv16x16[mb_index][0]*scale + 128) >> 8;\
  103. - mvc[i][1] = (l0->mv16x16[mb_index][1]*scale + 128) >> 8;\
  104. - i++;\
  105. + int scale = (curpoc - refpoc) * l0->inv_ref_poc[h->mb.b_interlaced&field]; \
  106. + mvc[i][0] = (l0->mv16x16[mb_index][0]*scale + 128) >> 8; \
  107. + mvc[i][1] = (l0->mv16x16[mb_index][1]*scale + 128) >> 8; \
  108. + i++; \
  109. }
  110.  
  111. SET_TMVP(0,0);
  112. diff --git a/common/ppc/dct.c b/common/ppc/dct.c
  113. index fdadf53..eb223ae 100644
  114. --- a/common/ppc/dct.c
  115. +++ b/common/ppc/dct.c
  116. @@ -205,7 +205,7 @@ void x264_sub8x8_dct8_altivec( int16_t dct[8][8], uint8_t *pix1, uint8_t *pix2 )
  117. vec_st( dct_tr1v, 16, (signed short *)dct );
  118. vec_st( dct_tr2v, 32, (signed short *)dct );
  119. vec_st( dct_tr3v, 48, (signed short *)dct );
  120. -
  121. +
  122. vec_st( dct_tr4v, 64, (signed short *)dct );
  123. vec_st( dct_tr5v, 80, (signed short *)dct );
  124. vec_st( dct_tr6v, 96, (signed short *)dct );
  125. diff --git a/common/ppc/mc.c b/common/ppc/mc.c
  126. index dfe250a..26b81f8 100644
  127. --- a/common/ppc/mc.c
  128. +++ b/common/ppc/mc.c
  129. @@ -291,8 +291,8 @@ static void mc_chroma_2xh( uint8_t *dst, int i_dst_stride,
  130. }
  131.  
  132.  
  133. -#define DO_PROCESS_W4( a ) \
  134. - dstv_16A = vec_mladd( src##a##v_16A, coeff##a##v, dstv_16A ); \
  135. +#define DO_PROCESS_W4( a ) \
  136. + dstv_16A = vec_mladd( src##a##v_16A, coeff##a##v, dstv_16A ); \
  137. dstv_16B = vec_mladd( src##a##v_16B, coeff##a##v, dstv_16B )
  138.  
  139. static void mc_chroma_altivec_4xh( uint8_t *dst, int i_dst_stride,
  140. @@ -369,10 +369,10 @@ static void mc_chroma_altivec_4xh( uint8_t *dst, int i_dst_stride,
  141. }
  142. }
  143.  
  144. -#define DO_PROCESS_W8( a ) \
  145. - src##a##v_16A = vec_u8_to_u16( src##a##v_8A ); \
  146. - src##a##v_16B = vec_u8_to_u16( src##a##v_8B ); \
  147. - dstv_16A = vec_mladd( src##a##v_16A, coeff##a##v, dstv_16A ); \
  148. +#define DO_PROCESS_W8( a ) \
  149. + src##a##v_16A = vec_u8_to_u16( src##a##v_8A ); \
  150. + src##a##v_16B = vec_u8_to_u16( src##a##v_8B ); \
  151. + dstv_16A = vec_mladd( src##a##v_16A, coeff##a##v, dstv_16A ); \
  152. dstv_16B = vec_mladd( src##a##v_16B, coeff##a##v, dstv_16B )
  153.  
  154. static void mc_chroma_altivec_8xh( uint8_t *dst, int i_dst_stride,
  155. diff --git a/common/ppc/ppccommon.h b/common/ppc/ppccommon.h
  156. index 510ab26..e61afaa 100644
  157. --- a/common/ppc/ppccommon.h
  158. +++ b/common/ppc/ppccommon.h
  159. @@ -113,13 +113,13 @@ typedef union {
  160. vec_u8_t _hv, _lv
  161.  
  162. #define PREP_LOAD_SRC( src ) \
  163. - vec_u8_t _##src##_ = vec_lvsl(0, src)
  164. + vec_u8_t _##src##_ = vec_lvsl(0, src)
  165.  
  166. #define VEC_LOAD_G( p, v, n, t ) \
  167. _hv = vec_ld( 0, p ); \
  168. v = (t) vec_lvsl( 0, p ); \
  169. _lv = vec_ld( n - 1, p ); \
  170. - v = (t) vec_perm( _hv, _lv, (vec_u8_t) v )
  171. + v = (t) vec_perm( _hv, _lv, (vec_u8_t) v )
  172.  
  173. #define VEC_LOAD( p, v, n, t, g ) \
  174. _hv = vec_ld( 0, p ); \
  175. @@ -134,7 +134,7 @@ typedef union {
  176. #define VEC_LOAD_PARTIAL( p, v, n, t, g) \
  177. _hv = vec_ld( 0, p); \
  178. v = (t) vec_perm( _hv, _hv, (vec_u8_t) _##g##_ )
  179. -
  180. +
  181.  
  182. /***********************************************************************
  183. * PREP_STORE##n: declares required vectors to store n bytes to a
  184. @@ -155,7 +155,7 @@ typedef union {
  185. _lv = vec_perm( (vec_u8_t) v, _tmp1v, _##o##r_ ); \
  186. vec_st( _lv, 15, (uint8_t *) p ); \
  187. _hv = vec_perm( _tmp1v, (vec_u8_t) v, _##o##r_ ); \
  188. - vec_st( _hv, 0, (uint8_t *) p )
  189. + vec_st( _hv, 0, (uint8_t *) p )
  190.  
  191.  
  192. #define PREP_STORE8 \
  193. diff --git a/common/ppc/quant.c b/common/ppc/quant.c
  194. index 4b2825c..6f41a06 100644
  195. --- a/common/ppc/quant.c
  196. +++ b/common/ppc/quant.c
  197. @@ -20,7 +20,7 @@
  198.  
  199. #include "common/common.h"
  200. #include "ppccommon.h"
  201. -#include "quant.h"
  202. +#include "quant.h"
  203.  
  204. // quant of a whole 4x4 block, unrolled 2x and "pre-scheduled"
  205. #define QUANT_16_U( idx0, idx1 ) \
  206. @@ -55,7 +55,7 @@
  207. nz = vec_or(nz, vec_or(temp1v, temp2v)); \
  208. vec_st(temp2v, (idx1), (int16_t*)dct); \
  209. }
  210. -
  211. +
  212. int x264_quant_4x4_altivec( int16_t dct[4][4], uint16_t mf[16], uint16_t bias[16] )
  213. {
  214. LOAD_ZERO;
  215. @@ -220,7 +220,7 @@ int x264_quant_8x8_altivec( int16_t dct[8][8], uint16_t mf[64], uint16_t bias[64
  216. vec_u16_t biasvB;
  217.  
  218. vec_s16_t temp1v, temp2v;
  219. -
  220. +
  221. vec_u32_u qbits_u;
  222. qbits_u.s[0]=16;
  223. i_qbitsv = vec_splat(qbits_u.v, 0);
  224. diff --git a/common/predict.c b/common/predict.c
  225. index 783cc9b..f120ca7 100644
  226. --- a/common/predict.c
  227. +++ b/common/predict.c
  228. @@ -41,7 +41,7 @@
  229. * 16x16 prediction for intra luma block
  230. ****************************************************************************/
  231.  
  232. -#define PREDICT_16x16_DC(v) \
  233. +#define PREDICT_16x16_DC(v)\
  234. for( int i = 0; i < 16; i++ )\
  235. {\
  236. M32( src+ 0 ) = v;\
  237. diff --git a/common/x86/const-a.asm b/common/x86/const-a.asm
  238. index 79bbf1b..99a34be 100755
  239. --- a/common/x86/const-a.asm
  240. +++ b/common/x86/const-a.asm
  241. @@ -43,7 +43,7 @@ const pw_64, times 8 dw 64
  242. const pw_32_0, times 4 dw 32,
  243. times 4 dw 0
  244. const pw_8000, times 8 dw 0x8000
  245. -const pw_3fff, times 8 dw 0x3fff
  246. +const pw_3fff, times 8 dw 0x3fff
  247.  
  248. const pd_1, times 4 dd 1
  249. const pd_128, times 4 dd 128
  250. diff --git a/common/x86/mc-c.c b/common/x86/mc-c.c
  251. index fb73562..6d386f6 100644
  252. --- a/common/x86/mc-c.c
  253. +++ b/common/x86/mc-c.c
  254. @@ -103,7 +103,7 @@ void x264_integral_init8v_sse2( uint16_t *sum8, int stride );
  255. void x264_integral_init4v_ssse3( uint16_t *sum8, uint16_t *sum4, int stride );
  256. void x264_mbtree_propagate_cost_sse2( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
  257. uint16_t *inter_costs, uint16_t *inv_qscales, int len );
  258. -#define LOWRES(cpu) \
  259. +#define LOWRES(cpu)\
  260. void x264_frame_init_lowres_core_##cpu( uint8_t *src0, uint8_t *dst0, uint8_t *dsth, uint8_t *dstv, uint8_t *dstc,\
  261. int src_stride, int dst_stride, int width, int height );
  262. LOWRES(mmxext)
  263. diff --git a/common/x86/predict-c.c b/common/x86/predict-c.c
  264. index 6fa7e3b..0e3e1c7 100644
  265. --- a/common/x86/predict-c.c
  266. +++ b/common/x86/predict-c.c
  267. @@ -326,7 +326,7 @@ static void x264_predict_8x8_vr_mmxext( uint8_t *src, uint8_t edge[33] )
  268. t=e; e+=f; f-=t;\
  269. t=g; g+=h; h-=t;
  270.  
  271. -#define INTRA_SA8D_X3(cpu) \
  272. +#define INTRA_SA8D_X3(cpu)\
  273. void x264_intra_sa8d_x3_8x8_##cpu( uint8_t *fenc, uint8_t edge[33], int res[3] )\
  274. {\
  275. PREDICT_8x8_LOAD_TOP\
  276. diff --git a/encoder/cabac.c b/encoder/cabac.c
  277. index 1086447..bc76fc8 100644
  278. --- a/encoder/cabac.c
  279. +++ b/encoder/cabac.c
  280. @@ -736,13 +736,13 @@ static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBl
  281. }
  282. #endif
  283.  
  284. -#define block_residual_write_cabac_cbf( h, cb, i_ctxBlockCat, i_idx, l, b_intra ) \
  285. -{ \
  286. - int ctxidxinc = x264_cabac_mb_cbf_ctxidxinc( h, i_ctxBlockCat, i_idx, b_intra ); \
  287. +#define block_residual_write_cabac_cbf( h, cb, i_ctxBlockCat, i_idx, l, b_intra )\
  288. +{\
  289. + int ctxidxinc = x264_cabac_mb_cbf_ctxidxinc( h, i_ctxBlockCat, i_idx, b_intra );\
  290. if( h->mb.cache.non_zero_count[x264_scan8[i_idx]] )\
  291. {\
  292. x264_cabac_encode_decision( cb, ctxidxinc, 1 );\
  293. - block_residual_write_cabac( h, cb, i_ctxBlockCat, l ); \
  294. + block_residual_write_cabac( h, cb, i_ctxBlockCat, l );\
  295. }\
  296. else\
  297. x264_cabac_encode_decision( cb, ctxidxinc, 0 );\
  298. diff --git a/encoder/me.c b/encoder/me.c
  299. index d7b2928..5e113f0 100644
  300. --- a/encoder/me.c
  301. +++ b/encoder/me.c
  302. @@ -914,14 +914,14 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
  303. m->cost_mv = p_cost_mvx[bmx] + p_cost_mvy[bmy];
  304. }
  305.  
  306. -#define BIME_CACHE( dx, dy, list ) \
  307. -{ \
  308. +#define BIME_CACHE( dx, dy, list )\
  309. +{\
  310. x264_me_t *m = m##list;\
  311. - int i = 4 + 3*dx + dy; \
  312. + int i = 4 + 3*dx + dy;\
  313. int mvx = bm##list##x+dx;\
  314. int mvy = bm##list##y+dy;\
  315. stride[list][i] = bw;\
  316. - src[list][i] = h->mc.get_ref( pixy_buf[list][i], &stride[list][i], m->p_fref, m->i_stride[0], mvx, mvy, bw, bh, weight_none ); \
  317. + src[list][i] = h->mc.get_ref( pixy_buf[list][i], &stride[list][i], m->p_fref, m->i_stride[0], mvx, mvy, bw, bh, weight_none );\
  318. if( rd )\
  319. {\
  320. h->mc.mc_chroma( pixu_buf[list][i], 8, m->p_fref[4], m->i_stride[1], mvx, mvy + mv##list##y_offset, bw>>1, bh>>1 );\
  321. @@ -1107,11 +1107,11 @@ void x264_me_refine_bidir_rd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_wei
  322. { \
  323. uint64_t cost; \
  324. M32( cache_mv ) = pack16to32_mask(mx,my); \
  325. - if( m->i_pixel <= PIXEL_8x8 )\
  326. - {\
  327. - h->mc.mc_chroma( pixu, FDEC_STRIDE, m->p_fref[4], m->i_stride[1], mx, my + mvy_offset, bw>>1, bh>>1 );\
  328. - h->mc.mc_chroma( pixv, FDEC_STRIDE, m->p_fref[5], m->i_stride[1], mx, my + mvy_offset, bw>>1, bh>>1 );\
  329. - }\
  330. + if( m->i_pixel <= PIXEL_8x8 ) \
  331. + { \
  332. + h->mc.mc_chroma( pixu, FDEC_STRIDE, m->p_fref[4], m->i_stride[1], mx, my + mvy_offset, bw>>1, bh>>1 ); \
  333. + h->mc.mc_chroma( pixv, FDEC_STRIDE, m->p_fref[5], m->i_stride[1], mx, my + mvy_offset, bw>>1, bh>>1 ); \
  334. + } \
  335. cost = x264_rd_cost_part( h, i_lambda2, i4, m->i_pixel ); \
  336. COPY4_IF_LT( bcost, cost, bmx, mx, bmy, my, dir, do_dir?mdir:dir ); \
  337. } \
  338. diff --git a/input/avs.c b/input/avs.c
  339. index 9e3aa55..5489a5e 100644
  340. --- a/input/avs.c
  341. +++ b/input/avs.c
  342. @@ -45,7 +45,7 @@
  343. /* maximum size of the sequence of filters to try on non script files */
  344. #define AVS_MAX_SEQUENCE 5
  345.  
  346. -#define LOAD_AVS_FUNC(name, continue_on_fail) \
  347. +#define LOAD_AVS_FUNC(name, continue_on_fail)\
  348. {\
  349. h->func.name = (void*)GetProcAddress( h->library, #name );\
  350. if( !continue_on_fail && !h->func.name )\
  351. diff --git a/tools/checkasm.c b/tools/checkasm.c
  352. index 228b75f..2008d2f 100644
  353. --- a/tools/checkasm.c
  354. +++ b/tools/checkasm.c
  355. @@ -265,7 +265,7 @@ static int check_pixel( int cpu_ref, int cpu_new )
  356. buf3[i] = ~(buf4[i] = -(buf1[i&~0x88]&1));
  357.  
  358. #define TEST_PIXEL( name, align ) \
  359. - ok = 1, used_asm = 0;\
  360. + ok = 1, used_asm = 0; \
  361. for( int i = 0; i < 7; i++ ) \
  362. { \
  363. int res_c, res_asm; \
  364. @@ -305,7 +305,7 @@ static int check_pixel( int cpu_ref, int cpu_new )
  365. TEST_PIXEL( sa8d, 1 );
  366.  
  367. #define TEST_PIXEL_X( N ) \
  368. - ok = 1; used_asm = 0;\
  369. + ok = 1; used_asm = 0; \
  370. for( int i = 0; i < 7; i++ ) \
  371. { \
  372. int res_c[4]={0}, res_asm[4]={0}; \
  373. @@ -350,7 +350,7 @@ static int check_pixel( int cpu_ref, int cpu_new )
  374. { \
  375. set_func_name( "%s_%s", "var", pixel_names[i] ); \
  376. used_asm = 1; \
  377. - /* abi-check wrapper can't return uint64_t, so separate it from return value check */\
  378. + /* abi-check wrapper can't return uint64_t, so separate it from return value check */ \
  379. call_c1( pixel_c.var[i], buf1, 16 ); \
  380. call_a1( pixel_asm.var[i], buf1, 16 ); \
  381. uint64_t res_c = pixel_c.var[i]( buf1, 16 ); \
  382. @@ -415,7 +415,7 @@ static int check_pixel( int cpu_ref, int cpu_new )
  383. if( pixel_asm.name && pixel_asm.name != pixel_ref.name ) \
  384. { \
  385. int res_c[3], res_asm[3]; \
  386. - set_func_name( #name );\
  387. + set_func_name( #name ); \
  388. used_asm = 1; \
  389. memcpy( buf3, buf2, 1024 ); \
  390. for( int i = 0; i < 3; i++ ) \
  391. @@ -538,7 +538,7 @@ static int check_dct( int cpu_ref, int cpu_new )
  392. #define TEST_DCT( name, t1, t2, size ) \
  393. if( dct_asm.name != dct_ref.name ) \
  394. { \
  395. - set_func_name( #name );\
  396. + set_func_name( #name ); \
  397. used_asm = 1; \
  398. call_c( dct_c.name, t1, buf1, buf2 ); \
  399. call_a( dct_asm.name, t2, buf1, buf2 ); \
  400. @@ -579,7 +579,7 @@ static int check_dct( int cpu_ref, int cpu_new )
  401. #define TEST_IDCT( name, src ) \
  402. if( dct_asm.name != dct_ref.name ) \
  403. { \
  404. - set_func_name( #name );\
  405. + set_func_name( #name ); \
  406. used_asm = 1; \
  407. memcpy( buf3, buf1, 32*32 ); \
  408. memcpy( buf4, buf1, 32*32 ); \
  409. @@ -644,12 +644,12 @@ static int check_dct( int cpu_ref, int cpu_new )
  410. ALIGNED_16( int16_t level1[64] );
  411. ALIGNED_16( int16_t level2[64] );
  412.  
  413. -#define TEST_ZIGZAG_SCAN( name, t1, t2, dct, size ) \
  414. +#define TEST_ZIGZAG_SCAN( name, t1, t2, dct, size ) \
  415. if( zigzag_asm.name != zigzag_ref.name ) \
  416. { \
  417. - set_func_name( "zigzag_"#name"_%s", interlace?"field":"frame" );\
  418. + set_func_name( "zigzag_"#name"_%s", interlace?"field":"frame" ); \
  419. used_asm = 1; \
  420. - memcpy(dct, buf1, size*sizeof(int16_t));\
  421. + memcpy(dct, buf1, size*sizeof(int16_t)); \
  422. call_c( zigzag_c.name, t1, dct ); \
  423. call_a( zigzag_asm.name, t2, dct ); \
  424. if( memcmp( t1, t2, size*sizeof(int16_t) ) ) \
  425. @@ -663,18 +663,18 @@ static int check_dct( int cpu_ref, int cpu_new )
  426. if( zigzag_asm.name != zigzag_ref.name ) \
  427. { \
  428. int nz_a, nz_c; \
  429. - set_func_name( "zigzag_"#name"_%s", interlace?"field":"frame" );\
  430. + set_func_name( "zigzag_"#name"_%s", interlace?"field":"frame" ); \
  431. used_asm = 1; \
  432. memcpy( buf3, buf1, 16*FDEC_STRIDE ); \
  433. memcpy( buf4, buf1, 16*FDEC_STRIDE ); \
  434. - nz_c = call_c1( zigzag_c.name, t1, buf2, buf3 ); \
  435. + nz_c = call_c1( zigzag_c.name, t1, buf2, buf3 ); \
  436. nz_a = call_a1( zigzag_asm.name, t2, buf2, buf4 ); \
  437. - if( memcmp( t1, t2, size*sizeof(int16_t) )|| memcmp( buf3, buf4, 16*FDEC_STRIDE ) || nz_c != nz_a ) \
  438. + if( memcmp( t1, t2, size*sizeof(int16_t) )|| memcmp( buf3, buf4, 16*FDEC_STRIDE ) || nz_c != nz_a ) \
  439. { \
  440. ok = 0; \
  441. fprintf( stderr, #name " [FAILED]\n" ); \
  442. } \
  443. - call_c2( zigzag_c.name, t1, buf2, buf3 ); \
  444. + call_c2( zigzag_c.name, t1, buf2, buf3 ); \
  445. call_a2( zigzag_asm.name, t2, buf2, buf4 ); \
  446. }
  447.  
  448. @@ -683,7 +683,7 @@ static int check_dct( int cpu_ref, int cpu_new )
  449. { \
  450. int nz_a, nz_c; \
  451. int16_t dc_a, dc_c; \
  452. - set_func_name( "zigzag_"#name"_%s", interlace?"field":"frame" );\
  453. + set_func_name( "zigzag_"#name"_%s", interlace?"field":"frame" ); \
  454. used_asm = 1; \
  455. for( int i = 0; i < 2; i++ ) \
  456. { \
  457. @@ -694,27 +694,27 @@ static int check_dct( int cpu_ref, int cpu_new )
  458. memcpy( buf3 + j*FDEC_STRIDE, (i?buf1:buf2) + j*FENC_STRIDE, 4 ); \
  459. memcpy( buf4 + j*FDEC_STRIDE, (i?buf1:buf2) + j*FENC_STRIDE, 4 ); \
  460. } \
  461. - nz_c = call_c1( zigzag_c.name, t1, buf2, buf3, &dc_c ); \
  462. + nz_c = call_c1( zigzag_c.name, t1, buf2, buf3, &dc_c ); \
  463. nz_a = call_a1( zigzag_asm.name, t2, buf2, buf4, &dc_a ); \
  464. - if( memcmp( t1+1, t2+1, 15*sizeof(int16_t) ) || memcmp( buf3, buf4, 16*FDEC_STRIDE ) || nz_c != nz_a || dc_c != dc_a ) \
  465. + if( memcmp( t1+1, t2+1, 15*sizeof(int16_t) ) || memcmp( buf3, buf4, 16*FDEC_STRIDE ) || nz_c != nz_a || dc_c != dc_a ) \
  466. { \
  467. ok = 0; \
  468. fprintf( stderr, #name " [FAILED]\n" ); \
  469. break; \
  470. } \
  471. } \
  472. - call_c2( zigzag_c.name, t1, buf2, buf3, &dc_c ); \
  473. + call_c2( zigzag_c.name, t1, buf2, buf3, &dc_c ); \
  474. call_a2( zigzag_asm.name, t2, buf2, buf4, &dc_a ); \
  475. }
  476.  
  477. -#define TEST_INTERLEAVE( name, t1, t2, dct, size ) \
  478. +#define TEST_INTERLEAVE( name, t1, t2, dct, size ) \
  479. if( zigzag_asm.name != zigzag_ref.name ) \
  480. { \
  481. for( int j = 0; j < 100; j++ ) \
  482. { \
  483. - set_func_name( "zigzag_"#name"_%s", interlace?"field":"frame" );\
  484. + set_func_name( "zigzag_"#name"_%s", interlace?"field":"frame" ); \
  485. used_asm = 1; \
  486. - memcpy(dct, buf1, size*sizeof(int16_t));\
  487. + memcpy(dct, buf1, size*sizeof(int16_t)); \
  488. for( int i = 0; i < size; i++ ) \
  489. dct[i] = rand()&0x1F ? 0 : dct[i]; \
  490. memcpy(buf3, buf4, 10*sizeof(uint8_t)); \
  491. @@ -784,7 +784,7 @@ static int check_mc( int cpu_ref, int cpu_new )
  492. if( mc_a.mc_luma != mc_ref.mc_luma && !(w&(w-1)) && h<=16 ) \
  493. { \
  494. const x264_weight_t *weight = weight_none; \
  495. - set_func_name( "mc_luma_%dx%d", w, h );\
  496. + set_func_name( "mc_luma_%dx%d", w, h ); \
  497. used_asm = 1; \
  498. memset( buf3, 0xCD, 1024 ); \
  499. memset( buf4, 0xCD, 1024 ); \
  500. @@ -801,7 +801,7 @@ static int check_mc( int cpu_ref, int cpu_new )
  501. uint8_t *ref = dst2; \
  502. int ref_stride = 32; \
  503. const x264_weight_t *weight = weight_none; \
  504. - set_func_name( "get_ref_%dx%d", w, h );\
  505. + set_func_name( "get_ref_%dx%d", w, h ); \
  506. used_asm = 1; \
  507. memset( buf3, 0xCD, 1024 ); \
  508. memset( buf4, 0xCD, 1024 ); \
  509. @@ -819,13 +819,13 @@ static int check_mc( int cpu_ref, int cpu_new )
  510. #define MC_TEST_CHROMA( w, h ) \
  511. if( mc_a.mc_chroma != mc_ref.mc_chroma ) \
  512. { \
  513. - set_func_name( "mc_chroma_%dx%d", w, h );\
  514. + set_func_name( "mc_chroma_%dx%d", w, h ); \
  515. used_asm = 1; \
  516. memset( buf3, 0xCD, 1024 ); \
  517. memset( buf4, 0xCD, 1024 ); \
  518. call_c( mc_c.mc_chroma, dst1, 16, src, 64, dx, dy, w, h ); \
  519. call_a( mc_a.mc_chroma, dst2, 16, src, 64, dx, dy, w, h ); \
  520. - /* mc_chroma width=2 may write garbage to the right of dst. ignore that. */\
  521. + /* mc_chroma width=2 may write garbage to the right of dst. ignore that. */ \
  522. for( int j = 0; j < h; j++ ) \
  523. for( int i = w; i < 4; i++ ) \
  524. dst2[i+j*16] = dst1[i+j*16]; \
  525. @@ -878,7 +878,7 @@ static int check_mc( int cpu_ref, int cpu_new )
  526. memcpy( buf4, buf1+320, 320 ); \
  527. if( mc_a.name[i] != mc_ref.name[i] ) \
  528. { \
  529. - set_func_name( "%s_%s", #name, pixel_names[i] );\
  530. + set_func_name( "%s_%s", #name, pixel_names[i] ); \
  531. used_asm = 1; \
  532. call_c1( mc_c.name[i], buf3, 16, buf2+1, 16, buf1+18, 16, weight ); \
  533. call_a1( mc_a.name[i], buf4, 16, buf2+1, 16, buf1+18, 16, weight ); \
  534. @@ -899,7 +899,7 @@ static int check_mc( int cpu_ref, int cpu_new )
  535.  
  536. #define MC_TEST_WEIGHT( name, weight, aligned ) \
  537. int align_off = (aligned ? 0 : rand()%16); \
  538. - ok = 1, used_asm = 0;\
  539. + ok = 1, used_asm = 0; \
  540. for( int i = 1; i <= 5; i++ ) \
  541. { \
  542. ALIGNED_16( uint8_t buffC[640] ); \
  543. @@ -1115,14 +1115,14 @@ static int check_deblock( int cpu_ref, int cpu_new )
  544. #define TEST_DEBLOCK( name, align, ... ) \
  545. for( int i = 0; i < 36; i++ ) \
  546. { \
  547. - int off = 8*32 + (i&15)*4*!align; /* benchmark various alignments of h filter */\
  548. + int off = 8*32 + (i&15)*4*!align; /* benchmark various alignments of h filter */ \
  549. for( int j = 0; j < 1024; j++ ) \
  550. - /* two distributions of random to excersize different failure modes */\
  551. + /* two distributions of random to excersize different failure modes */ \
  552. buf3[j] = rand() & (i&1 ? 0xf : 0xff ); \
  553. memcpy( buf4, buf3, 1024 ); \
  554. if( db_a.name != db_ref.name ) \
  555. { \
  556. - set_func_name( #name );\
  557. + set_func_name( #name ); \
  558. used_asm = 1; \
  559. call_c1( db_c.name, buf3+off, 32, alphas[i], betas[i], ##__VA_ARGS__ ); \
  560. call_a1( db_a.name, buf4+off, 32, alphas[i], betas[i], ##__VA_ARGS__ ); \
  561. @@ -1236,7 +1236,7 @@ static int check_quant( int cpu_ref, int cpu_new )
  562. dct1[i] = dct2[i] = j ? (rand() & 0x1fff) - 0xfff : 0; \
  563. result_c = call_c1( qf_c.name, dct1, h->quant4_mf[CQM_4IY][qp][0], h->quant4_bias[CQM_4IY][qp][0] ); \
  564. result_a = call_a1( qf_a.name, dct2, h->quant4_mf[CQM_4IY][qp][0], h->quant4_bias[CQM_4IY][qp][0] ); \
  565. - if( memcmp( dct1, dct2, 16*2 ) || result_c != result_a ) \
  566. + if( memcmp( dct1, dct2, 16*2 ) || result_c != result_a ) \
  567. { \
  568. oks[0] = 0; \
  569. fprintf( stderr, #name "(cqm=%d): [FAILED]\n", i_cqm ); \
  570. @@ -1491,11 +1491,11 @@ static int check_intra( int cpu_ref, int cpu_new )
  571.  
  572. ip_c.predict_8x8_filter( buf1+48, edge, ALL_NEIGHBORS, ALL_NEIGHBORS );
  573.  
  574. -#define INTRA_TEST( name, dir, w, ... ) \
  575. +#define INTRA_TEST( name, dir, w, ... )\
  576. if( ip_a.name[dir] != ip_ref.name[dir] )\
  577. - { \
  578. + {\
  579. set_func_name( "intra_%s_%s", #name, intra_##name##_names[dir] );\
  580. - used_asm = 1; \
  581. + used_asm = 1;\
  582. memcpy( buf3, buf1, 32*20 );\
  583. memcpy( buf4, buf1, 32*20 );\
  584. call_c( ip_c.name[dir], buf3+48, ##__VA_ARGS__ );\
  585. --
  586. 1.7.0.4
  587.  
  588.  
  589. From 29b379cc3499541e72007131909d45a8c472f2b5 Mon Sep 17 00:00:00 2001
  590. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  591. Date: Sat, 8 May 2010 11:58:22 -0700
  592. Subject: [PATCH 2/9] Fix intra refresh behavior with I-frames
  593. Intra refresh still allows I-frames (for scenecuts/etc).
  594. Now I-frames count as a full refresh, as opposed to instantly triggering a refresh.
  595.  
  596. ---
  597. common/frame.h | 1 +
  598. encoder/encoder.c | 28 +++++++++++++++++-----------
  599. 2 files changed, 18 insertions(+), 11 deletions(-)
  600.  
  601. diff --git a/common/frame.h b/common/frame.h
  602. index 357929e..e2766ad 100644
  603. --- a/common/frame.h
  604. +++ b/common/frame.h
  605. @@ -142,6 +142,7 @@ typedef struct x264_frame
  606. float f_pir_position;
  607. int i_pir_start_col;
  608. int i_pir_end_col;
  609. + int i_frames_since_pir;
  610. } x264_frame_t;
  611.  
  612. /* synchronized frame list */
  613. diff --git a/encoder/encoder.c b/encoder/encoder.c
  614. index 7ad4295..7c5a64f 100644
  615. --- a/encoder/encoder.c
  616. +++ b/encoder/encoder.c
  617. @@ -2375,25 +2375,31 @@ int x264_encoder_encode( x264_t *h,
  618. h->i_nal_type = i_nal_type;
  619. h->i_nal_ref_idc = i_nal_ref_idc;
  620.  
  621. - if( h->param.b_intra_refresh && h->fenc->i_type == X264_TYPE_P )
  622. + if( h->param.b_intra_refresh )
  623. {
  624. - int pocdiff = (h->fdec->i_poc - h->fref0[0]->i_poc)/2;
  625. - float increment = X264_MAX( ((float)h->sps->i_mb_width-1) / h->param.i_keyint_max, 1 );
  626. - int max_position = (int)(increment * h->param.i_keyint_max);
  627. - if( IS_X264_TYPE_I( h->fref0[0]->i_type ) )
  628. - h->fdec->f_pir_position = 0;
  629. - else
  630. + if( IS_X264_TYPE_I( h->fenc->i_type ) )
  631. + {
  632. + h->fdec->i_frames_since_pir = 0;
  633. + /* PIR is currently only supported with ref == 1, so any intra frame effectively refreshes
  634. + * the whole frame and counts as an intra refresh. */
  635. + h->fdec->f_pir_position = h->sps->i_mb_width;
  636. + }
  637. + else if( h->fenc->i_type == X264_TYPE_P )
  638. {
  639. + int pocdiff = (h->fdec->i_poc - h->fref0[0]->i_poc)/2;
  640. + float increment = X264_MAX( ((float)h->sps->i_mb_width-1) / h->param.i_keyint_max, 1 );
  641. h->fdec->f_pir_position = h->fref0[0]->f_pir_position;
  642. - if( h->fdec->f_pir_position+0.5 >= max_position )
  643. + h->fdec->i_frames_since_pir = h->fref0[0]->i_frames_since_pir + pocdiff;
  644. + if( h->fdec->i_frames_since_pir >= h->param.i_keyint_max )
  645. {
  646. h->fdec->f_pir_position = 0;
  647. + h->fdec->i_frames_since_pir = 0;
  648. h->fenc->b_keyframe = 1;
  649. }
  650. + h->fdec->i_pir_start_col = h->fdec->f_pir_position+0.5;
  651. + h->fdec->f_pir_position += increment * pocdiff;
  652. + h->fdec->i_pir_end_col = h->fdec->f_pir_position+0.5;
  653. }
  654. - h->fdec->i_pir_start_col = h->fdec->f_pir_position+0.5;
  655. - h->fdec->f_pir_position += increment * pocdiff;
  656. - h->fdec->i_pir_end_col = h->fdec->f_pir_position+0.5;
  657. }
  658.  
  659. if( h->fenc->b_keyframe )
  660. --
  661. 1.7.0.4
  662.  
  663.  
  664. From 9d79dc5866ad8836dfd7f115ba2475e0d7c4b7c6 Mon Sep 17 00:00:00 2001
  665. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  666. Date: Sat, 8 May 2010 12:07:13 -0700
  667. Subject: [PATCH 3/9] Add API function to trigger intra refresh
  668. Useful for interactive applications where the encoder knows that packet loss has occurred on the client.
  669. Full documentation is in x264.h.
  670.  
  671. ---
  672. common/common.h | 2 ++
  673. encoder/encoder.c | 10 +++++++++-
  674. x264.h | 10 +++++++++-
  675. 3 files changed, 20 insertions(+), 2 deletions(-)
  676.  
  677. diff --git a/common/common.h b/common/common.h
  678. index 91d5030..f673648 100644
  679. --- a/common/common.h
  680. +++ b/common/common.h
  681. @@ -408,6 +408,8 @@ struct x264_t
  682. int i_coded_fields_lookahead; /* Use separate counters for lookahead */
  683. int i_cpb_delay_lookahead;
  684.  
  685. + int b_queued_intra_refresh;
  686. +
  687. /* We use only one SPS and one PPS */
  688. x264_sps_t sps_array[1];
  689. x264_sps_t *sps;
  690. diff --git a/encoder/encoder.c b/encoder/encoder.c
  691. index 7c5a64f..9ebe303 100644
  692. --- a/encoder/encoder.c
  693. +++ b/encoder/encoder.c
  694. @@ -2131,6 +2131,11 @@ static int x264_threaded_slices_write( x264_t *h )
  695. return 0;
  696. }
  697.  
  698. +void x264_encoder_intra_refresh( x264_t *h )
  699. +{
  700. + h->b_queued_intra_refresh = 1;
  701. +}
  702. +
  703. /****************************************************************************
  704. * x264_encoder_encode:
  705. * XXX: i_poc : is the poc of the current given picture
  706. @@ -2380,6 +2385,7 @@ int x264_encoder_encode( x264_t *h,
  707. if( IS_X264_TYPE_I( h->fenc->i_type ) )
  708. {
  709. h->fdec->i_frames_since_pir = 0;
  710. + h->b_queued_intra_refresh = 0;
  711. /* PIR is currently only supported with ref == 1, so any intra frame effectively refreshes
  712. * the whole frame and counts as an intra refresh. */
  713. h->fdec->f_pir_position = h->sps->i_mb_width;
  714. @@ -2390,10 +2396,12 @@ int x264_encoder_encode( x264_t *h,
  715. float increment = X264_MAX( ((float)h->sps->i_mb_width-1) / h->param.i_keyint_max, 1 );
  716. h->fdec->f_pir_position = h->fref0[0]->f_pir_position;
  717. h->fdec->i_frames_since_pir = h->fref0[0]->i_frames_since_pir + pocdiff;
  718. - if( h->fdec->i_frames_since_pir >= h->param.i_keyint_max )
  719. + if( h->fdec->i_frames_since_pir >= h->param.i_keyint_max ||
  720. + (h->b_queued_intra_refresh && h->fdec->f_pir_position + 0.5 >= h->sps->i_mb_width) )
  721. {
  722. h->fdec->f_pir_position = 0;
  723. h->fdec->i_frames_since_pir = 0;
  724. + h->b_queued_intra_refresh = 0;
  725. h->fenc->b_keyframe = 1;
  726. }
  727. h->fdec->i_pir_start_col = h->fdec->f_pir_position+0.5;
  728. diff --git a/x264.h b/x264.h
  729. index 83f087e..f568dc5 100644
  730. --- a/x264.h
  731. +++ b/x264.h
  732. @@ -35,7 +35,7 @@
  733.  
  734. #include <stdarg.h>
  735.  
  736. -#define X264_BUILD 94
  737. +#define X264_BUILD 95
  738.  
  739. /* x264_t:
  740. * opaque handler for encoder */
  741. @@ -639,5 +639,13 @@ void x264_encoder_close ( x264_t * );
  742. * return the number of currently delayed (buffered) frames
  743. * this should be used at the end of the stream, to know when you have all the encoded frames. */
  744. int x264_encoder_delayed_frames( x264_t * );
  745. +/* x264_encoder_intra_refresh:
  746. + * If an intra refresh is not in progress, begin one with the next P-frame.
  747. + * If an intra refresh is in progress, begin one as soon as the current one finishes.
  748. + * Requires that b_intra_refresh be set.
  749. + * Useful for interactive streaming where the client can tell the server that packet loss has
  750. + * occurred. In this case, keyint can be set to an extremely high value so that intra refreshes
  751. + * only occur when calling x264_encoder_intra_refresh. */
  752. +void x264_encoder_intra_refresh( x264_t * );
  753.  
  754. #endif
  755. --
  756. 1.7.0.4
  757.  
  758.  
  759. From 71255d6474c29358d9b30b1791642eaac43911f8 Mon Sep 17 00:00:00 2001
  760. From: Henrik Gramner <hengar-6@student.ltu.se>
  761. Date: Mon, 10 May 2010 23:27:36 +0200
  762. Subject: [PATCH 4/9] Shrink even more constant arrays
  763.  
  764. ---
  765. common/arm/mc-c.c | 4 ++--
  766. common/mc.c | 4 ++--
  767. common/ppc/mc.c | 4 ++--
  768. common/set.c | 10 +++++-----
  769. common/x86/mc-c.c | 4 ++--
  770. encoder/encoder.c | 4 ++--
  771. encoder/me.c | 2 +-
  772. encoder/set.c | 14 +++++---------
  773. 8 files changed, 21 insertions(+), 25 deletions(-)
  774.  
  775. diff --git a/common/arm/mc-c.c b/common/arm/mc-c.c
  776. index 0a7b734..d294eff 100644
  777. --- a/common/arm/mc-c.c
  778. +++ b/common/arm/mc-c.c
  779. @@ -112,8 +112,8 @@ static void (* const x264_mc_copy_wtab_neon[5])( uint8_t *, int, uint8_t *, int,
  780. x264_mc_copy_w16_neon,
  781. };
  782.  
  783. -static const int hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
  784. -static const int hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
  785. +static const uint8_t hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
  786. +static const uint8_t hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
  787.  
  788. static void mc_luma_neon( uint8_t *dst, int i_dst_stride,
  789. uint8_t *src[4], int i_src_stride,
  790. diff --git a/common/mc.c b/common/mc.c
  791. index ada8bdc..e0dc659 100644
  792. --- a/common/mc.c
  793. +++ b/common/mc.c
  794. @@ -203,8 +203,8 @@ static void hpel_filter( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *s
  795. }
  796. }
  797.  
  798. -static const int hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
  799. -static const int hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
  800. +static const uint8_t hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
  801. +static const uint8_t hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
  802.  
  803. static void mc_luma( uint8_t *dst, int i_dst_stride,
  804. uint8_t *src[4], int i_src_stride,
  805. diff --git a/common/ppc/mc.c b/common/ppc/mc.c
  806. index 26b81f8..83c60b1 100644
  807. --- a/common/ppc/mc.c
  808. +++ b/common/ppc/mc.c
  809. @@ -37,8 +37,8 @@ typedef void (*pf_mc_t)( uint8_t *src, int i_src,
  810. uint8_t *dst, int i_dst, int i_height );
  811.  
  812.  
  813. -static const int hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
  814. -static const int hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
  815. +static const uint8_t hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
  816. +static const uint8_t hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
  817.  
  818.  
  819. static inline int x264_tapfilter( uint8_t *pix, int i_pix_next )
  820. diff --git a/common/set.c b/common/set.c
  821. index 50d4213..16cff8e 100644
  822. --- a/common/set.c
  823. +++ b/common/set.c
  824. @@ -23,7 +23,7 @@
  825. #define SHIFT(x,s) ((s)<=0 ? (x)<<-(s) : ((x)+(1<<((s)-1)))>>(s))
  826. #define DIV(n,d) (((n) + ((d)>>1)) / (d))
  827.  
  828. -static const int dequant4_scale[6][3] =
  829. +static const uint8_t dequant4_scale[6][3] =
  830. {
  831. { 10, 13, 16 },
  832. { 11, 14, 18 },
  833. @@ -32,7 +32,7 @@ static const int dequant4_scale[6][3] =
  834. { 16, 20, 25 },
  835. { 18, 23, 29 }
  836. };
  837. -static const int quant4_scale[6][3] =
  838. +static const uint16_t quant4_scale[6][3] =
  839. {
  840. { 13107, 8066, 5243 },
  841. { 11916, 7490, 4660 },
  842. @@ -42,11 +42,11 @@ static const int quant4_scale[6][3] =
  843. { 7282, 4559, 2893 },
  844. };
  845.  
  846. -static const int quant8_scan[16] =
  847. +static const uint8_t quant8_scan[16] =
  848. {
  849. 0,3,4,3, 3,1,5,1, 4,5,2,5, 3,1,5,1
  850. };
  851. -static const int dequant8_scale[6][6] =
  852. +static const uint8_t dequant8_scale[6][6] =
  853. {
  854. { 20, 18, 32, 19, 25, 24 },
  855. { 22, 19, 35, 21, 28, 26 },
  856. @@ -55,7 +55,7 @@ static const int dequant8_scale[6][6] =
  857. { 32, 28, 51, 30, 40, 38 },
  858. { 36, 32, 58, 34, 46, 43 },
  859. };
  860. -static const int quant8_scale[6][6] =
  861. +static const uint16_t quant8_scale[6][6] =
  862. {
  863. { 13107, 11428, 20972, 12222, 16777, 15481 },
  864. { 11916, 10826, 19174, 11058, 14980, 14290 },
  865. diff --git a/common/x86/mc-c.c b/common/x86/mc-c.c
  866. index 6d386f6..f641cff 100644
  867. --- a/common/x86/mc-c.c
  868. +++ b/common/x86/mc-c.c
  869. @@ -228,8 +228,8 @@ static void x264_weight_cache_ssse3( x264_t *h, x264_weight_t *w )
  870. }
  871. }
  872.  
  873. -static const int hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
  874. -static const int hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
  875. +static const uint8_t hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
  876. +static const uint8_t hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
  877.  
  878. #define MC_LUMA(name,instr1,instr2)\
  879. static void mc_luma_##name( uint8_t *dst, int i_dst_stride,\
  880. diff --git a/encoder/encoder.c b/encoder/encoder.c
  881. index 9ebe303..aa7dffe 100644
  882. --- a/encoder/encoder.c
  883. +++ b/encoder/encoder.c
  884. @@ -2815,8 +2815,8 @@ void x264_encoder_close ( x264_t *h )
  885. /* Slices used and PSNR */
  886. for( int i = 0; i < 5; i++ )
  887. {
  888. - static const int slice_order[] = { SLICE_TYPE_I, SLICE_TYPE_SI, SLICE_TYPE_P, SLICE_TYPE_SP, SLICE_TYPE_B };
  889. - static const char *slice_name[] = { "P", "B", "I", "SP", "SI" };
  890. + static const uint8_t slice_order[] = { SLICE_TYPE_I, SLICE_TYPE_SI, SLICE_TYPE_P, SLICE_TYPE_SP, SLICE_TYPE_B };
  891. + static const char * const slice_name[] = { "P", "B", "I", "SP", "SI" };
  892. int i_slice = slice_order[i];
  893.  
  894. if( h->stat.i_frame_count[i_slice] > 0 )
  895. diff --git a/encoder/me.c b/encoder/me.c
  896. index 5e113f0..a35da53 100644
  897. --- a/encoder/me.c
  898. +++ b/encoder/me.c
  899. @@ -484,7 +484,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
  900. int i = 1;
  901. do
  902. {
  903. - static const int hex4[16][2] = {
  904. + static const int8_t hex4[16][2] = {
  905. { 0,-4}, { 0, 4}, {-2,-3}, { 2,-3},
  906. {-4,-2}, { 4,-2}, {-4,-1}, { 4,-1},
  907. {-4, 0}, { 4, 0}, {-4, 1}, { 4, 1},
  908. diff --git a/encoder/set.c b/encoder/set.c
  909. index e3a071c..ce52a4b 100644
  910. --- a/encoder/set.c
  911. +++ b/encoder/set.c
  912. @@ -315,26 +315,22 @@ void x264_sps_write( bs_t *s, x264_sps_t *sps )
  913. if( sps->vui.b_aspect_ratio_info_present )
  914. {
  915. int i;
  916. - static const struct { int w, h; int sar; } sar[] =
  917. + static const struct { uint8_t w, h, sar; } sar[] =
  918. {
  919. { 1, 1, 1 }, { 12, 11, 2 }, { 10, 11, 3 }, { 16, 11, 4 },
  920. { 40, 33, 5 }, { 24, 11, 6 }, { 20, 11, 7 }, { 32, 11, 8 },
  921. { 80, 33, 9 }, { 18, 11, 10}, { 15, 11, 11}, { 64, 33, 12},
  922. - { 160,99, 13}, { 0, 0, -1 }
  923. + { 160,99, 13}, { 0, 0, 255 }
  924. };
  925. - for( i = 0; sar[i].sar != -1; i++ )
  926. + for( i = 0; sar[i].sar != 255; i++ )
  927. {
  928. if( sar[i].w == sps->vui.i_sar_width &&
  929. sar[i].h == sps->vui.i_sar_height )
  930. break;
  931. }
  932. - if( sar[i].sar != -1 )
  933. + bs_write( s, 8, sar[i].sar );
  934. + if( sar[i].sar == 255 ) /* aspect_ratio_idc (extended) */
  935. {
  936. - bs_write( s, 8, sar[i].sar );
  937. - }
  938. - else
  939. - {
  940. - bs_write( s, 8, 255); /* aspect_ratio_idc (extended) */
  941. bs_write( s, 16, sps->vui.i_sar_width );
  942. bs_write( s, 16, sps->vui.i_sar_height );
  943. }
  944. --
  945. 1.7.0.4
  946.  
  947.  
  948. From 4f801540bc1b8802b8689e1f7440c3f06db5b69b Mon Sep 17 00:00:00 2001
  949. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  950. Date: Mon, 10 May 2010 22:59:12 -0700
  951. Subject: [PATCH 5/9] Fix condition for printing rc=cbr in options SEI
  952. Also fix crf-max formatting.
  953.  
  954. ---
  955. common/common.c | 4 ++--
  956. 1 files changed, 2 insertions(+), 2 deletions(-)
  957.  
  958. diff --git a/common/common.c b/common/common.c
  959. index 848c6de..ad7cf98 100644
  960. --- a/common/common.c
  961. +++ b/common/common.c
  962. @@ -1237,7 +1237,7 @@ char *x264_param2string( x264_param_t *p, int b_res )
  963. s += sprintf( s, " rc_lookahead=%d", p->rc.i_lookahead );
  964.  
  965. s += sprintf( s, " rc=%s mbtree=%d", p->rc.i_rc_method == X264_RC_ABR ?
  966. - ( p->rc.b_stat_read ? "2pass" : p->rc.i_vbv_buffer_size == p->rc.i_bitrate ? "cbr" : "abr" )
  967. + ( p->rc.b_stat_read ? "2pass" : p->rc.i_vbv_max_bitrate == p->rc.i_bitrate ? "cbr" : "abr" )
  968. : p->rc.i_rc_method == X264_RC_CRF ? "crf" : "cqp", p->rc.b_mb_tree );
  969. if( p->rc.i_rc_method == X264_RC_ABR || p->rc.i_rc_method == X264_RC_CRF )
  970. {
  971. @@ -1256,7 +1256,7 @@ char *x264_param2string( x264_param_t *p, int b_res )
  972. s += sprintf( s, " vbv_maxrate=%d vbv_bufsize=%d",
  973. p->rc.i_vbv_max_bitrate, p->rc.i_vbv_buffer_size );
  974. if( p->rc.i_rc_method == X264_RC_CRF )
  975. - s += sprintf( s, " crf-max=%.1f", p->rc.f_rf_constant_max );
  976. + s += sprintf( s, " crf_max=%.1f", p->rc.f_rf_constant_max );
  977. }
  978. }
  979. else if( p->rc.i_rc_method == X264_RC_CQP )
  980. --
  981. 1.7.0.4
  982.  
  983.  
  984. From e8e064bea830af884739272e8dea150dff2e921b Mon Sep 17 00:00:00 2001
  985. From: Anton Mitrofanov <BugMaster@narod.ru>
  986. Date: Wed, 12 May 2010 01:57:38 +0400
  987. Subject: [PATCH 6/9] Fix crash with sliced-threads on Phenom
  988.  
  989. ---
  990. encoder/encoder.c | 4 ++++
  991. 1 files changed, 4 insertions(+), 0 deletions(-)
  992.  
  993. diff --git a/encoder/encoder.c b/encoder/encoder.c
  994. index aa7dffe..45fe2dd 100644
  995. --- a/encoder/encoder.c
  996. +++ b/encoder/encoder.c
  997. @@ -2066,6 +2066,10 @@ static void *x264_slices_write( x264_t *h )
  998. static int x264_threaded_slices_write( x264_t *h )
  999. {
  1000. void *ret = NULL;
  1001. +#ifdef HAVE_MMX
  1002. + if( h->param.cpu&X264_CPU_SSE_MISALIGN )
  1003. + x264_cpu_mask_misalign_sse();
  1004. +#endif
  1005. /* set first/last mb and sync contexts */
  1006. for( int i = 0; i < h->param.i_threads; i++ )
  1007. {
  1008. --
  1009. 1.7.0.4
  1010.  
  1011.  
  1012. From a0c0f1870e212ed3ba2dea3f6f61ea997b4e538d Mon Sep 17 00:00:00 2001
  1013. From: Anton Mitrofanov <BugMaster@narod.ru>
  1014. Date: Wed, 12 May 2010 22:05:34 +0400
  1015. Subject: [PATCH 7/9] Fix bitrate calculation in progress status
  1016. Was slightly incorrect due to using pts, which is out of order.
  1017.  
  1018. ---
  1019. x264.c | 34 +++++++++++++++++++++++++---------
  1020. 1 files changed, 25 insertions(+), 9 deletions(-)
  1021.  
  1022. diff --git a/x264.c b/x264.c
  1023. index 8f4e372..1a85c74 100644
  1024. --- a/x264.c
  1025. +++ b/x264.c
  1026. @@ -1312,7 +1312,7 @@ static void parse_qpfile( cli_opt_t *opt, x264_picture_t *pic, int i_frame )
  1027. * Encode:
  1028. *****************************************************************************/
  1029.  
  1030. -static int Encode_frame( x264_t *h, hnd_t hout, x264_picture_t *pic, int64_t *last_pts )
  1031. +static int Encode_frame( x264_t *h, hnd_t hout, x264_picture_t *pic, int64_t *last_dts )
  1032. {
  1033. x264_picture_t pic_out;
  1034. x264_nal_t *nal;
  1035. @@ -1330,18 +1330,22 @@ static int Encode_frame( x264_t *h, hnd_t hout, x264_picture_t *pic, int64_t *l
  1036. if( i_frame_size )
  1037. {
  1038. i_frame_size = output.write_frame( hout, nal[0].p_payload, i_frame_size, &pic_out );
  1039. - *last_pts = pic_out.i_pts;
  1040. + *last_dts = pic_out.i_dts;
  1041. }
  1042.  
  1043. return i_frame_size;
  1044. }
  1045.  
  1046. -static void Print_status( int64_t i_start, int i_frame, int i_frame_total, int64_t i_file, x264_param_t *param, int64_t last_pts )
  1047. +static void Print_status( int64_t i_start, int i_frame, int i_frame_total, int64_t i_file, x264_param_t *param, int64_t last_ts )
  1048. {
  1049. char buf[200];
  1050. int64_t i_elapsed = x264_mdate() - i_start;
  1051. double fps = i_elapsed > 0 ? i_frame * 1000000. / i_elapsed : 0;
  1052. - double bitrate = (double) i_file * 8 / ( (double) last_pts * 1000 * param->i_timebase_num / param->i_timebase_den );
  1053. + double bitrate;
  1054. + if( last_ts )
  1055. + bitrate = (double) i_file * 8 / ( (double) last_ts * 1000 * param->i_timebase_num / param->i_timebase_den );
  1056. + else
  1057. + bitrate = (double) i_file * 8 / ( (double) 1000 * param->i_fps_den / param->i_fps_num );
  1058. if( i_frame_total )
  1059. {
  1060. int eta = i_elapsed * (i_frame_total - i_frame) / ((int64_t)i_frame * 1000000);
  1061. @@ -1369,7 +1373,9 @@ static int Encode( x264_param_t *param, cli_opt_t *opt )
  1062. int64_t i_file = 0;
  1063. int i_frame_size;
  1064. int i_update_interval;
  1065. - int64_t last_pts = 0;
  1066. + int64_t last_dts = 0;
  1067. + int64_t prev_dts = 0;
  1068. + int64_t first_dts = 0;
  1069. # define MAX_PTS_WARNING 3 /* arbitrary */
  1070. int pts_warning_cnt = 0;
  1071. int64_t largest_pts = -1;
  1072. @@ -1506,12 +1512,17 @@ static int Encode( x264_param_t *param, cli_opt_t *opt )
  1073. pic.i_qpplus1 = 0;
  1074. }
  1075.  
  1076. - i_frame_size = Encode_frame( h, opt->hout, &pic, &last_pts );
  1077. + prev_dts = last_dts;
  1078. + i_frame_size = Encode_frame( h, opt->hout, &pic, &last_dts );
  1079. if( i_frame_size < 0 )
  1080. return -1;
  1081. i_file += i_frame_size;
  1082. if( i_frame_size )
  1083. + {
  1084. i_frame_output++;
  1085. + if( i_frame_output == 1 )
  1086. + first_dts = prev_dts = last_dts;
  1087. + }
  1088.  
  1089. i_frame++;
  1090.  
  1091. @@ -1520,19 +1531,24 @@ static int Encode( x264_param_t *param, cli_opt_t *opt )
  1092.  
  1093. /* update status line (up to 1000 times per input file) */
  1094. if( opt->b_progress && i_frame_output % i_update_interval == 0 && i_frame_output )
  1095. - Print_status( i_start, i_frame_output, i_frame_total, i_file, param, last_pts );
  1096. + Print_status( i_start, i_frame_output, i_frame_total, i_file, param, 2 * last_dts - prev_dts - first_dts );
  1097. }
  1098. /* Flush delayed frames */
  1099. while( !b_ctrl_c && x264_encoder_delayed_frames( h ) )
  1100. {
  1101. - i_frame_size = Encode_frame( h, opt->hout, NULL, &last_pts );
  1102. + prev_dts = last_dts;
  1103. + i_frame_size = Encode_frame( h, opt->hout, NULL, &last_dts );
  1104. if( i_frame_size < 0 )
  1105. return -1;
  1106. i_file += i_frame_size;
  1107. if( i_frame_size )
  1108. + {
  1109. i_frame_output++;
  1110. + if( i_frame_output == 1 )
  1111. + first_dts = prev_dts = last_dts;
  1112. + }
  1113. if( opt->b_progress && i_frame_output % i_update_interval == 0 && i_frame_output )
  1114. - Print_status( i_start, i_frame_output, i_frame_total, i_file, param, last_pts );
  1115. + Print_status( i_start, i_frame_output, i_frame_total, i_file, param, 2 * last_dts - prev_dts - first_dts );
  1116. }
  1117. if( pts_warning_cnt >= MAX_PTS_WARNING && param->i_log_level < X264_LOG_DEBUG )
  1118. fprintf( stderr, "x264 [warning]: %d suppressed nonmonotonic pts warnings\n", pts_warning_cnt-MAX_PTS_WARNING );
  1119. --
  1120. 1.7.0.4
  1121.  
  1122.  
  1123. From 54403196986a1c0e3924d22e002050e5b7ea0da5 Mon Sep 17 00:00:00 2001
  1124. From: Kieran Kunhya <kieran@kunhya.com>
  1125. Date: Thu, 13 May 2010 19:13:35 +0100
  1126. Subject: [PATCH 8/9] Fix typo in pulldown
  1127.  
  1128. ---
  1129. x264.c | 2 +-
  1130. 1 files changed, 1 insertions(+), 1 deletions(-)
  1131.  
  1132. diff --git a/x264.c b/x264.c
  1133. index 1a85c74..862aabb 100644
  1134. --- a/x264.c
  1135. +++ b/x264.c
  1136. @@ -120,7 +120,7 @@ enum pulldown_type_e
  1137.  
  1138. static const cli_pulldown_t pulldown_values[] =
  1139. {
  1140. - [X264_PULLDOWN_22] = {1, {TB}, 2.0},
  1141. + [X264_PULLDOWN_22] = {1, {TB}, 1.0},
  1142. [X264_PULLDOWN_32] = {4, {TBT, BT, BTB, TB}, 1.25},
  1143. [X264_PULLDOWN_64] = {2, {PIC_STRUCT_DOUBLE, PIC_STRUCT_TRIPLE}, 1.0},
  1144. [X264_PULLDOWN_DOUBLE] = {1, {PIC_STRUCT_DOUBLE}, 2.0},
  1145. --
  1146. 1.7.0.4
  1147.  
  1148.  
  1149. From bca4c46432572b4bd7d01178762a16d81f5059da Mon Sep 17 00:00:00 2001
  1150. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  1151. Date: Sat, 15 May 2010 14:48:58 -0700
  1152. Subject: [PATCH 9/9] Overhaul CABAC: faster, less cache usage
  1153. Horribly munge up the CABAC tables to allow deduplication of some data.
  1154. Saves 256 bytes of L1d cache in non-RD, 512 bytes in RD.
  1155. Add asm versions of bypass and terminal; save L1i cache by re-using putbyte code.
  1156. Further optimize encode_decision.
  1157. All 3 primary CABAC functions fit in under 256 bytes of code total on x86_64.
  1158.  
  1159. ---
  1160. common/cabac.c | 185 ++++++++++++++++++++----------------------------
  1161. common/cabac.h | 22 ++++--
  1162. common/x86/cabac-a.asm | 76 ++++++++++++++++----
  1163. common/x86/x86inc.asm | 2 +-
  1164. encoder/rdo.c | 2 +
  1165. tools/checkasm.c | 46 ++++++++++--
  1166. 6 files changed, 195 insertions(+), 138 deletions(-)
  1167.  
  1168. diff --git a/common/cabac.c b/common/cabac.c
  1169. index f50aef6..11988a1 100644
  1170. --- a/common/cabac.c
  1171. +++ b/common/cabac.c
  1172. @@ -664,75 +664,44 @@ static const int8_t x264_cabac_context_init_PB[3][460][2] =
  1173. }
  1174. };
  1175.  
  1176. -/* FIXME could avoid this duplication by reversing the order of states
  1177. - * with MPS=0, but that would uglify the other tables */
  1178. -const uint8_t x264_cabac_range_lps[128][4] =
  1179. +const uint8_t x264_cabac_range_lps[64][4] =
  1180. {
  1181. - { 2, 2, 2, 2 },
  1182. - { 6, 7, 8, 9 }, { 6, 7, 9, 10 }, { 6, 8, 9, 11 },
  1183. - { 7, 8, 10, 11 }, { 7, 9, 10, 12 }, { 7, 9, 11, 12 },
  1184. - { 8, 9, 11, 13 }, { 8, 10, 12, 14 }, { 9, 11, 12, 14 },
  1185. - { 9, 11, 13, 15 }, { 10, 12, 14, 16 }, { 10, 12, 15, 17 },
  1186. - { 11, 13, 15, 18 }, { 11, 14, 16, 19 }, { 12, 14, 17, 20 },
  1187. - { 12, 15, 18, 21 }, { 13, 16, 19, 22 }, { 14, 17, 20, 23 },
  1188. - { 14, 18, 21, 24 }, { 15, 19, 22, 25 }, { 16, 20, 23, 27 },
  1189. - { 17, 21, 25, 28 }, { 18, 22, 26, 30 }, { 19, 23, 27, 31 },
  1190. - { 20, 24, 29, 33 }, { 21, 26, 30, 35 }, { 22, 27, 32, 37 },
  1191. - { 23, 28, 33, 39 }, { 24, 30, 35, 41 }, { 26, 31, 37, 43 },
  1192. - { 27, 33, 39, 45 }, { 29, 35, 41, 48 }, { 30, 37, 43, 50 },
  1193. - { 32, 39, 46, 53 }, { 33, 41, 48, 56 }, { 35, 43, 51, 59 },
  1194. - { 37, 45, 54, 62 }, { 39, 48, 56, 65 }, { 41, 50, 59, 69 },
  1195. - { 43, 53, 63, 72 }, { 46, 56, 66, 76 }, { 48, 59, 69, 80 },
  1196. - { 51, 62, 73, 85 }, { 53, 65, 77, 89 }, { 56, 69, 81, 94 },
  1197. - { 59, 72, 86, 99 }, { 62, 76, 90, 104 }, { 66, 80, 95, 110 },
  1198. - { 69, 85, 100, 116 }, { 73, 89, 105, 122 }, { 77, 94, 111, 128 },
  1199. - { 81, 99, 117, 135 }, { 85, 104, 123, 142 }, { 90, 110, 130, 150 },
  1200. - { 95, 116, 137, 158 }, { 100, 122, 144, 166 }, { 105, 128, 152, 175 },
  1201. - { 111, 135, 160, 185 }, { 116, 142, 169, 195 }, { 123, 150, 178, 205 },
  1202. - { 128, 158, 187, 216 }, { 128, 167, 197, 227 }, { 128, 176, 208, 240 },
  1203. -
  1204. - { 128, 176, 208, 240 }, { 128, 167, 197, 227 }, { 128, 158, 187, 216 },
  1205. - { 123, 150, 178, 205 }, { 116, 142, 169, 195 }, { 111, 135, 160, 185 },
  1206. - { 105, 128, 152, 175 }, { 100, 122, 144, 166 }, { 95, 116, 137, 158 },
  1207. - { 90, 110, 130, 150 }, { 85, 104, 123, 142 }, { 81, 99, 117, 135 },
  1208. - { 77, 94, 111, 128 }, { 73, 89, 105, 122 }, { 69, 85, 100, 116 },
  1209. - { 66, 80, 95, 110 }, { 62, 76, 90, 104 }, { 59, 72, 86, 99 },
  1210. - { 56, 69, 81, 94 }, { 53, 65, 77, 89 }, { 51, 62, 73, 85 },
  1211. - { 48, 59, 69, 80 }, { 46, 56, 66, 76 }, { 43, 53, 63, 72 },
  1212. - { 41, 50, 59, 69 }, { 39, 48, 56, 65 }, { 37, 45, 54, 62 },
  1213. - { 35, 43, 51, 59 }, { 33, 41, 48, 56 }, { 32, 39, 46, 53 },
  1214. - { 30, 37, 43, 50 }, { 29, 35, 41, 48 }, { 27, 33, 39, 45 },
  1215. - { 26, 31, 37, 43 }, { 24, 30, 35, 41 }, { 23, 28, 33, 39 },
  1216. - { 22, 27, 32, 37 }, { 21, 26, 30, 35 }, { 20, 24, 29, 33 },
  1217. - { 19, 23, 27, 31 }, { 18, 22, 26, 30 }, { 17, 21, 25, 28 },
  1218. - { 16, 20, 23, 27 }, { 15, 19, 22, 25 }, { 14, 18, 21, 24 },
  1219. - { 14, 17, 20, 23 }, { 13, 16, 19, 22 }, { 12, 15, 18, 21 },
  1220. - { 12, 14, 17, 20 }, { 11, 14, 16, 19 }, { 11, 13, 15, 18 },
  1221. - { 10, 12, 15, 17 }, { 10, 12, 14, 16 }, { 9, 11, 13, 15 },
  1222. - { 9, 11, 12, 14 }, { 8, 10, 12, 14 }, { 8, 9, 11, 13 },
  1223. - { 7, 9, 11, 12 }, { 7, 9, 10, 12 }, { 7, 8, 10, 11 },
  1224. - { 6, 8, 9, 11 }, { 6, 7, 9, 10 }, { 6, 7, 8, 9 },
  1225. - { 2, 2, 2, 2 },
  1226. + { 2, 2, 2, 2}, { 6, 7, 8, 9}, { 6, 7, 9, 10}, { 6, 8, 9, 11},
  1227. + { 7, 8, 10, 11}, { 7, 9, 10, 12}, { 7, 9, 11, 12}, { 8, 9, 11, 13},
  1228. + { 8, 10, 12, 14}, { 9, 11, 12, 14}, { 9, 11, 13, 15}, { 10, 12, 14, 16},
  1229. + { 10, 12, 15, 17}, { 11, 13, 15, 18}, { 11, 14, 16, 19}, { 12, 14, 17, 20},
  1230. + { 12, 15, 18, 21}, { 13, 16, 19, 22}, { 14, 17, 20, 23}, { 14, 18, 21, 24},
  1231. + { 15, 19, 22, 25}, { 16, 20, 23, 27}, { 17, 21, 25, 28}, { 18, 22, 26, 30},
  1232. + { 19, 23, 27, 31}, { 20, 24, 29, 33}, { 21, 26, 30, 35}, { 22, 27, 32, 37},
  1233. + { 23, 28, 33, 39}, { 24, 30, 35, 41}, { 26, 31, 37, 43}, { 27, 33, 39, 45},
  1234. + { 29, 35, 41, 48}, { 30, 37, 43, 50}, { 32, 39, 46, 53}, { 33, 41, 48, 56},
  1235. + { 35, 43, 51, 59}, { 37, 45, 54, 62}, { 39, 48, 56, 65}, { 41, 50, 59, 69},
  1236. + { 43, 53, 63, 72}, { 46, 56, 66, 76}, { 48, 59, 69, 80}, { 51, 62, 73, 85},
  1237. + { 53, 65, 77, 89}, { 56, 69, 81, 94}, { 59, 72, 86, 99}, { 62, 76, 90, 104},
  1238. + { 66, 80, 95, 110}, { 69, 85, 100, 116}, { 73, 89, 105, 122}, { 77, 94, 111, 128},
  1239. + { 81, 99, 117, 135}, { 85, 104, 123, 142}, { 90, 110, 130, 150}, { 95, 116, 137, 158},
  1240. + {100, 122, 144, 166}, {105, 128, 152, 175}, {111, 135, 160, 185}, {116, 142, 169, 195},
  1241. + {123, 150, 178, 205}, {128, 158, 187, 216}, {128, 167, 197, 227}, {128, 176, 208, 240}
  1242. };
  1243.  
  1244. const uint8_t x264_cabac_transition[128][2] =
  1245. {
  1246. - { 0, 0}, { 1, 25}, { 1, 25}, { 2, 26}, { 3, 26}, { 4, 26}, { 5, 27}, { 6, 27},
  1247. - { 7, 27}, { 8, 28}, { 9, 28}, { 10, 28}, { 11, 29}, { 12, 29}, { 13, 30}, { 14, 30},
  1248. - { 15, 30}, { 16, 31}, { 17, 31}, { 18, 32}, { 19, 33}, { 20, 33}, { 21, 33}, { 22, 34},
  1249. - { 23, 34}, { 24, 35}, { 25, 36}, { 26, 36}, { 27, 37}, { 28, 37}, { 29, 38}, { 30, 39},
  1250. - { 31, 39}, { 32, 40}, { 33, 41}, { 34, 41}, { 35, 42}, { 36, 42}, { 37, 44}, { 38, 44},
  1251. - { 39, 45}, { 40, 45}, { 41, 47}, { 42, 47}, { 43, 48}, { 44, 48}, { 45, 50}, { 46, 50},
  1252. - { 47, 51}, { 48, 52}, { 49, 52}, { 50, 54}, { 51, 54}, { 52, 55}, { 53, 56}, { 54, 57},
  1253. - { 55, 58}, { 56, 59}, { 57, 59}, { 58, 61}, { 59, 61}, { 60, 62}, { 61, 63}, { 62, 64},
  1254. - { 63, 65}, { 64, 66}, { 65, 67}, { 66, 68}, { 66, 69}, { 68, 70}, { 68, 71}, { 69, 72},
  1255. - { 70, 73}, { 71, 74}, { 72, 75}, { 73, 76}, { 73, 77}, { 75, 78}, { 75, 79}, { 76, 80},
  1256. - { 77, 81}, { 77, 82}, { 79, 83}, { 79, 84}, { 80, 85}, { 80, 86}, { 82, 87}, { 82, 88},
  1257. - { 83, 89}, { 83, 90}, { 85, 91}, { 85, 92}, { 86, 93}, { 86, 94}, { 87, 95}, { 88, 96},
  1258. - { 88, 97}, { 89, 98}, { 90, 99}, { 90,100}, { 91,101}, { 91,102}, { 92,103}, { 93,104},
  1259. - { 93,105}, { 94,106}, { 94,107}, { 94,108}, { 95,109}, { 96,110}, { 96,111}, { 97,112},
  1260. - { 97,113}, { 97,114}, { 98,115}, { 98,116}, { 99,117}, { 99,118}, { 99,119}, {100,120},
  1261. - {100,121}, {100,122}, {101,123}, {101,124}, {101,125}, {102,126}, {102,126}, {127,127},
  1262. + { 0, 0}, { 1, 1}, { 2, 50}, { 51, 3}, { 2, 50}, { 51, 3}, { 4, 52}, { 53, 5},
  1263. + { 6, 52}, { 53, 7}, { 8, 52}, { 53, 9}, { 10, 54}, { 55, 11}, { 12, 54}, { 55, 13},
  1264. + { 14, 54}, { 55, 15}, { 16, 56}, { 57, 17}, { 18, 56}, { 57, 19}, { 20, 56}, { 57, 21},
  1265. + { 22, 58}, { 59, 23}, { 24, 58}, { 59, 25}, { 26, 60}, { 61, 27}, { 28, 60}, { 61, 29},
  1266. + { 30, 60}, { 61, 31}, { 32, 62}, { 63, 33}, { 34, 62}, { 63, 35}, { 36, 64}, { 65, 37},
  1267. + { 38, 66}, { 67, 39}, { 40, 66}, { 67, 41}, { 42, 66}, { 67, 43}, { 44, 68}, { 69, 45},
  1268. + { 46, 68}, { 69, 47}, { 48, 70}, { 71, 49}, { 50, 72}, { 73, 51}, { 52, 72}, { 73, 53},
  1269. + { 54, 74}, { 75, 55}, { 56, 74}, { 75, 57}, { 58, 76}, { 77, 59}, { 60, 78}, { 79, 61},
  1270. + { 62, 78}, { 79, 63}, { 64, 80}, { 81, 65}, { 66, 82}, { 83, 67}, { 68, 82}, { 83, 69},
  1271. + { 70, 84}, { 85, 71}, { 72, 84}, { 85, 73}, { 74, 88}, { 89, 75}, { 76, 88}, { 89, 77},
  1272. + { 78, 90}, { 91, 79}, { 80, 90}, { 91, 81}, { 82, 94}, { 95, 83}, { 84, 94}, { 95, 85},
  1273. + { 86, 96}, { 97, 87}, { 88, 96}, { 97, 89}, { 90, 100}, {101, 91}, { 92, 100}, {101, 93},
  1274. + { 94, 102}, {103, 95}, { 96, 104}, {105, 97}, { 98, 104}, {105, 99}, {100, 108}, {109, 101},
  1275. + {102, 108}, {109, 103}, {104, 110}, {111, 105}, {106, 112}, {113, 107}, {108, 114}, {115, 109},
  1276. + {110, 116}, {117, 111}, {112, 118}, {119, 113}, {114, 118}, {119, 115}, {116, 122}, {123, 117},
  1277. + {118, 122}, {123, 119}, {120, 124}, {125, 121}, {122, 126}, {127, 123}, {124, 127}, {126, 125}
  1278. };
  1279.  
  1280. const uint8_t x264_cabac_renorm_shift[64]= {
  1281. @@ -743,41 +712,40 @@ const uint8_t x264_cabac_renorm_shift[64]= {
  1282. };
  1283.  
  1284. /* -ln2(probability) */
  1285. -#define F(a,b) {FIX8(a),FIX8(b)}
  1286. -const uint16_t x264_cabac_entropy[128][2] =
  1287. +const uint16_t x264_cabac_entropy[128] =
  1288. {
  1289. - F(0.0273,5.7370), F(0.0288,5.6618), F(0.0303,5.5866), F(0.0320,5.5114),
  1290. - F(0.0337,5.4362), F(0.0355,5.3610), F(0.0375,5.2859), F(0.0395,5.2106),
  1291. - F(0.0416,5.1354), F(0.0439,5.0602), F(0.0463,4.9851), F(0.0488,4.9099),
  1292. - F(0.0515,4.8347), F(0.0543,4.7595), F(0.0572,4.6843), F(0.0604,4.6091),
  1293. - F(0.0637,4.5339), F(0.0671,4.4588), F(0.0708,4.3836), F(0.0747,4.3083),
  1294. - F(0.0788,4.2332), F(0.0832,4.1580), F(0.0878,4.0828), F(0.0926,4.0076),
  1295. - F(0.0977,3.9324), F(0.1032,3.8572), F(0.1089,3.7820), F(0.1149,3.7068),
  1296. - F(0.1214,3.6316), F(0.1282,3.5565), F(0.1353,3.4813), F(0.1429,3.4061),
  1297. - F(0.1510,3.3309), F(0.1596,3.2557), F(0.1686,3.1805), F(0.1782,3.1053),
  1298. - F(0.1884,3.0301), F(0.1992,2.9549), F(0.2107,2.8797), F(0.2229,2.8046),
  1299. - F(0.2358,2.7294), F(0.2496,2.6542), F(0.2642,2.5790), F(0.2798,2.5038),
  1300. - F(0.2964,2.4286), F(0.3142,2.3534), F(0.3331,2.2782), F(0.3532,2.2030),
  1301. - F(0.3748,2.1278), F(0.3979,2.0527), F(0.4226,1.9775), F(0.4491,1.9023),
  1302. - F(0.4776,1.8271), F(0.5082,1.7519), F(0.5412,1.6767), F(0.5768,1.6015),
  1303. - F(0.6152,1.5263), F(0.6568,1.4511), F(0.7020,1.3759), F(0.7513,1.3008),
  1304. - F(0.8050,1.2256), F(0.8638,1.1504), F(0.9285,1.0752), F(1.0000,1.0000),
  1305. - F(1.0000,1.0000), F(1.0752,0.9285), F(1.1504,0.8638), F(1.2256,0.8050),
  1306. - F(1.3008,0.7513), F(1.3759,0.7020), F(1.4511,0.6568), F(1.5263,0.6152),
  1307. - F(1.6015,0.5768), F(1.6767,0.5412), F(1.7519,0.5082), F(1.8271,0.4776),
  1308. - F(1.9023,0.4491), F(1.9775,0.4226), F(2.0527,0.3979), F(2.1278,0.3748),
  1309. - F(2.2030,0.3532), F(2.2782,0.3331), F(2.3534,0.3142), F(2.4286,0.2964),
  1310. - F(2.5038,0.2798), F(2.5790,0.2642), F(2.6542,0.2496), F(2.7294,0.2358),
  1311. - F(2.8046,0.2229), F(2.8797,0.2107), F(2.9549,0.1992), F(3.0301,0.1884),
  1312. - F(3.1053,0.1782), F(3.1805,0.1686), F(3.2557,0.1596), F(3.3309,0.1510),
  1313. - F(3.4061,0.1429), F(3.4813,0.1353), F(3.5565,0.1282), F(3.6316,0.1214),
  1314. - F(3.7068,0.1149), F(3.7820,0.1089), F(3.8572,0.1032), F(3.9324,0.0977),
  1315. - F(4.0076,0.0926), F(4.0828,0.0878), F(4.1580,0.0832), F(4.2332,0.0788),
  1316. - F(4.3083,0.0747), F(4.3836,0.0708), F(4.4588,0.0671), F(4.5339,0.0637),
  1317. - F(4.6091,0.0604), F(4.6843,0.0572), F(4.7595,0.0543), F(4.8347,0.0515),
  1318. - F(4.9099,0.0488), F(4.9851,0.0463), F(5.0602,0.0439), F(5.1354,0.0416),
  1319. - F(5.2106,0.0395), F(5.2859,0.0375), F(5.3610,0.0355), F(5.4362,0.0337),
  1320. - F(5.5114,0.0320), F(5.5866,0.0303), F(5.6618,0.0288), F(5.7370,0.0273),
  1321. + FIX8(0.0273), FIX8(5.7370), FIX8(0.0288), FIX8(5.6618),
  1322. + FIX8(0.0303), FIX8(5.5866), FIX8(0.0320), FIX8(5.5114),
  1323. + FIX8(0.0337), FIX8(5.4362), FIX8(0.0355), FIX8(5.3610),
  1324. + FIX8(0.0375), FIX8(5.2859), FIX8(0.0395), FIX8(5.2106),
  1325. + FIX8(0.0416), FIX8(5.1354), FIX8(0.0439), FIX8(5.0602),
  1326. + FIX8(0.0463), FIX8(4.9851), FIX8(0.0488), FIX8(4.9099),
  1327. + FIX8(0.0515), FIX8(4.8347), FIX8(0.0543), FIX8(4.7595),
  1328. + FIX8(0.0572), FIX8(4.6843), FIX8(0.0604), FIX8(4.6091),
  1329. + FIX8(0.0637), FIX8(4.5339), FIX8(0.0671), FIX8(4.4588),
  1330. + FIX8(0.0708), FIX8(4.3836), FIX8(0.0747), FIX8(4.3083),
  1331. + FIX8(0.0788), FIX8(4.2332), FIX8(0.0832), FIX8(4.1580),
  1332. + FIX8(0.0878), FIX8(4.0828), FIX8(0.0926), FIX8(4.0076),
  1333. + FIX8(0.0977), FIX8(3.9324), FIX8(0.1032), FIX8(3.8572),
  1334. + FIX8(0.1089), FIX8(3.7820), FIX8(0.1149), FIX8(3.7068),
  1335. + FIX8(0.1214), FIX8(3.6316), FIX8(0.1282), FIX8(3.5565),
  1336. + FIX8(0.1353), FIX8(3.4813), FIX8(0.1429), FIX8(3.4061),
  1337. + FIX8(0.1510), FIX8(3.3309), FIX8(0.1596), FIX8(3.2557),
  1338. + FIX8(0.1686), FIX8(3.1805), FIX8(0.1782), FIX8(3.1053),
  1339. + FIX8(0.1884), FIX8(3.0301), FIX8(0.1992), FIX8(2.9549),
  1340. + FIX8(0.2107), FIX8(2.8797), FIX8(0.2229), FIX8(2.8046),
  1341. + FIX8(0.2358), FIX8(2.7294), FIX8(0.2496), FIX8(2.6542),
  1342. + FIX8(0.2642), FIX8(2.5790), FIX8(0.2798), FIX8(2.5038),
  1343. + FIX8(0.2964), FIX8(2.4286), FIX8(0.3142), FIX8(2.3534),
  1344. + FIX8(0.3331), FIX8(2.2782), FIX8(0.3532), FIX8(2.2030),
  1345. + FIX8(0.3748), FIX8(2.1278), FIX8(0.3979), FIX8(2.0527),
  1346. + FIX8(0.4226), FIX8(1.9775), FIX8(0.4491), FIX8(1.9023),
  1347. + FIX8(0.4776), FIX8(1.8271), FIX8(0.5082), FIX8(1.7519),
  1348. + FIX8(0.5412), FIX8(1.6767), FIX8(0.5768), FIX8(1.6015),
  1349. + FIX8(0.6152), FIX8(1.5263), FIX8(0.6568), FIX8(1.4511),
  1350. + FIX8(0.7020), FIX8(1.3759), FIX8(0.7513), FIX8(1.3008),
  1351. + FIX8(0.8050), FIX8(1.2256), FIX8(0.8638), FIX8(1.1504),
  1352. + FIX8(0.9285), FIX8(1.0752), FIX8(1.0000), FIX8(1.0000)
  1353. };
  1354.  
  1355.  
  1356. @@ -794,14 +762,17 @@ void x264_cabac_context_init( x264_cabac_t *cb, int i_slice_type, int i_qp, int
  1357. cabac_context_init = &x264_cabac_context_init_PB[i_model];
  1358.  
  1359. for( int i = 0; i < 460; i++ )
  1360. - cb->state[i] = x264_clip3( (((*cabac_context_init)[i][0] * i_qp) >> 4) + (*cabac_context_init)[i][1], 1, 126 );
  1361. + {
  1362. + int state = x264_clip3( (((*cabac_context_init)[i][0] * i_qp) >> 4) + (*cabac_context_init)[i][1], 1, 126 );
  1363. + cb->state[i] = (X264_MIN( state, 127-state ) << 1) | (state >> 6);
  1364. + }
  1365. }
  1366.  
  1367. void x264_cabac_encode_init( x264_cabac_t *cb, uint8_t *p_data, uint8_t *p_end )
  1368. {
  1369. cb->i_low = 0;
  1370. cb->i_range = 0x01FE;
  1371. - cb->i_queue = -1; // the first bit will be shifted away and not written
  1372. + cb->i_queue = -9; // the first bit will be shifted away and not written
  1373. cb->i_bytes_outstanding = 0;
  1374. cb->p_start = p_data;
  1375. cb->p = p_data;
  1376. @@ -810,10 +781,10 @@ void x264_cabac_encode_init( x264_cabac_t *cb, uint8_t *p_data, uint8_t *p_end )
  1377.  
  1378. static inline void x264_cabac_putbyte( x264_cabac_t *cb )
  1379. {
  1380. - if( cb->i_queue >= 8 )
  1381. + if( cb->i_queue >= 0 )
  1382. {
  1383. - int out = cb->i_low >> (cb->i_queue+2);
  1384. - cb->i_low &= (4<<cb->i_queue)-1;
  1385. + int out = cb->i_low >> (cb->i_queue+10);
  1386. + cb->i_low &= (0x400<<cb->i_queue)-1;
  1387. cb->i_queue -= 8;
  1388.  
  1389. if( (out & 0xff) == 0xff )
  1390. @@ -855,9 +826,9 @@ static inline void x264_cabac_encode_renorm( x264_cabac_t *cb )
  1391. void x264_cabac_encode_decision_c( x264_cabac_t *cb, int i_ctx, int b )
  1392. {
  1393. int i_state = cb->state[i_ctx];
  1394. - int i_range_lps = x264_cabac_range_lps[i_state][(cb->i_range>>6)-4];
  1395. + int i_range_lps = x264_cabac_range_lps[i_state>>1][(cb->i_range>>6)-4];
  1396. cb->i_range -= i_range_lps;
  1397. - if( b != (i_state >> 6) )
  1398. + if( b != (i_state & 1) )
  1399. {
  1400. cb->i_low += cb->i_range;
  1401. cb->i_range = i_range_lps;
  1402. @@ -866,7 +837,7 @@ void x264_cabac_encode_decision_c( x264_cabac_t *cb, int i_ctx, int b )
  1403. x264_cabac_encode_renorm( cb );
  1404. }
  1405.  
  1406. -void x264_cabac_encode_bypass( x264_cabac_t *cb, int b )
  1407. +void x264_cabac_encode_bypass_c( x264_cabac_t *cb, int b )
  1408. {
  1409. cb->i_low <<= 1;
  1410. cb->i_low += -b & cb->i_range;
  1411. @@ -892,7 +863,7 @@ void x264_cabac_encode_ue_bypass( x264_cabac_t *cb, int exp_bits, int val )
  1412. } while( k > 0 );
  1413. }
  1414.  
  1415. -void x264_cabac_encode_terminal( x264_cabac_t *cb )
  1416. +void x264_cabac_encode_terminal_c( x264_cabac_t *cb )
  1417. {
  1418. cb->i_range -= 2;
  1419. x264_cabac_encode_renorm( cb );
  1420. diff --git a/common/cabac.h b/common/cabac.h
  1421. index ef68fe6..9fc3007 100644
  1422. --- a/common/cabac.h
  1423. +++ b/common/cabac.h
  1424. @@ -31,7 +31,7 @@ typedef struct
  1425. int i_range;
  1426.  
  1427. /* bit stream */
  1428. - int i_queue;
  1429. + int i_queue; //stored with an offset of -8 for faster asm
  1430. int i_bytes_outstanding;
  1431.  
  1432. uint8_t *p_start;
  1433. @@ -46,7 +46,7 @@ typedef struct
  1434. } x264_cabac_t;
  1435.  
  1436. extern const uint8_t x264_cabac_transition[128][2];
  1437. -extern const uint16_t x264_cabac_entropy[128][2];
  1438. +extern const uint16_t x264_cabac_entropy[128];
  1439.  
  1440. /* init the contexts given i_slice_type, the quantif and the model */
  1441. void x264_cabac_context_init( x264_cabac_t *cb, int i_slice_type, int i_qp, int i_model );
  1442. @@ -55,15 +55,21 @@ void x264_cabac_context_init( x264_cabac_t *cb, int i_slice_type, int i_qp, int
  1443. void x264_cabac_encode_init ( x264_cabac_t *cb, uint8_t *p_data, uint8_t *p_end );
  1444. void x264_cabac_encode_decision_c( x264_cabac_t *cb, int i_ctx, int b );
  1445. void x264_cabac_encode_decision_asm( x264_cabac_t *cb, int i_ctx, int b );
  1446. -void x264_cabac_encode_bypass( x264_cabac_t *cb, int b );
  1447. +void x264_cabac_encode_bypass_c( x264_cabac_t *cb, int b );
  1448. +void x264_cabac_encode_bypass_asm( x264_cabac_t *cb, int b );
  1449. +void x264_cabac_encode_terminal_c( x264_cabac_t *cb );
  1450. +void x264_cabac_encode_terminal_asm( x264_cabac_t *cb );
  1451. void x264_cabac_encode_ue_bypass( x264_cabac_t *cb, int exp_bits, int val );
  1452. -void x264_cabac_encode_terminal( x264_cabac_t *cb );
  1453. void x264_cabac_encode_flush( x264_t *h, x264_cabac_t *cb );
  1454.  
  1455. #ifdef HAVE_MMX
  1456. #define x264_cabac_encode_decision x264_cabac_encode_decision_asm
  1457. +#define x264_cabac_encode_bypass x264_cabac_encode_bypass_asm
  1458. +#define x264_cabac_encode_terminal x264_cabac_encode_terminal_asm
  1459. #else
  1460. #define x264_cabac_encode_decision x264_cabac_encode_decision_c
  1461. +#define x264_cabac_encode_bypass x264_cabac_encode_bypass_c
  1462. +#define x264_cabac_encode_terminal x264_cabac_encode_terminal_c
  1463. #endif
  1464. #define x264_cabac_encode_decision_noup x264_cabac_encode_decision
  1465.  
  1466. @@ -78,25 +84,25 @@ static ALWAYS_INLINE void x264_cabac_size_decision( x264_cabac_t *cb, long i_ctx
  1467. {
  1468. int i_state = cb->state[i_ctx];
  1469. cb->state[i_ctx] = x264_cabac_transition[i_state][b];
  1470. - cb->f8_bits_encoded += x264_cabac_entropy[i_state][b];
  1471. + cb->f8_bits_encoded += x264_cabac_entropy[i_state^b];
  1472. }
  1473.  
  1474. static ALWAYS_INLINE int x264_cabac_size_decision2( uint8_t *state, long b )
  1475. {
  1476. int i_state = *state;
  1477. *state = x264_cabac_transition[i_state][b];
  1478. - return x264_cabac_entropy[i_state][b];
  1479. + return x264_cabac_entropy[i_state^b];
  1480. }
  1481.  
  1482. static ALWAYS_INLINE void x264_ca
Add Comment
Please, Sign In to add comment