Advertisement
Guest User

Untitled

a guest
Jun 3rd, 2017
591
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Diff 213.70 KB | None | 0 0
  1. From aa1a8435000228c4d9e74da0f9fd3d16e85a3e80 Mon Sep 17 00:00:00 2001
  2. From: Loren Merritt <pengvado@akuvian.org>
  3. Date: Sat, 26 Jun 2010 20:55:59 -0700
  4. Subject: [PATCH 1/7] Simplify pixel_ads
  5.  
  6. ---
  7. common/macroblock.c    |    2 +-
  8.  common/x86/pixel-a.asm |  175 +++++++++++++++++------------------------------
  9.  encoder/me.c           |    2 +-
  10.  3 files changed, 65 insertions(+), 114 deletions(-)
  11.  
  12. diff --git a/common/macroblock.c b/common/macroblock.c
  13. index 8e9b06d..4561d8a 100644
  14. --- a/common/macroblock.c
  15. +++ b/common/macroblock.c
  16. @@ -341,7 +341,7 @@ int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
  17.          int buf_ssim = h->param.analyse.b_ssim * 8 * (h->param.i_width/4+3) * sizeof(int);
  18.          int me_range = X264_MIN(h->param.analyse.i_me_range, h->param.analyse.i_mv_range);
  19.          int buf_tesa = (h->param.analyse.i_me_method >= X264_ME_ESA) *
  20. -            ((me_range*2+18) * sizeof(int16_t) + (me_range+4) * (me_range+1) * 4 * sizeof(mvsad_t));
  21. +            ((me_range*2+24) * sizeof(int16_t) + (me_range+4) * (me_range+1) * 4 * sizeof(mvsad_t));
  22.          scratch_size = X264_MAX3( buf_hpel, buf_ssim, buf_tesa );
  23.      }
  24.      int buf_mbtree = h->param.rc.b_mb_tree * ((h->mb.i_mb_width+3)&~3) * sizeof(int);
  25. diff --git a/common/x86/pixel-a.asm b/common/x86/pixel-a.asm
  26. index 78ca4c7..1756f86 100644
  27. --- a/common/x86/pixel-a.asm
  28. +++ b/common/x86/pixel-a.asm
  29. @@ -2142,34 +2142,24 @@ cglobal pixel_ssim_end4_sse2, 3,3,7
  30.  ; Successive Elimination ADS
  31.  ;=============================================================================
  32.  
  33. -%macro ADS_START 1 ; unroll_size
  34. -%ifdef ARCH_X86_64
  35. -    %define t0 r6
  36. +%macro ADS_START 0
  37.  %ifdef WIN64
  38. -    mov     r4,  r4mp
  39. -    movsxd  r5,  dword r5m
  40. +    movsxd  r5,  r5d
  41.  %endif
  42. -    mov     r10, rsp
  43. -%else
  44. -    %define t0 r4
  45. -    mov     rbp, rsp
  46. -%endif
  47. -    mov     r0d, r5m
  48. -    sub     rsp, r0
  49. -    sub     rsp, %1*4-1
  50. -    and     rsp, ~15
  51. -    mov     t0,  rsp
  52. +    mov     r0d, r5d
  53. +    lea     r6,  [r4+r5+15]
  54. +    and     r6,  ~15;
  55.      shl     r2d,  1
  56.  %endmacro
  57.  
  58. -%macro ADS_END 1
  59. +%macro ADS_END 1 ; unroll_size
  60.      add     r1, 8*%1
  61.      add     r3, 8*%1
  62. -    add     t0, 4*%1
  63. +    add     r6, 4*%1
  64.      sub     r0d, 4*%1
  65.      jg .loop
  66.  %ifdef WIN64
  67. -    RESTORE_XMM r10
  68. +    RESTORE_XMM rsp
  69.  %endif
  70.      jmp ads_mvs
  71.  %endmacro
  72. @@ -2180,14 +2170,14 @@ cglobal pixel_ssim_end4_sse2, 3,3,7
  73.  ; int pixel_ads4( int enc_dc[4], uint16_t *sums, int delta,
  74.  ;                 uint16_t *cost_mvx, int16_t *mvs, int width, int thresh )
  75.  ;-----------------------------------------------------------------------------
  76. -cglobal pixel_ads4_mmxext, 4,7
  77. +cglobal pixel_ads4_mmxext, 6,7
  78.      movq    mm6, [r0]
  79.      movq    mm4, [r0+8]
  80.      pshufw  mm7, mm6, 0
  81.      pshufw  mm6, mm6, 0xAA
  82.      pshufw  mm5, mm4, 0
  83.      pshufw  mm4, mm4, 0xAA
  84. -    ADS_START 1
  85. +    ADS_START
  86.  .loop:
  87.      movq    mm0, [r1]
  88.      movq    mm1, [r1+16]
  89. @@ -2204,25 +2194,19 @@ cglobal pixel_ads4_mmxext, 4,7
  90.      ABS1    mm3, mm1
  91.      paddw   mm0, mm2
  92.      paddw   mm0, mm3
  93. -%ifdef WIN64
  94. -    pshufw  mm1, [r10+stack_offset+56], 0
  95. -%elifdef ARCH_X86_64
  96. -    pshufw  mm1, [r10+8], 0
  97. -%else
  98. -    pshufw  mm1, [ebp+stack_offset+28], 0
  99. -%endif
  100. +    pshufw  mm1, r6m, 0
  101.      paddusw mm0, [r3]
  102.      psubusw mm1, mm0
  103.      packsswb mm1, mm1
  104. -    movd    [t0], mm1
  105. +    movd    [r6], mm1
  106.      ADS_END 1
  107.  
  108. -cglobal pixel_ads2_mmxext, 4,7
  109. +cglobal pixel_ads2_mmxext, 6,7
  110.      movq    mm6, [r0]
  111.      pshufw  mm5, r6m, 0
  112.      pshufw  mm7, mm6, 0
  113.      pshufw  mm6, mm6, 0xAA
  114. -    ADS_START 1
  115. +    ADS_START
  116.  .loop:
  117.      movq    mm0, [r1]
  118.      movq    mm1, [r1+r2]
  119. @@ -2235,13 +2219,13 @@ cglobal pixel_ads2_mmxext, 4,7
  120.      movq    mm4, mm5
  121.      psubusw mm4, mm0
  122.      packsswb mm4, mm4
  123. -    movd    [t0], mm4
  124. +    movd    [r6], mm4
  125.      ADS_END 1
  126.  
  127. -cglobal pixel_ads1_mmxext, 4,7
  128. +cglobal pixel_ads1_mmxext, 6,7
  129.      pshufw  mm7, [r0], 0
  130.      pshufw  mm6, r6m, 0
  131. -    ADS_START 2
  132. +    ADS_START
  133.  .loop:
  134.      movq    mm0, [r1]
  135.      movq    mm1, [r1+8]
  136. @@ -2256,11 +2240,11 @@ cglobal pixel_ads1_mmxext, 4,7
  137.      psubusw mm4, mm0
  138.      psubusw mm5, mm1
  139.      packsswb mm4, mm5
  140. -    movq    [t0], mm4
  141. +    movq    [r6], mm4
  142.      ADS_END 2
  143.  
  144.  %macro ADS_SSE2 1
  145. -cglobal pixel_ads4_%1, 4,7,12
  146. +cglobal pixel_ads4_%1, 6,7,12
  147.      movdqa  xmm4, [r0]
  148.      pshuflw xmm7, xmm4, 0
  149.      pshuflw xmm6, xmm4, 0xAA
  150. @@ -2273,7 +2257,7 @@ cglobal pixel_ads4_%1, 4,7,12
  151.  %ifdef ARCH_X86_64
  152.      pshuflw xmm8, r6m, 0
  153.      punpcklqdq xmm8, xmm8
  154. -    ADS_START 2
  155. +    ADS_START
  156.      movdqu  xmm10, [r1]
  157.      movdqu  xmm11, [r1+r2]
  158.  .loop:
  159. @@ -2299,9 +2283,9 @@ cglobal pixel_ads4_%1, 4,7,12
  160.      movdqa  xmm1, xmm8
  161.      psubusw xmm1, xmm0
  162.      packsswb xmm1, xmm1
  163. -    movq    [t0], xmm1
  164. +    movq    [r6], xmm1
  165.  %else
  166. -    ADS_START 2
  167. +    ADS_START
  168.  .loop:
  169.      movdqu  xmm0, [r1]
  170.      movdqu  xmm1, [r1+16]
  171. @@ -2318,18 +2302,18 @@ cglobal pixel_ads4_%1, 4,7,12
  172.      ABS1    xmm3, xmm1
  173.      paddw   xmm0, xmm2
  174.      paddw   xmm0, xmm3
  175. -    movd    xmm1, [ebp+stack_offset+28]
  176. +    movd    xmm1, r6m
  177.      movdqu  xmm2, [r3]
  178.      pshuflw xmm1, xmm1, 0
  179.      punpcklqdq xmm1, xmm1
  180.      paddusw xmm0, xmm2
  181.      psubusw xmm1, xmm0
  182.      packsswb xmm1, xmm1
  183. -    movq    [t0], xmm1
  184. +    movq    [r6], xmm1
  185.  %endif ; ARCH
  186.      ADS_END 2
  187.  
  188. -cglobal pixel_ads2_%1, 4,7,8
  189. +cglobal pixel_ads2_%1, 6,7,8
  190.      movq    xmm6, [r0]
  191.      movd    xmm5, r6m
  192.      pshuflw xmm7, xmm6, 0
  193. @@ -2338,7 +2322,7 @@ cglobal pixel_ads2_%1, 4,7,8
  194.      punpcklqdq xmm7, xmm7
  195.      punpcklqdq xmm6, xmm6
  196.      punpcklqdq xmm5, xmm5
  197. -    ADS_START 2
  198. +    ADS_START
  199.  .loop:
  200.      movdqu  xmm0, [r1]
  201.      movdqu  xmm1, [r1+r2]
  202. @@ -2352,17 +2336,17 @@ cglobal pixel_ads2_%1, 4,7,8
  203.      movdqa  xmm1, xmm5
  204.      psubusw xmm1, xmm0
  205.      packsswb xmm1, xmm1
  206. -    movq    [t0], xmm1
  207. +    movq    [r6], xmm1
  208.      ADS_END 2
  209.  
  210. -cglobal pixel_ads1_%1, 4,7,8
  211. +cglobal pixel_ads1_%1, 6,7,8
  212.      movd    xmm7, [r0]
  213.      movd    xmm6, r6m
  214.      pshuflw xmm7, xmm7, 0
  215.      pshuflw xmm6, xmm6, 0
  216.      punpcklqdq xmm7, xmm7
  217.      punpcklqdq xmm6, xmm6
  218. -    ADS_START 4
  219. +    ADS_START
  220.  .loop:
  221.      movdqu  xmm0, [r1]
  222.      movdqu  xmm1, [r1+16]
  223. @@ -2379,7 +2363,7 @@ cglobal pixel_ads1_%1, 4,7,8
  224.      psubusw xmm4, xmm0
  225.      psubusw xmm5, xmm1
  226.      packsswb xmm4, xmm5
  227. -    movdqa  [t0], xmm4
  228. +    movdqa  [r6], xmm4
  229.      ADS_END 4
  230.  %endmacro
  231.  
  232. @@ -2401,90 +2385,57 @@ ADS_SSE2 ssse3
  233.  ;     }
  234.  ;     return nmv;
  235.  ; }
  236. +
  237. +%macro TEST 1
  238. +    mov     [r4+r0*2], r1w
  239. +    test    r2d, 0xff<<(%1*8)
  240. +    setne   r3b
  241. +    add     r0d, r3d
  242. +    inc     r1d
  243. +%endmacro
  244. +
  245.  cglobal pixel_ads_mvs, 0,7,0
  246.  ads_mvs:
  247. -%ifdef ARCH_X86_64
  248. +    lea     r6,  [r4+r5+15]
  249. +    and     r6,  ~15;
  250.      ; mvs = r4
  251. -    ; masks = rsp
  252. +    ; masks = r6
  253.      ; width = r5
  254.      ; clear last block in case width isn't divisible by 8. (assume divisible by 4, so clearing 4 bytes is enough.)
  255. -%ifdef WIN64
  256. -    mov     r8, r4
  257. -    mov     r9, r5
  258. -%endif
  259. -    xor     eax, eax
  260. -    xor     esi, esi
  261. -    mov     dword [rsp+r9], 0
  262. +    xor     r0d, r0d
  263. +    xor     r1d, r1d
  264. +    mov     [r6+r5], r0d
  265.      jmp .loopi
  266. +ALIGN 16
  267.  .loopi0:
  268. -    add     esi, 8
  269. -    cmp     esi, r9d
  270. +    add     r1d, 8
  271. +    cmp     r1d, r5d
  272.      jge .end
  273.  .loopi:
  274. -    mov     rdi, [rsp+rsi]
  275. -    test    rdi, rdi
  276. +    mov     r2,  [r6+r1]
  277. +%ifdef ARCH_X86_64
  278. +    test    r2,  r2
  279. +%else
  280. +    mov     r3,  r2
  281. +    or      r3d, [r6+r1+4]
  282. +%endif
  283.      jz .loopi0
  284. -    xor     ecx, ecx
  285. -%macro TEST 1
  286. -    mov     [r8+rax*2], si
  287. -    test    edi, 0xff<<(%1*8)
  288. -    setne   cl
  289. -    add     eax, ecx
  290. -    inc     esi
  291. -%endmacro
  292. +    xor     r3d, r3d
  293.      TEST 0
  294.      TEST 1
  295.      TEST 2
  296.      TEST 3
  297. -    shr     rdi, 32
  298. +%ifdef ARCH_X86_64
  299. +    shr     r2,  32
  300. +%else
  301. +    mov     r2d, [r6+r1]
  302. +%endif
  303.      TEST 0
  304.      TEST 1
  305.      TEST 2
  306.      TEST 3
  307. -    cmp     esi, r9d
  308. -    jl .loopi
  309. -.end:
  310. -    mov     rsp, r10
  311. -    RET
  312. -
  313. -%else
  314. -    xor     eax, eax
  315. -    xor     esi, esi
  316. -    mov     ebx, [ebp+stack_offset+20] ; mvs
  317. -    mov     edi, [ebp+stack_offset+24] ; width
  318. -    mov     dword [esp+edi], 0
  319. -    push    ebp
  320. -    jmp .loopi
  321. -.loopi0:
  322. -    add     esi, 8
  323. -    cmp     esi, edi
  324. -    jge .end
  325. -.loopi:
  326. -    mov     ebp, [esp+esi+4]
  327. -    mov     edx, [esp+esi+8]
  328. -    mov     ecx, ebp
  329. -    or      ecx, edx
  330. -    jz .loopi0
  331. -    xor     ecx, ecx
  332. -%macro TEST 2
  333. -    mov     [ebx+eax*2], si
  334. -    test    %2, 0xff<<(%1*8)
  335. -    setne   cl
  336. -    add     eax, ecx
  337. -    inc     esi
  338. -%endmacro
  339. -    TEST 0, ebp
  340. -    TEST 1, ebp
  341. -    TEST 2, ebp
  342. -    TEST 3, ebp
  343. -    TEST 0, edx
  344. -    TEST 1, edx
  345. -    TEST 2, edx
  346. -    TEST 3, edx
  347. -    cmp     esi, edi
  348. +    cmp     r1d, r5d
  349.      jl .loopi
  350.  .end:
  351. -    pop     esp
  352. +    movifnidn eax, r0d
  353.      RET
  354. -%endif ; ARCH
  355. -
  356. diff --git a/encoder/me.c b/encoder/me.c
  357. index 291104a..19c5b2b 100644
  358. --- a/encoder/me.c
  359. +++ b/encoder/me.c
  360. @@ -609,7 +609,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
  361.              if( h->mb.i_me_method == X264_ME_TESA )
  362.              {
  363.                  // ADS threshold, then SAD threshold, then keep the best few SADs, then SATD
  364. -                mvsad_t *mvsads = (mvsad_t *)(xs + ((width+15)&~15));
  365. +                mvsad_t *mvsads = (mvsad_t *)(xs + ((width+15)&~15) + 4);
  366.                  int nmvsad = 0, limit;
  367.                  int sad_thresh = i_me_range <= 16 ? 10 : i_me_range <= 24 ? 11 : 12;
  368.                  int bsad = h->pixf.sad[i_pixel]( p_fenc, FENC_STRIDE, p_fref_w+bmy*stride+bmx, stride )
  369. --
  370. 1.7.1
  371.  
  372.  
  373. From 4f74306c2f266bfc671ad99e9027b816dd423ece Mon Sep 17 00:00:00 2001
  374. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  375. Date: Mon, 28 Jun 2010 15:02:33 -0700
  376. Subject: [PATCH 2/7] Callback feature for low-latency per-slice output
  377.  Add a callback to allow the calling application to send slices immediately after being encoded.
  378.  Also add some extra information to the x264_nal_t structure to help inform such a calling application how the NAL units should be ordered.
  379.  
  380. Full documentation is in x264.h.
  381. ---
  382. common/bitstream.c |    7 ++-
  383.  common/bitstream.h |    1 -
  384.  encoder/encoder.c  |   26 ++++++++---
  385.  x264.h             |  128 +++++++++++++++++++++++++++++++++-------------------
  386.  4 files changed, 105 insertions(+), 57 deletions(-)
  387.  
  388. diff --git a/common/bitstream.c b/common/bitstream.c
  389. index 0aaac21..ad8c16e 100644
  390. --- a/common/bitstream.c
  391. +++ b/common/bitstream.c
  392. @@ -44,7 +44,7 @@ uint8_t *x264_nal_escape_sse2( uint8_t *dst, uint8_t *src, uint8_t *end );
  393.  /****************************************************************************
  394.   * x264_nal_encode:
  395.   ****************************************************************************/
  396. -int x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal, int b_long_startcode )
  397. +void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal )
  398.  {
  399.      uint8_t *src = nal->p_payload;
  400.      uint8_t *end = nal->p_payload + nal->i_payload;
  401. @@ -52,7 +52,7 @@ int x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal, int b_long_startc
  402.  
  403.      if( h->param.b_annexb )
  404.      {
  405. -        if( b_long_startcode )
  406. +        if( nal->b_long_startcode )
  407.              *dst++ = 0x00;
  408.          *dst++ = 0x00;
  409.          *dst++ = 0x00;
  410. @@ -77,7 +77,8 @@ int x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal, int b_long_startc
  411.          orig_dst[3] = size>> 0;
  412.      }
  413.  
  414. -    return size+4;
  415. +    nal->i_payload = size+4;
  416. +    nal->p_payload = orig_dst;
  417.  }
  418.  
  419.  void x264_bitstream_init( int cpu, x264_bitstream_function_t *pf )
  420. diff --git a/common/bitstream.h b/common/bitstream.h
  421. index 9ce5bd7..dd8118d 100644
  422. --- a/common/bitstream.h
  423. +++ b/common/bitstream.h
  424. @@ -68,7 +68,6 @@ typedef struct
  425.      uint8_t *(*nal_escape) ( uint8_t *dst, uint8_t *src, uint8_t *end );
  426.  } x264_bitstream_function_t;
  427.  
  428. -int x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal, int b_long_startcode );
  429.  void x264_bitstream_init( int cpu, x264_bitstream_function_t *pf );
  430.  
  431.  /* A larger level table size theoretically could help a bit at extremely
  432. diff --git a/encoder/encoder.c b/encoder/encoder.c
  433. index f54fe85..fe97aef 100644
  434. --- a/encoder/encoder.c
  435. +++ b/encoder/encoder.c
  436. @@ -427,6 +427,8 @@ static int x264_validate_parameters( x264_t *h )
  437.      else
  438.          h->param.b_sliced_threads = 0;
  439.      h->i_thread_frames = h->param.b_sliced_threads ? 1 : h->param.i_threads;
  440. +    if( h->i_thread_frames > 1 )
  441. +        h->param.nalu_process = NULL;
  442.  
  443.      if( h->param.b_interlaced )
  444.      {
  445. @@ -1253,8 +1255,9 @@ static void x264_nal_start( x264_t *h, int i_type, int i_ref_idc )
  446.  {
  447.      x264_nal_t *nal = &h->out.nal[h->out.i_nal];
  448.  
  449. -    nal->i_ref_idc = i_ref_idc;
  450. -    nal->i_type    = i_type;
  451. +    nal->i_ref_idc        = i_ref_idc;
  452. +    nal->i_type           = i_type;
  453. +    nal->b_long_startcode = 1;
  454.  
  455.      nal->i_payload= 0;
  456.      nal->p_payload= &h->out.p_bitstream[bs_pos( &h->out.bs ) / 8];
  457. @@ -1280,6 +1283,8 @@ static int x264_nal_end( x264_t *h )
  458.  {
  459.      x264_nal_t *nal = &h->out.nal[h->out.i_nal];
  460.      nal->i_payload = &h->out.p_bitstream[bs_pos( &h->out.bs ) / 8] - nal->p_payload;
  461. +    if( h->param.nalu_process )
  462. +        h->param.nalu_process( h, nal );
  463.      h->out.i_nal++;
  464.  
  465.      return x264_nal_check_buffer( h );
  466. @@ -1289,6 +1294,13 @@ static int x264_encoder_encapsulate_nals( x264_t *h, int start )
  467.  {
  468.      int nal_size = 0, previous_nal_size = 0;
  469.  
  470. +    if( h->param.nalu_process )
  471. +    {
  472. +        for( int i = start; i < h->out.i_nal; i++ )
  473. +            nal_size += h->out.nal[i].i_payload;
  474. +        return nal_size;
  475. +    }
  476. +
  477.      for( int i = 0; i < start; i++ )
  478.          previous_nal_size += h->out.nal[i].i_payload;
  479.  
  480. @@ -1311,11 +1323,9 @@ static int x264_encoder_encapsulate_nals( x264_t *h, int start )
  481.  
  482.      for( int i = start; i < h->out.i_nal; i++ )
  483.      {
  484. -        int long_startcode = !i || h->out.nal[i].i_type == NAL_SPS || h->out.nal[i].i_type == NAL_PPS;
  485. -        int size = x264_nal_encode( h, nal_buffer, &h->out.nal[i], long_startcode );
  486. -        h->out.nal[i].i_payload = size;
  487. -        h->out.nal[i].p_payload = nal_buffer;
  488. -        nal_buffer += size;
  489. +        h->out.nal[i].b_long_startcode = !i || h->out.nal[i].i_type == NAL_SPS || h->out.nal[i].i_type == NAL_PPS;
  490. +        x264_nal_encode( h, nal_buffer, &h->out.nal[i] );
  491. +        nal_buffer += h->out.nal[i].i_payload;
  492.      }
  493.  
  494.      x264_emms();
  495. @@ -1805,6 +1815,7 @@ static int x264_slice_write( x264_t *h )
  496.  
  497.      /* Slice */
  498.      x264_nal_start( h, h->i_nal_type, h->i_nal_ref_idc );
  499. +    h->out.nal[h->out.i_nal].i_first_mb = h->sh.i_first_mb;
  500.  
  501.      /* Slice header */
  502.      x264_macroblock_thread_init( h );
  503. @@ -2020,6 +2031,7 @@ static int x264_slice_write( x264_t *h )
  504.              i_mb_x = 0;
  505.          }
  506.      }
  507. +    h->out.nal[h->out.i_nal].i_last_mb = h->sh.i_last_mb;
  508.  
  509.      if( h->param.b_cabac )
  510.      {
  511. diff --git a/x264.h b/x264.h
  512. index 1138a8b..e1ae084 100644
  513. --- a/x264.h
  514. +++ b/x264.h
  515. @@ -35,13 +35,61 @@
  516.  
  517.  #include <stdarg.h>
  518.  
  519. -#define X264_BUILD 100
  520. +#define X264_BUILD 101
  521.  
  522.  /* x264_t:
  523.   *      opaque handler for encoder */
  524.  typedef struct x264_t x264_t;
  525.  
  526.  /****************************************************************************
  527. + * NAL structure and functions
  528. + ****************************************************************************/
  529. +
  530. +enum nal_unit_type_e
  531. +{
  532. +    NAL_UNKNOWN     = 0,
  533. +    NAL_SLICE       = 1,
  534. +    NAL_SLICE_DPA   = 2,
  535. +    NAL_SLICE_DPB   = 3,
  536. +    NAL_SLICE_DPC   = 4,
  537. +    NAL_SLICE_IDR   = 5,    /* ref_idc != 0 */
  538. +    NAL_SEI         = 6,    /* ref_idc == 0 */
  539. +    NAL_SPS         = 7,
  540. +    NAL_PPS         = 8,
  541. +    NAL_AUD         = 9,
  542. +    NAL_FILLER      = 12,
  543. +    /* ref_idc == 0 for 6,9,10,11,12 */
  544. +};
  545. +enum nal_priority_e
  546. +{
  547. +    NAL_PRIORITY_DISPOSABLE = 0,
  548. +    NAL_PRIORITY_LOW        = 1,
  549. +    NAL_PRIORITY_HIGH       = 2,
  550. +    NAL_PRIORITY_HIGHEST    = 3,
  551. +};
  552. +
  553. +/* The data within the payload is already NAL-encapsulated; the ref_idc and type
  554. + * are merely in the struct for easy access by the calling application.
  555. + * All data returned in an x264_nal_t, including the data in p_payload, is no longer
  556. + * valid after the next call to x264_encoder_encode.  Thus it must be used or copied
  557. + * before calling x264_encoder_encode or x264_encoder_headers again. */
  558. +typedef struct
  559. +{
  560. +    int i_ref_idc;  /* nal_priority_e */
  561. +    int i_type;     /* nal_unit_type_e */
  562. +    int b_long_startcode;
  563. +    int i_first_mb; /* If this NAL is a slice, the index of the first MB in the slice. */
  564. +    int i_last_mb;  /* If this NAL is a slice, the index of the last MB in the slice. */
  565. +
  566. +    /* Size of payload in bytes. */
  567. +    int     i_payload;
  568. +    /* If param->b_annexb is set, Annex-B bytestream with startcode.
  569. +     * Otherwise, startcode is replaced with a 4-byte size.
  570. +     * This size is the size used in mp4/similar muxing; it is equal to i_payload-4 */
  571. +    uint8_t *p_payload;
  572. +} x264_nal_t;
  573. +
  574. +/****************************************************************************
  575.   * Encoder parameters
  576.   ****************************************************************************/
  577.  /* CPU flags
  578. @@ -377,8 +425,41 @@ typedef struct x264_param_t
  579.       * i.e. when an x264_param_t is passed to x264_t in an x264_picture_t or in zones.
  580.       * Not used when x264_encoder_reconfig is called directly. */
  581.      void (*param_free)( void* );
  582. +
  583. +    /* Optional low-level callback for low-latency encoding.  Called for each output NAL unit
  584. +     * immediately after the NAL unit is finished encoding.  This allows the calling application
  585. +     * to begin processing video data (e.g. by sending packets over a network) before the frame
  586. +     * is done encoding.
  587. +     *
  588. +     * This callback MUST do the following in order to work correctly:
  589. +     * 1) Have available an output buffer of at least size nal->i_payload*3/2 + 5 + 16.
  590. +     * 2) Call x264_nal_encode( h, dst, nal ), where dst is the output buffer.
  591. +     * After these steps, the content of nal is valid and can be used in the same way as if
  592. +     * the NAL unit were output by x264_encoder_encode.
  593. +     *
  594. +     * This does not need to be synchronous with the encoding process: the data pointed to
  595. +     * by nal (both before and after x264_nal_encode) will remain valid until the next
  596. +     * x264_encoder_encode call.  The callback must be re-entrant.
  597. +     *
  598. +     * This callback does not work with frame-based threads; threads must be disabled
  599. +     * or sliced-threads enabled.  This callback also does not work as one would expect
  600. +     * with HRD -- since the buffering period SEI cannot be calculated until the frame
  601. +     * is finished encoding, it will not be sent via this callback.
  602. +     *
  603. +     * Note also that the NALs are not necessarily returned in order when sliced threads is
  604. +     * enabled.  Accordingly, the variable i_first_mb and i_last_mb are available in
  605. +     * x264_nal_t to help the calling application reorder the slices if necessary.
  606. +     *
  607. +     * When this callback is enabled, x264_encoder_encode does not return valid NALs;
  608. +     * the calling application is expected to acquire all output NALs through the callback.
  609. +     *
  610. +     * It is generally sensible to combine this callback with a use of slice-max-mbs or
  611. +     * slice-max-size. */
  612. +    void (*nalu_process) ( x264_t *h, x264_nal_t *nal );
  613.  } x264_param_t;
  614.  
  615. +void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal );
  616. +
  617.  /****************************************************************************
  618.   * H.264 level restriction information
  619.   ****************************************************************************/
  620. @@ -586,51 +667,6 @@ int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_heigh
  621.  void x264_picture_clean( x264_picture_t *pic );
  622.  
  623.  /****************************************************************************
  624. - * NAL structure and functions
  625. - ****************************************************************************/
  626. -
  627. -enum nal_unit_type_e
  628. -{
  629. -    NAL_UNKNOWN     = 0,
  630. -    NAL_SLICE       = 1,
  631. -    NAL_SLICE_DPA   = 2,
  632. -    NAL_SLICE_DPB   = 3,
  633. -    NAL_SLICE_DPC   = 4,
  634. -    NAL_SLICE_IDR   = 5,    /* ref_idc != 0 */
  635. -    NAL_SEI         = 6,    /* ref_idc == 0 */
  636. -    NAL_SPS         = 7,
  637. -    NAL_PPS         = 8,
  638. -    NAL_AUD         = 9,
  639. -    NAL_FILLER      = 12,
  640. -    /* ref_idc == 0 for 6,9,10,11,12 */
  641. -};
  642. -enum nal_priority_e
  643. -{
  644. -    NAL_PRIORITY_DISPOSABLE = 0,
  645. -    NAL_PRIORITY_LOW        = 1,
  646. -    NAL_PRIORITY_HIGH       = 2,
  647. -    NAL_PRIORITY_HIGHEST    = 3,
  648. -};
  649. -
  650. -/* The data within the payload is already NAL-encapsulated; the ref_idc and type
  651. - * are merely in the struct for easy access by the calling application.
  652. - * All data returned in an x264_nal_t, including the data in p_payload, is no longer
  653. - * valid after the next call to x264_encoder_encode.  Thus it must be used or copied
  654. - * before calling x264_encoder_encode or x264_encoder_headers again. */
  655. -typedef struct
  656. -{
  657. -    int i_ref_idc;  /* nal_priority_e */
  658. -    int i_type;     /* nal_unit_type_e */
  659. -
  660. -    /* Size of payload in bytes. */
  661. -    int     i_payload;
  662. -    /* If param->b_annexb is set, Annex-B bytestream with 4-byte startcode.
  663. -     * Otherwise, startcode is replaced with a 4-byte size.
  664. -     * This size is the size used in mp4/similar muxing; it is equal to i_payload-4 */
  665. -    uint8_t *p_payload;
  666. -} x264_nal_t;
  667. -
  668. -/****************************************************************************
  669.   * Encoder functions
  670.   ****************************************************************************/
  671.  
  672. --
  673. 1.7.1
  674.  
  675.  
  676. From 22bf1672adafa4e938a13952b8f71cd7548d31f1 Mon Sep 17 00:00:00 2001
  677. From: Lamont Alston <wewk584@gmail.com>
  678. Date: Tue, 29 Jun 2010 10:11:42 -0700
  679. Subject: [PATCH 3/7] Make open-GOP Blu-ray compatible
  680.  Blu-ray is even more braindamaged than we thought.
  681.  Accordingly, open-gop options are now "normal" and "bluray", as opposed to display and coded.
  682.  Normal should be used in all cases besides Blu-ray authoring.
  683.  
  684. ---
  685. encoder/encoder.c   |    2 +-
  686.  encoder/slicetype.c |   28 +++++++---------------------
  687.  x264.c              |    8 ++++----
  688.  x264.h              |    8 ++++----
  689.  4 files changed, 16 insertions(+), 30 deletions(-)
  690.  
  691. diff --git a/encoder/encoder.c b/encoder/encoder.c
  692. index fe97aef..5cd3307 100644
  693. --- a/encoder/encoder.c
  694. +++ b/encoder/encoder.c
  695. @@ -577,7 +577,7 @@ static int x264_validate_parameters( x264_t *h )
  696.          h->param.analyse.i_direct_mv_pred = X264_DIRECT_PRED_SPATIAL;
  697.      }
  698.      h->param.i_bframe = x264_clip3( h->param.i_bframe, 0, X264_MIN( X264_BFRAME_MAX, h->param.i_keyint_max-1 ) );
  699. -    h->param.i_open_gop = x264_clip3( h->param.i_open_gop, X264_OPEN_GOP_NONE, X264_OPEN_GOP_CODED_ORDER );
  700. +    h->param.i_open_gop = x264_clip3( h->param.i_open_gop, X264_OPEN_GOP_NONE, X264_OPEN_GOP_BLURAY );
  701.      if( h->param.i_keyint_max == 1 )
  702.          h->param.b_intra_refresh = 0;
  703.      h->param.i_bframe_bias = x264_clip3( h->param.i_bframe_bias, -90, 100 );
  704. diff --git a/encoder/slicetype.c b/encoder/slicetype.c
  705. index 2703f02..4ede8cf 100644
  706. --- a/encoder/slicetype.c
  707. +++ b/encoder/slicetype.c
  708. @@ -1233,17 +1233,11 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
  709.      if( !h->param.b_intra_refresh )
  710.          for( int i = keyint_limit+1; i <= num_frames; i += h->param.i_keyint_max )
  711.          {
  712. -            int j = i;
  713. -            if( h->param.i_open_gop == X264_OPEN_GOP_CODED_ORDER )
  714. -            {
  715. -                while( IS_X264_TYPE_B( frames[i]->i_type ) )
  716. -                    i++;
  717. -                while( IS_X264_TYPE_B( frames[j-1]->i_type ) )
  718. -                    j--;
  719. -            }
  720.              frames[i]->i_type = X264_TYPE_I;
  721.              reset_start = X264_MIN( reset_start, i+1 );
  722. -            i = j;
  723. +            if( h->param.i_open_gop == X264_OPEN_GOP_BLURAY )
  724. +                while( IS_X264_TYPE_B( frames[i-1]->i_type ) )
  725. +                    i--;
  726.          }
  727.  
  728.      if( vbv_lookahead )
  729. @@ -1337,16 +1331,8 @@ void x264_slicetype_decide( x264_t *h )
  730.              if( frm->i_type == X264_TYPE_AUTO || frm->i_type == X264_TYPE_I )
  731.                  frm->i_type = h->param.i_open_gop && h->lookahead->i_last_keyframe >= 0 ? X264_TYPE_I : X264_TYPE_IDR;
  732.              int warn = frm->i_type != X264_TYPE_IDR;
  733. -            if( warn && h->param.i_open_gop == X264_OPEN_GOP_DISPLAY_ORDER )
  734. -                warn &= frm->i_type != X264_TYPE_I && frm->i_type != X264_TYPE_KEYFRAME;
  735. -            if( warn && h->param.i_open_gop == X264_OPEN_GOP_CODED_ORDER )
  736. -            {
  737. -                /* if this minigop ends with i, it's not a violation */
  738. -                int j = bframes;
  739. -                while( IS_X264_TYPE_B( h->lookahead->next.list[j]->i_type ) )
  740. -                    j++;
  741. -                warn = h->lookahead->next.list[j]->i_type != X264_TYPE_I && h->lookahead->next.list[j]->i_type != X264_TYPE_KEYFRAME;
  742. -            }
  743. +            if( warn && h->param.i_open_gop )
  744. +                warn &= frm->i_type != X264_TYPE_I;
  745.              if( warn )
  746.                  x264_log( h, X264_LOG_WARNING, "specified frame type (%d) at %d is not compatible with keyframe interval\n", frm->i_type, frm->i_frame );
  747.          }
  748. @@ -1355,8 +1341,8 @@ void x264_slicetype_decide( x264_t *h )
  749.              if( h->param.i_open_gop )
  750.              {
  751.                  h->lookahead->i_last_keyframe = frm->i_frame; // Use display order
  752. -                if( h->param.i_open_gop == X264_OPEN_GOP_CODED_ORDER )
  753. -                    h->lookahead->i_last_keyframe -= bframes; // Use coded order
  754. +                if( h->param.i_open_gop == X264_OPEN_GOP_BLURAY )
  755. +                    h->lookahead->i_last_keyframe -= bframes; // Use bluray order
  756.                  frm->b_keyframe = 1;
  757.              }
  758.              else
  759. diff --git a/x264.c b/x264.c
  760. index df04385..f08ab41 100644
  761. --- a/x264.c
  762. +++ b/x264.c
  763. @@ -382,10 +382,10 @@ static void Help( x264_param_t *defaults, int longhelp )
  764.          "                                  - normal: Non-strict (not Blu-ray compatible)\n",
  765.          strtable_lookup( x264_b_pyramid_names, defaults->i_bframe_pyramid ) );
  766.      H1( "      --open-gop <string>     Use recovery points to close GOPs [none]\n"
  767. -        "                                  - none: Use standard closed GOPs\n"
  768. -        "                                  - display: Base GOP length on display order\n"
  769. -        "                                             (not Blu-ray compatible)\n"
  770. -        "                                  - coded: Base GOP length on coded order\n"
  771. +        "                                  - none: closed GOPs only\n"
  772. +        "                                  - normal: standard open GOPs\n"
  773. +        "                                            (not Blu-ray compatible)\n"
  774. +        "                                  - bluray: Blu-ray-compatible open GOPs\n"
  775.          "                              Only available with b-frames\n" );
  776.      H1( "      --no-cabac              Disable CABAC\n" );
  777.      H1( "  -r, --ref <integer>         Number of reference frames [%d]\n", defaults->i_frame_reference );
  778. diff --git a/x264.h b/x264.h
  779. index e1ae084..86f7426 100644
  780. --- a/x264.h
  781. +++ b/x264.h
  782. @@ -153,8 +153,8 @@ typedef struct
  783.  #define X264_B_PYRAMID_NORMAL        2
  784.  #define X264_KEYINT_MIN_AUTO         0
  785.  #define X264_OPEN_GOP_NONE           0
  786. -#define X264_OPEN_GOP_DISPLAY_ORDER  1
  787. -#define X264_OPEN_GOP_CODED_ORDER    2
  788. +#define X264_OPEN_GOP_NORMAL         1
  789. +#define X264_OPEN_GOP_BLURAY         2
  790.  
  791.  static const char * const x264_direct_pred_names[] = { "none", "spatial", "temporal", "auto", 0 };
  792.  static const char * const x264_motion_est_names[] = { "dia", "hex", "umh", "esa", "tesa", 0 };
  793. @@ -166,7 +166,7 @@ static const char * const x264_colorprim_names[] = { "", "bt709", "undef", "", "
  794.  static const char * const x264_transfer_names[] = { "", "bt709", "undef", "", "bt470m", "bt470bg", "smpte170m", "smpte240m", "linear", "log100", "log316", 0 };
  795.  static const char * const x264_colmatrix_names[] = { "GBR", "bt709", "undef", "", "fcc", "bt470bg", "smpte170m", "smpte240m", "YCgCo", 0 };
  796.  static const char * const x264_nal_hrd_names[] = { "none", "vbr", "cbr", 0 };
  797. -static const char * const x264_open_gop_names[] = { "none", "display", "coded", 0 };
  798. +static const char * const x264_open_gop_names[] = { "none", "normal", "bluray", 0 };
  799.  
  800.  /* Colorspace type
  801.   * legacy only; nothing other than I420 is really supported. */
  802. @@ -276,7 +276,7 @@ typedef struct x264_param_t
  803.      int         i_bframe_adaptive;
  804.      int         i_bframe_bias;
  805.      int         i_bframe_pyramid;   /* Keep some B-frames as references: 0=off, 1=strict hierarchical, 2=normal */
  806. -    int         i_open_gop;         /* Open gop: 1=display order, 2=coded order to determine gop size */
  807. +    int         i_open_gop;         /* Open gop: 1=display order, 2=bluray compatibility braindamage mode */
  808.  
  809.      int         b_deblocking_filter;
  810.      int         i_deblocking_filter_alphac0;    /* [-6, 6] -6 light filter, 6 strong */
  811. --
  812. 1.7.1
  813.  
  814.  
  815. From ae5c32e10d6b500366d1d638c52b75e65aad1d9f Mon Sep 17 00:00:00 2001
  816. From: Steven Walters <kemuri9@gmail.com>
  817. Date: Sat, 26 Jun 2010 16:28:49 -0400
  818. Subject: [PATCH 4/7] Centralize logging within x264cli
  819.  x264cli messages will now respect the log level they pertain to.
  820.  Slightly reduces binary size.
  821.  
  822. ---
  823. input/avs.c             |   88 +++++-------------
  824.  input/ffms.c            |   58 +++--------
  825.  input/input.h           |    2 +
  826.  input/lavf.c            |   55 +++--------
  827.  input/thread.c          |    9 +-
  828.  input/timecode.c        |  111 ++++++----------------
  829.  input/y4m.c             |   23 +----
  830.  input/yuv.c             |    8 +-
  831.  muxers.h                |   61 ------------
  832.  output/flv.c            |   10 +-
  833.  output/flv_bytestream.c |    2 +-
  834.  output/matroska.c       |    2 +-
  835.  output/matroska_ebml.c  |    2 +-
  836.  output/mp4.c            |   12 +--
  837.  output/output.h         |    2 +
  838.  output/raw.c            |    2 +-
  839.  x264.c                  |  246 +++++++++++++++++++++--------------------------
  840.  x264cli.h               |   67 +++++++++++++
  841.  18 files changed, 289 insertions(+), 471 deletions(-)
  842.  delete mode 100644 muxers.h
  843.  create mode 100644 x264cli.h
  844.  
  845. diff --git a/input/avs.c b/input/avs.c
  846. index 07add40..b83f715 100644
  847. --- a/input/avs.c
  848. +++ b/input/avs.c
  849. @@ -20,8 +20,9 @@
  850.   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
  851.   *****************************************************************************/
  852.  
  853. -#include "muxers.h"
  854. +#include "input.h"
  855.  #include <windows.h>
  856. +#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "avs", __VA_ARGS__ )
  857.  
  858.  /* the AVS interface currently uses __declspec to link function declarations to their definitions in the dll.
  859.     this has a side effect of preventing program execution if the avisynth dll is not found,
  860. @@ -131,27 +132,15 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
  861.      FILE *fh = fopen( psz_filename, "r" );
  862.      if( !fh )
  863.          return -1;
  864. -    else if( !x264_is_regular_file( fh ) )
  865. -    {
  866. -        fprintf( stderr, "avs [error]: AVS input is incompatible with non-regular file `%s'\n", psz_filename );
  867. -        return -1;
  868. -    }
  869. +    FAIL_IF_ERROR( !x264_is_regular_file( fh ), "AVS input is incompatible with non-regular file `%s'\n", psz_filename );
  870.      fclose( fh );
  871.  
  872.      avs_hnd_t *h = malloc( sizeof(avs_hnd_t) );
  873.      if( !h )
  874.          return -1;
  875. -    if( avs_load_library( h ) )
  876. -    {
  877. -        fprintf( stderr, "avs [error]: failed to load avisynth\n" );
  878. -        return -1;
  879. -    }
  880. +    FAIL_IF_ERROR( avs_load_library( h ), "failed to load avisynth\n" )
  881.      h->env = h->func.avs_create_script_environment( AVS_INTERFACE_YV12 );
  882. -    if( !h->env )
  883. -    {
  884. -        fprintf( stderr, "avs [error]: failed to initiate avisynth\n" );
  885. -        return -1;
  886. -    }
  887. +    FAIL_IF_ERROR( !h->env, "failed to initiate avisynth\n" )
  888.      AVS_Value arg = avs_new_value_string( psz_filename );
  889.      AVS_Value res;
  890.      char *filename_ext = get_filename_extension( psz_filename );
  891. @@ -159,11 +148,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
  892.      if( !strcasecmp( filename_ext, "avs" ) )
  893.      {
  894.          res = h->func.avs_invoke( h->env, "Import", arg, NULL );
  895. -        if( avs_is_error( res ) )
  896. -        {
  897. -            fprintf( stderr, "avs [error]: %s\n", avs_as_string( res ) );
  898. -            return -1;
  899. -        }
  900. +        FAIL_IF_ERROR( avs_is_error( res ), "%s\n", avs_as_string( res ) )
  901.          /* check if the user is using a multi-threaded script and apply distributor if necessary.
  902.             adapted from avisynth's vfw interface */
  903.          AVS_Value mt_test = h->func.avs_invoke( h->env, "GetMTMode", avs_new_value_bool( 0 ), NULL );
  904. @@ -184,78 +169,55 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
  905.          int i;
  906.          for( i = 0; filter[i]; i++ )
  907.          {
  908. -            fprintf( stderr, "avs [info]: trying %s... ", filter[i] );
  909. +            x264_cli_log( "avs", X264_LOG_INFO, "trying %s... ", filter[i] );
  910.              if( !h->func.avs_function_exists( h->env, filter[i] ) )
  911.              {
  912. -                fprintf( stderr, "not found\n" );
  913. +                x264_cli_printf( X264_LOG_INFO, "not found\n" );
  914.                  continue;
  915.              }
  916.              if( !strncasecmp( filter[i], "FFmpegSource", 12 ) )
  917.              {
  918. -                fprintf( stderr, "indexing... " );
  919. +                x264_cli_printf( X264_LOG_INFO, "indexing... " );
  920.                  fflush( stderr );
  921.              }
  922.              res = h->func.avs_invoke( h->env, filter[i], arg, NULL );
  923.              if( !avs_is_error( res ) )
  924.              {
  925. -                fprintf( stderr, "succeeded\n" );
  926. +                x264_cli_printf( X264_LOG_INFO, "succeeded\n" );
  927.                  break;
  928.              }
  929. -            fprintf( stderr, "failed\n" );
  930. -        }
  931. -        if( !filter[i] )
  932. -        {
  933. -            fprintf( stderr, "avs [error]: unable to find source filter to open `%s'\n", psz_filename );
  934. -            return -1;
  935. +            x264_cli_printf( X264_LOG_INFO, "failed\n" );
  936.          }
  937. +        FAIL_IF_ERROR( !filter[i], "unable to find source filter to open `%s'\n", psz_filename )
  938.      }
  939. -    if( !avs_is_clip( res ) )
  940. -    {
  941. -        fprintf( stderr, "avs [error]: `%s' didn't return a video clip\n", psz_filename );
  942. -        return -1;
  943. -    }
  944. +    FAIL_IF_ERROR( !avs_is_clip( res ), "`%s' didn't return a video clip\n", psz_filename )
  945.      h->clip = h->func.avs_take_clip( res, h->env );
  946.      int avs_version = h->func.avs_get_version( h->clip );
  947.      const AVS_VideoInfo *vi = h->func.avs_get_video_info( h->clip );
  948. -    if( !avs_has_video( vi ) )
  949. -    {
  950. -        fprintf( stderr, "avs [error]: `%s' has no video data\n", psz_filename );
  951. -        return -1;
  952. -    }
  953. +    FAIL_IF_ERROR( !avs_has_video( vi ), "`%s' has no video data\n", psz_filename )
  954.      /* if the clip is made of fields instead of frames, call weave to make them frames */
  955.      if( avs_is_field_based( vi ) )
  956.      {
  957. -        fprintf( stderr, "avs [warning]: detected fieldbased (separated) input, weaving to frames\n" );
  958. +        x264_cli_log( "avs", X264_LOG_WARNING, "detected fieldbased (separated) input, weaving to frames\n" );
  959.          AVS_Value tmp = h->func.avs_invoke( h->env, "Weave", res, NULL );
  960. -        if( avs_is_error( tmp ) )
  961. -        {
  962. -            fprintf( stderr, "avs [error]: couldn't weave fields into frames\n" );
  963. -            return -1;
  964. -        }
  965. +        FAIL_IF_ERROR( avs_is_error( tmp ), "couldn't weave fields into frames\n" )
  966.          res = update_clip( h, &vi, tmp, res );
  967.          info->interlaced = 1;
  968.          info->tff = avs_is_tff( vi );
  969.      }
  970. -    if( vi->width&1 || vi->height&1 )
  971. -    {
  972. -        fprintf( stderr, "avs [error]: input clip width or height not divisible by 2 (%dx%d)\n",
  973. -                 vi->width, vi->height );
  974. -        return -1;
  975. -    }
  976. +    FAIL_IF_ERROR( vi->width&1 || vi->height&1, "input clip width or height not divisible by 2 (%dx%d)\n", vi->width, vi->height )
  977.      /* always call ConvertToYV12 to convert non YV12 planar colorspaces to YV12 when user's AVS supports them,
  978.         as all planar colorspaces are flagged as YV12. If it is already YV12 in this case, the call does nothing */
  979.      if( !avs_is_yv12( vi ) || avs_version >= AVS_INTERFACE_OTHER_PLANAR )
  980.      {
  981. -        fprintf( stderr, "avs %s\n", !avs_is_yv12( vi ) ? "[warning]: converting input clip to YV12"
  982. -               : "[info]: avisynth 2.6+ detected, forcing conversion to YV12" );
  983. +        if( !avs_is_yv12( vi ) )
  984. +            x264_cli_log( "avs", X264_LOG_WARNING, "converting input clip to YV12" );
  985. +        else
  986. +            x264_cli_log( "avs", X264_LOG_INFO, "avisynth 2.6+ detected, forcing conversion to YV12" );
  987.          const char *arg_name[2] = { NULL, "interlaced" };
  988.          AVS_Value arg_arr[2] = { res, avs_new_value_bool( info->interlaced ) };
  989.          AVS_Value res2 = h->func.avs_invoke( h->env, "ConvertToYV12", avs_new_value_array( arg_arr, 2 ), arg_name );
  990. -        if( avs_is_error( res2 ) )
  991. -        {
  992. -            fprintf( stderr, "avs [error]: couldn't convert input clip to YV12\n" );
  993. -            return -1;
  994. -        }
  995. +        FAIL_IF_ERROR( avs_is_error( res2 ), "couldn't convert input clip to YV12\n" )
  996.          res = update_clip( h, &vi, res2, res );
  997.      }
  998.      h->func.avs_release_value( res );
  999. @@ -294,11 +256,7 @@ static int read_frame( x264_picture_t *p_pic, hnd_t handle, int i_frame )
  1000.          return -1;
  1001.      AVS_VideoFrame *frm = p_pic->opaque = h->func.avs_get_frame( h->clip, i_frame );
  1002.      const char *err = h->func.avs_clip_get_error( h->clip );
  1003. -    if( err )
  1004. -    {
  1005. -        fprintf( stderr, "avs [error]: %s occurred while reading frame %d\n", err, i_frame );
  1006. -        return -1;
  1007. -    }
  1008. +    FAIL_IF_ERROR( err, "%s occurred while reading frame %d\n", err, i_frame )
  1009.      for( int i = 0; i < 3; i++ )
  1010.      {
  1011.          /* explicitly cast away the const attribute to avoid a warning */
  1012. diff --git a/input/ffms.c b/input/ffms.c
  1013. index b2a253e..fe8bf7e 100644
  1014. --- a/input/ffms.c
  1015. +++ b/input/ffms.c
  1016. @@ -21,8 +21,10 @@
  1017.   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
  1018.   *****************************************************************************/
  1019.  
  1020. -#include "muxers.h"
  1021. +#include "input.h"
  1022.  #include <ffms.h>
  1023. +#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "ffms", __VA_ARGS__ )
  1024. +
  1025.  #undef DECLARE_ALIGNED
  1026.  #include <libavcodec/avcodec.h>
  1027.  #include <libswscale/swscale.h>
  1028. @@ -86,28 +88,16 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
  1029.      {
  1030.          idx = FFMS_MakeIndex( psz_filename, 0, 0, NULL, NULL, 0, update_progress, NULL, &e );
  1031.          fprintf( stderr, "                                            \r" );
  1032. -        if( !idx )
  1033. -        {
  1034. -            fprintf( stderr, "ffms [error]: could not create index\n" );
  1035. -            return -1;
  1036. -        }
  1037. +        FAIL_IF_ERROR( !idx, "could not create index\n" )
  1038.          if( opt->index_file && FFMS_WriteIndex( opt->index_file, idx, &e ) )
  1039. -            fprintf( stderr, "ffms [warning]: could not write index file\n" );
  1040. +            x264_cli_log( "ffms", X264_LOG_WARNING, "could not write index file\n" );
  1041.      }
  1042.  
  1043.      int trackno = FFMS_GetFirstTrackOfType( idx, FFMS_TYPE_VIDEO, &e );
  1044. -    if( trackno < 0 )
  1045. -    {
  1046. -        fprintf( stderr, "ffms [error]: could not find video track\n" );
  1047. -        return -1;
  1048. -    }
  1049. +    FAIL_IF_ERROR( trackno < 0, "could not find video track\n" )
  1050.  
  1051.      h->video_source = FFMS_CreateVideoSource( psz_filename, trackno, idx, 1, seekmode, &e );
  1052. -    if( !h->video_source )
  1053. -    {
  1054. -        fprintf( stderr, "ffms [error]: could not create video source\n" );
  1055. -        return -1;
  1056. -    }
  1057. +    FAIL_IF_ERROR( !h->video_source, "could not create video source\n" )
  1058.  
  1059.      h->track = FFMS_GetTrackFromVideo( h->video_source );
  1060.  
  1061. @@ -121,11 +111,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
  1062.      h->vfr_input       = info->vfr;
  1063.  
  1064.      const FFMS_Frame *frame = FFMS_GetFrame( h->video_source, 0, &e );
  1065. -    if( !frame )
  1066. -    {
  1067. -        fprintf( stderr, "ffms [error]: could not read frame 0\n" );
  1068. -        return -1;
  1069. -    }
  1070. +    FAIL_IF_ERROR( !frame, "could not read frame 0\n" )
  1071.  
  1072.      h->init_width  = h->cur_width  = info->width  = frame->EncodedWidth;
  1073.      h->init_height = h->cur_height = info->height = frame->EncodedHeight;
  1074. @@ -134,8 +120,8 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
  1075.      info->tff        = frame->TopFieldFirst;
  1076.  
  1077.      if( h->cur_pix_fmt != PIX_FMT_YUV420P )
  1078. -        fprintf( stderr, "ffms [warning]: converting from %s to YV12\n",
  1079. -                 avcodec_get_pix_fmt_name( h->cur_pix_fmt ) );
  1080. +        x264_cli_log( "ffms", X264_LOG_WARNING, "converting from %s to YV12\n",
  1081. +                       avcodec_get_pix_fmt_name( h->cur_pix_fmt ) );
  1082.  
  1083.      /* ffms timestamps are in milliseconds. ffms also uses int64_ts for timebase,
  1084.       * so we need to reduce large timebases to prevent overflow */
  1085. @@ -173,19 +159,15 @@ static int check_swscale( ffms_hnd_t *h, const FFMS_Frame *frame, int i_frame )
  1086.      if( h->scaler )
  1087.      {
  1088.          sws_freeContext( h->scaler );
  1089. -        fprintf( stderr, "ffms [warning]: stream properties changed to %dx%d, %s at frame %d  \n", frame->EncodedWidth,
  1090. -                 frame->EncodedHeight, avcodec_get_pix_fmt_name( frame->EncodedPixelFormat ), i_frame );
  1091. +        x264_cli_log( "ffms", X264_LOG_WARNING, "stream properties changed to %dx%d, %s at frame %d  \n", frame->EncodedWidth,
  1092. +                      frame->EncodedHeight, avcodec_get_pix_fmt_name( frame->EncodedPixelFormat ), i_frame );
  1093.          h->cur_width   = frame->EncodedWidth;
  1094.          h->cur_height  = frame->EncodedHeight;
  1095.          h->cur_pix_fmt = frame->EncodedPixelFormat;
  1096.      }
  1097.      h->scaler = sws_getContext( h->cur_width, h->cur_height, h->cur_pix_fmt, h->init_width, h->init_height,
  1098.                                  PIX_FMT_YUV420P, SWS_BICUBIC, NULL, NULL, NULL );
  1099. -    if( !h->scaler )
  1100. -    {
  1101. -        fprintf( stderr, "ffms [error]: could not open swscale context\n" );
  1102. -        return -1;
  1103. -    }
  1104. +    FAIL_IF_ERROR( !h->scaler, "could not open swscale context\n" )
  1105.      return 0;
  1106.  }
  1107.  
  1108. @@ -195,11 +177,7 @@ static int read_frame( x264_picture_t *p_pic, hnd_t handle, int i_frame )
  1109.      FFMS_ErrorInfo e;
  1110.      e.BufferSize = 0;
  1111.      const FFMS_Frame *frame = FFMS_GetFrame( h->video_source, i_frame, &e );
  1112. -    if( !frame )
  1113. -    {
  1114. -        fprintf( stderr, "ffms [error]: could not read frame %d\n", i_frame );
  1115. -        return -1;
  1116. -    }
  1117. +    FAIL_IF_ERROR( !frame, "could not read frame %d\n", i_frame )
  1118.  
  1119.      if( check_swscale( h, frame, i_frame ) )
  1120.          return -1;
  1121. @@ -214,12 +192,8 @@ static int read_frame( x264_picture_t *p_pic, hnd_t handle, int i_frame )
  1122.  
  1123.      if( h->vfr_input )
  1124.      {
  1125. -        if( info->PTS == AV_NOPTS_VALUE )
  1126. -        {
  1127. -            fprintf( stderr, "ffms [error]: invalid timestamp. "
  1128. -                     "Use --force-cfr and specify a framerate with --fps\n" );
  1129. -            return -1;
  1130. -        }
  1131. +        FAIL_IF_ERROR( info->PTS == AV_NOPTS_VALUE, "invalid timestamp. "
  1132. +                       "Use --force-cfr and specify a framerate with --fps\n" )
  1133.  
  1134.          if( !h->pts_offset_flag )
  1135.          {
  1136. diff --git a/input/input.h b/input/input.h
  1137. index f89b13b..f588f3c 100644
  1138. --- a/input/input.h
  1139. +++ b/input/input.h
  1140. @@ -25,6 +25,8 @@
  1141.  #ifndef X264_INPUT_H
  1142.  #define X264_INPUT_H
  1143.  
  1144. +#include "x264cli.h"
  1145. +
  1146.  /* options that are used by only some demuxers */
  1147.  typedef struct
  1148.  {
  1149. diff --git a/input/lavf.c b/input/lavf.c
  1150. index 4b0375f..54a275f 100644
  1151. --- a/input/lavf.c
  1152. +++ b/input/lavf.c
  1153. @@ -21,7 +21,8 @@
  1154.   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
  1155.   *****************************************************************************/
  1156.  
  1157. -#include "muxers.h"
  1158. +#include "input.h"
  1159. +#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "lavf", __VA_ARGS__ )
  1160.  #undef DECLARE_ALIGNED
  1161.  #include <libavformat/avformat.h>
  1162.  #include <libswscale/swscale.h>
  1163. @@ -59,19 +60,15 @@ static int check_swscale( lavf_hnd_t *h, AVCodecContext *c, int i_frame )
  1164.      if( h->scaler )
  1165.      {
  1166.          sws_freeContext( h->scaler );
  1167. -        fprintf( stderr, "lavf [warning]: stream properties changed to %dx%d, %s at frame %d  \n",
  1168. -                 c->width, c->height, avcodec_get_pix_fmt_name( c->pix_fmt ), i_frame );
  1169. +        x264_cli_log( "lavf", X264_LOG_WARNING, "stream properties changed to %dx%d, %s at frame %d  \n",
  1170. +                      c->width, c->height, avcodec_get_pix_fmt_name( c->pix_fmt ), i_frame );
  1171.          h->cur_width   = c->width;
  1172.          h->cur_height  = c->height;
  1173.          h->cur_pix_fmt = c->pix_fmt;
  1174.      }
  1175.      h->scaler = sws_getContext( h->cur_width, h->cur_height, h->cur_pix_fmt, h->init_width, h->init_height,
  1176.                                  PIX_FMT_YUV420P, SWS_BICUBIC, NULL, NULL, NULL );
  1177. -    if( !h->scaler )
  1178. -    {
  1179. -        fprintf( stderr, "lavf [error]: could not open swscale context\n" );
  1180. -        return -1;
  1181. -    }
  1182. +    FAIL_IF_ERROR( !h->scaler, "could not open swscale context\n" )
  1183.      return 0;
  1184.  }
  1185.  
  1186. @@ -106,12 +103,12 @@ static int read_frame_internal( x264_picture_t *p_pic, lavf_hnd_t *h, int i_fram
  1187.              {
  1188.                  c->reordered_opaque = pkt->pts;
  1189.                  if( avcodec_decode_video2( c, frame, &finished, pkt ) < 0 )
  1190. -                    fprintf( stderr, "lavf [warning]: video decoding failed on frame %d\n", h->next_frame );
  1191. +                    x264_cli_log( "lavf", X264_LOG_WARNING, "video decoding failed on frame %d\n", h->next_frame );
  1192.              }
  1193.          if( !finished )
  1194.          {
  1195.              if( avcodec_decode_video2( c, frame, &finished, pkt ) < 0 )
  1196. -                fprintf( stderr, "lavf [warning]: video decoding failed on frame %d\n", h->next_frame );
  1197. +                x264_cli_log( "lavf", X264_LOG_WARNING, "video decoding failed on frame %d\n", h->next_frame );
  1198.              if( !finished )
  1199.                  return -1;
  1200.          }
  1201. @@ -166,26 +163,13 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
  1202.      if( !strcmp( psz_filename, "-" ) )
  1203.          psz_filename = "pipe:";
  1204.  
  1205. -    if( av_open_input_file( &h->lavf, psz_filename, NULL, 0, NULL ) )
  1206. -    {
  1207. -        fprintf( stderr, "lavf [error]: could not open input file\n" );
  1208. -        return -1;
  1209. -    }
  1210. -
  1211. -    if( av_find_stream_info( h->lavf ) < 0 )
  1212. -    {
  1213. -        fprintf( stderr, "lavf [error]: could not find input stream info\n" );
  1214. -        return -1;
  1215. -    }
  1216. +    FAIL_IF_ERROR( av_open_input_file( &h->lavf, psz_filename, NULL, 0, NULL ), "could not open input file\n" )
  1217. +    FAIL_IF_ERROR( av_find_stream_info( h->lavf ) < 0, "could not find input stream info\n" )
  1218.  
  1219.      int i = 0;
  1220.      while( i < h->lavf->nb_streams && h->lavf->streams[i]->codec->codec_type != CODEC_TYPE_VIDEO )
  1221.          i++;
  1222. -    if( i == h->lavf->nb_streams )
  1223. -    {
  1224. -        fprintf( stderr, "lavf [error]: could not find video stream\n" );
  1225. -        return -1;
  1226. -    }
  1227. +    FAIL_IF_ERROR( i == h->lavf->nb_streams, "could not find video stream\n" )
  1228.      h->stream_id       = i;
  1229.      h->next_frame      = 0;
  1230.      h->pts_offset_flag = 0;
  1231. @@ -207,22 +191,15 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
  1232.          info->csp |= X264_CSP_VFLIP;
  1233.  
  1234.      if( h->cur_pix_fmt != PIX_FMT_YUV420P )
  1235. -        fprintf( stderr, "lavf [warning]: converting from %s to YV12\n",
  1236. -                 avcodec_get_pix_fmt_name( h->cur_pix_fmt ) );
  1237. -
  1238. -    if( avcodec_open( c, avcodec_find_decoder( c->codec_id ) ) )
  1239. -    {
  1240. -        fprintf( stderr, "lavf [error]: could not find decoder for video stream\n" );
  1241. -        return -1;
  1242. -    }
  1243. +        x264_cli_log( "lavf", X264_LOG_WARNING, "converting from %s to YV12\n",
  1244. +                      avcodec_get_pix_fmt_name( h->cur_pix_fmt ) );
  1245. +    FAIL_IF_ERROR( avcodec_open( c, avcodec_find_decoder( c->codec_id ) ),
  1246. +                   "could not find decoder for video stream\n" )
  1247.  
  1248.      /* prefetch the first frame and set/confirm flags */
  1249.      h->first_pic = malloc( sizeof(x264_picture_t) );
  1250. -    if( !h->first_pic || lavf_input.picture_alloc( h->first_pic, info->csp, info->width, info->height ) )
  1251. -    {
  1252. -        fprintf( stderr, "lavf [error]: malloc failed\n" );
  1253. -        return -1;
  1254. -    }
  1255. +    FAIL_IF_ERROR( !h->first_pic || lavf_input.picture_alloc( h->first_pic, info->csp, info->width, info->height ),
  1256. +                   "malloc failed\n" )
  1257.      else if( read_frame_internal( h->first_pic, h, 0, info ) )
  1258.          return -1;
  1259.  
  1260. diff --git a/input/thread.c b/input/thread.c
  1261. index c4b07fa..98af22b 100644
  1262. --- a/input/thread.c
  1263. +++ b/input/thread.c
  1264. @@ -21,7 +21,7 @@
  1265.   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
  1266.   *****************************************************************************/
  1267.  
  1268. -#include "muxers.h"
  1269. +#include "input.h"
  1270.  
  1271.  extern cli_input_t input;
  1272.  
  1273. @@ -47,11 +47,8 @@ typedef struct thread_input_arg_t
  1274.  static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, cli_input_opt_t *opt )
  1275.  {
  1276.      thread_hnd_t *h = malloc( sizeof(thread_hnd_t) );
  1277. -    if( !h || input.picture_alloc( &h->pic, info->csp, info->width, info->height ) )
  1278. -    {
  1279. -        fprintf( stderr, "x264 [error]: malloc failed\n" );
  1280. -        return -1;
  1281. -    }
  1282. +    FAIL_IF_ERR( !h || input.picture_alloc( &h->pic, info->csp, info->width, info->height ),
  1283. +                 "x264", "malloc failed\n" )
  1284.      h->input = input;
  1285.      h->p_handle = *p_handle;
  1286.      h->next_frame = -1;
  1287. diff --git a/input/timecode.c b/input/timecode.c
  1288. index a307327..7821e76 100644
  1289. --- a/input/timecode.c
  1290. +++ b/input/timecode.c
  1291. @@ -20,7 +20,8 @@
  1292.   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
  1293.   *****************************************************************************/
  1294.  
  1295. -#include "muxers.h"
  1296. +#include "input.h"
  1297. +#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "timecode", __VA_ARGS__ )
  1298.  #include <math.h>
  1299.  
  1300.  extern cli_input_t input;
  1301. @@ -61,12 +62,8 @@ static double correct_fps( double fps, timecode_hnd_t *h )
  1302.      {
  1303.          fps_den = i * h->timebase_num;
  1304.          fps_num = round( fps_den * fps_sig ) * exponent;
  1305. -        if( fps_num > UINT32_MAX )
  1306. -        {
  1307. -            fprintf( stderr, "timecode [error]: tcfile fps correction failed.\n"
  1308. -                             "                  Specify an appropriate timebase manually or remake tcfile.\n" );
  1309. -            return -1;
  1310. -        }
  1311. +        FAIL_IF_ERROR( fps_num > UINT32_MAX, "tcfile fps correction failed.\n"
  1312. +                       "                  Specify an appropriate timebase manually or remake tcfile.\n" )
  1313.          if( fabs( ((double)fps_num / fps_den) / exponent - fps_sig ) < DOUBLE_EPSILON )
  1314.              break;
  1315.          ++i;
  1316. @@ -91,12 +88,8 @@ static int try_mkv_timebase_den( double *fpss, timecode_hnd_t *h, int loop_num )
  1317.          double fps_sig = sigexp10( fpss[num], &exponent );
  1318.          fps_den = round( MKV_TIMEBASE_DEN / fps_sig ) / exponent;
  1319.          h->timebase_num = fps_den && h->timebase_num ? gcd( h->timebase_num, fps_den ) : fps_den;
  1320. -        if( h->timebase_num > UINT32_MAX || !h->timebase_num )
  1321. -        {
  1322. -            fprintf( stderr, "timecode [error]: automatic timebase generation failed.\n"
  1323. -                             "                  Specify timebase manually.\n" );
  1324. -            return -1;
  1325. -        }
  1326. +        FAIL_IF_ERROR( h->timebase_num > UINT32_MAX || !h->timebase_num, "automatic timebase generation failed.\n"
  1327. +                       "                  Specify timebase manually.\n" )
  1328.      }
  1329.      return 0;
  1330.  }
  1331. @@ -110,11 +103,7 @@ static int parse_tcfile( FILE *tcfile_in, timecode_hnd_t *h, video_info_t *info
  1332.      double *fpss = NULL;
  1333.  
  1334.      ret = fscanf( tcfile_in, "# timecode format v%d", &tcfv );
  1335. -    if( ret != 1 || (tcfv != 1 && tcfv != 2) )
  1336. -    {
  1337. -        fprintf( stderr, "timecode [error]: unsupported timecode format\n" );
  1338. -        return -1;
  1339. -    }
  1340. +    FAIL_IF_ERROR( ret != 1 || (tcfv != 1 && tcfv != 2), "unsupported timecode format\n" )
  1341.  
  1342.      if( tcfv == 1 )
  1343.      {
  1344. @@ -128,18 +117,11 @@ static int parse_tcfile( FILE *tcfile_in, timecode_hnd_t *h, video_info_t *info
  1345.          {
  1346.              if( buff[0] == '#' || buff[0] == '\n' || buff[0] == '\r' )
  1347.                  continue;
  1348. -            if( sscanf( buff, "assume %lf", &h->assume_fps ) != 1 && sscanf( buff, "Assume %lf", &h->assume_fps ) != 1 )
  1349. -            {
  1350. -                fprintf( stderr, "timecode [error]: tcfile parsing error: assumed fps not found\n" );
  1351. -                return -1;
  1352. -            }
  1353. +            FAIL_IF_ERROR( sscanf( buff, "assume %lf", &h->assume_fps ) != 1 && sscanf( buff, "Assume %lf", &h->assume_fps ) != 1,
  1354. +                           "tcfile parsing error: assumed fps not found\n" )
  1355.              break;
  1356.          }
  1357. -        if( h->assume_fps <= 0 )
  1358. -        {
  1359. -            fprintf( stderr, "timecode [error]: invalid assumed fps %.6f\n", h->assume_fps );
  1360. -            return -1;
  1361. -        }
  1362. +        FAIL_IF_ERROR( h->assume_fps <= 0, "invalid assumed fps %.6f\n", h->assume_fps )
  1363.  
  1364.          file_pos = ftell( tcfile_in );
  1365.          h->stored_pts_num = 0;
  1366. @@ -152,16 +134,9 @@ static int parse_tcfile( FILE *tcfile_in, timecode_hnd_t *h, video_info_t *info
  1367.                  continue;
  1368.              }
  1369.              ret = sscanf( buff, "%d,%d,%lf", &start, &end, &seq_fps );
  1370. -            if( ret != 3 && ret != EOF )
  1371. -            {
  1372. -                fprintf( stderr, "timecode [error]: invalid input tcfile\n" );
  1373. -                return -1;
  1374. -            }
  1375. -            if( start > end || start <= prev_start || end <= prev_end || seq_fps <= 0 )
  1376. -            {
  1377. -                fprintf( stderr, "timecode [error]: invalid input tcfile at line %d: %s\n", num, buff );
  1378. -                return -1;
  1379. -            }
  1380. +            FAIL_IF_ERROR( ret != 3 && ret != EOF, "invalid input tcfile\n" )
  1381. +            FAIL_IF_ERROR( start > end || start <= prev_start || end <= prev_end || seq_fps <= 0,
  1382. +                           "invalid input tcfile at line %d: %s\n", num, buff )
  1383.              prev_start = start;
  1384.              prev_end = end;
  1385.              if( h->auto_timebase_den || h->auto_timebase_num )
  1386. @@ -259,11 +234,7 @@ static int parse_tcfile( FILE *tcfile_in, timecode_hnd_t *h, video_info_t *info
  1387.              ++num;
  1388.          }
  1389.          timecodes_num = h->stored_pts_num + h->seek;
  1390. -        if( !timecodes_num )
  1391. -        {
  1392. -            fprintf( stderr, "timecode [error]: input tcfile doesn't have any timecodes!\n" );
  1393. -            return -1;
  1394. -        }
  1395. +        FAIL_IF_ERROR( !timecodes_num, "input tcfile doesn't have any timecodes!\n" )
  1396.          fseek( tcfile_in, file_pos, SEEK_SET );
  1397.  
  1398.          timecodes = malloc( timecodes_num * sizeof(double) );
  1399. @@ -272,11 +243,7 @@ static int parse_tcfile( FILE *tcfile_in, timecode_hnd_t *h, video_info_t *info
  1400.  
  1401.          fgets( buff, sizeof(buff), tcfile_in );
  1402.          ret = sscanf( buff, "%lf", &timecodes[0] );
  1403. -        if( ret != 1 )
  1404. -        {
  1405. -            fprintf( stderr, "timecode [error]: invalid input tcfile for frame 0\n" );
  1406. -            goto fail;
  1407. -        }
  1408. +        FAIL_IF_ERROR( ret != 1, "invalid input tcfile for frame 0\n" )
  1409.          for( num = 1; num < timecodes_num; )
  1410.          {
  1411.              fgets( buff, sizeof(buff), tcfile_in );
  1412. @@ -284,11 +251,8 @@ static int parse_tcfile( FILE *tcfile_in, timecode_hnd_t *h, video_info_t *info
  1413.                  continue;
  1414.              ret = sscanf( buff, "%lf", &timecodes[num] );
  1415.              timecodes[num] *= 1e-3;         /* Timecode format v2 is expressed in milliseconds. */
  1416. -            if( ret != 1 || timecodes[num] <= timecodes[num - 1] )
  1417. -            {
  1418. -                fprintf( stderr, "timecode [error]: invalid input tcfile for frame %d\n", num );
  1419. -                goto fail;
  1420. -            }
  1421. +            FAIL_IF_ERROR( ret != 1 || timecodes[num] <= timecodes[num - 1],
  1422. +                           "invalid input tcfile for frame %d\n", num )
  1423.              ++num;
  1424.          }
  1425.  
  1426. @@ -342,14 +306,10 @@ static int parse_tcfile( FILE *tcfile_in, timecode_hnd_t *h, video_info_t *info
  1427.          uint64_t i = gcd( h->timebase_num, h->timebase_den );
  1428.          h->timebase_num /= i;
  1429.          h->timebase_den /= i;
  1430. -        fprintf( stderr, "timecode [info]: automatic timebase generation %"PRIu64"/%"PRIu64"\n", h->timebase_num, h->timebase_den );
  1431. -    }
  1432. -    else if( h->timebase_den > UINT32_MAX || !h->timebase_den )
  1433. -    {
  1434. -        fprintf( stderr, "timecode [error]: automatic timebase generation failed.\n"
  1435. -                         "                  Specify an appropriate timebase manually.\n" );
  1436. -        goto fail;
  1437. +        x264_cli_log( "timecode", X264_LOG_INFO, "automatic timebase generation %"PRIu64"/%"PRIu64"\n", h->timebase_num, h->timebase_den );
  1438.      }
  1439. +    else FAIL_IF_ERROR( h->timebase_den > UINT32_MAX || !h->timebase_den, "automatic timebase generation failed.\n"
  1440. +                        "                  Specify an appropriate timebase manually.\n" )
  1441.  
  1442.      h->pts = malloc( h->stored_pts_num * sizeof(int64_t) );
  1443.      if( !h->pts )
  1444. @@ -360,11 +320,7 @@ static int parse_tcfile( FILE *tcfile_in, timecode_hnd_t *h, video_info_t *info
  1445.      {
  1446.          h->pts[num] = (int64_t)( timecodes[h->seek + num] * ((double)h->timebase_den / h->timebase_num) + 0.5 );
  1447.          h->pts[num] -= pts_seek_offset;
  1448. -        if( h->pts[num] <= h->pts[num - 1] )
  1449. -        {
  1450. -            fprintf( stderr, "timecode [error]: invalid timebase or timecode for frame %d\n", num );
  1451. -            goto fail;
  1452. -        }
  1453. +        FAIL_IF_ERROR( h->pts[num] <= h->pts[num - 1], "invalid timebase or timecode for frame %d\n", num )
  1454.      }
  1455.  
  1456.      free( timecodes );
  1457. @@ -386,11 +342,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
  1458.      int ret = 0;
  1459.      FILE *tcfile_in;
  1460.      timecode_hnd_t *h = malloc( sizeof(timecode_hnd_t) );
  1461. -    if( !h )
  1462. -    {
  1463. -        fprintf( stderr, "timecode [error]: malloc failed\n" );
  1464. -        return -1;
  1465. -    }
  1466. +    FAIL_IF_ERROR( !h, "malloc failed\n" )
  1467.      h->input = input;
  1468.      h->p_handle = *p_handle;
  1469.      h->frame_total = input.get_frame_total( h->p_handle );
  1470. @@ -400,11 +352,8 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
  1471.          ret = sscanf( opt->timebase, "%"SCNu64"/%"SCNu64, &h->timebase_num, &h->timebase_den );
  1472.          if( ret == 1 )
  1473.              h->timebase_num = strtoul( opt->timebase, NULL, 10 );
  1474. -        if( h->timebase_num > UINT32_MAX || h->timebase_den > UINT32_MAX )
  1475. -        {
  1476. -            fprintf( stderr, "timecode [error]: timebase you specified exceeds H.264 maximum\n" );
  1477. -            return -1;
  1478. -        }
  1479. +        FAIL_IF_ERROR( h->timebase_num > UINT32_MAX || h->timebase_den > UINT32_MAX,
  1480. +                       "timebase you specified exceeds H.264 maximum\n" )
  1481.      }
  1482.      h->auto_timebase_num = !ret;
  1483.      h->auto_timebase_den = ret < 2;
  1484. @@ -418,14 +367,10 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
  1485.      *p_handle = h;
  1486.  
  1487.      tcfile_in = fopen( psz_filename, "rb" );
  1488. -    if( !tcfile_in )
  1489. -    {
  1490. -        fprintf( stderr, "timecode [error]: can't open `%s'\n", psz_filename );
  1491. -        return -1;
  1492. -    }
  1493. +    FAIL_IF_ERROR( !tcfile_in, "can't open `%s'\n", psz_filename )
  1494.      else if( !x264_is_regular_file( tcfile_in ) )
  1495.      {
  1496. -        fprintf( stderr, "timecode [error]: tcfile input incompatible with non-regular file `%s'\n", psz_filename );
  1497. +        x264_cli_log( "timecode", X264_LOG_ERROR, "tcfile input incompatible with non-regular file `%s'\n", psz_filename );
  1498.          fclose( tcfile_in );
  1499.          return -1;
  1500.      }
  1501. @@ -466,8 +411,8 @@ static int read_frame( x264_picture_t *p_pic, hnd_t handle, int i_frame )
  1502.      {
  1503.          if( h->pts )
  1504.          {
  1505. -            fprintf( stderr, "timecode [info]: input timecode file missing data for frame %d and later\n"
  1506. -                             "                 assuming constant fps %.6f\n", i_frame, h->assume_fps );
  1507. +            x264_cli_log( "timecode", X264_LOG_INFO, "input timecode file missing data for frame %d and later\n"
  1508. +                          "                 assuming constant fps %.6f\n", i_frame, h->assume_fps );
  1509.              free( h->pts );
  1510.              h->pts = NULL;
  1511.          }
  1512. diff --git a/input/y4m.c b/input/y4m.c
  1513. index fd42140..9b39d2f 100644
  1514. --- a/input/y4m.c
  1515. +++ b/input/y4m.c
  1516. @@ -21,7 +21,8 @@
  1517.   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
  1518.   *****************************************************************************/
  1519.  
  1520. -#include "muxers.h"
  1521. +#include "input.h"
  1522. +#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "y4m", __VA_ARGS__ )
  1523.  
  1524.  typedef struct
  1525.  {
  1526. @@ -162,11 +163,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
  1527.      if( colorspace == X264_CSP_NONE )
  1528.          colorspace = X264_CSP_I420;
  1529.  
  1530. -    if( colorspace != X264_CSP_I420 )
  1531. -    {
  1532. -        fprintf( stderr, "y4m [error]: colorspace unhandled\n" );
  1533. -        return -1;
  1534. -    }
  1535. +    FAIL_IF_ERROR( colorspace != X264_CSP_I420, "colorspace unhandled\n" )
  1536.  
  1537.      *p_handle = h;
  1538.      return 0;
  1539. @@ -202,21 +199,13 @@ static int read_frame_internal( x264_picture_t *p_pic, y4m_hnd_t *h )
  1540.          return -1;
  1541.  
  1542.      header[slen] = 0;
  1543. -    if( strncmp( header, Y4M_FRAME_MAGIC, slen ) )
  1544. -    {
  1545. -        fprintf( stderr, "y4m [error]: bad header magic (%"PRIx32" <=> %s)\n",
  1546. -                 M32(header), header );
  1547. -        return -1;
  1548. -    }
  1549. +    FAIL_IF_ERROR( strncmp( header, Y4M_FRAME_MAGIC, slen ), "bad header magic (%"PRIx32" <=> %s)\n",
  1550. +                   M32(header), header )
  1551.  
  1552.      /* Skip most of it */
  1553.      while( i < MAX_FRAME_HEADER && fgetc( h->fh ) != '\n' )
  1554.          i++;
  1555. -    if( i == MAX_FRAME_HEADER )
  1556. -    {
  1557. -        fprintf( stderr, "y4m [error]: bad frame header!\n" );
  1558. -        return -1;
  1559. -    }
  1560. +    FAIL_IF_ERROR( i == MAX_FRAME_HEADER, "bad frame header!\n" )
  1561.      h->frame_header_len = i+slen+1;
  1562.  
  1563.      if( fread( p_pic->img.plane[0], h->width * h->height, 1, h->fh ) <= 0
  1564. diff --git a/input/yuv.c b/input/yuv.c
  1565. index cbed7fc..613662c 100644
  1566. --- a/input/yuv.c
  1567. +++ b/input/yuv.c
  1568. @@ -21,7 +21,7 @@
  1569.   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
  1570.   *****************************************************************************/
  1571.  
  1572. -#include "muxers.h"
  1573. +#include "input.h"
  1574.  
  1575.  typedef struct
  1576.  {
  1577. @@ -45,11 +45,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
  1578.      }
  1579.      else
  1580.          sscanf( opt->resolution, "%ux%u", &info->width, &info->height );
  1581. -    if( !info->width || !info->height )
  1582. -    {
  1583. -        fprintf( stderr, "yuv [error]: rawyuv input requires a resolution.\n" );
  1584. -        return -1;
  1585. -    }
  1586. +    FAIL_IF_ERR( !info->width || !info->height, "yuv", "rawyuv input requires a resolution.\n" )
  1587.  
  1588.      h->next_frame = 0;
  1589.      info->vfr     = 0;
  1590. diff --git a/muxers.h b/muxers.h
  1591. deleted file mode 100644
  1592. index b309320..0000000
  1593. --- a/muxers.h
  1594. +++ /dev/null
  1595. @@ -1,61 +0,0 @@
  1596. -/*****************************************************************************
  1597. - * muxers.h: h264 file i/o modules
  1598. - *****************************************************************************
  1599. - * Copyright (C) 2003-2009 x264 project
  1600. - *
  1601. - * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  1602. - *          Loren Merritt <lorenm@u.washington.edu>
  1603. - *
  1604. - * This program is free software; you can redistribute it and/or modify
  1605. - * it under the terms of the GNU General Public License as published by
  1606. - * the Free Software Foundation; either version 2 of the License, or
  1607. - * (at your option) any later version.
  1608. - *
  1609. - * This program is distributed in the hope that it will be useful,
  1610. - * but WITHOUT ANY WARRANTY; without even the implied warranty of
  1611. - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  1612. - * GNU General Public License for more details.
  1613. - *
  1614. - * You should have received a copy of the GNU General Public License
  1615. - * along with this program; if not, write to the Free Software
  1616. - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
  1617. - *****************************************************************************/
  1618. -
  1619. -#ifndef X264_MUXERS_H
  1620. -#define X264_MUXERS_H
  1621. -
  1622. -#include "common/common.h"
  1623. -#include "x264.h"
  1624. -
  1625. -typedef void *hnd_t;
  1626. -
  1627. -static inline int64_t gcd( int64_t a, int64_t b )
  1628. -{
  1629. -    while( 1 )
  1630. -    {
  1631. -        int64_t c = a % b;
  1632. -        if( !c )
  1633. -            return b;
  1634. -        a = b;
  1635. -        b = c;
  1636. -    }
  1637. -}
  1638. -
  1639. -static inline int64_t lcm( int64_t a, int64_t b )
  1640. -{
  1641. -    return ( a / gcd( a, b ) ) * b;
  1642. -}
  1643. -
  1644. -static inline char *get_filename_extension( char *filename )
  1645. -{
  1646. -    char *ext = filename + strlen( filename );
  1647. -    while( *ext != '.' && ext > filename )
  1648. -        ext--;
  1649. -    ext += *ext == '.';
  1650. -    return ext;
  1651. -}
  1652. -
  1653. -#include "input/input.h"
  1654. -#include "output/output.h"
  1655. -
  1656. -#endif
  1657. diff --git a/output/flv.c b/output/flv.c
  1658. index e441b6d..9831a5b 100644
  1659. --- a/output/flv.c
  1660. +++ b/output/flv.c
  1661. @@ -18,7 +18,7 @@
  1662.   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
  1663.   *****************************************************************************/
  1664.  
  1665. -#include "muxers.h"
  1666. +#include "output.h"
  1667.  #include "flv_bytestream.h"
  1668.  
  1669.  #define CHECK(x)\
  1670. @@ -223,14 +223,14 @@ static int write_frame( hnd_t handle, uint8_t *p_nalu, int i_size, x264_picture_
  1671.          if( prev_dts == dts )
  1672.          {
  1673.              double fps = ((double)p_flv->i_timebase_den / p_flv->i_timebase_num) / (p_picture->i_dts - p_flv->i_prev_dts);
  1674. -            fprintf( stderr, "flv [warning]: duplicate DTS %"PRId64" generated by rounding\n"
  1675. -                             "               current internal decoding framerate: %.6f fps\n", dts, fps );
  1676. +            x264_cli_log( "flv", X264_LOG_WARNING, "duplicate DTS %"PRId64" generated by rounding\n"
  1677. +                          "               current internal decoding framerate: %.6f fps\n", dts, fps );
  1678.          }
  1679.          if( prev_cts == cts )
  1680.          {
  1681.              double fps = ((double)p_flv->i_timebase_den / p_flv->i_timebase_num) / (p_picture->i_pts - p_flv->i_prev_pts);
  1682. -            fprintf( stderr, "flv [warning]: duplicate CTS %"PRId64" generated by rounding\n"
  1683. -                             "               current internal composition framerate: %.6f fps\n", cts, fps );
  1684. +            x264_cli_log( "flv", X264_LOG_WARNING, "duplicate CTS %"PRId64" generated by rounding\n"
  1685. +                          "               current internal composition framerate: %.6f fps\n", cts, fps );
  1686.          }
  1687.      }
  1688.      p_flv->i_prev_dts = p_picture->i_dts;
  1689. diff --git a/output/flv_bytestream.c b/output/flv_bytestream.c
  1690. index 316114c..e02476c 100644
  1691. --- a/output/flv_bytestream.c
  1692. +++ b/output/flv_bytestream.c
  1693. @@ -18,7 +18,7 @@
  1694.   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
  1695.   *****************************************************************************/
  1696.  
  1697. -#include "muxers.h"
  1698. +#include "output.h"
  1699.  #include "flv_bytestream.h"
  1700.  
  1701.  uint64_t dbl2int( double value )
  1702. diff --git a/output/matroska.c b/output/matroska.c
  1703. index 0304c84..a1219d0 100644
  1704. --- a/output/matroska.c
  1705. +++ b/output/matroska.c
  1706. @@ -18,7 +18,7 @@
  1707.   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
  1708.   *****************************************************************************/
  1709.  
  1710. -#include "muxers.h"
  1711. +#include "output.h"
  1712.  #include "matroska_ebml.h"
  1713.  
  1714.  typedef struct
  1715. diff --git a/output/matroska_ebml.c b/output/matroska_ebml.c
  1716. index 31b62f8..adfcaa8 100644
  1717. --- a/output/matroska_ebml.c
  1718. +++ b/output/matroska_ebml.c
  1719. @@ -18,7 +18,7 @@
  1720.   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
  1721.   *****************************************************************************/
  1722.  
  1723. -#include "muxers.h"
  1724. +#include "output.h"
  1725.  #include "matroska_ebml.h"
  1726.  
  1727.  #define CLSIZE 1048576
  1728. diff --git a/output/mp4.c b/output/mp4.c
  1729. index 0e3c2fc..f2fc5f5 100644
  1730. --- a/output/mp4.c
  1731. +++ b/output/mp4.c
  1732. @@ -21,7 +21,7 @@
  1733.   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
  1734.   *****************************************************************************/
  1735.  
  1736. -#include "muxers.h"
  1737. +#include "output.h"
  1738.  #include <gpac/isomedia.h>
  1739.  
  1740.  #if HAVE_GF_MALLOC
  1741. @@ -61,12 +61,12 @@ static void recompute_bitrate_mp4( GF_ISOFile *p_file, int i_track )
  1742.  
  1743.      timescale = gf_isom_get_media_timescale( p_file, i_track );
  1744.      count = gf_isom_get_sample_count( p_file, i_track );
  1745. -    for( int i = 0; i < count; i++ )
  1746. +    for( u32 i = 0; i < count; i++ )
  1747.      {
  1748.          GF_ISOSample *samp = gf_isom_get_sample_info( p_file, i_track, i+1, &di, &offset );
  1749.          if( !samp )
  1750.          {
  1751. -            fprintf( stderr, "mp4 [error]: failure reading back frame %u\n", i );
  1752. +            x264_cli_log( "mp4", X264_LOG_ERROR, "failure reading back frame %u\n", i );
  1753.              break;
  1754.          }
  1755.  
  1756. @@ -163,11 +163,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle )
  1757.      FILE *fh = fopen( psz_filename, "w" );
  1758.      if( !fh )
  1759.          return -1;
  1760. -    else if( !x264_is_regular_file( fh ) )
  1761. -    {
  1762. -        fprintf( stderr, "mp4 [error]: MP4 output is incompatible with non-regular file `%s'\n", psz_filename );
  1763. -        return -1;
  1764. -    }
  1765. +    FAIL_IF_ERR( !x264_is_regular_file( fh ), "MP4 output is incompatible with non-regular file `%s'\n", psz_filename )
  1766.      fclose( fh );
  1767.  
  1768.      if( !(p_mp4 = malloc( sizeof(mp4_hnd_t) )) )
  1769. diff --git a/output/output.h b/output/output.h
  1770. index c79b48e..094fefc 100644
  1771. --- a/output/output.h
  1772. +++ b/output/output.h
  1773. @@ -24,6 +24,8 @@
  1774.  #ifndef X264_OUTPUT_H
  1775.  #define X264_OUTPUT_H
  1776.  
  1777. +#include "x264cli.h"
  1778. +
  1779.  typedef struct
  1780.  {
  1781.      int (*open_file)( char *psz_filename, hnd_t *p_handle );
  1782. diff --git a/output/raw.c b/output/raw.c
  1783. index 02e4c56..fc418fb 100644
  1784. --- a/output/raw.c
  1785. +++ b/output/raw.c
  1786. @@ -21,7 +21,7 @@
  1787.   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
  1788.   *****************************************************************************/
  1789.  
  1790. -#include "muxers.h"
  1791. +#include "output.h"
  1792.  
  1793.  static int open_file( char *psz_filename, hnd_t *p_handle )
  1794.  {
  1795. diff --git a/x264.c b/x264.c
  1796. index f08ab41..741570c 100644
  1797. --- a/x264.c
  1798. +++ b/x264.c
  1799. @@ -31,9 +31,11 @@
  1800.  #include <getopt.h>
  1801.  
  1802.  #include "common/common.h"
  1803. -#include "common/cpu.h"
  1804. -#include "x264.h"
  1805. -#include "muxers.h"
  1806. +#include "x264cli.h"
  1807. +#include "input/input.h"
  1808. +#include "output/output.h"
  1809. +
  1810. +#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "x264", __VA_ARGS__ )
  1811.  
  1812.  #ifdef _WIN32
  1813.  #include <windows.h>
  1814. @@ -96,6 +98,7 @@ static const char * const muxer_names[] =
  1815.  };
  1816.  
  1817.  static const char * const pulldown_names[] = { "none", "22", "32", "64", "double", "triple", "euro", 0 };
  1818. +static const char * const log_level_names[] = { "none", "error", "warning", "info", "debug", 0 };
  1819.  
  1820.  typedef struct{
  1821.      int mod;
  1822. @@ -141,6 +144,48 @@ static void Help( x264_param_t *defaults, int longhelp );
  1823.  static int  Parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt );
  1824.  static int  Encode( x264_param_t *param, cli_opt_t *opt );
  1825.  
  1826. +/* logging and printing for within the cli system */
  1827. +static int cli_log_level;
  1828. +void x264_cli_log( const char *name, int i_level, const char *fmt, ... )
  1829. +{
  1830. +    if( i_level > cli_log_level )
  1831. +        return;
  1832. +    char *s_level;
  1833. +    switch( i_level )
  1834. +    {
  1835. +        case X264_LOG_ERROR:
  1836. +            s_level = "error";
  1837. +            break;
  1838. +        case X264_LOG_WARNING:
  1839. +            s_level = "warning";
  1840. +            break;
  1841. +        case X264_LOG_INFO:
  1842. +            s_level = "info";
  1843. +            break;
  1844. +        case X264_LOG_DEBUG:
  1845. +            s_level = "debug";
  1846. +            break;
  1847. +        default:
  1848. +            s_level = "unknown";
  1849. +            break;
  1850. +    }
  1851. +    fprintf( stderr, "%s [%s]: ", name, s_level );
  1852. +    va_list arg;
  1853. +    va_start( arg, fmt );
  1854. +    vfprintf( stderr, fmt, arg );
  1855. +    va_end( arg );
  1856. +}
  1857. +
  1858. +void x264_cli_printf( int i_level, const char *fmt, ... )
  1859. +{
  1860. +    if( i_level > cli_log_level )
  1861. +        return;
  1862. +    va_list arg;
  1863. +    va_start( arg, fmt );
  1864. +    vfprintf( stderr, fmt, arg );
  1865. +    va_end( arg );
  1866. +}
  1867. +
  1868.  /****************************************************************************
  1869.   * main:
  1870.   ****************************************************************************/
  1871. @@ -571,6 +616,9 @@ static void Help( x264_param_t *defaults, int longhelp )
  1872.      H1( "  -v, --verbose               Print stats for each frame\n" );
  1873.      H1( "      --no-progress           Don't show the progress indicator while encoding\n" );
  1874.      H0( "      --quiet                 Quiet Mode\n" );
  1875. +    H1( "      --log-level <string>    Specify the maximum level of logging [\"%s\"]\n"
  1876. +        "                                  - %s\n", strtable_lookup( log_level_names, cli_log_level - X264_LOG_NONE ),
  1877. +                                       stringify_names( buf, log_level_names ) );
  1878.      H1( "      --psnr                  Enable PSNR computation\n" );
  1879.      H1( "      --ssim                  Enable SSIM computation\n" );
  1880.      H1( "      --threads <integer>     Force a specific number of threads\n" );
  1881. @@ -616,6 +664,7 @@ enum {
  1882.      OPT_TCFILE_OUT,
  1883.      OPT_TIMEBASE,
  1884.      OPT_PULLDOWN,
  1885. +    OPT_LOG_LEVEL
  1886.  } OptionsOPT;
  1887.  
  1888.  static char short_options[] = "8A:B:b:f:hI:i:m:o:p:q:r:t:Vvw";
  1889. @@ -729,6 +778,7 @@ static struct option long_options[] =
  1890.      { "ssim",              no_argument, NULL, 0 },
  1891.      { "quiet",             no_argument, NULL, OPT_QUIET },
  1892.      { "verbose",           no_argument, NULL, 'v' },
  1893. +    { "log-level",   required_argument, NULL, OPT_LOG_LEVEL },
  1894.      { "no-progress",       no_argument, NULL, OPT_NOPROGRESS },
  1895.      { "visualize",         no_argument, NULL, OPT_VISUALIZE },
  1896.      { "dump-yuv",    required_argument, NULL, 0 },
  1897. @@ -780,11 +830,11 @@ static int select_output( const char *muxer, char *filename, x264_param_t *param
  1898.          param->b_repeat_headers = 0;
  1899.          if( param->i_nal_hrd == X264_NAL_HRD_CBR )
  1900.          {
  1901. -            fprintf( stderr, "x264 [warning]: cbr nal-hrd is not compatible with mp4\n" );
  1902. +            x264_cli_log( "x264", X264_LOG_WARNING, "cbr nal-hrd is not compatible with mp4\n" );
  1903.              param->i_nal_hrd = X264_NAL_HRD_VBR;
  1904.          }
  1905.  #else
  1906. -        fprintf( stderr, "x264 [error]: not compiled with MP4 output support\n" );
  1907. +        x264_cli_log( "x264", X264_LOG_ERROR, "not compiled with MP4 output support\n" );
  1908.          return -1;
  1909.  #endif
  1910.      }
  1911. @@ -833,7 +883,7 @@ static int select_input( const char *demuxer, char *used_demuxer, char *filename
  1912.          input = avs_input;
  1913.          module = "avs";
  1914.  #else
  1915. -        fprintf( stderr, "x264 [error]: not compiled with AVS input support\n" );
  1916. +        x264_cli_log( "x264", X264_LOG_ERROR, "not compiled with AVS input support\n" );
  1917.          return -1;
  1918.  #endif
  1919.      }
  1920. @@ -877,11 +927,7 @@ static int select_input( const char *demuxer, char *used_demuxer, char *filename
  1921.              input = yuv_input;
  1922.          }
  1923.  
  1924. -        if( !(*p_handle) )
  1925. -        {
  1926. -            fprintf( stderr, "x264 [error]: could not open input file `%s' via any method!\n", filename );
  1927. -            return -1;
  1928. -        }
  1929. +        FAIL_IF_ERROR( !(*p_handle), "could not open input file `%s' via any method!\n", filename )
  1930.      }
  1931.      strcpy( used_demuxer, module );
  1932.  
  1933. @@ -932,6 +978,7 @@ static int Parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )
  1934.      char *tune = NULL;
  1935.  
  1936.      x264_param_default( &defaults );
  1937. +    cli_log_level = defaults.i_log_level;
  1938.  
  1939.      memset( opt, 0, sizeof(cli_opt_t) );
  1940.      memset( &input_opt, 0, sizeof(cli_input_opt_t) );
  1941. @@ -1004,32 +1051,20 @@ static int Parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )
  1942.                  output_filename = optarg;
  1943.                  break;
  1944.              case OPT_MUXER:
  1945. -                if( parse_enum_name( optarg, muxer_names, &muxer ) < 0 )
  1946. -                {
  1947. -                    fprintf( stderr, "x264 [error]: Unknown muxer `%s'\n", optarg );
  1948. -                    return -1;
  1949. -                }
  1950. +                FAIL_IF_ERROR( parse_enum_name( optarg, muxer_names, &muxer ), "Unknown muxer `%s'\n", optarg )
  1951.                  break;
  1952.              case OPT_DEMUXER:
  1953. -                if( parse_enum_name( optarg, demuxer_names, &demuxer ) < 0 )
  1954. -                {
  1955. -                    fprintf( stderr, "x264 [error]: Unknown demuxer `%s'\n", optarg );
  1956. -                    return -1;
  1957. -                }
  1958. +                FAIL_IF_ERROR( parse_enum_name( optarg, demuxer_names, &demuxer ), "Unknown demuxer `%s'\n", optarg )
  1959.                  break;
  1960.              case OPT_INDEX:
  1961.                  input_opt.index_file = optarg;
  1962.                  break;
  1963.              case OPT_QPFILE:
  1964.                  opt->qpfile = fopen( optarg, "rb" );
  1965. -                if( !opt->qpfile )
  1966. -                {
  1967. -                    fprintf( stderr, "x264 [error]: can't open qpfile `%s'\n", optarg );
  1968. -                    return -1;
  1969. -                }
  1970. -                else if( !x264_is_regular_file( opt->qpfile ) )
  1971. +                FAIL_IF_ERROR( !opt->qpfile, "can't open qpfile `%s'\n", optarg )
  1972. +                if( !x264_is_regular_file( opt->qpfile ) )
  1973.                  {
  1974. -                    fprintf( stderr, "x264 [error]: qpfile incompatible with non-regular file `%s'\n", optarg );
  1975. +                    x264_cli_log( "x264", X264_LOG_ERROR, "qpfile incompatible with non-regular file `%s'\n", optarg );
  1976.                      fclose( opt->qpfile );
  1977.                      return -1;
  1978.                  }
  1979. @@ -1038,11 +1073,17 @@ static int Parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )
  1980.                  b_thread_input = 1;
  1981.                  break;
  1982.              case OPT_QUIET:
  1983. -                param->i_log_level = X264_LOG_NONE;
  1984. +                cli_log_level = param->i_log_level = X264_LOG_NONE;
  1985.                  break;
  1986.              case 'v':
  1987. -                param->i_log_level = X264_LOG_DEBUG;
  1988. +                cli_log_level = param->i_log_level = X264_LOG_DEBUG;
  1989.                  break;
  1990. +            case OPT_LOG_LEVEL:
  1991. +                if( !parse_enum_value( optarg, log_level_names, &cli_log_level ) )
  1992. +                    cli_log_level += X264_LOG_NONE;
  1993. +                else
  1994. +                    cli_log_level = atoi( optarg );
  1995. +                param->i_log_level = cli_log_level;
  1996.              case OPT_NOPROGRESS:
  1997.                  opt->b_progress = 0;
  1998.                  break;
  1999. @@ -1051,7 +1092,7 @@ static int Parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )
  2000.                  param->b_visualize = 1;
  2001.                  b_exit_on_ctrl_c = 1;
  2002.  #else
  2003. -                fprintf( stderr, "x264 [warning]: not compiled with visualization support\n" );
  2004. +                x264_cli_log( "x264", X264_LOG_WARNING, "not compiled with visualization support\n" );
  2005.  #endif
  2006.                  break;
  2007.              case OPT_TUNE:
  2008. @@ -1078,18 +1119,13 @@ static int Parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )
  2009.                  break;
  2010.              case OPT_TCFILE_OUT:
  2011.                  opt->tcfile_out = fopen( optarg, "wb" );
  2012. -                if( !opt->tcfile_out )
  2013. -                {
  2014. -                    fprintf( stderr, "x264 [error]: can't open `%s'\n", optarg );
  2015. -                    return -1;
  2016. -                }
  2017. +                FAIL_IF_ERROR( !opt->tcfile_out, "can't open `%s'\n", optarg )
  2018.                  break;
  2019.              case OPT_TIMEBASE:
  2020.                  input_opt.timebase = optarg;
  2021.                  break;
  2022.              case OPT_PULLDOWN:
  2023. -                if( parse_enum_value( optarg, pulldown_names, &opt->i_pulldown ) < 0 )
  2024. -                    return -1;
  2025. +                FAIL_IF_ERROR( parse_enum_value( optarg, pulldown_names, &opt->i_pulldown ), "Unknown pulldown `%s'\n", optarg )
  2026.                  break;
  2027.              default:
  2028.  generic_option:
  2029. @@ -1116,7 +1152,7 @@ generic_option:
  2030.          if( b_error )
  2031.          {
  2032.              const char *name = long_options_index > 0 ? long_options[long_options_index].name : argv[optind-2];
  2033. -            fprintf( stderr, "x264 [error]: invalid argument: %s = %s\n", name, optarg );
  2034. +            x264_cli_log( "x264", X264_LOG_ERROR, "invalid argument: %s = %s\n", name, optarg );
  2035.              return -1;
  2036.          }
  2037.      }
  2038. @@ -1130,20 +1166,12 @@ generic_option:
  2039.          return -1;
  2040.  
  2041.      /* Get the file name */
  2042. -    if( optind > argc - 1 || !output_filename )
  2043. -    {
  2044. -        fprintf( stderr, "x264 [error]: No %s file. Run x264 --help for a list of options.\n",
  2045. -                 optind > argc - 1 ? "input" : "output" );
  2046. -        return -1;
  2047. -    }
  2048. +    FAIL_IF_ERROR( optind > argc - 1 || !output_filename, "No %s file. Run x264 --help for a list of options.\n",
  2049. +                   optind > argc - 1 ? "input" : "output" )
  2050.  
  2051.      if( select_output( muxer, output_filename, param ) )
  2052.          return -1;
  2053. -    if( output.open_file( output_filename, &opt->hout ) )
  2054. -    {
  2055. -        fprintf( stderr, "x264 [error]: could not open output file `%s'\n", output_filename );
  2056. -        return -1;
  2057. -    }
  2058. +    FAIL_IF_ERROR( output.open_file( output_filename, &opt->hout ), "could not open output file `%s'\n", output_filename )
  2059.  
  2060.      input_filename = argv[optind++];
  2061.      input_opt.resolution = optind < argc ? argv[optind++] : NULL;
  2062. @@ -1163,39 +1191,22 @@ generic_option:
  2063.      if( select_input( demuxer, demuxername, input_filename, &opt->hin, &info, &input_opt ) )
  2064.          return -1;
  2065.  
  2066. -    if( !opt->hin && input.open_file( input_filename, &opt->hin, &info, &input_opt ) )
  2067. -    {
  2068. -        fprintf( stderr, "x264 [error]: could not open input file `%s'\n", input_filename );
  2069. -        return -1;
  2070. -    }
  2071. +    FAIL_IF_ERROR( !opt->hin && input.open_file( input_filename, &opt->hin, &info, &input_opt ),
  2072. +                   "could not open input file `%s'\n", input_filename )
  2073.  
  2074.      x264_reduce_fraction( &info.sar_width, &info.sar_height );
  2075.      x264_reduce_fraction( &info.fps_num, &info.fps_den );
  2076. -    if( param->i_log_level >= X264_LOG_INFO )
  2077. -        fprintf( stderr, "%s [info]: %dx%d%c %d:%d @ %d/%d fps (%cfr)\n", demuxername, info.width,
  2078. -                 info.height, info.interlaced ? 'i' : 'p', info.sar_width, info.sar_height,
  2079. -                 info.fps_num, info.fps_den, info.vfr ? 'v' : 'c' );
  2080. +    x264_cli_log( demuxername, X264_LOG_INFO, "%dx%d%c %d:%d @ %d/%d fps (%cfr)\n", info.width,
  2081. +                  info.height, info.interlaced ? 'i' : 'p', info.sar_width, info.sar_height,
  2082. +                  info.fps_num, info.fps_den, info.vfr ? 'v' : 'c' );
  2083.  
  2084.      if( tcfile_name )
  2085.      {
  2086. -        if( b_user_fps )
  2087. -        {
  2088. -            fprintf( stderr, "x264 [error]: --fps + --tcfile-in is incompatible.\n" );
  2089. -            return -1;
  2090. -        }
  2091. -        if( timecode_input.open_file( tcfile_name, &opt->hin, &info, &input_opt ) )
  2092. -        {
  2093. -            fprintf( stderr, "x264 [error]: timecode input failed\n" );
  2094. -            return -1;
  2095. -        }
  2096. -        else
  2097. -            input = timecode_input;
  2098. -    }
  2099. -    else if( !info.vfr && input_opt.timebase )
  2100. -    {
  2101. -        fprintf( stderr, "x264 [error]: --timebase is incompatible with cfr input\n" );
  2102. -        return -1;
  2103. +        FAIL_IF_ERROR( b_user_fps, "--fps + --tcfile-in is incompatible.\n" )
  2104. +        FAIL_IF_ERROR( timecode_input.open_file( tcfile_name, &opt->hin, &info, &input_opt ), "timecode input failed\n" )
  2105. +        input = timecode_input;
  2106.      }
  2107. +    else FAIL_IF_ERROR( !info.vfr && input_opt.timebase, "--timebase is incompatible with cfr input\n" )
  2108.  
  2109.      /* set param flags from the info flags as necessary */
  2110.      param->i_csp       = info.csp;
  2111. @@ -1204,9 +1215,9 @@ generic_option:
  2112.      param->i_width     = info.width;
  2113.      if( !b_user_interlaced && info.interlaced )
  2114.      {
  2115. -        fprintf( stderr, "x264 [warning]: input appears to be interlaced, enabling %cff interlaced mode.\n"
  2116. -                         "                If you want otherwise, use --no-interlaced or --%cff\n",
  2117. -                 info.tff ? 't' : 'b', info.tff ? 'b' : 't' );
  2118. +        x264_cli_log( "x264", X264_LOG_WARNING, "input appears to be interlaced, enabling %cff interlaced mode.\n"
  2119. +                      "                If you want otherwise, use --no-interlaced or --%cff\n",
  2120. +                      info.tff ? 't' : 'b', info.tff ? 'b' : 't' );
  2121.          param->b_interlaced = 1;
  2122.          param->b_tff = !!info.tff;
  2123.      }
  2124. @@ -1230,21 +1241,14 @@ generic_option:
  2125.          uint64_t i_user_timebase_num;
  2126.          uint64_t i_user_timebase_den;
  2127.          int ret = sscanf( input_opt.timebase, "%"SCNu64"/%"SCNu64, &i_user_timebase_num, &i_user_timebase_den );
  2128. -        if( !ret )
  2129. -        {
  2130. -            fprintf( stderr, "x264 [error]: invalid argument: timebase = %s\n", input_opt.timebase );
  2131. -            return -1;
  2132. -        }
  2133. +        FAIL_IF_ERROR( !ret, "invalid argument: timebase = %s\n", input_opt.timebase )
  2134.          else if( ret == 1 )
  2135.          {
  2136.              i_user_timebase_num = param->i_timebase_num;
  2137.              i_user_timebase_den = strtoul( input_opt.timebase, NULL, 10 );
  2138.          }
  2139. -        if( i_user_timebase_num > UINT32_MAX || i_user_timebase_den > UINT32_MAX )
  2140. -        {
  2141. -            fprintf( stderr, "x264 [error]: timebase you specified exceeds H.264 maximum\n" );
  2142. -            return -1;
  2143. -        }
  2144. +        FAIL_IF_ERROR( i_user_timebase_num > UINT32_MAX || i_user_timebase_den > UINT32_MAX,
  2145. +                       "timebase you specified exceeds H.264 maximum\n" )
  2146.          opt->timebase_convert_multiplier = ((double)i_user_timebase_den / param->i_timebase_den)
  2147.                                           * ((double)param->i_timebase_num / i_user_timebase_num);
  2148.          param->i_timebase_num = i_user_timebase_num;
  2149. @@ -1261,13 +1265,8 @@ generic_option:
  2150.      if( b_thread_input || param->i_threads > 1
  2151.          || (param->i_threads == X264_THREADS_AUTO && x264_cpu_num_processors() > 1) )
  2152.      {
  2153. -        if( thread_input.open_file( NULL, &opt->hin, &info, NULL ) )
  2154. -        {
  2155. -            fprintf( stderr, "x264 [error]: threaded input failed\n" );
  2156. -            return -1;
  2157. -        }
  2158. -        else
  2159. -            input = thread_input;
  2160. +        FAIL_IF_ERROR( thread_input.open_file( NULL, &opt->hin, &info, NULL ), "threaded input failed\n" )
  2161. +        input = thread_input;
  2162.      }
  2163.  #endif
  2164.  
  2165. @@ -1321,7 +1320,7 @@ static void parse_qpfile( cli_opt_t *opt, x264_picture_t *pic, int i_frame )
  2166.          else ret = 0;
  2167.          if( ret != 3 || qp < -1 || qp > 51 )
  2168.          {
  2169. -            fprintf( stderr, "x264 [error]: can't parse qpfile for frame %d\n", i_frame );
  2170. +            x264_cli_log( "x264", X264_LOG_ERROR, "can't parse qpfile for frame %d\n", i_frame );
  2171.              fclose( opt->qpfile );
  2172.              opt->qpfile = NULL;
  2173.              pic->i_type = X264_TYPE_AUTO;
  2174. @@ -1344,11 +1343,7 @@ static int  Encode_frame( x264_t *h, hnd_t hout, x264_picture_t *pic, int64_t *l
  2175.  
  2176.      i_frame_size = x264_encoder_encode( h, &nal, &i_nal, pic, &pic_out );
  2177.  
  2178. -    if( i_frame_size < 0 )
  2179. -    {
  2180. -        fprintf( stderr, "x264 [error]: x264_encoder_encode failed\n" );
  2181. -        return -1;
  2182. -    }
  2183. +    FAIL_IF_ERROR( i_frame_size < 0, "x264_encoder_encode failed\n" );
  2184.  
  2185.      if( i_frame_size )
  2186.      {
  2187. @@ -1424,17 +1419,14 @@ static int  Encode( x264_param_t *param, cli_opt_t *opt )
  2188.          param->b_pic_struct = 1;
  2189.          pulldown = &pulldown_values[opt->i_pulldown];
  2190.          param->i_timebase_num = param->i_fps_den;
  2191. -        if( fmod( param->i_fps_num * pulldown->fps_factor, 1 ) )
  2192. -        {
  2193. -            fprintf( stderr, "x264 [error]: unsupported framerate for chosen pulldown\n" );
  2194. -            return -1;
  2195. -        }
  2196. +        FAIL_IF_ERROR( fmod( param->i_fps_num * pulldown->fps_factor, 1 ),
  2197. +                       "unsupported framerate for chosen pulldown\n" )
  2198.          param->i_timebase_den = param->i_fps_num * pulldown->fps_factor;
  2199.      }
  2200.  
  2201.      if( ( h = x264_encoder_open( param ) ) == NULL )
  2202.      {
  2203. -        fprintf( stderr, "x264 [error]: x264_encoder_open failed\n" );
  2204. +        x264_cli_log( "x264", X264_LOG_ERROR, "x264_encoder_open failed\n" );
  2205.          input.close_file( opt->hin );
  2206.          return -1;
  2207.      }
  2208. @@ -1445,27 +1437,19 @@ static int  Encode( x264_param_t *param, cli_opt_t *opt )
  2209.  
  2210.      if( output.set_param( opt->hout, param ) )
  2211.      {
  2212. -        fprintf( stderr, "x264 [error]: can't set outfile param\n" );
  2213. +        x264_cli_log( "x264", X264_LOG_ERROR, "can't set outfile param\n" );
  2214.          input.close_file( opt->hin );
  2215.          output.close_file( opt->hout, largest_pts, second_largest_pts );
  2216.          return -1;
  2217.      }
  2218.  
  2219.      /* Create a new pic */
  2220. -    if( input.picture_alloc( &pic, param->i_csp, param->i_width, param->i_height ) )
  2221. -    {
  2222. -        fprintf( stderr, "x264 [error]: malloc failed\n" );
  2223. -        return -1;
  2224. -    }
  2225. +    FAIL_IF_ERROR( input.picture_alloc( &pic, param->i_csp, param->i_width, param->i_height ), "malloc failed\n" )
  2226.  
  2227.      i_start = x264_mdate();
  2228.      /* ticks/frame = ticks/second / frames/second */
  2229.      ticks_per_frame = (int64_t)param->i_timebase_den * param->i_fps_den / param->i_timebase_num / param->i_fps_num;
  2230. -    if( ticks_per_frame < 1 )
  2231. -    {
  2232. -        fprintf( stderr, "x264 [error]: ticks_per_frame invalid: %"PRId64"\n", ticks_per_frame );
  2233. -        return -1;
  2234. -    }
  2235. +    FAIL_IF_ERROR( ticks_per_frame < 1, "ticks_per_frame invalid: %"PRId64"\n", ticks_per_frame )
  2236.  
  2237.      if( !param->b_repeat_headers )
  2238.      {
  2239. @@ -1473,12 +1457,7 @@ static int  Encode( x264_param_t *param, cli_opt_t *opt )
  2240.          x264_nal_t *headers;
  2241.          int i_nal;
  2242.  
  2243. -        if( x264_encoder_headers( h, &headers, &i_nal ) < 0 )
  2244. -        {
  2245. -            fprintf( stderr, "x264 [error]: x264_encoder_headers failed\n" );
  2246. -            return -1;
  2247. -        }
  2248. -
  2249. +        FAIL_IF_ERROR( x264_encoder_headers( h, &headers, &i_nal ) < 0, "x264_encoder_headers failed\n" )
  2250.          if( (i_file = output.write_headers( opt->hout, headers )) < 0 )
  2251.              return -1;
  2252.      }
  2253. @@ -1508,15 +1487,12 @@ static int  Encode( x264_param_t *param, cli_opt_t *opt )
  2254.  
  2255.          if( pic.i_pts <= largest_pts )
  2256.          {
  2257. -            if( param->i_log_level >= X264_LOG_WARNING )
  2258. -            {
  2259. -                if( param->i_log_level >= X264_LOG_DEBUG || pts_warning_cnt < MAX_PTS_WARNING )
  2260. -                    fprintf( stderr, "x264 [warning]: non-strictly-monotonic pts at frame %d (%"PRId64" <= %"PRId64")\n",
  2261. +            if( cli_log_level >= X264_LOG_DEBUG || pts_warning_cnt < MAX_PTS_WARNING )
  2262. +                x264_cli_log( "x264", X264_LOG_WARNING, "non-strictly-monotonic pts at frame %d (%"PRId64" <= %"PRId64")\n",
  2263.                               i_frame, output_pts, largest_pts * dts_compress_multiplier );
  2264. -                else if( pts_warning_cnt == MAX_PTS_WARNING )
  2265. -                    fprintf( stderr, "x264 [warning]: too many nonmonotonic pts warnings, suppressing further ones\n" );
  2266. -                pts_warning_cnt++;
  2267. -            }
  2268. +            else if( pts_warning_cnt == MAX_PTS_WARNING )
  2269. +                x264_cli_log( "x264", X264_LOG_WARNING, "too many nonmonotonic pts warnings, suppressing further ones\n" );
  2270. +            pts_warning_cnt++;
  2271.              pic.i_pts = largest_pts + ticks_per_frame;
  2272.              output_pts = pic.i_pts * dts_compress_multiplier;
  2273.          }
  2274. @@ -1573,8 +1549,8 @@ static int  Encode( x264_param_t *param, cli_opt_t *opt )
  2275.          if( opt->b_progress && i_frame_output % i_update_interval == 0 && i_frame_output )
  2276.              Print_status( i_start, i_frame_output, i_frame_total, i_file, param, 2 * last_dts - prev_dts - first_dts );
  2277.      }
  2278. -    if( pts_warning_cnt >= MAX_PTS_WARNING && param->i_log_level < X264_LOG_DEBUG )
  2279. -        fprintf( stderr, "x264 [warning]: %d suppressed nonmonotonic pts warnings\n", pts_warning_cnt-MAX_PTS_WARNING );
  2280. +    if( pts_warning_cnt >= MAX_PTS_WARNING && cli_log_level < X264_LOG_DEBUG )
  2281. +        x264_cli_log( "x264", X264_LOG_WARNING, "%d suppressed nonmonotonic pts warnings\n", pts_warning_cnt-MAX_PTS_WARNING );
  2282.  
  2283.      /* duration algorithm fails when only 1 frame is output */
  2284.      if( i_frame_output == 1 )
  2285. diff --git a/x264cli.h b/x264cli.h
  2286. new file mode 100644
  2287. index 0000000..1acca56
  2288. --- /dev/null
  2289. +++ b/x264cli.h
  2290. @@ -0,0 +1,67 @@
  2291. +/*****************************************************************************
  2292. + * x264cli.h: x264cli common
  2293. + *****************************************************************************
  2294. + * Copyright (C) 2003-2010 x264 project
  2295. + *
  2296. + * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  2297. + *          Loren Merritt <lorenm@u.washington.edu>
  2298. + *
  2299. + * This program is free software; you can redistribute it and/or modify
  2300. + * it under the terms of the GNU General Public License as published by
  2301. + * the Free Software Foundation; either version 2 of the License, or
  2302. + * (at your option) any later version.
  2303. + *
  2304. + * This program is distributed in the hope that it will be useful,
  2305. + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  2306. + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  2307. + * GNU General Public License for more details.
  2308. + *
  2309. + * You should have received a copy of the GNU General Public License
  2310. + * along with this program; if not, write to the Free Software
  2311. + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
  2312. + *****************************************************************************/
  2313. +
  2314. +#ifndef X264_CLI_H
  2315. +#define X264_CLI_H
  2316. +
  2317. +#include "common/common.h"
  2318. +
  2319. +typedef void *hnd_t;
  2320. +
  2321. +static inline int64_t gcd( int64_t a, int64_t b )
  2322. +{
  2323. +    while( 1 )
  2324. +    {
  2325. +        int64_t c = a % b;
  2326. +        if( !c )
  2327. +            return b;
  2328. +        a = b;
  2329. +        b = c;
  2330. +    }
  2331. +}
  2332. +
  2333. +static inline int64_t lcm( int64_t a, int64_t b )
  2334. +{
  2335. +    return ( a / gcd( a, b ) ) * b;
  2336. +}
  2337. +
  2338. +static inline char *get_filename_extension( char *filename )
  2339. +{
  2340. +    char *ext = filename + strlen( filename );
  2341. +    while( *ext != '.' && ext > filename )
  2342. +        ext--;
  2343. +    ext += *ext == '.';
  2344. +    return ext;
  2345. +}
  2346. +
  2347. +void x264_cli_log( const char *name, int i_level, const char *fmt, ... );
  2348. +void x264_cli_printf( int i_level, const char *fmt, ... );
  2349. +
  2350. +#define FAIL_IF_ERR( cond, name, ... )\
  2351. +if( cond )\
  2352. +{\
  2353. +    x264_cli_log( name, X264_LOG_ERROR, __VA_ARGS__ );\
  2354. +    return -1;\
  2355. +}
  2356. +
  2357. +#endif
  2358. --
  2359. 1.7.1
  2360.  
  2361.  
  2362. From e9d3c46276c11f98f3819d8faf1fd0402bcb6a08 Mon Sep 17 00:00:00 2001
  2363. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  2364. Date: Wed, 30 Jun 2010 13:06:22 -0700
  2365. Subject: [PATCH 5/7] Don't check i16x16 planar mode unless previous modes were useful
  2366.  Saves ~160 clocks per MB at subme=1, ~270 per MB at subme>1 (measured on Core i7).
  2367.  Negligle effect on compression.
  2368.  
  2369. Also make a few more arrays static.
  2370. ---
  2371. encoder/analyse.c |   29 +++++++++++++++++++----------
  2372.  encoder/set.c     |    3 ++-
  2373.  2 files changed, 21 insertions(+), 11 deletions(-)
  2374.  
  2375. diff --git a/encoder/analyse.c b/encoder/analyse.c
  2376. index 696c78f..cdbdd1e 100644
  2377. --- a/encoder/analyse.c
  2378. +++ b/encoder/analyse.c
  2379. @@ -646,16 +646,27 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_
  2380.      /* 16x16 prediction selection */
  2381.      const int8_t *predict_mode = predict_16x16_mode_available( h->mb.i_neighbour_intra );
  2382.  
  2383. +    /* Not heavily tuned */
  2384. +    static const uint8_t i16x16_thresh_lut[11] = { 2, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4 };
  2385. +    int i16x16_thresh = a->b_fast_intra ? (i16x16_thresh_lut[h->mb.i_subpel_refine]*i_satd_inter)>>1 : COST_MAX;
  2386. +
  2387.      if( !h->mb.b_lossless && predict_mode[3] >= 0 )
  2388.      {
  2389.          h->pixf.intra_mbcmp_x3_16x16( p_src, p_dst, a->i_satd_i16x16_dir );
  2390. -        h->predict_16x16[I_PRED_16x16_P]( p_dst );
  2391. -        a->i_satd_i16x16_dir[I_PRED_16x16_P] =
  2392. -            h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE );
  2393. -        for( int i = 0; i < 4; i++ )
  2394. +        a->i_satd_i16x16_dir[0] += lambda * bs_size_ue(0);
  2395. +        a->i_satd_i16x16_dir[1] += lambda * bs_size_ue(1);
  2396. +        a->i_satd_i16x16_dir[2] += lambda * bs_size_ue(2);
  2397. +        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[0], a->i_predict16x16, 0 );
  2398. +        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[1], a->i_predict16x16, 1 );
  2399. +        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[2], a->i_predict16x16, 2 );
  2400. +
  2401. +        /* Plane is expensive, so don't check it unless one of the previous modes was useful. */
  2402. +        if( a->i_satd_i16x16 <= i16x16_thresh )
  2403.          {
  2404. -            int cost = a->i_satd_i16x16_dir[i] += lambda * bs_size_ue(i);
  2405. -            COPY2_IF_LT( a->i_satd_i16x16, cost, a->i_predict16x16, i );
  2406. +            h->predict_16x16[I_PRED_16x16_P]( p_dst );
  2407. +            a->i_satd_i16x16_dir[I_PRED_16x16_P] = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE );
  2408. +            a->i_satd_i16x16_dir[I_PRED_16x16_P] += lambda * bs_size_ue(3);
  2409. +            COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[I_PRED_16x16_P], a->i_predict16x16, 3 );
  2410.          }
  2411.      }
  2412.      else
  2413. @@ -681,9 +692,7 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_
  2414.          /* cavlc mb type prefix */
  2415.          a->i_satd_i16x16 += lambda * i_mb_b_cost_table[I_16x16];
  2416.  
  2417. -    /* Not heavily tuned */
  2418. -    const uint8_t i16x16_thresh[11] = { 2, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4 };
  2419. -    if( a->b_fast_intra && a->i_satd_i16x16 > (i16x16_thresh[h->mb.i_subpel_refine]*i_satd_inter)>>1 )
  2420. +    if( a->i_satd_i16x16 > i16x16_thresh )
  2421.          return;
  2422.  
  2423.      /* 8x8 prediction selection */
  2424. @@ -784,7 +793,7 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_
  2425.              i_cost = (i_cost * cost_div_fix8[idx]) >> 8;
  2426.          }
  2427.          /* Not heavily tuned */
  2428. -        const uint8_t i8x8_thresh[11] = { 4, 4, 4, 5, 5, 5, 6, 6, 6, 6, 6 };
  2429. +        static const uint8_t i8x8_thresh[11] = { 4, 4, 4, 5, 5, 5, 6, 6, 6, 6, 6 };
  2430.          if( X264_MIN(i_cost, a->i_satd_i16x16) > (i_satd_inter*i8x8_thresh[h->mb.i_subpel_refine])>>2 )
  2431.              return;
  2432.      }
  2433. diff --git a/encoder/set.c b/encoder/set.c
  2434. index 8d007aa..8ea6eac 100644
  2435. --- a/encoder/set.c
  2436. +++ b/encoder/set.c
  2437. @@ -534,7 +534,8 @@ int x264_sei_version_write( x264_t *h, bs_t *s )
  2438.  {
  2439.      int i;
  2440.      // random ID number generated according to ISO-11578
  2441. -    const uint8_t uuid[16] = {
  2442. +    static const uint8_t uuid[16] =
  2443. +    {
  2444.          0xdc, 0x45, 0xe9, 0xbd, 0xe6, 0xd9, 0x48, 0xb7,
  2445.          0x96, 0x2c, 0xd8, 0x20, 0xd9, 0x23, 0xee, 0xef
  2446.      };
  2447. --
  2448. 1.7.1
  2449.  
  2450.  
  2451. From 470c853a3c0817573139ab387b1c3fe207d62a17 Mon Sep 17 00:00:00 2001
  2452. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  2453. Date: Wed, 30 Jun 2010 13:55:46 -0700
  2454. Subject: [PATCH 6/7] Support infinite keyint (--keyint infinite).
  2455.  This just means x264 won't insert non-scenecut keyframes.
  2456.  Useful for streaming when using interactive error recovery or some other mechanism that makes keyframes unnecessary.
  2457.  
  2458. Also change POC logic to limit POC/framenum LSB size (to save bits per slice).
  2459. Also fix a bug in the CPB underflow detection code (didn't affect the bitstream, just resulted in the failure to print certain warning messages).
  2460. ---
  2461. common/common.c       |    7 ++++---
  2462.  encoder/encoder.c     |    8 ++++----
  2463.  encoder/ratecontrol.c |   10 +++++-----
  2464.  encoder/set.c         |   29 +++++++++++++++++------------
  2465.  encoder/slicetype.c   |    2 +-
  2466.  x264.c                |    2 +-
  2467.  x264.h                |    3 ++-
  2468.  7 files changed, 34 insertions(+), 27 deletions(-)
  2469.  
  2470. diff --git a/common/common.c b/common/common.c
  2471. index 8c7cf3c..14dd716 100644
  2472. --- a/common/common.c
  2473. +++ b/common/common.c
  2474. @@ -638,9 +638,10 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
  2475.          p->i_dpb_size = atoi(value);
  2476.      OPT("keyint")
  2477.      {
  2478. -        p->i_keyint_max = atoi(value);
  2479. -        if( p->i_keyint_min > p->i_keyint_max )
  2480. -            p->i_keyint_min = p->i_keyint_max;
  2481. +        if( strstr( value, "infinite" ) )
  2482. +            p->i_keyint_max = X264_KEYINT_MAX_INFINITE;
  2483. +        else
  2484. +            p->i_keyint_max = atoi(value);
  2485.      }
  2486.      OPT2("min-keyint", "keyint-min")
  2487.      {
  2488. diff --git a/encoder/encoder.c b/encoder/encoder.c
  2489. index 5cd3307..31cb84a 100644
  2490. --- a/encoder/encoder.c
  2491. +++ b/encoder/encoder.c
  2492. @@ -567,8 +567,7 @@ static int x264_validate_parameters( x264_t *h )
  2493.  
  2494.      h->param.i_frame_reference = x264_clip3( h->param.i_frame_reference, 1, 16 );
  2495.      h->param.i_dpb_size = x264_clip3( h->param.i_dpb_size, 1, 16 );
  2496. -    if( h->param.i_keyint_max <= 0 )
  2497. -        h->param.i_keyint_max = 1;
  2498. +    h->param.i_keyint_max = x264_clip3( h->param.i_keyint_max, 1, X264_KEYINT_MAX_INFINITE );
  2499.      if( h->param.i_scenecut_threshold < 0 )
  2500.          h->param.i_scenecut_threshold = 0;
  2501.      if( !h->param.analyse.i_subpel_refine && h->param.analyse.i_direct_mv_pred > X264_DIRECT_PRED_SPATIAL )
  2502. @@ -627,9 +626,10 @@ static int x264_validate_parameters( x264_t *h )
  2503.      h->param.rc.f_qcompress = x264_clip3f( h->param.rc.f_qcompress, 0.0, 1.0 );
  2504.      if( h->param.i_keyint_max == 1 || h->param.rc.f_qcompress == 1 )
  2505.          h->param.rc.b_mb_tree = 0;
  2506. -    if( !h->param.rc.i_lookahead && !h->param.b_intra_refresh && h->param.rc.b_mb_tree )
  2507. +    if( (!h->param.b_intra_refresh && h->param.i_keyint_max != X264_KEYINT_MAX_INFINITE) &&
  2508. +        !h->param.rc.i_lookahead && h->param.rc.b_mb_tree )
  2509.      {
  2510. -        x264_log( h, X264_LOG_WARNING, "lookaheadless mb-tree requires intra refresh\n" );
  2511. +        x264_log( h, X264_LOG_WARNING, "lookaheadless mb-tree requires intra refresh or infinite keyint\n" );
  2512.          h->param.rc.b_mb_tree = 0;
  2513.      }
  2514.      if( h->param.rc.b_stat_read )
  2515. diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
  2516. index 1030ef2..6fdaa98 100644
  2517. --- a/encoder/ratecontrol.c
  2518. +++ b/encoder/ratecontrol.c
  2519. @@ -492,13 +492,13 @@ void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init )
  2520.              // arbitrary
  2521.              #define MAX_DURATION 0.5
  2522.  
  2523. -            int max_cpb_output_delay = h->param.i_keyint_max * MAX_DURATION * h->sps->vui.i_time_scale / h->sps->vui.i_num_units_in_tick;
  2524. +            int max_cpb_output_delay = X264_MIN( h->param.i_keyint_max * MAX_DURATION * h->sps->vui.i_time_scale / h->sps->vui.i_num_units_in_tick, INT_MAX );
  2525.              int max_dpb_output_delay = h->sps->vui.i_max_dec_frame_buffering * MAX_DURATION * h->sps->vui.i_time_scale / h->sps->vui.i_num_units_in_tick;
  2526.              int max_delay = (int)(90000.0 * (double)h->sps->vui.hrd.i_cpb_size_unscaled / h->sps->vui.hrd.i_bit_rate_unscaled + 0.5);
  2527.  
  2528.              h->sps->vui.hrd.i_initial_cpb_removal_delay_length = 2 + x264_clip3( 32 - x264_clz( max_delay ), 4, 22 );
  2529. -            h->sps->vui.hrd.i_cpb_removal_delay_length = x264_clip3( 32 - x264_clz( max_cpb_output_delay ), 4, 32 );
  2530. -            h->sps->vui.hrd.i_dpb_output_delay_length  = x264_clip3( 32 - x264_clz( max_dpb_output_delay ), 4, 32 );
  2531. +            h->sps->vui.hrd.i_cpb_removal_delay_length = x264_clip3( 32 - x264_clz( max_cpb_output_delay ), 4, 31 );
  2532. +            h->sps->vui.hrd.i_dpb_output_delay_length  = x264_clip3( 32 - x264_clz( max_dpb_output_delay ), 4, 31 );
  2533.  
  2534.              #undef MAX_DURATION
  2535.  
  2536. @@ -1781,10 +1781,10 @@ void x264_hrd_fullness( x264_t *h )
  2537.      uint64_t cpb_size = (uint64_t)h->sps->vui.hrd.i_cpb_size_unscaled * h->sps->vui.i_time_scale;
  2538.      uint64_t multiply_factor = 180000 / rct->hrd_multiply_denom;
  2539.  
  2540. -    if( cpb_state < 0 || cpb_state > cpb_size )
  2541. +    if( rct->buffer_fill_final < 0 || rct->buffer_fill_final > cpb_size )
  2542.      {
  2543.           x264_log( h, X264_LOG_WARNING, "CPB %s: %.0lf bits in a %.0lf-bit buffer\n",
  2544. -                   cpb_state < 0 ? "underflow" : "overflow", (float)cpb_state/denom, (float)cpb_size/denom );
  2545. +                   rct->buffer_fill_final < 0 ? "underflow" : "overflow", (float)rct->buffer_fill_final/denom, (float)cpb_size/denom );
  2546.      }
  2547.  
  2548.      h->initial_cpb_removal_delay = (multiply_factor * cpb_state + denom) / (2*denom);
  2549. diff --git a/encoder/set.c b/encoder/set.c
  2550. index 8ea6eac..9e6e736 100644
  2551. --- a/encoder/set.c
  2552. +++ b/encoder/set.c
  2553. @@ -99,6 +99,7 @@ static void x264_sei_write( bs_t *s, uint8_t *p_start )
  2554.  void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param )
  2555.  {
  2556.      sps->i_id = i_id;
  2557. +    int max_frame_num;
  2558.  
  2559.      sps->b_qpprime_y_zero_transform_bypass = param->rc.i_rc_method == X264_RC_CQP && param->rc.i_qp_constant == 0;
  2560.      if( sps->b_qpprime_y_zero_transform_bypass )
  2561. @@ -118,15 +119,27 @@ void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param )
  2562.      /* Never set constraint_set2, it is not necessary and not used in real world. */
  2563.      sps->b_constraint_set2  = 0;
  2564.  
  2565. -    sps->i_log2_max_frame_num = 4;  /* at least 4 */
  2566. -    while( (1 << sps->i_log2_max_frame_num) <= param->i_keyint_max && sps->i_log2_max_frame_num < 10 )
  2567. +    sps->vui.i_num_reorder_frames = param->i_bframe_pyramid ? 2 : param->i_bframe ? 1 : 0;
  2568. +    /* extra slot with pyramid so that we don't have to override the
  2569. +     * order of forgetting old pictures */
  2570. +    sps->vui.i_max_dec_frame_buffering =
  2571. +    sps->i_num_ref_frames = X264_MIN(16, X264_MAX4(param->i_frame_reference, 1 + sps->vui.i_num_reorder_frames,
  2572. +                            param->i_bframe_pyramid ? 4 : 1, param->i_dpb_size));
  2573. +    sps->i_num_ref_frames -= param->i_bframe_pyramid == X264_B_PYRAMID_STRICT;
  2574. +
  2575. +    /* number of refs + current frame */
  2576. +    max_frame_num = sps->vui.i_max_dec_frame_buffering * (!!param->i_bframe_pyramid+1) + 1;
  2577. +    sps->i_log2_max_frame_num = 4;
  2578. +    while( (1 << sps->i_log2_max_frame_num) <= max_frame_num )
  2579.          sps->i_log2_max_frame_num++;
  2580. -    sps->i_log2_max_frame_num++;
  2581.  
  2582.      sps->i_poc_type = 0;
  2583.      if( sps->i_poc_type == 0 )
  2584.      {
  2585. -        sps->i_log2_max_poc_lsb = sps->i_log2_max_frame_num + 1;    /* max poc = 2*frame_num */
  2586. +        int max_delta_poc = (param->i_bframe + 2) * (!!param->i_bframe_pyramid + 1) * 2;
  2587. +        sps->i_log2_max_poc_lsb = 4;
  2588. +        while( (1 << sps->i_log2_max_poc_lsb) <= max_delta_poc * 2 )
  2589. +            sps->i_log2_max_poc_lsb++;
  2590.      }
  2591.      else if( sps->i_poc_type == 1 )
  2592.      {
  2593. @@ -219,14 +232,6 @@ void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param )
  2594.  
  2595.      // NOTE: HRD related parts of the SPS are initialised in x264_ratecontrol_init_reconfigurable
  2596.  
  2597. -    sps->vui.i_num_reorder_frames = param->i_bframe_pyramid ? 2 : param->i_bframe ? 1 : 0;
  2598. -    /* extra slot with pyramid so that we don't have to override the
  2599. -     * order of forgetting old pictures */
  2600. -    sps->vui.i_max_dec_frame_buffering =
  2601. -    sps->i_num_ref_frames = X264_MIN(16, X264_MAX4(param->i_frame_reference, 1 + sps->vui.i_num_reorder_frames,
  2602. -                            param->i_bframe_pyramid ? 4 : 1, param->i_dpb_size));
  2603. -    sps->i_num_ref_frames -= param->i_bframe_pyramid == X264_B_PYRAMID_STRICT;
  2604. -
  2605.      sps->vui.b_bitstream_restriction = 1;
  2606.      if( sps->vui.b_bitstream_restriction )
  2607.      {
  2608. diff --git a/encoder/slicetype.c b/encoder/slicetype.c
  2609. index 4ede8cf..7d69b71 100644
  2610. --- a/encoder/slicetype.c
  2611. +++ b/encoder/slicetype.c
  2612. @@ -1009,7 +1009,7 @@ static int scenecut_internal( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **f
  2613.      float f_thresh_max = h->param.i_scenecut_threshold / 100.0;
  2614.      /* magic numbers pulled out of thin air */
  2615.      float f_thresh_min = f_thresh_max * h->param.i_keyint_min
  2616. -                         / ( h->param.i_keyint_max * 4 );
  2617. +                         / ( h->param.i_keyint_max * 4. );
  2618.      int res;
  2619.  
  2620.      if( h->param.i_keyint_min == h->param.i_keyint_max )
  2621. diff --git a/x264.c b/x264.c
  2622. index 741570c..0bede93 100644
  2623. --- a/x264.c
  2624. +++ b/x264.c
  2625. @@ -409,7 +409,7 @@ static void Help( x264_param_t *defaults, int longhelp )
  2626.      H0( "\n" );
  2627.      H0( "Frame-type options:\n" );
  2628.      H0( "\n" );
  2629. -    H0( "  -I, --keyint <integer>      Maximum GOP size [%d]\n", defaults->i_keyint_max );
  2630. +    H0( "  -I, --keyint <integer or \"infinite\"> Maximum GOP size [%d]\n", defaults->i_keyint_max );
  2631.      H2( "  -i, --min-keyint <integer>  Minimum GOP size [auto]\n" );
  2632.      H2( "      --no-scenecut           Disable adaptive I-frame decision\n" );
  2633.      H2( "      --scenecut <integer>    How aggressively to insert extra I-frames [%d]\n", defaults->i_scenecut_threshold );
  2634. diff --git a/x264.h b/x264.h
  2635. index 86f7426..097365a 100644
  2636. --- a/x264.h
  2637. +++ b/x264.h
  2638. @@ -35,7 +35,7 @@
  2639.  
  2640.  #include <stdarg.h>
  2641.  
  2642. -#define X264_BUILD 101
  2643. +#define X264_BUILD 102
  2644.  
  2645.  /* x264_t:
  2646.   *      opaque handler for encoder */
  2647. @@ -152,6 +152,7 @@ typedef struct
  2648.  #define X264_B_PYRAMID_STRICT        1
  2649.  #define X264_B_PYRAMID_NORMAL        2
  2650.  #define X264_KEYINT_MIN_AUTO         0
  2651. +#define X264_KEYINT_MAX_INFINITE     (1<<30)
  2652.  #define X264_OPEN_GOP_NONE           0
  2653.  #define X264_OPEN_GOP_NORMAL         1
  2654.  #define X264_OPEN_GOP_BLURAY         2
  2655. --
  2656. 1.7.1
  2657.  
  2658.  
  2659. From 1b4e95140832b569f81984dcc36fea50452380f9 Mon Sep 17 00:00:00 2001
  2660. From: Oskar Arvidsson <oskar@irock.se>
  2661. Date: Fri, 2 Jul 2010 04:06:08 +0200
  2662. Subject: [PATCH 7/7] Support for 9 and 10-bit encoding
  2663.  Output bit depth is specified on compilation time via --bit-depth.
  2664.  There is currently almost no assembly code available for high-bit-depth modes, so encoding will be very slow.
  2665.  Input is still 8-bit only; this will change in the future.
  2666.  
  2667. Note that very few H.264 decoders support >8 bit depth currently.
  2668. ---
  2669. common/arm/mc-c.c      |   42 +++++++-----
  2670.  common/arm/predict-c.c |    8 ++
  2671.  common/bitstream.h     |    2 +-
  2672.  common/common.c        |   17 ++++-
  2673.  common/common.h        |   39 +++++++----
  2674.  common/dct.c           |   15 +++-
  2675.  common/deblock.c       |   27 +++++---
  2676.  common/macroblock.c    |    2 +-
  2677.  common/macroblock.h    |   66 ++++++++++++-------
  2678.  common/mc.c            |   33 ++++-----
  2679.  common/mc.h            |    2 +-
  2680.  common/pixel.c         |   14 +++-
  2681.  common/ppc/dct.c       |    2 +
  2682.  common/ppc/deblock.c   |    2 +
  2683.  common/ppc/mc.c        |    4 +
  2684.  common/ppc/pixel.c     |    4 +
  2685.  common/ppc/predict.c   |    6 ++
  2686.  common/ppc/quant.c     |    2 +
  2687.  common/predict.c       |   63 ++++++++++--------
  2688.  common/quant.c         |   14 ++---
  2689.  common/set.c           |   25 ++++++-
  2690.  common/x86/mc-c.c      |   12 +++-
  2691.  common/x86/predict-c.c |   10 +++
  2692.  configure              |   16 +++++
  2693.  encoder/analyse.c      |   80 ++++++++++++-----------
  2694.  encoder/cabac.c        |   25 ++++---
  2695.  encoder/cavlc.c        |   24 +++----
  2696.  encoder/encoder.c      |   45 +++++++++----
  2697.  encoder/macroblock.h   |    4 +-
  2698.  encoder/me.h           |    2 +-
  2699.  encoder/ratecontrol.c  |   20 +++---
  2700.  encoder/rdo.c          |   10 +--
  2701.  encoder/set.c          |    8 ++-
  2702.  encoder/slicetype.c    |   10 ++--
  2703.  tools/checkasm.c       |  169 +++++++++++++++++++++++++----------------------
  2704.  x264.c                 |   24 ++++---
  2705.  x264.h                 |    4 +-
  2706.  37 files changed, 516 insertions(+), 336 deletions(-)
  2707.  
  2708. diff --git a/common/arm/mc-c.c b/common/arm/mc-c.c
  2709. index d294eff..b1106dd 100644
  2710. --- a/common/arm/mc-c.c
  2711. +++ b/common/arm/mc-c.c
  2712. @@ -64,6 +64,19 @@ MC_WEIGHT(_nodenom)
  2713.  MC_WEIGHT(_offsetadd)
  2714.  MC_WEIGHT(_offsetsub)
  2715.  
  2716. +void x264_mc_copy_w4_neon( uint8_t *, int, uint8_t *, int, int );
  2717. +void x264_mc_copy_w8_neon( uint8_t *, int, uint8_t *, int, int );
  2718. +void x264_mc_copy_w16_neon( uint8_t *, int, uint8_t *, int, int );
  2719. +void x264_mc_copy_w16_aligned_neon( uint8_t *, int, uint8_t *, int, int );
  2720. +
  2721. +void x264_mc_chroma_neon( uint8_t *, int, uint8_t *, int, int, int, int, int );
  2722. +void x264_frame_init_lowres_core_neon( uint8_t *, uint8_t *, uint8_t *, uint8_t *, uint8_t *, int, int, int, int);
  2723. +
  2724. +void x264_hpel_filter_v_neon( uint8_t *, uint8_t *, int16_t *, int, int );
  2725. +void x264_hpel_filter_c_neon( uint8_t *, int16_t *, int );
  2726. +void x264_hpel_filter_h_neon( uint8_t *, uint8_t *, int );
  2727. +
  2728. +#if !X264_HIGH_BIT_DEPTH
  2729.  static void x264_weight_cache_neon( x264_t *h, x264_weight_t *w )
  2730.  {
  2731.      if( w->i_scale == 1<<w->i_denom )
  2732. @@ -85,14 +98,6 @@ static void x264_weight_cache_neon( x264_t *h, x264_weight_t *w )
  2733.          w->weightfn = x264_mc_wtab_neon;
  2734.  }
  2735.  
  2736. -void x264_mc_copy_w4_neon( uint8_t *, int, uint8_t *, int, int );
  2737. -void x264_mc_copy_w8_neon( uint8_t *, int, uint8_t *, int, int );
  2738. -void x264_mc_copy_w16_neon( uint8_t *, int, uint8_t *, int, int );
  2739. -void x264_mc_copy_w16_aligned_neon( uint8_t *, int, uint8_t *, int, int );
  2740. -
  2741. -void x264_mc_chroma_neon( uint8_t *, int, uint8_t *, int, int, int, int, int );
  2742. -void x264_frame_init_lowres_core_neon( uint8_t *, uint8_t *, uint8_t *, uint8_t *, uint8_t *, int, int, int, int);
  2743. -
  2744.  static void (* const x264_pixel_avg_wtab_neon[6])( uint8_t *, int, uint8_t *, int, uint8_t *, int ) =
  2745.  {
  2746.      NULL,
  2747. @@ -174,10 +179,6 @@ static uint8_t *get_ref_neon( uint8_t *dst,   int *i_dst_stride,
  2748.      }
  2749.  }
  2750.  
  2751. -void x264_hpel_filter_v_neon( uint8_t *, uint8_t *, int16_t *, int, int );
  2752. -void x264_hpel_filter_c_neon( uint8_t *, int16_t *, int );
  2753. -void x264_hpel_filter_h_neon( uint8_t *, uint8_t *, int );
  2754. -
  2755.  static void hpel_filter_neon( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src,
  2756.                                int stride, int width, int height, int16_t *buf )
  2757.  {
  2758. @@ -198,18 +199,22 @@ static void hpel_filter_neon( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8
  2759.          src  += stride;
  2760.      }
  2761.  }
  2762. +#endif // !X264_HIGH_BIT_DEPTH
  2763.  
  2764.  void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf )
  2765.  {
  2766.      if( !(cpu&X264_CPU_ARMV6) )
  2767.          return;
  2768.  
  2769. +#if !X264_HIGH_BIT_DEPTH
  2770.      pf->prefetch_fenc = x264_prefetch_fenc_arm;
  2771.      pf->prefetch_ref  = x264_prefetch_ref_arm;
  2772. +#endif // !X264_HIGH_BIT_DEPTH
  2773.  
  2774.      if( !(cpu&X264_CPU_NEON) )
  2775.          return;
  2776.  
  2777. +#if !X264_HIGH_BIT_DEPTH
  2778.      pf->copy_16x16_unaligned = x264_mc_copy_w16_neon;
  2779.      pf->copy[PIXEL_16x16] = x264_mc_copy_w16_aligned_neon;
  2780.      pf->copy[PIXEL_8x8]   = x264_mc_copy_w8_neon;
  2781. @@ -229,15 +234,16 @@ void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf )
  2782.      pf->offsetsub = x264_mc_offsetsub_wtab_neon;
  2783.      pf->weight_cache = x264_weight_cache_neon;
  2784.  
  2785. -// Apple's gcc stupidly cannot align stack variables, and ALIGNED_ARRAY can't work on structs
  2786. -#ifndef SYS_MACOSX
  2787. -    pf->memcpy_aligned  = x264_memcpy_aligned_neon;
  2788. -#endif
  2789. -    pf->memzero_aligned = x264_memzero_aligned_neon;
  2790. -
  2791.      pf->mc_chroma = x264_mc_chroma_neon;
  2792.      pf->mc_luma = mc_luma_neon;
  2793.      pf->get_ref = get_ref_neon;
  2794.      pf->hpel_filter = hpel_filter_neon;
  2795.      pf->frame_init_lowres_core = x264_frame_init_lowres_core_neon;
  2796. +#endif // !X264_HIGH_BIT_DEPTH
  2797. +
  2798. +// Apple's gcc stupidly cannot align stack variables, and ALIGNED_ARRAY can't work on structs
  2799. +#ifndef SYS_MACOSX
  2800. +    pf->memcpy_aligned  = x264_memcpy_aligned_neon;
  2801. +#endif
  2802. +    pf->memzero_aligned = x264_memzero_aligned_neon;
  2803.  }
  2804. diff --git a/common/arm/predict-c.c b/common/arm/predict-c.c
  2805. index fa7b9f7..b40dc9a 100644
  2806. --- a/common/arm/predict-c.c
  2807. +++ b/common/arm/predict-c.c
  2808. @@ -51,6 +51,7 @@ void x264_predict_4x4_init_arm( int cpu, x264_predict_t pf[12] )
  2809.      if (!(cpu&X264_CPU_ARMV6))
  2810.          return;
  2811.  
  2812. +#if !X264_HIGH_BIT_DEPTH
  2813.      pf[I_PRED_4x4_H]   = x264_predict_4x4_h_armv6;
  2814.      pf[I_PRED_4x4_DC]  = x264_predict_4x4_dc_armv6;
  2815.      pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_armv6;
  2816. @@ -59,6 +60,7 @@ void x264_predict_4x4_init_arm( int cpu, x264_predict_t pf[12] )
  2817.          return;
  2818.  
  2819.      pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_neon;
  2820. +#endif // !X264_HIGH_BIT_DEPTH
  2821.  }
  2822.  
  2823.  void x264_predict_8x8c_init_arm( int cpu, x264_predict_t pf[7] )
  2824. @@ -66,12 +68,14 @@ void x264_predict_8x8c_init_arm( int cpu, x264_predict_t pf[7] )
  2825.      if (!(cpu&X264_CPU_NEON))
  2826.          return;
  2827.  
  2828. +#if !X264_HIGH_BIT_DEPTH
  2829.      pf[I_PRED_CHROMA_DC]      = x264_predict_8x8c_dc_neon;
  2830.      pf[I_PRED_CHROMA_DC_TOP]  = x264_predict_8x8c_dc_top_neon;
  2831.      pf[I_PRED_CHROMA_DC_LEFT] = x264_predict_8x8c_dc_left_neon;
  2832.      pf[I_PRED_CHROMA_H] = x264_predict_8x8c_h_neon;
  2833.      pf[I_PRED_CHROMA_V] = x264_predict_8x8c_v_neon;
  2834.      pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_neon;
  2835. +#endif // !X264_HIGH_BIT_DEPTH
  2836.  }
  2837.  
  2838.  void x264_predict_8x8_init_arm( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter )
  2839. @@ -79,8 +83,10 @@ void x264_predict_8x8_init_arm( int cpu, x264_predict8x8_t pf[12], x264_predict_
  2840.      if (!(cpu&X264_CPU_NEON))
  2841.          return;
  2842.  
  2843. +#if !X264_HIGH_BIT_DEPTH
  2844.      pf[I_PRED_8x8_DC]  = x264_predict_8x8_dc_neon;
  2845.      pf[I_PRED_8x8_H]   = x264_predict_8x8_h_neon;
  2846. +#endif // !X264_HIGH_BIT_DEPTH
  2847.  }
  2848.  
  2849.  void x264_predict_16x16_init_arm( int cpu, x264_predict_t pf[7] )
  2850. @@ -88,10 +94,12 @@ void x264_predict_16x16_init_arm( int cpu, x264_predict_t pf[7] )
  2851.      if (!(cpu&X264_CPU_NEON))
  2852.          return;
  2853.  
  2854. +#if !X264_HIGH_BIT_DEPTH
  2855.      pf[I_PRED_16x16_DC ]    = x264_predict_16x16_dc_neon;
  2856.      pf[I_PRED_16x16_DC_TOP] = x264_predict_16x16_dc_top_neon;
  2857.      pf[I_PRED_16x16_DC_LEFT]= x264_predict_16x16_dc_left_neon;
  2858.      pf[I_PRED_16x16_H ]     = x264_predict_16x16_h_neon;
  2859.      pf[I_PRED_16x16_V ]     = x264_predict_16x16_v_neon;
  2860.      pf[I_PRED_16x16_P ]     = x264_predict_16x16_p_neon;
  2861. +#endif // !X264_HIGH_BIT_DEPTH
  2862.  }
  2863. diff --git a/common/bitstream.h b/common/bitstream.h
  2864. index dd8118d..318c790 100644
  2865. --- a/common/bitstream.h
  2866. +++ b/common/bitstream.h
  2867. @@ -53,7 +53,7 @@ typedef struct bs_s
  2868.  typedef struct
  2869.  {
  2870.      int     last;
  2871. -    int16_t level[16];
  2872. +    dctcoef level[16];
  2873.      uint8_t run[16];
  2874.  } x264_run_level_t;
  2875.  
  2876. diff --git a/common/common.c b/common/common.c
  2877. index 14dd716..728dfab 100644
  2878. --- a/common/common.c
  2879. +++ b/common/common.c
  2880. @@ -91,10 +91,10 @@ void x264_param_default( x264_param_t *param )
  2881.      param->rc.i_vbv_max_bitrate = 0;
  2882.      param->rc.i_vbv_buffer_size = 0;
  2883.      param->rc.f_vbv_buffer_init = 0.9;
  2884. -    param->rc.i_qp_constant = 23;
  2885. -    param->rc.f_rf_constant = 23;
  2886. +    param->rc.i_qp_constant = 23 + QP_BD_OFFSET;
  2887. +    param->rc.f_rf_constant = 23 + QP_BD_OFFSET;
  2888.      param->rc.i_qp_min = 10;
  2889. -    param->rc.i_qp_max = 51;
  2890. +    param->rc.i_qp_max = QP_MAX;
  2891.      param->rc.i_qp_step = 4;
  2892.      param->rc.f_ip_factor = 1.4;
  2893.      param->rc.f_pb_factor = 1.3;
  2894. @@ -418,6 +418,15 @@ int x264_param_apply_profile( x264_param_t *param, const char *profile )
  2895.      if( !profile )
  2896.          return 0;
  2897.  
  2898. +#if BIT_DEPTH > 8
  2899. +    if( !strcasecmp( profile, "baseline" ) || !strcasecmp( profile, "main" ) ||
  2900. +        !strcasecmp( profile, "high" ) )
  2901. +    {
  2902. +        x264_log( NULL, X264_LOG_ERROR, "%s profile doesn't support a bit depth of %d.\n", profile, BIT_DEPTH );
  2903. +        return -1;
  2904. +    }
  2905. +#endif
  2906. +
  2907.      if( !strcasecmp( profile, "baseline" ) )
  2908.      {
  2909.          param->analyse.b_transform_8x8 = 0;
  2910. @@ -441,7 +450,7 @@ int x264_param_apply_profile( x264_param_t *param, const char *profile )
  2911.          param->analyse.b_transform_8x8 = 0;
  2912.          param->i_cqm_preset = X264_CQM_FLAT;
  2913.      }
  2914. -    else if( !strcasecmp( profile, "high" ) )
  2915. +    else if( !strcasecmp( profile, "high" ) || !strcasecmp( profile, "high10" ) )
  2916.      {
  2917.          /* Default */
  2918.      }
  2919. diff --git a/common/common.h b/common/common.h
  2920. index 7b60811..a218d35 100644
  2921. --- a/common/common.h
  2922. +++ b/common/common.h
  2923. @@ -54,8 +54,13 @@ do {\
  2924.  #define X264_THREAD_MAX 128
  2925.  #define X264_PCM_COST (386*8)
  2926.  #define X264_LOOKAHEAD_MAX 250
  2927. +#define QP_BD_OFFSET (6*(BIT_DEPTH-8))
  2928. +#define QP_MAX (51+QP_BD_OFFSET)
  2929. +#define QP_MAX_MAX (51+2*6)
  2930. +#define LAMBDA_MAX (91 << (BIT_DEPTH-8))
  2931. +#define PIXEL_MAX ((1 << BIT_DEPTH)-1)
  2932.  // arbitrary, but low because SATD scores are 1/4 normal
  2933. -#define X264_LOOKAHEAD_QP 12
  2934. +#define X264_LOOKAHEAD_QP (12+QP_BD_OFFSET)
  2935.  
  2936.  // number of pixels (per thread) in progress at any given time.
  2937.  // 16 for the macroblock in progress + 3 for deblocking + 3 for motion compensation filter + 2 for extra safety
  2938. @@ -101,17 +106,23 @@ typedef union { x264_uint128_t i; uint64_t a[2]; uint32_t b[4]; uint16_t c[8]; u
  2939.  #define CP64(dst,src) M64(dst) = M64(src)
  2940.  #define CP128(dst,src) M128(dst) = M128(src)
  2941.  
  2942. -typedef uint8_t pixel;
  2943. -typedef uint32_t pixel4;
  2944. -typedef int16_t dctcoef;
  2945. +#if X264_HIGH_BIT_DEPTH
  2946. +    typedef uint16_t pixel;
  2947. +    typedef uint64_t pixel4;
  2948. +    typedef int32_t  dctcoef;
  2949.  
  2950. -#define PIXEL_SPLAT_X4(x) ((x)*0x01010101U)
  2951. -#define MPIXEL_X4(src) M32(src)
  2952. -#define CPPIXEL_X4(dst,src) CP32(dst,src)
  2953. -#define CPPIXEL_X8(dst,src) CP64(dst,src)
  2954. -#define MDCT_X2(dct) M32(dct)
  2955. -#define CPDCT_X2(dst,src) CP32(dst,src)
  2956. -#define CPDCT_X4(dst,src) CP64(dst,src)
  2957. +#   define PIXEL_SPLAT_X4(x) ((x)*0x0001000100010001ULL)
  2958. +#   define MPIXEL_X4(src) M64(src)
  2959. +#else
  2960. +    typedef uint8_t  pixel;
  2961. +    typedef uint32_t pixel4;
  2962. +    typedef int16_t  dctcoef;
  2963. +
  2964. +#   define PIXEL_SPLAT_X4(x) ((x)*0x01010101U)
  2965. +#   define MPIXEL_X4(src) M32(src)
  2966. +#endif
  2967. +
  2968. +#define CPPIXEL_X4(dst,src) MPIXEL_X4(dst) = MPIXEL_X4(src)
  2969.  
  2970.  #define X264_SCAN8_SIZE (6*8)
  2971.  #define X264_SCAN8_LUMA_SIZE (5*8)
  2972. @@ -189,7 +200,7 @@ void x264_init_vlc_tables();
  2973.  
  2974.  static ALWAYS_INLINE pixel x264_clip_pixel( int x )
  2975.  {
  2976. -    return x&(~255) ? (-x)>>31 : x;
  2977. +    return ( (x & ~PIXEL_MAX) ? (-x)>>31 & PIXEL_MAX : x );
  2978.  }
  2979.  
  2980.  static ALWAYS_INLINE int x264_clip3( int v, int i_min, int i_max )
  2981. @@ -449,8 +460,8 @@ struct x264_t
  2982.      /* mv/ref cost arrays.  Indexed by lambda instead of
  2983.       * qp because, due to rounding, some quantizers share
  2984.       * lambdas.  This saves memory. */
  2985. -    uint16_t *cost_mv[92];
  2986. -    uint16_t *cost_mv_fpel[92][4];
  2987. +    uint16_t *cost_mv[LAMBDA_MAX+1];
  2988. +    uint16_t *cost_mv_fpel[LAMBDA_MAX+1][4];
  2989.  
  2990.      const uint8_t   *chroma_qp_table; /* includes both the nonlinear luma->chroma mapping and chroma_qp_offset */
  2991.  
  2992. diff --git a/common/dct.c b/common/dct.c
  2993. index 60dbd55..cd27363 100644
  2994. --- a/common/dct.c
  2995. +++ b/common/dct.c
  2996. @@ -418,6 +418,7 @@ void x264_dct_init( int cpu, x264_dct_function_t *dctf )
  2997.      dctf->dct4x4dc  = dct4x4dc;
  2998.      dctf->idct4x4dc = idct4x4dc;
  2999.  
  3000. +#if !X264_HIGH_BIT_DEPTH
  3001.  #if HAVE_MMX
  3002.      if( cpu&X264_CPU_MMX )
  3003.      {
  3004. @@ -515,6 +516,7 @@ void x264_dct_init( int cpu, x264_dct_function_t *dctf )
  3005.          dctf->add16x16_idct8= x264_add16x16_idct8_neon;
  3006.      }
  3007.  #endif
  3008. +#endif // !X264_HIGH_BIT_DEPTH
  3009.  }
  3010.  
  3011.  void x264_dct_init_weights( void )
  3012. @@ -599,11 +601,9 @@ static void zigzag_scan_4x4_frame( dctcoef level[16], dctcoef dct[16] )
  3013.  
  3014.  static void zigzag_scan_4x4_field( dctcoef level[16], dctcoef dct[16] )
  3015.  {
  3016. -    CPDCT_X2( level, dct );
  3017. +    memcpy( level, dct, 2 * sizeof(dctcoef) );
  3018.      ZIG(2,0,1) ZIG(3,2,0) ZIG(4,3,0) ZIG(5,1,1)
  3019. -    CPDCT_X2( level+6, dct+6 );
  3020. -    CPDCT_X4( level+8, dct+8 );
  3021. -    CPDCT_X4( level+12, dct+12 );
  3022. +    memcpy( level+6, dct+6, 10 * sizeof(dctcoef) );
  3023.  }
  3024.  
  3025.  #undef ZIG
  3026. @@ -618,6 +618,7 @@ static void zigzag_scan_4x4_field( dctcoef level[16], dctcoef dct[16] )
  3027.      CPPIXEL_X4( p_dst+1*FDEC_STRIDE, p_src+1*FENC_STRIDE );\
  3028.      CPPIXEL_X4( p_dst+2*FDEC_STRIDE, p_src+2*FENC_STRIDE );\
  3029.      CPPIXEL_X4( p_dst+3*FDEC_STRIDE, p_src+3*FENC_STRIDE );
  3030. +#define CPPIXEL_X8(dst,src) ( CPPIXEL_X4(dst,src), CPPIXEL_X4(dst+4,src+4) )
  3031.  #define COPY8x8\
  3032.      CPPIXEL_X8( p_dst+0*FDEC_STRIDE, p_src+0*FENC_STRIDE );\
  3033.      CPPIXEL_X8( p_dst+1*FDEC_STRIDE, p_src+1*FENC_STRIDE );\
  3034. @@ -709,6 +710,7 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf, int b_interlaced )
  3035.          pf->sub_8x8    = zigzag_sub_8x8_field;
  3036.          pf->sub_4x4    = zigzag_sub_4x4_field;
  3037.          pf->sub_4x4ac  = zigzag_sub_4x4ac_field;
  3038. +#if !X264_HIGH_BIT_DEPTH
  3039.  #if HAVE_MMX
  3040.          if( cpu&X264_CPU_MMXEXT )
  3041.          {
  3042. @@ -726,6 +728,7 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf, int b_interlaced )
  3043.          if( cpu&X264_CPU_ALTIVEC )
  3044.              pf->scan_4x4   = x264_zigzag_scan_4x4_field_altivec;
  3045.  #endif
  3046. +#endif // !X264_HIGH_BIT_DEPTH
  3047.      }
  3048.      else
  3049.      {
  3050. @@ -734,6 +737,7 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf, int b_interlaced )
  3051.          pf->sub_8x8    = zigzag_sub_8x8_frame;
  3052.          pf->sub_4x4    = zigzag_sub_4x4_frame;
  3053.          pf->sub_4x4ac  = zigzag_sub_4x4ac_frame;
  3054. +#if !X264_HIGH_BIT_DEPTH
  3055.  #if HAVE_MMX
  3056.          if( cpu&X264_CPU_MMX )
  3057.              pf->scan_4x4 = x264_zigzag_scan_4x4_frame_mmx;
  3058. @@ -759,13 +763,16 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf, int b_interlaced )
  3059.          if( cpu&X264_CPU_NEON )
  3060.              pf->scan_4x4 = x264_zigzag_scan_4x4_frame_neon;
  3061.  #endif
  3062. +#endif // !X264_HIGH_BIT_DEPTH
  3063.      }
  3064.  
  3065.      pf->interleave_8x8_cavlc = zigzag_interleave_8x8_cavlc;
  3066. +#if !X264_HIGH_BIT_DEPTH
  3067.  #if HAVE_MMX
  3068.      if( cpu&X264_CPU_MMX )
  3069.          pf->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_mmx;
  3070.      if( cpu&X264_CPU_SHUFFLE_IS_FAST )
  3071.          pf->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_sse2;
  3072.  #endif
  3073. +#endif // !X264_HIGH_BIT_DEPTH
  3074.  }
  3075. diff --git a/common/deblock.c b/common/deblock.c
  3076. index db9c95d..0b3b6df 100644
  3077. --- a/common/deblock.c
  3078. +++ b/common/deblock.c
  3079. @@ -265,18 +265,19 @@ static void deblock_strength_c( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264
  3080.  
  3081.  static inline void deblock_edge( x264_t *h, pixel *pix1, pixel *pix2, int i_stride, uint8_t bS[4], int i_qp, int b_chroma, x264_deblock_inter_t pf_inter )
  3082.  {
  3083. -    int index_a = i_qp + h->sh.i_alpha_c0_offset;
  3084. -    int alpha = alpha_table(index_a);
  3085. -    int beta  = beta_table(i_qp + h->sh.i_beta_offset);
  3086. +    int index_a = x264_clip3((i_qp-QP_BD_OFFSET + h->sh.i_alpha_c0_offset), 0, 51);
  3087. +    int index_b = x264_clip3((i_qp-QP_BD_OFFSET + h->sh.i_beta_offset), 0, 51);
  3088. +    int alpha = alpha_table(index_a) << (BIT_DEPTH-8);
  3089. +    int beta  = beta_table(index_b) << (BIT_DEPTH-8);
  3090.      int8_t tc[4];
  3091.  
  3092.      if( !M32(bS) || !alpha || !beta )
  3093.          return;
  3094.  
  3095. -    tc[0] = tc0_table(index_a)[bS[0]] + b_chroma;
  3096. -    tc[1] = tc0_table(index_a)[bS[1]] + b_chroma;
  3097. -    tc[2] = tc0_table(index_a)[bS[2]] + b_chroma;
  3098. -    tc[3] = tc0_table(index_a)[bS[3]] + b_chroma;
  3099. +    tc[0] = (tc0_table(index_a)[bS[0]] << (BIT_DEPTH-8)) + b_chroma;
  3100. +    tc[1] = (tc0_table(index_a)[bS[1]] << (BIT_DEPTH-8)) + b_chroma;
  3101. +    tc[2] = (tc0_table(index_a)[bS[2]] << (BIT_DEPTH-8)) + b_chroma;
  3102. +    tc[3] = (tc0_table(index_a)[bS[3]] << (BIT_DEPTH-8)) + b_chroma;
  3103.  
  3104.      pf_inter( pix1, i_stride, alpha, beta, tc );
  3105.      if( b_chroma )
  3106. @@ -285,8 +286,10 @@ static inline void deblock_edge( x264_t *h, pixel *pix1, pixel *pix2, int i_stri
  3107.  
  3108.  static inline void deblock_edge_intra( x264_t *h, pixel *pix1, pixel *pix2, int i_stride, uint8_t bS[4], int i_qp, int b_chroma, x264_deblock_intra_t pf_intra )
  3109.  {
  3110. -    int alpha = alpha_table(i_qp + h->sh.i_alpha_c0_offset);
  3111. -    int beta  = beta_table(i_qp + h->sh.i_beta_offset);
  3112. +    int index_a = x264_clip3((i_qp-QP_BD_OFFSET + h->sh.i_alpha_c0_offset), 0, 51);
  3113. +    int index_b = x264_clip3((i_qp-QP_BD_OFFSET + h->sh.i_beta_offset), 0, 51);
  3114. +    int alpha = alpha_table(index_a) << (BIT_DEPTH-8);
  3115. +    int beta  = beta_table(index_b) << (BIT_DEPTH-8);
  3116.  
  3117.      if( !alpha || !beta )
  3118.          return;
  3119. @@ -450,6 +453,7 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf )
  3120.  #if HAVE_MMX
  3121.      if( cpu&X264_CPU_MMXEXT )
  3122.      {
  3123. +#if !X264_HIGH_BIT_DEPTH
  3124.          pf->deblock_chroma[1] = x264_deblock_v_chroma_mmxext;
  3125.          pf->deblock_chroma[0] = x264_deblock_h_chroma_mmxext;
  3126.          pf->deblock_chroma_intra[1] = x264_deblock_v_chroma_intra_mmxext;
  3127. @@ -460,10 +464,12 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf )
  3128.          pf->deblock_luma_intra[1] = x264_deblock_v_luma_intra_mmxext;
  3129.          pf->deblock_luma_intra[0] = x264_deblock_h_luma_intra_mmxext;
  3130.  #endif
  3131. +#endif // !X264_HIGH_BIT_DEPTH
  3132.          pf->deblock_strength = x264_deblock_strength_mmxext;
  3133.          if( cpu&X264_CPU_SSE2 )
  3134.          {
  3135.              pf->deblock_strength = x264_deblock_strength_sse2;
  3136. +#if !X264_HIGH_BIT_DEPTH
  3137.              if( !(cpu&X264_CPU_STACK_MOD4) )
  3138.              {
  3139.                  pf->deblock_luma[1] = x264_deblock_v_luma_sse2;
  3140. @@ -471,12 +477,14 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf )
  3141.                  pf->deblock_luma_intra[1] = x264_deblock_v_luma_intra_sse2;
  3142.                  pf->deblock_luma_intra[0] = x264_deblock_h_luma_intra_sse2;
  3143.              }
  3144. +#endif // !X264_HIGH_BIT_DEPTH
  3145.          }
  3146.          if( cpu&X264_CPU_SSSE3 )
  3147.              pf->deblock_strength = x264_deblock_strength_ssse3;
  3148.      }
  3149.  #endif
  3150.  
  3151. +#if !X264_HIGH_BIT_DEPTH
  3152.  #if HAVE_ALTIVEC
  3153.      if( cpu&X264_CPU_ALTIVEC )
  3154.      {
  3155. @@ -494,4 +502,5 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf )
  3156.          pf->deblock_chroma[0] = x264_deblock_h_chroma_neon;
  3157.     }
  3158.  #endif
  3159. +#endif // !X264_HIGH_BIT_DEPTH
  3160.  }
  3161. diff --git a/common/macroblock.c b/common/macroblock.c
  3162. index 4561d8a..f0a624f 100644
  3163. --- a/common/macroblock.c
  3164. +++ b/common/macroblock.c
  3165. @@ -337,7 +337,7 @@ int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
  3166.      int scratch_size = 0;
  3167.      if( !b_lookahead )
  3168.      {
  3169. -        int buf_hpel = (h->thread[0]->fdec->i_width[0]+48) * sizeof(int16_t);
  3170. +        int buf_hpel = (h->thread[0]->fdec->i_width[0]+48) * sizeof(dctcoef);
  3171.          int buf_ssim = h->param.analyse.b_ssim * 8 * (h->param.i_width/4+3) * sizeof(int);
  3172.          int me_range = X264_MIN(h->param.analyse.i_me_range, h->param.analyse.i_mv_range);
  3173.          int buf_tesa = (h->param.analyse.i_me_method >= X264_ME_ESA) *
  3174. diff --git a/common/macroblock.h b/common/macroblock.h
  3175. index 1a4992f..e09cd55 100644
  3176. --- a/common/macroblock.h
  3177. +++ b/common/macroblock.h
  3178. @@ -238,17 +238,30 @@ static const uint16_t block_idx_xy_fdec[16] =
  3179.      2*4 + 3*4*FDEC_STRIDE, 3*4 + 3*4*FDEC_STRIDE
  3180.  };
  3181.  
  3182. -static const uint8_t i_chroma_qp_table[52+12*2] =
  3183. +#define QP(qP) ( (qP)+QP_BD_OFFSET )
  3184. +static const uint8_t i_chroma_qp_table[QP_MAX+1+12*2] =
  3185.  {
  3186. -     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
  3187. -     0,  1,  2,  3,  4,  5,  6,  7,  8,  9,
  3188. -    10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
  3189. -    20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
  3190. -    29, 30, 31, 32, 32, 33, 34, 34, 35, 35,
  3191. -    36, 36, 37, 37, 37, 38, 38, 38, 39, 39,
  3192. -    39, 39,
  3193. -    39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39,
  3194. +         0,      0,      0,      0,      0,      0,
  3195. +         0,      0,      0,      0,      0,      0,
  3196. +#if BIT_DEPTH > 9
  3197. +   QP(-12),QP(-11),QP(-10), QP(-9), QP(-8), QP(-7),
  3198. +#endif
  3199. +#if BIT_DEPTH > 8
  3200. +    QP(-6), QP(-5), QP(-4), QP(-3), QP(-2), QP(-1),
  3201. +#endif
  3202. +     QP(0),  QP(1),  QP(2),  QP(3),  QP(4),  QP(5),
  3203. +     QP(6),  QP(7),  QP(8),  QP(9), QP(10), QP(11),
  3204. +    QP(12), QP(13), QP(14), QP(15), QP(16), QP(17),
  3205. +    QP(18), QP(19), QP(20), QP(21), QP(22), QP(23),
  3206. +    QP(24), QP(25), QP(26), QP(27), QP(28), QP(29),
  3207. +    QP(29), QP(30), QP(31), QP(32), QP(32), QP(33),
  3208. +    QP(34), QP(34), QP(35), QP(35), QP(36), QP(36),
  3209. +    QP(37), QP(37), QP(37), QP(38), QP(38), QP(38),
  3210. +    QP(39), QP(39), QP(39), QP(39),
  3211. +    QP(39), QP(39), QP(39), QP(39), QP(39), QP(39),
  3212. +    QP(39), QP(39), QP(39), QP(39), QP(39), QP(39),
  3213.  };
  3214. +#undef QP
  3215.  
  3216.  enum cabac_ctx_block_cat_e
  3217.  {
  3218. @@ -340,26 +353,31 @@ static ALWAYS_INLINE uint32_t pack16to32_mask( int a, int b )
  3219.     return (a&0xFFFF) + (b<<16);
  3220.  #endif
  3221.  }
  3222. +static ALWAYS_INLINE uint64_t pack32to64( uint32_t a, uint32_t b )
  3223. +{
  3224. +#ifdef WORDS_BIGENDIAN
  3225. +   return b + ((uint64_t)a<<32);
  3226. +#else
  3227. +   return a + ((uint64_t)b<<32);
  3228. +#endif
  3229. +}
  3230.  
  3231. -#define pack_pixel_1to2 pack8to16
  3232. -#define pack_pixel_2to4 pack16to32
  3233. +#if X264_HIGH_BIT_DEPTH
  3234. +#   define pack_pixel_1to2 pack16to32
  3235. +#   define pack_pixel_2to4 pack32to64
  3236. +#else
  3237. +#   define pack_pixel_1to2 pack8to16
  3238. +#   define pack_pixel_2to4 pack16to32
  3239. +#endif
  3240.  
  3241. -#define array_non_zero(a) array_non_zero_int(a, sizeof(a))
  3242. +#define array_non_zero(a) array_non_zero_int(a, sizeof(a)/sizeof(dctcoef))
  3243.  #define array_non_zero_int array_non_zero_int
  3244.  static ALWAYS_INLINE int array_non_zero_int( dctcoef *v, int i_count )
  3245.  {
  3246. -    if(i_count == 8)
  3247. -        return !!M64( &v[0] );
  3248. -    else if(i_count == 16)
  3249. -        return !!(M64( &v[0] ) | M64( &v[4] ));
  3250. -    else if(i_count == 32)
  3251. -        return !!(M64( &v[0] ) | M64( &v[4] ) | M64( &v[8] ) | M64( &v[12] ));
  3252. -    else
  3253. -    {
  3254. -        for( int i = 0; i < i_count; i+=4 )
  3255. -            if( M64( &v[i] ) ) return 1;
  3256. -        return 0;
  3257. -    }
  3258. +    for( int i = 0; i < i_count; i++ )
  3259. +        if( v[i] )
  3260. +            return 1;
  3261. +    return 0;
  3262.  }
  3263.  static ALWAYS_INLINE int x264_mb_predict_intra4x4_mode( x264_t *h, int idx )
  3264.  {
  3265. diff --git a/common/mc.c b/common/mc.c
  3266. index 9776bec..5ef0682 100644
  3267. --- a/common/mc.c
  3268. +++ b/common/mc.c
  3269. @@ -117,11 +117,14 @@ static void x264_weight_cache( x264_t *h, x264_weight_t *w )
  3270.  {
  3271.      w->weightfn = h->mc.weight;
  3272.  }
  3273. -#define opscale(x) dst[x] = x264_clip_pixel( ((src[x] * weight->i_scale + (1<<(weight->i_denom - 1))) >> weight->i_denom) + weight->i_offset )
  3274. -#define opscale_noden(x) dst[x] = x264_clip_pixel( src[x] * weight->i_scale + weight->i_offset )
  3275. -static inline void mc_weight( pixel *dst, int i_dst_stride, pixel *src, int i_src_stride, const x264_weight_t *weight, int i_width, int i_height )
  3276. +#define opscale(x) dst[x] = x264_clip_pixel( ((src[x] * scale + (1<<(denom - 1))) >> denom) + offset )
  3277. +#define opscale_noden(x) dst[x] = x264_clip_pixel( src[x] * scale + offset )
  3278. +static void mc_weight( pixel *dst, int i_dst_stride, pixel *src, int i_src_stride, const x264_weight_t *weight, int i_width, int i_height )
  3279.  {
  3280. -    if( weight->i_denom >= 1 )
  3281. +    int offset = weight->i_offset << (BIT_DEPTH-8);
  3282. +    int scale = weight->i_scale;
  3283. +    int denom = weight->i_denom;
  3284. +    if( denom >= 1 )
  3285.      {
  3286.          for( int y = 0; y < i_height; y++, dst += i_dst_stride, src += i_src_stride )
  3287.              for( int x = 0; x < i_width; x++ )
  3288. @@ -135,21 +138,10 @@ static inline void mc_weight( pixel *dst, int i_dst_stride, pixel *src, int i_sr
  3289.      }
  3290.  }
  3291.  
  3292. -#define MC_WEIGHT_C( name, lx ) \
  3293. +#define MC_WEIGHT_C( name, width ) \
  3294.      static void name( pixel *dst, int i_dst_stride, pixel *src, int i_src_stride, const x264_weight_t *weight, int height ) \
  3295.  { \
  3296. -    if( weight->i_denom >= 1 ) \
  3297. -    { \
  3298. -        for( int y = 0; y < height; y++, dst += i_dst_stride, src += i_src_stride ) \
  3299. -            for( int x = 0; x < lx; x++ ) \
  3300. -                opscale( x ); \
  3301. -    } \
  3302. -    else \
  3303. -    { \
  3304. -        for( int y = 0; y < height; y++, dst += i_dst_stride, src += i_src_stride ) \
  3305. -            for( int x = 0; x < lx; x++ ) \
  3306. -                opscale_noden( x ); \
  3307. -    } \
  3308. +    mc_weight( dst, i_dst_stride, src, i_src_stride, weight, width, height );\
  3309.  }
  3310.  
  3311.  MC_WEIGHT_C( mc_weight_w20, 20 )
  3312. @@ -182,7 +174,7 @@ static void mc_copy( pixel *src, int i_src_stride, pixel *dst, int i_dst_stride,
  3313.  
  3314.  #define TAPFILTER(pix, d) ((pix)[x-2*d] + (pix)[x+3*d] - 5*((pix)[x-d] + (pix)[x+2*d]) + 20*((pix)[x] + (pix)[x+d]))
  3315.  static void hpel_filter( pixel *dsth, pixel *dstv, pixel *dstc, pixel *src,
  3316. -                         int stride, int width, int height, int16_t *buf )
  3317. +                         int stride, int width, int height, dctcoef *buf )
  3318.  {
  3319.      for( int y = 0; y < height; y++ )
  3320.      {
  3321. @@ -301,7 +293,12 @@ void x264_plane_copy_c( pixel *dst, int i_dst,
  3322.  {
  3323.      while( h-- )
  3324.      {
  3325. +#if X264_HIGH_BIT_DEPTH
  3326. +        for( int i = 0; i < w; i++ )
  3327. +            dst[i] = src[i] << (BIT_DEPTH-8);
  3328. +#else
  3329.          memcpy( dst, src, w );
  3330. +#endif
  3331.          dst += i_dst;
  3332.          src += i_src;
  3333.      }
  3334. diff --git a/common/mc.h b/common/mc.h
  3335. index bb16d13..cbdf1a6 100644
  3336. --- a/common/mc.h
  3337. +++ b/common/mc.h
  3338. @@ -82,7 +82,7 @@ typedef struct
  3339.                          uint8_t *src, int i_src, int w, int h);
  3340.  
  3341.      void (*hpel_filter)( pixel *dsth, pixel *dstv, pixel *dstc, pixel *src,
  3342. -                         int i_stride, int i_width, int i_height, int16_t *buf );
  3343. +                         int i_stride, int i_width, int i_height, dctcoef *buf );
  3344.  
  3345.      /* prefetch the next few macroblocks of fenc or fdec */
  3346.      void (*prefetch_fenc)( pixel *pix_y, int stride_y,
  3347. diff --git a/common/pixel.c b/common/pixel.c
  3348. index 8441c7a..069589f 100644
  3349. --- a/common/pixel.c
  3350. +++ b/common/pixel.c
  3351. @@ -177,7 +177,7 @@ static int pixel_var2_8x8( pixel *pix1, int i_stride1, pixel *pix2, int i_stride
  3352.          pix2 += i_stride2;
  3353.      }
  3354.      sum = abs(sum);
  3355. -    var = sqr - (sum * sum >> 6);
  3356. +    var = sqr - ((uint64_t)sum * sum >> 6);
  3357.      *ssd = sqr;
  3358.      return var;
  3359.  }
  3360. @@ -406,12 +406,14 @@ SAD_X( 8x4 )
  3361.  SAD_X( 4x8 )
  3362.  SAD_X( 4x4 )
  3363.  
  3364. +#if !X264_HIGH_BIT_DEPTH
  3365.  #if ARCH_UltraSparc
  3366.  SAD_X( 16x16_vis )
  3367.  SAD_X( 16x8_vis )
  3368.  SAD_X( 8x16_vis )
  3369.  SAD_X( 8x8_vis )
  3370.  #endif
  3371. +#endif // !X264_HIGH_BIT_DEPTH
  3372.  
  3373.  /****************************************************************************
  3374.   * pixel_satd_x4
  3375. @@ -444,6 +446,7 @@ SATD_X_DECL6( cpu )\
  3376.  SATD_X( 4x4, cpu )
  3377.  
  3378.  SATD_X_DECL7()
  3379. +#if !X264_HIGH_BIT_DEPTH
  3380.  #if HAVE_MMX
  3381.  SATD_X_DECL7( _mmxext )
  3382.  SATD_X_DECL6( _sse2 )
  3383. @@ -454,6 +457,7 @@ SATD_X_DECL7( _sse4 )
  3384.  #if HAVE_ARMV6
  3385.  SATD_X_DECL7( _neon )
  3386.  #endif
  3387. +#endif // !X264_HIGH_BIT_DEPTH
  3388.  
  3389.  #define INTRA_MBCMP_8x8( mbcmp )\
  3390.  void x264_intra_##mbcmp##_x3_8x8( pixel *fenc, pixel edge[33], int res[3] )\
  3391. @@ -520,8 +524,8 @@ static void ssim_4x4x2_core( const pixel *pix1, int stride1,
  3392.  
  3393.  static float ssim_end1( int s1, int s2, int ss, int s12 )
  3394.  {
  3395. -    static const int ssim_c1 = (int)(.01*.01*255*255*64 + .5);
  3396. -    static const int ssim_c2 = (int)(.03*.03*255*255*64*63 + .5);
  3397. +    static const int ssim_c1 = (int)(.01*.01*PIXEL_MAX*PIXEL_MAX*64 + .5);
  3398. +    static const int ssim_c2 = (int)(.03*.03*PIXEL_MAX*PIXEL_MAX*64*63 + .5);
  3399.      int vars = ss*64 - s1*s1 - s2*s2;
  3400.      int covar = s12*64 - s1*s2;
  3401.      return (float)(2*s1*s2 + ssim_c1) * (float)(2*covar + ssim_c2)
  3402. @@ -678,6 +682,7 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
  3403.      pixf->intra_sad_x3_16x16  = x264_intra_sad_x3_16x16;
  3404.      pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16;
  3405.  
  3406. +#if !X264_HIGH_BIT_DEPTH
  3407.  #if HAVE_MMX
  3408.      if( cpu&X264_CPU_MMX )
  3409.      {
  3410. @@ -903,17 +908,20 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
  3411.          }
  3412.      }
  3413.  #endif
  3414. +#endif // !X264_HIGH_BIT_DEPTH
  3415.  #if HAVE_ALTIVEC
  3416.      if( cpu&X264_CPU_ALTIVEC )
  3417.      {
  3418.          x264_pixel_altivec_init( pixf );
  3419.      }
  3420.  #endif
  3421. +#if !X264_HIGH_BIT_DEPTH
  3422.  #if ARCH_UltraSparc
  3423.      INIT4( sad, _vis );
  3424.      INIT4( sad_x3, _vis );
  3425.      INIT4( sad_x4, _vis );
  3426.  #endif
  3427. +#endif // !X264_HIGH_BIT_DEPTH
  3428.  
  3429.      pixf->ads[PIXEL_8x16] =
  3430.      pixf->ads[PIXEL_8x4] =
  3431. diff --git a/common/ppc/dct.c b/common/ppc/dct.c
  3432. index eb223ae..85d5ce7 100644
  3433. --- a/common/ppc/dct.c
  3434. +++ b/common/ppc/dct.c
  3435. @@ -24,6 +24,7 @@
  3436.  #include "common/common.h"
  3437.  #include "ppccommon.h"
  3438.  
  3439. +#if !X264_HIGH_BIT_DEPTH
  3440.  #define VEC_DCT(a0,a1,a2,a3,b0,b1,b2,b3) \
  3441.      b1 = vec_add( a0, a3 );              \
  3442.      b3 = vec_add( a1, a2 );              \
  3443. @@ -482,4 +483,5 @@ void x264_zigzag_scan_4x4_field_altivec( int16_t level[16], int16_t dct[4][4] )
  3444.      vec_st( tmp0v, 0x00, level );
  3445.      vec_st( tmp1v, 0x10, level );
  3446.  }
  3447. +#endif // !X264_HIGH_BIT_DEPTH
  3448.  
  3449. diff --git a/common/ppc/deblock.c b/common/ppc/deblock.c
  3450. index 0c8d2d4..986710d 100644
  3451. --- a/common/ppc/deblock.c
  3452. +++ b/common/ppc/deblock.c
  3453. @@ -21,6 +21,7 @@
  3454.  #include "common/common.h"
  3455.  #include "ppccommon.h"
  3456.  
  3457. +#if !X264_HIGH_BIT_DEPTH
  3458.  #define transpose4x16(r0, r1, r2, r3)        \
  3459.  {                                            \
  3460.      register vec_u8_t r4;                    \
  3461. @@ -292,3 +293,4 @@ void x264_deblock_h_luma_altivec( uint8_t *pix, int stride, int alpha, int beta,
  3462.      transpose4x16(line1, line2, line3, line4);
  3463.      write16x4(pix-2, stride, line1, line2, line3, line4);
  3464.  }
  3465. +#endif // !X264_HIGH_BIT_DEPTH
  3466. diff --git a/common/ppc/mc.c b/common/ppc/mc.c
  3467. index 7ad8050..744a804 100644
  3468. --- a/common/ppc/mc.c
  3469. +++ b/common/ppc/mc.c
  3470. @@ -33,6 +33,7 @@
  3471.  #include "mc.h"
  3472.  #include "ppccommon.h"
  3473.  
  3474. +#if !X264_HIGH_BIT_DEPTH
  3475.  typedef void (*pf_mc_t)( uint8_t *src, int i_src,
  3476.                           uint8_t *dst, int i_dst, int i_height );
  3477.  
  3478. @@ -792,9 +793,11 @@ static void frame_init_lowres_core_altivec( uint8_t *src0, uint8_t *dst0, uint8_
  3479.          dstc += dst_stride;
  3480.      }
  3481.  }
  3482. +#endif // !X264_HIGH_BIT_DEPTH
  3483.  
  3484.  void x264_mc_altivec_init( x264_mc_functions_t *pf )
  3485.  {
  3486. +#if !X264_HIGH_BIT_DEPTH
  3487.      pf->mc_luma   = mc_luma_altivec;
  3488.      pf->get_ref   = get_ref_altivec;
  3489.      pf->mc_chroma = mc_chroma_altivec;
  3490. @@ -804,4 +807,5 @@ void x264_mc_altivec_init( x264_mc_functions_t *pf )
  3491.  
  3492.      pf->hpel_filter = x264_hpel_filter_altivec;
  3493.      pf->frame_init_lowres_core = frame_init_lowres_core_altivec;
  3494. +#endif // !X264_HIGH_BIT_DEPTH
  3495.  }
  3496. diff --git a/common/ppc/pixel.c b/common/ppc/pixel.c
  3497. index 3f99606..bd5f547 100644
  3498. --- a/common/ppc/pixel.c
  3499. +++ b/common/ppc/pixel.c
  3500. @@ -24,6 +24,7 @@
  3501.  #include "common/common.h"
  3502.  #include "ppccommon.h"
  3503.  
  3504. +#if !X264_HIGH_BIT_DEPTH
  3505.  /***********************************************************************
  3506.   * SAD routines
  3507.   **********************************************************************/
  3508. @@ -1979,12 +1980,14 @@ static void ssim_4x4x2_core_altivec( const uint8_t *pix1, int stride1,
  3509.      sums[0][3] = temp[0];
  3510.      sums[1][3] = temp[1];
  3511.  }
  3512. +#endif // !X264_HIGH_BIT_DEPTH
  3513.  
  3514.  /****************************************************************************
  3515.   * x264_pixel_init:
  3516.   ****************************************************************************/
  3517.  void x264_pixel_altivec_init( x264_pixel_function_t *pixf )
  3518.  {
  3519. +#if !X264_HIGH_BIT_DEPTH
  3520.      pixf->sad[PIXEL_16x16]  = pixel_sad_16x16_altivec;
  3521.      pixf->sad[PIXEL_8x16]   = pixel_sad_8x16_altivec;
  3522.      pixf->sad[PIXEL_16x8]   = pixel_sad_16x8_altivec;
  3523. @@ -2023,4 +2026,5 @@ void x264_pixel_altivec_init( x264_pixel_function_t *pixf )
  3524.      pixf->hadamard_ac[PIXEL_8x8]   = x264_pixel_hadamard_ac_8x8_altivec;
  3525.  
  3526.      pixf->ssim_4x4x2_core = ssim_4x4x2_core_altivec;
  3527. +#endif // !X264_HIGH_BIT_DEPTH
  3528.  }
  3529. diff --git a/common/ppc/predict.c b/common/ppc/predict.c
  3530. index 3fb1a2b..c71dbb5 100644
  3531. --- a/common/ppc/predict.c
  3532. +++ b/common/ppc/predict.c
  3533. @@ -23,6 +23,7 @@
  3534.  #include "pixel.h"
  3535.  #include "ppccommon.h"
  3536.  
  3537. +#if !X264_HIGH_BIT_DEPTH
  3538.  static void predict_8x8c_p_altivec( uint8_t *src )
  3539.  {
  3540.      int H = 0, V = 0;
  3541. @@ -194,6 +195,7 @@ static void predict_16x16_v_altivec( uint8_t *src )
  3542.          src += FDEC_STRIDE;
  3543.      }
  3544.  }
  3545. +#endif // !X264_HIGH_BIT_DEPTH
  3546.  
  3547.  
  3548.  /****************************************************************************
  3549. @@ -201,6 +203,7 @@ static void predict_16x16_v_altivec( uint8_t *src )
  3550.   ****************************************************************************/
  3551.  void x264_predict_16x16_init_altivec( x264_predict_t pf[7] )
  3552.  {
  3553. +#if !X264_HIGH_BIT_DEPTH
  3554.      pf[I_PRED_16x16_V ]      = predict_16x16_v_altivec;
  3555.      pf[I_PRED_16x16_H ]      = predict_16x16_h_altivec;
  3556.      pf[I_PRED_16x16_DC]      = predict_16x16_dc_altivec;
  3557. @@ -208,9 +211,12 @@ void x264_predict_16x16_init_altivec( x264_predict_t pf[7] )
  3558.      pf[I_PRED_16x16_DC_LEFT] = predict_16x16_dc_left_altivec;
  3559.      pf[I_PRED_16x16_DC_TOP ] = predict_16x16_dc_top_altivec;
  3560.      pf[I_PRED_16x16_DC_128 ] = predict_16x16_dc_128_altivec;
  3561. +#endif // !X264_HIGH_BIT_DEPTH
  3562.  }
  3563.  
  3564.  void x264_predict_8x8c_init_altivec( x264_predict_t pf[7] )
  3565.  {
  3566. +#if !X264_HIGH_BIT_DEPTH
  3567.      pf[I_PRED_CHROMA_P]       = predict_8x8c_p_altivec;
  3568. +#endif // !X264_HIGH_BIT_DEPTH
  3569.  }
  3570. diff --git a/common/ppc/quant.c b/common/ppc/quant.c
  3571. index 6f41a06..ffd6a1b 100644
  3572. --- a/common/ppc/quant.c
  3573. +++ b/common/ppc/quant.c
  3574. @@ -22,6 +22,7 @@
  3575.  #include "ppccommon.h"
  3576.  #include "quant.h"
  3577.  
  3578. +#if !X264_HIGH_BIT_DEPTH
  3579.  // quant of a whole 4x4 block, unrolled 2x and "pre-scheduled"
  3580.  #define QUANT_16_U( idx0, idx1 )                                    \
  3581.  {                                                                   \
  3582. @@ -360,4 +361,5 @@ void x264_dequant_8x8_altivec( int16_t dct[8][8], int dequant_mf[6][8][8], int i
  3583.              DEQUANT_SHR();
  3584.      }
  3585.  }
  3586. +#endif // !X264_HIGH_BIT_DEPTH
  3587.  
  3588. diff --git a/common/predict.c b/common/predict.c
  3589. index 79ec1fc..dc92083 100644
  3590. --- a/common/predict.c
  3591. +++ b/common/predict.c
  3592. @@ -53,40 +53,40 @@
  3593.  
  3594.  void x264_predict_16x16_dc_c( pixel *src )
  3595.  {
  3596. -    pixel4 dc = 0;
  3597. +    int dc = 0;
  3598.  
  3599.      for( int i = 0; i < 16; i++ )
  3600.      {
  3601.          dc += src[-1 + i * FDEC_STRIDE];
  3602.          dc += src[i - FDEC_STRIDE];
  3603.      }
  3604. -    dc = PIXEL_SPLAT_X4( ( dc + 16 ) >> 5 );
  3605. +    pixel4 dcsplat = PIXEL_SPLAT_X4( ( dc + 16 ) >> 5 );
  3606.  
  3607. -    PREDICT_16x16_DC( dc );
  3608. +    PREDICT_16x16_DC( dcsplat );
  3609.  }
  3610.  static void x264_predict_16x16_dc_left_c( pixel *src )
  3611.  {
  3612. -    pixel4 dc = 0;
  3613. +    int dc = 0;
  3614.  
  3615.      for( int i = 0; i < 16; i++ )
  3616.          dc += src[-1 + i * FDEC_STRIDE];
  3617. -    dc = PIXEL_SPLAT_X4( ( dc + 8 ) >> 4 );
  3618. +    pixel4 dcsplat = PIXEL_SPLAT_X4( ( dc + 8 ) >> 4 );
  3619.  
  3620. -    PREDICT_16x16_DC( dc );
  3621. +    PREDICT_16x16_DC( dcsplat );
  3622.  }
  3623.  static void x264_predict_16x16_dc_top_c( pixel *src )
  3624.  {
  3625. -    pixel4 dc = 0;
  3626. +    int dc = 0;
  3627.  
  3628.      for( int i = 0; i < 16; i++ )
  3629.          dc += src[i - FDEC_STRIDE];
  3630. -    dc = PIXEL_SPLAT_X4( ( dc + 8 ) >> 4 );
  3631. +    pixel4 dcsplat = PIXEL_SPLAT_X4( ( dc + 8 ) >> 4 );
  3632.  
  3633. -    PREDICT_16x16_DC( dc );
  3634. +    PREDICT_16x16_DC( dcsplat );
  3635.  }
  3636.  static void x264_predict_16x16_dc_128_c( pixel *src )
  3637.  {
  3638. -    PREDICT_16x16_DC( PIXEL_SPLAT_X4( 0x80 ) );
  3639. +    PREDICT_16x16_DC( PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) ) );
  3640.  }
  3641.  void x264_predict_16x16_h_c( pixel *src )
  3642.  {
  3643. @@ -155,53 +155,53 @@ static void x264_predict_8x8c_dc_128_c( pixel *src )
  3644.  {
  3645.      for( int y = 0; y < 8; y++ )
  3646.      {
  3647. -        MPIXEL_X4( src+0 ) = PIXEL_SPLAT_X4( 0x80 );
  3648. -        MPIXEL_X4( src+4 ) = PIXEL_SPLAT_X4( 0x80 );
  3649. +        MPIXEL_X4( src+0 ) = PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) );
  3650. +        MPIXEL_X4( src+4 ) = PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) );
  3651.          src += FDEC_STRIDE;
  3652.      }
  3653.  }
  3654.  static void x264_predict_8x8c_dc_left_c( pixel *src )
  3655.  {
  3656. -    pixel4 dc0 = 0, dc1 = 0;
  3657. +    int dc0 = 0, dc1 = 0;
  3658.  
  3659.      for( int y = 0; y < 4; y++ )
  3660.      {
  3661.          dc0 += src[y * FDEC_STRIDE     - 1];
  3662.          dc1 += src[(y+4) * FDEC_STRIDE - 1];
  3663.      }
  3664. -    dc0 = PIXEL_SPLAT_X4( ( dc0 + 2 ) >> 2 );
  3665. -    dc1 = PIXEL_SPLAT_X4( ( dc1 + 2 ) >> 2 );
  3666. +    pixel4 dc0splat = PIXEL_SPLAT_X4( ( dc0 + 2 ) >> 2 );
  3667. +    pixel4 dc1splat = PIXEL_SPLAT_X4( ( dc1 + 2 ) >> 2 );
  3668.  
  3669.      for( int y = 0; y < 4; y++ )
  3670.      {
  3671. -        MPIXEL_X4( src+0 ) = dc0;
  3672. -        MPIXEL_X4( src+4 ) = dc0;
  3673. +        MPIXEL_X4( src+0 ) = dc0splat;
  3674. +        MPIXEL_X4( src+4 ) = dc0splat;
  3675.          src += FDEC_STRIDE;
  3676.      }
  3677.      for( int y = 0; y < 4; y++ )
  3678.      {
  3679. -        MPIXEL_X4( src+0 ) = dc1;
  3680. -        MPIXEL_X4( src+4 ) = dc1;
  3681. +        MPIXEL_X4( src+0 ) = dc1splat;
  3682. +        MPIXEL_X4( src+4 ) = dc1splat;
  3683.          src += FDEC_STRIDE;
  3684.      }
  3685.  
  3686.  }
  3687.  static void x264_predict_8x8c_dc_top_c( pixel *src )
  3688.  {
  3689. -    pixel4 dc0 = 0, dc1 = 0;
  3690. +    int dc0 = 0, dc1 = 0;
  3691.  
  3692.      for( int x = 0; x < 4; x++ )
  3693.      {
  3694.          dc0 += src[x     - FDEC_STRIDE];
  3695.          dc1 += src[x + 4 - FDEC_STRIDE];
  3696.      }
  3697. -    dc0 = PIXEL_SPLAT_X4( ( dc0 + 2 ) >> 2 );
  3698. -    dc1 = PIXEL_SPLAT_X4( ( dc1 + 2 ) >> 2 );
  3699. +    pixel4 dc0splat = PIXEL_SPLAT_X4( ( dc0 + 2 ) >> 2 );
  3700. +    pixel4 dc1splat = PIXEL_SPLAT_X4( ( dc1 + 2 ) >> 2 );
  3701.  
  3702.      for( int y = 0; y < 8; y++ )
  3703.      {
  3704. -        MPIXEL_X4( src+0 ) = dc0;
  3705. -        MPIXEL_X4( src+4 ) = dc1;
  3706. +        MPIXEL_X4( src+0 ) = dc0splat;
  3707. +        MPIXEL_X4( src+4 ) = dc1splat;
  3708.          src += FDEC_STRIDE;
  3709.      }
  3710.  }
  3711. @@ -306,7 +306,7 @@ static void x264_predict_8x8c_p_c( pixel *src )
  3712.  
  3713.  static void x264_predict_4x4_dc_128_c( pixel *src )
  3714.  {
  3715. -    PREDICT_4x4_DC( PIXEL_SPLAT_X4( 0x80 ) );
  3716. +    PREDICT_4x4_DC( PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) ) );
  3717.  }
  3718.  static void x264_predict_4x4_dc_left_c( pixel *src )
  3719.  {
  3720. @@ -491,7 +491,8 @@ static void x264_predict_8x8_filter_c( pixel *src, pixel edge[33], int i_neighbo
  3721.              }
  3722.              else
  3723.              {
  3724. -                M64( edge+24 ) = SRC(7,-1) * 0x0101010101010101ULL;
  3725. +                MPIXEL_X4( edge+24 ) = PIXEL_SPLAT_X4( SRC(7,-1) );
  3726. +                MPIXEL_X4( edge+28 ) = PIXEL_SPLAT_X4( SRC(7,-1) );
  3727.                  edge[32] = SRC(7,-1);
  3728.              }
  3729.          }
  3730. @@ -523,7 +524,7 @@ static void x264_predict_8x8_filter_c( pixel *src, pixel edge[33], int i_neighbo
  3731.  
  3732.  static void x264_predict_8x8_dc_128_c( pixel *src, pixel edge[33] )
  3733.  {
  3734. -    PREDICT_8x8_DC( PIXEL_SPLAT_X4( 0x80 ) );
  3735. +    PREDICT_8x8_DC( PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) ) );
  3736.  }
  3737.  static void x264_predict_8x8_dc_left_c( pixel *src, pixel edge[33] )
  3738.  {
  3739. @@ -554,9 +555,13 @@ void x264_predict_8x8_h_c( pixel *src, pixel edge[33] )
  3740.  }
  3741.  void x264_predict_8x8_v_c( pixel *src, pixel edge[33] )
  3742.  {
  3743. -    uint64_t top = M64( edge+16 );
  3744. +    pixel4 top[2] = { MPIXEL_X4( edge+16 ),
  3745. +                      MPIXEL_X4( edge+20 ) };
  3746.      for( int y = 0; y < 8; y++ )
  3747. -        M64( src+y*FDEC_STRIDE ) = top;
  3748. +    {
  3749. +        MPIXEL_X4( src+y*FDEC_STRIDE+0 ) = top[0];
  3750. +        MPIXEL_X4( src+y*FDEC_STRIDE+4 ) = top[1];
  3751. +    }
  3752.  }
  3753.  static void x264_predict_8x8_ddl_c( pixel *src, pixel edge[33] )
  3754.  {
  3755. diff --git a/common/quant.c b/common/quant.c
  3756. index ece52f9..a7b72cf 100644
  3757. --- a/common/quant.c
  3758. +++ b/common/quant.c
  3759. @@ -142,7 +142,7 @@ static void x264_denoise_dct( dctcoef *dct, uint32_t *sum, uint16_t *offset, int
  3760.      for( int i = 1; i < size; i++ )
  3761.      {
  3762.          int level = dct[i];
  3763. -        int sign = level>>15;
  3764. +        int sign = level>>31;
  3765.          level = (level+sign)^sign;
  3766.          sum[i] += level;
  3767.          level -= offset[i];
  3768. @@ -177,10 +177,7 @@ static int ALWAYS_INLINE x264_decimate_score_internal( dctcoef *dct, int i_max )
  3769.      int i_score = 0;
  3770.      int idx = i_max - 1;
  3771.  
  3772. -    /* Yes, dct[idx-1] is guaranteed to be 32-bit aligned.  idx>=0 instead of 1 works correctly for the same reason */
  3773. -    while( idx >= 0 && MDCT_X2( &dct[idx-1] ) == 0 )
  3774. -        idx -= 2;
  3775. -    if( idx >= 0 && dct[idx] == 0 )
  3776. +    while( idx >= 0 && dct[idx] == 0 )
  3777.          idx--;
  3778.      while( idx >= 0 )
  3779.      {
  3780. @@ -216,10 +213,7 @@ static int x264_decimate_score64( dctcoef *dct )
  3781.  
  3782.  static int ALWAYS_INLINE x264_coeff_last_internal( dctcoef *l, int i_count )
  3783.  {
  3784. -    int i_last;
  3785. -    for( i_last = i_count-1; i_last >= 3; i_last -= 4 )
  3786. -        if( M64( l+i_last-3 ) )
  3787. -            break;
  3788. +    int i_last = i_count-1;
  3789.      while( i_last >= 0 && l[i_last] == 0 )
  3790.          i_last--;
  3791.      return i_last;
  3792. @@ -287,6 +281,7 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
  3793.      pf->coeff_level_run[  DCT_LUMA_AC] = x264_coeff_level_run15;
  3794.      pf->coeff_level_run[ DCT_LUMA_4x4] = x264_coeff_level_run16;
  3795.  
  3796. +#if !X264_HIGH_BIT_DEPTH
  3797.  #if HAVE_MMX
  3798.      if( cpu&X264_CPU_MMX )
  3799.      {
  3800. @@ -425,6 +420,7 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
  3801.          pf->coeff_last[DCT_LUMA_8x8] = x264_coeff_last64_neon;
  3802.      }
  3803.  #endif
  3804. +#endif // !X264_HIGH_BIT_DEPTH
  3805.      pf->coeff_last[  DCT_LUMA_DC] = pf->coeff_last[DCT_LUMA_4x4];
  3806.      pf->coeff_last[DCT_CHROMA_AC] = pf->coeff_last[ DCT_LUMA_AC];
  3807.      pf->coeff_level_run[  DCT_LUMA_DC] = pf->coeff_level_run[DCT_LUMA_4x4];
  3808. diff --git a/common/set.c b/common/set.c
  3809. index 16cff8e..86f3854 100644
  3810. --- a/common/set.c
  3811. +++ b/common/set.c
  3812. @@ -78,6 +78,7 @@ int x264_cqm_init( x264_t *h )
  3813.                          32 - 11, 32 - 21 };
  3814.      int max_qp_err = -1;
  3815.      int max_chroma_qp_err = -1;
  3816. +    int min_qp_err = QP_MAX+1;
  3817.  
  3818.      for( int i = 0; i < 6; i++ )
  3819.      {
  3820. @@ -94,9 +95,9 @@ int x264_cqm_init( x264_t *h )
  3821.          }
  3822.          else
  3823.          {
  3824. -            CHECKED_MALLOC( h->  quant4_mf[i], 52*size*sizeof(uint16_t) );
  3825. +            CHECKED_MALLOC( h->  quant4_mf[i], (QP_MAX+1)*size*sizeof(uint16_t) );
  3826.              CHECKED_MALLOC( h->dequant4_mf[i],  6*size*sizeof(int) );
  3827. -            CHECKED_MALLOC( h->unquant4_mf[i], 52*size*sizeof(int) );
  3828. +            CHECKED_MALLOC( h->unquant4_mf[i], (QP_MAX+1)*size*sizeof(int) );
  3829.          }
  3830.  
  3831.          for( j = (i<4 ? 0 : 4); j < i; j++ )
  3832. @@ -106,7 +107,7 @@ int x264_cqm_init( x264_t *h )
  3833.          if( j < i )
  3834.              h->quant4_bias[i] = h->quant4_bias[j];
  3835.          else
  3836. -            CHECKED_MALLOC( h->quant4_bias[i], 52*size*sizeof(uint16_t) );
  3837. +            CHECKED_MALLOC( h->quant4_bias[i], (QP_MAX+1)*size*sizeof(uint16_t) );
  3838.      }
  3839.  
  3840.      for( int q = 0; q < 6; q++ )
  3841. @@ -140,7 +141,7 @@ int x264_cqm_init( x264_t *h )
  3842.                       quant8_mf[i_list][q][i] = DIV(def_quant8[q][i] * 16, h->pps->scaling_list[4+i_list][i]);
  3843.              }
  3844.      }
  3845. -    for( int q = 0; q < 52; q++ )
  3846. +    for( int q = 0; q < QP_MAX+1; q++ )
  3847.      {
  3848.          int j;
  3849.          for( int i_list = 0; i_list < 4; i_list++ )
  3850. @@ -148,6 +149,11 @@ int x264_cqm_init( x264_t *h )
  3851.              {
  3852.                  h->unquant4_mf[i_list][q][i] = (1ULL << (q/6 + 15 + 8)) / quant4_mf[i_list][q%6][i];
  3853.                  h->quant4_mf[i_list][q][i] = j = SHIFT(quant4_mf[i_list][q%6][i], q/6 - 1);
  3854. +                if( !j )
  3855. +                {
  3856. +                    min_qp_err = X264_MIN( min_qp_err, q );
  3857. +                    continue;
  3858. +                }
  3859.                  // round to nearest, unless that would cause the deadzone to be negative
  3860.                  h->quant4_bias[i_list][q][i] = X264_MIN( DIV(deadzone[i_list]<<10, j), (1<<15)/j );
  3861.                  if( j > 0xffff && q > max_qp_err && (i_list == CQM_4IY || i_list == CQM_4PY) )
  3862. @@ -161,6 +167,11 @@ int x264_cqm_init( x264_t *h )
  3863.                  {
  3864.                      h->unquant8_mf[i_list][q][i] = (1ULL << (q/6 + 16 + 8)) / quant8_mf[i_list][q%6][i];
  3865.                      h->quant8_mf[i_list][q][i] = j = SHIFT(quant8_mf[i_list][q%6][i], q/6);
  3866. +                    if( !j )
  3867. +                    {
  3868. +                        min_qp_err = X264_MIN( min_qp_err, q );
  3869. +                        continue;
  3870. +                    }
  3871.                      h->quant8_bias[i_list][q][i] = X264_MIN( DIV(deadzone[i_list]<<10, j), (1<<15)/j );
  3872.                      if( j > 0xffff && q > max_qp_err )
  3873.                          max_qp_err = q;
  3874. @@ -179,6 +190,12 @@ int x264_cqm_init( x264_t *h )
  3875.          x264_log( h, X264_LOG_ERROR, "but min chroma QP is implied to be %d.\n", h->chroma_qp_table[h->param.rc.i_qp_min] );
  3876.          return -1;
  3877.      }
  3878. +    if( !h->mb.b_lossless && min_qp_err <= h->param.rc.i_qp_max )
  3879. +    {
  3880. +        x264_log( h, X264_LOG_ERROR, "Quantization underflow.  Your CQM is incompatible with QP > %d,\n", min_qp_err-1 );
  3881. +        x264_log( h, X264_LOG_ERROR, "but max QP is implied to be %d.\n", h->param.rc.i_qp_max );
  3882. +        return -1;
  3883. +    }
  3884.      return 0;
  3885.  fail:
  3886.      x264_cqm_delete( h );
  3887. diff --git a/common/x86/mc-c.c b/common/x86/mc-c.c
  3888. index 2dcd671..4bb5f33 100644
  3889. --- a/common/x86/mc-c.c
  3890. +++ b/common/x86/mc-c.c
  3891. @@ -125,6 +125,7 @@ PIXEL_AVG_WALL(sse2)
  3892.  PIXEL_AVG_WALL(sse2_misalign)
  3893.  PIXEL_AVG_WALL(cache64_ssse3)
  3894.  
  3895. +#if !X264_HIGH_BIT_DEPTH
  3896.  #define PIXEL_AVG_WTAB(instr, name1, name2, name3, name4, name5)\
  3897.  static void (* const x264_pixel_avg_wtab_##instr[6])( uint8_t *, int, uint8_t *, int, uint8_t *, int ) =\
  3898.  {\
  3899. @@ -355,24 +356,28 @@ static void x264_plane_copy_mmxext( uint8_t *dst, int i_dst, uint8_t *src, int i
  3900.          x264_plane_copy_core_mmxext( dst+i_dst, i_dst, src+i_src, i_src, (w+15)&~15, h-1 );
  3901.      }
  3902.  }
  3903. +#endif // !X264_HIGH_BIT_DEPTH
  3904.  
  3905.  void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
  3906.  {
  3907.      if( !(cpu&X264_CPU_MMX) )
  3908.          return;
  3909.  
  3910. +    pf->memcpy_aligned = x264_memcpy_aligned_mmx;
  3911. +    pf->memzero_aligned = x264_memzero_aligned_mmx;
  3912. +#if !X264_HIGH_BIT_DEPTH
  3913.      pf->copy_16x16_unaligned = x264_mc_copy_w16_mmx;
  3914.      pf->copy[PIXEL_16x16] = x264_mc_copy_w16_mmx;
  3915.      pf->copy[PIXEL_8x8]   = x264_mc_copy_w8_mmx;
  3916.      pf->copy[PIXEL_4x4]   = x264_mc_copy_w4_mmx;
  3917. -    pf->memcpy_aligned = x264_memcpy_aligned_mmx;
  3918. -    pf->memzero_aligned = x264_memzero_aligned_mmx;
  3919.      pf->integral_init4v = x264_integral_init4v_mmx;
  3920.      pf->integral_init8v = x264_integral_init8v_mmx;
  3921. +#endif // !X264_HIGH_BIT_DEPTH
  3922.  
  3923.      if( !(cpu&X264_CPU_MMXEXT) )
  3924.          return;
  3925.  
  3926. +#if !X264_HIGH_BIT_DEPTH
  3927.      pf->mc_luma = mc_luma_mmxext;
  3928.      pf->get_ref = get_ref_mmxext;
  3929.      pf->mc_chroma = x264_mc_chroma_mmxext;
  3930. @@ -412,12 +417,14 @@ void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
  3931.          pf->frame_init_lowres_core = x264_frame_init_lowres_core_cache32_mmxext;
  3932.      }
  3933.  #endif
  3934. +#endif // !X264_HIGH_BIT_DEPTH
  3935.  
  3936.      if( !(cpu&X264_CPU_SSE2) )
  3937.          return;
  3938.  
  3939.      pf->memcpy_aligned = x264_memcpy_aligned_sse2;
  3940.      pf->memzero_aligned = x264_memzero_aligned_sse2;
  3941. +#if !X264_HIGH_BIT_DEPTH
  3942.      pf->integral_init4v = x264_integral_init4v_sse2;
  3943.      pf->integral_init8v = x264_integral_init8v_sse2;
  3944.      pf->hpel_filter = x264_hpel_filter_sse2_amd;
  3945. @@ -492,4 +499,5 @@ void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
  3946.  
  3947.      pf->integral_init4h = x264_integral_init4h_sse4;
  3948.      pf->integral_init8h = x264_integral_init8h_sse4;
  3949. +#endif // !X264_HIGH_BIT_DEPTH
  3950.  }
  3951. diff --git a/common/x86/predict-c.c b/common/x86/predict-c.c
  3952. index e771431..4004265 100644
  3953. --- a/common/x86/predict-c.c
  3954. +++ b/common/x86/predict-c.c
  3955. @@ -75,6 +75,7 @@
  3956.   void x264_predict_16x16_v_sse2( uint8_t *src );
  3957.   void x264_predict_16x16_p_core_sse2( uint8_t *src, int i00, int b, int c );
  3958.  
  3959. +#if !X264_HIGH_BIT_DEPTH
  3960.  ALIGNED_8( static const int8_t pb_12345678[8] ) = {1,2,3,4,5,6,7,8};
  3961.  ALIGNED_8( static const int8_t pb_m87654321[8] ) = {-8,-7,-6,-5,-4,-3,-2,-1};
  3962.  ALIGNED_8( static const int8_t pb_m32101234[8] ) = {-3,-2,-1,0,1,2,3,4};
  3963. @@ -364,6 +365,7 @@ INTRA_SA8D_X3(ssse3)
  3964.  #else
  3965.  INTRA_SA8D_X3(mmxext)
  3966.  #endif
  3967. +#endif // !X264_HIGH_BIT_DEPTH
  3968.  
  3969.  /****************************************************************************
  3970.   * Exported functions:
  3971. @@ -372,6 +374,7 @@ void x264_predict_16x16_init_mmx( int cpu, x264_predict_t pf[7] )
  3972.  {
  3973.      if( !(cpu&X264_CPU_MMX) )
  3974.          return;
  3975. +#if !X264_HIGH_BIT_DEPTH
  3976.      pf[I_PRED_16x16_V]       = x264_predict_16x16_v_mmx;
  3977.      if( !(cpu&X264_CPU_MMXEXT) )
  3978.          return;
  3979. @@ -397,12 +400,14 @@ void x264_predict_16x16_init_mmx( int cpu, x264_predict_t pf[7] )
  3980.  #ifdef __GNUC__
  3981.      pf[I_PRED_16x16_P]       = x264_predict_16x16_p_ssse3;
  3982.  #endif
  3983. +#endif // !X264_HIGH_BIT_DEPTH
  3984.  }
  3985.  
  3986.  void x264_predict_8x8c_init_mmx( int cpu, x264_predict_t pf[7] )
  3987.  {
  3988.      if( !(cpu&X264_CPU_MMX) )
  3989.          return;
  3990. +#if !X264_HIGH_BIT_DEPTH
  3991.  #if ARCH_X86_64
  3992.      pf[I_PRED_CHROMA_DC_LEFT] = x264_predict_8x8c_dc_left;
  3993.  #endif
  3994. @@ -424,12 +429,14 @@ void x264_predict_8x8c_init_mmx( int cpu, x264_predict_t pf[7] )
  3995.  #ifdef __GNUC__
  3996.      pf[I_PRED_CHROMA_P]       = x264_predict_8x8c_p_ssse3;
  3997.  #endif
  3998. +#endif // !X264_HIGH_BIT_DEPTH
  3999.  }
  4000.  
  4001.  void x264_predict_8x8_init_mmx( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_8x8_filter )
  4002.  {
  4003.      if( !(cpu&X264_CPU_MMXEXT) )
  4004.          return;
  4005. +#if !X264_HIGH_BIT_DEPTH
  4006.      pf[I_PRED_8x8_V]      = x264_predict_8x8_v_mmxext;
  4007.      pf[I_PRED_8x8_H]      = x264_predict_8x8_h_mmxext;
  4008.      pf[I_PRED_8x8_DC]     = x264_predict_8x8_dc_mmxext;
  4009. @@ -456,12 +463,14 @@ void x264_predict_8x8_init_mmx( int cpu, x264_predict8x8_t pf[12], x264_predict_
  4010.      pf[I_PRED_8x8_HD]   = x264_predict_8x8_hd_ssse3;
  4011.      pf[I_PRED_8x8_HU]   = x264_predict_8x8_hu_ssse3;
  4012.      *predict_8x8_filter = x264_predict_8x8_filter_ssse3;
  4013. +#endif // !X264_HIGH_BIT_DEPTH
  4014.  }
  4015.  
  4016.  void x264_predict_4x4_init_mmx( int cpu, x264_predict_t pf[12] )
  4017.  {
  4018.      if( !(cpu&X264_CPU_MMXEXT) )
  4019.          return;
  4020. +#if !X264_HIGH_BIT_DEPTH
  4021.      pf[I_PRED_4x4_VR]  = x264_predict_4x4_vr_mmxext;
  4022.      pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_mmxext;
  4023.      pf[I_PRED_4x4_VL]  = x264_predict_4x4_vl_mmxext;
  4024. @@ -474,4 +483,5 @@ void x264_predict_4x4_init_mmx( int cpu, x264_predict_t pf[12] )
  4025.      pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_ssse3;
  4026.      pf[I_PRED_4x4_VR]  = x264_predict_4x4_vr_ssse3;
  4027.      pf[I_PRED_4x4_HD]  = x264_predict_4x4_hd_ssse3;
  4028. +#endif // !X264_HIGH_BIT_DEPTH
  4029.  }
  4030. diff --git a/configure b/configure
  4031. index 24d15ad..5cdd82d 100755
  4032. --- a/configure
  4033. +++ b/configure
  4034. @@ -18,6 +18,7 @@ echo "  --enable-gprof           adds -pg, doesn't strip"
  4035.  echo "  --enable-visualize       enables visualization (X11 only)"
  4036.  echo "  --enable-pic             build position-independent code"
  4037.  echo "  --enable-shared          build libx264.so"
  4038. +echo "  --bit-depth=BIT_DEPTH    sets output bit depth (8-10), default 8"
  4039.  echo "  --extra-asflags=EASFLAGS add EASFLAGS to ASFLAGS"
  4040.  echo "  --extra-cflags=ECFLAGS   add ECFLAGS to CFLAGS"
  4041.  echo "  --extra-ldflags=ELDFLAGS add ELDFLAGS to LDFLAGS"
  4042. @@ -124,6 +125,7 @@ gprof="no"
  4043.  pic="no"
  4044.  vis="no"
  4045.  shared="no"
  4046. +bit_depth="8"
  4047.  
  4048.  CFLAGS="$CFLAGS -Wall -I."
  4049.  LDFLAGS="$LDFLAGS"
  4050. @@ -208,6 +210,13 @@ for opt do
  4051.              CFLAGS="$CFLAGS --sysroot=${opt#--sysroot=}"
  4052.              LDFLAGS="$LDFLAGS --sysroot=${opt#--sysroot=}"
  4053.              ;;
  4054. +        --bit-depth=*)
  4055. +            bit_depth="${opt#--bit-depth=}"
  4056. +            if [ "$bit_depth" -lt "8" ] || [ "$bit_depth" -gt "10" ]; then
  4057. +                echo "Supplied bit depth must be in range [8,10]."
  4058. +                exit 1
  4059. +            fi
  4060. +            ;;
  4061.          *)
  4062.              echo "Unknown option $opt, ignored"
  4063.              ;;
  4064. @@ -644,6 +653,12 @@ if cc_check '' -Wshadow ; then
  4065.      CFLAGS="-Wshadow $CFLAGS"
  4066.  fi
  4067.  
  4068. +if [ "$bit_depth" -gt "8" ]; then
  4069. +    define X264_HIGH_BIT_DEPTH
  4070. +fi
  4071. +
  4072. +define BIT_DEPTH $bit_depth
  4073. +
  4074.  rm -f conftest*
  4075.  
  4076.  # generate config files
  4077. @@ -724,6 +739,7 @@ gprof:      $gprof
  4078.  PIC:        $pic
  4079.  shared:     $shared
  4080.  visualize:  $vis
  4081. +bit depth:  $bit_depth
  4082.  EOF
  4083.  
  4084.  echo >> config.log
  4085. diff --git a/encoder/analyse.c b/encoder/analyse.c
  4086. index cdbdd1e..93f7eed 100644
  4087. --- a/encoder/analyse.c
  4088. +++ b/encoder/analyse.c
  4089. @@ -134,25 +134,27 @@ typedef struct
  4090.  } x264_mb_analysis_t;
  4091.  
  4092.  /* lambda = pow(2,qp/6-2) */
  4093. -const uint8_t x264_lambda_tab[52] = {
  4094. -   1, 1, 1, 1, 1, 1, 1, 1,  /*  0-7 */
  4095. -   1, 1, 1, 1,              /*  8-11 */
  4096. -   1, 1, 1, 1, 2, 2, 2, 2,  /* 12-19 */
  4097. -   3, 3, 3, 4, 4, 4, 5, 6,  /* 20-27 */
  4098. -   6, 7, 8, 9,10,11,13,14,  /* 28-35 */
  4099. -  16,18,20,23,25,29,32,36,  /* 36-43 */
  4100. -  40,45,51,57,64,72,81,91   /* 44-51 */
  4101. +const uint16_t x264_lambda_tab[QP_MAX_MAX+1] = {
  4102. +   1,   1,   1,   1,   1,   1,   1,   1, /*  0- 7 */
  4103. +   1,   1,   1,   1,   1,   1,   1,   1, /*  8-15 */
  4104. +   2,   2,   2,   2,   3,   3,   3,   4, /* 16-23 */
  4105. +   4,   4,   5,   6,   6,   7,   8,   9, /* 24-31 */
  4106. +  10,  11,  13,  14,  16,  18,  20,  23, /* 32-39 */
  4107. +  25,  29,  32,  36,  40,  45,  51,  57, /* 40-47 */
  4108. +  64,  72,  81,  91, 102, 114, 128, 144, /* 48-55 */
  4109. + 161, 181, 203, 228, 256, 287, 323, 362, /* 56-63 */
  4110.  };
  4111.  
  4112.  /* lambda2 = pow(lambda,2) * .9 * 256 */
  4113. -const int x264_lambda2_tab[52] = {
  4114. -    14,      18,      22,      28,     36,     45,     57,     72, /*  0 -  7 */
  4115. -    91,     115,     145,     182,    230,    290,    365,    460, /*  8 - 15 */
  4116. -   580,     731,     921,    1161,   1462,   1843,   2322,   2925, /* 16 - 23 */
  4117. -  3686,    4644,    5851,    7372,   9289,  11703,  14745,  18578, /* 24 - 31 */
  4118. - 23407,   29491,   37156,   46814,  58982,  74313,  93628, 117964, /* 32 - 39 */
  4119. -148626,  187257,  235929,  297252, 374514, 471859, 594505, 749029, /* 40 - 47 */
  4120. -943718, 1189010, 1498059, 1887436                                  /* 48 - 51 */
  4121. +const int x264_lambda2_tab[QP_MAX_MAX+1] = {
  4122. +     14,     18,     22,      28,      36,      45,      57,      72, /*  0- 7 */
  4123. +     91,    115,    145,     182,     230,     290,     365,     460, /*  8-15 */
  4124. +    580,    731,    921,    1161,    1462,    1843,    2322,    2925, /* 16-23 */
  4125. +   3686,   4644,   5851,    7372,    9289,   11703,   14745,   18578, /* 24-31 */
  4126. +  23407,  29491,  37156,   46814,   58982,   74313,   93628,  117964, /* 32-39 */
  4127. + 148626, 187257, 235929,  297252,  374514,  471859,  594505,  749029, /* 40-47 */
  4128. + 943718,1189010,1498059, 1887436, 2378021, 2996119, 3774873, 4756042, /* 48-55 */
  4129. +5992238,7549747,9512085,11984476,15099494,19024170,23968953,30198988, /* 56-63 */
  4130.  };
  4131.  
  4132.  const uint8_t x264_exp2_lut[64] = {
  4133. @@ -188,27 +190,31 @@ const float x264_log2_lz_lut[32] = {
  4134.  
  4135.  // should the intra and inter lambdas be different?
  4136.  // I'm just matching the behaviour of deadzone quant.
  4137. -static const int x264_trellis_lambda2_tab[2][52] = {
  4138. +static const int x264_trellis_lambda2_tab[2][QP_MAX_MAX+1] = {
  4139.      // inter lambda = .85 * .85 * 2**(qp/3. + 10 - LAMBDA_BITS)
  4140. -    {    46,      58,      73,      92,     117,     147,
  4141. -        185,     233,     294,     370,     466,     587,
  4142. -        740,     932,    1174,    1480,    1864,    2349,
  4143. -       2959,    3728,    4697,    5918,    7457,    9395,
  4144. -      11837,   14914,   18790,   23674,   29828,   37581,
  4145. -      47349,   59656,   75163,   94699,  119313,  150326,
  4146. -     189399,  238627,  300652,  378798,  477255,  601304,
  4147. -     757596,  954511, 1202608, 1515192, 1909022, 2405217,
  4148. -    3030384, 3818045, 4810435, 6060769 },
  4149. +    {      46,      58,      73,      92,     117,     147,
  4150. +          185,     233,     294,     370,     466,     587,
  4151. +          740,     932,    1174,    1480,    1864,    2349,
  4152. +         2959,    3728,    4697,    5918,    7457,    9395,
  4153. +        11837,   14914,   18790,   23674,   29828,   37581,
  4154. +        47349,   59656,   75163,   94699,  119313,  150326,
  4155. +       189399,  238627,  300652,  378798,  477255,  601304,
  4156. +       757596,  954511, 1202608, 1515192, 1909022, 2405217,
  4157. +      3030384, 3818045, 4810435, 6060769, 7636091, 9620872,
  4158. +     12121539,15272182,19241743,24243077,30544363,38483486,
  4159. +     48486154,61088726,76966972,96972308 },
  4160.      // intra lambda = .65 * .65 * 2**(qp/3. + 10 - LAMBDA_BITS)
  4161. -    {    27,      34,      43,      54,      68,      86,
  4162. -        108,     136,     172,     216,     273,     343,
  4163. -        433,     545,     687,     865,    1090,    1374,
  4164. -       1731,    2180,    2747,    3461,    4361,    5494,
  4165. -       6922,    8721,   10988,   13844,   17442,   21976,
  4166. -      27688,   34885,   43953,   55377,   69771,   87906,
  4167. -     110755,  139543,  175813,  221511,  279087,  351627,
  4168. -     443023,  558174,  703255,  886046, 1116348, 1406511,
  4169. -    1772093, 2232697, 2813022, 3544186 }
  4170. +    {      27,      34,      43,      54,      68,      86,
  4171. +          108,     136,     172,     216,     273,     343,
  4172. +          433,     545,     687,     865,    1090,    1374,
  4173. +         1731,    2180,    2747,    3461,    4361,    5494,
  4174. +         6922,    8721,   10988,   13844,   17442,   21976,
  4175. +        27688,   34885,   43953,   55377,   69771,   87906,
  4176. +       110755,  139543,  175813,  221511,  279087,  351627,
  4177. +       443023,  558174,  703255,  886046, 1116348, 1406511,
  4178. +      1772093, 2232697, 2813022, 3544186, 4465396, 5626046,
  4179. +      7088374, 8930791,11252092,14176748,17861583,22504184,
  4180. +     28353495,35723165,45008368,56706990 }
  4181.  };
  4182.  
  4183.  static const uint16_t x264_chroma_lambda2_offset_tab[] = {
  4184. @@ -237,7 +243,7 @@ static const uint8_t i_sub_mb_p_cost_table[4] = {
  4185.  
  4186.  static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a );
  4187.  
  4188. -static uint16_t x264_cost_ref[92][3][33];
  4189. +static uint16_t x264_cost_ref[LAMBDA_MAX+1][3][33];
  4190.  static UNUSED x264_pthread_mutex_t cost_ref_mutex = X264_PTHREAD_MUTEX_INITIALIZER;
  4191.  
  4192.  int x264_analyse_init_costs( x264_t *h, int qp )
  4193. @@ -275,7 +281,7 @@ fail:
  4194.  
  4195.  void x264_analyse_free_costs( x264_t *h )
  4196.  {
  4197. -    for( int i = 0; i < 92; i++ )
  4198. +    for( int i = 0; i < LAMBDA_MAX+1; i++ )
  4199.      {
  4200.          if( h->cost_mv[i] )
  4201.              x264_free( h->cost_mv[i] - 2*4*2048 );
  4202. diff --git a/encoder/cabac.c b/encoder/cabac.c
  4203. index 8bd40f1..e82d7e9 100644
  4204. --- a/encoder/cabac.c
  4205. +++ b/encoder/cabac.c
  4206. @@ -262,9 +262,9 @@ static void x264_cabac_mb_qp_delta( x264_t *h, x264_cabac_t *cb )
  4207.      if( i_dqp != 0 )
  4208.      {
  4209.          int val = i_dqp <= 0 ? (-2*i_dqp) : (2*i_dqp - 1);
  4210. -        /* dqp is interpreted modulo 52 */
  4211. -        if( val >= 51 && val != 52 )
  4212. -            val = 103 - val;
  4213. +        /* dqp is interpreted modulo (QP_MAX+1) */
  4214. +        if( val >= QP_MAX && val != QP_MAX+1 )
  4215. +            val = 2*QP_MAX+1 - val;
  4216.          do
  4217.          {
  4218.              x264_cabac_encode_decision( cb, 60 + ctx, 1 );
  4219. @@ -767,15 +767,18 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
  4220.          i_mb_pos_tex = x264_cabac_pos( cb );
  4221.          h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start;
  4222.  
  4223. -        memcpy( cb->p, h->mb.pic.p_fenc[0], 256 );
  4224. -        cb->p += 256;
  4225. -        for( int i = 0; i < 8; i++ )
  4226. -            memcpy( cb->p + i*8, h->mb.pic.p_fenc[1] + i*FENC_STRIDE, 8 );
  4227. -        cb->p += 64;
  4228. -        for( int i = 0; i < 8; i++ )
  4229. -            memcpy( cb->p + i*8, h->mb.pic.p_fenc[2] + i*FENC_STRIDE, 8 );
  4230. -        cb->p += 64;
  4231. +        bs_t s;
  4232. +        bs_init( &s, cb->p, cb->p_end - cb->p );
  4233.  
  4234. +        for( int i = 0; i < 256; i++ )
  4235. +            bs_write( &s, BIT_DEPTH, h->mb.pic.p_fenc[0][i] );
  4236. +        for( int ch = 0; ch < 2; ch++ )
  4237. +            for( int i = 0; i < 8; i++ )
  4238. +                for( int j = 0; j < 8; j++ )
  4239. +                    bs_write( &s, BIT_DEPTH, h->mb.pic.p_fenc[ch][i*FENC_STRIDE+j] );
  4240. +
  4241. +        bs_flush( &s );
  4242. +        cb->p = s.p;
  4243.          x264_cabac_encode_init_core( cb );
  4244.  
  4245.          h->stat.frame.i_tex_bits += x264_cabac_pos( cb ) - i_mb_pos_tex;
  4246. diff --git a/encoder/cavlc.c b/encoder/cavlc.c
  4247. index e2f60b1..632ed41 100644
  4248. --- a/encoder/cavlc.c
  4249. +++ b/encoder/cavlc.c
  4250. @@ -66,7 +66,7 @@ static inline int block_residual_write_cavlc_escape( x264_t *h, int i_suffix_len
  4251.      bs_t *s = &h->out.bs;
  4252.      static const uint16_t next_suffix[7] = { 0, 3, 6, 12, 24, 48, 0xffff };
  4253.      int i_level_prefix = 15;
  4254. -    int mask = level >> 15;
  4255. +    int mask = level >> 31;
  4256.      int abs_level = (level^mask)-mask;
  4257.      int i_level_code = abs_level*2-mask-2;
  4258.      if( ( i_level_code >> i_suffix_length ) < 15 )
  4259. @@ -219,10 +219,10 @@ static void cavlc_qp_delta( x264_t *h )
  4260.  
  4261.      if( i_dqp )
  4262.      {
  4263. -        if( i_dqp < -26 )
  4264. -            i_dqp += 52;
  4265. -        else if( i_dqp > 25 )
  4266. -            i_dqp -= 52;
  4267. +        if( i_dqp < -(QP_MAX+1)/2 )
  4268. +            i_dqp += QP_MAX+1;
  4269. +        else if( i_dqp > QP_MAX/2 )
  4270. +            i_dqp -= QP_MAX+1;
  4271.      }
  4272.      bs_write_se( s, i_dqp );
  4273.  }
  4274. @@ -309,14 +309,12 @@ void x264_macroblock_write_cavlc( x264_t *h )
  4275.  
  4276.          bs_align_0( s );
  4277.  
  4278. -        memcpy( s->p, h->mb.pic.p_fenc[0], 256 );
  4279. -        s->p += 256;
  4280. -        for( int i = 0; i < 8; i++ )
  4281. -            memcpy( s->p + i*8, h->mb.pic.p_fenc[1] + i*FENC_STRIDE, 8 );
  4282. -        s->p += 64;
  4283. -        for( int i = 0; i < 8; i++ )
  4284. -            memcpy( s->p + i*8, h->mb.pic.p_fenc[2] + i*FENC_STRIDE, 8 );
  4285. -        s->p += 64;
  4286. +        for( int i = 0; i < 256; i++ )
  4287. +            bs_write( s, BIT_DEPTH, h->mb.pic.p_fenc[0][i] );
  4288. +        for( int ch = 0; ch < 2; ch++ )
  4289. +            for( int i = 0; i < 8; i++ )
  4290. +                for( int j = 0; j < 8; j++ )
  4291. +                    bs_write( s, BIT_DEPTH, h->mb.pic.p_fenc[ch][i*FENC_STRIDE+j] );
  4292.  
  4293.          bs_init( s, s->p, s->p_end - s->p );
  4294.          s->p_start = p_start;
  4295. diff --git a/encoder/encoder.c b/encoder/encoder.c
  4296. index 31cb84a..f7e0e38 100644
  4297. --- a/encoder/encoder.c
  4298. +++ b/encoder/encoder.c
  4299. @@ -51,7 +51,7 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
  4300.   ****************************************************************************/
  4301.  static float x264_psnr( int64_t i_sqe, int64_t i_size )
  4302.  {
  4303. -    double f_mse = (double)i_sqe / ((double)65025.0 * (double)i_size);
  4304. +    double f_mse = (double)i_sqe / (PIXEL_MAX*PIXEL_MAX * (double)i_size);
  4305.      if( f_mse <= 0.0000000001 ) /* Max 100dB */
  4306.          return 100;
  4307.  
  4308. @@ -68,11 +68,13 @@ static void x264_frame_dump( x264_t *h )
  4309.      FILE *f = fopen( h->param.psz_dump_yuv, "r+b" );
  4310.      if( !f )
  4311.          return;
  4312. +    int bytes_per_pixel = (BIT_DEPTH+7)/8;
  4313.      /* Write the frame in display order */
  4314. -    fseek( f, (uint64_t)h->fdec->i_frame * h->param.i_height * h->param.i_width * 3/2, SEEK_SET );
  4315. +    fseek( f, (uint64_t)h->fdec->i_frame * h->param.i_height * h->param.i_width * 3/2 * bytes_per_pixel, SEEK_SET );
  4316.      for( int i = 0; i < h->fdec->i_plane; i++ )
  4317.          for( int y = 0; y < h->param.i_height >> !!i; y++ )
  4318. -            fwrite( &h->fdec->plane[i][y*h->fdec->i_stride[i]], 1, h->param.i_width >> !!i, f );
  4319. +            for( int j = 0; j < h->param.i_width >> !!i; j++ )
  4320. +                fwrite( &h->fdec->plane[i][y*h->fdec->i_stride[i]]+j, bytes_per_pixel, 1, f );
  4321.      fclose( f );
  4322.  }
  4323.  
  4324. @@ -469,8 +471,8 @@ static int x264_validate_parameters( x264_t *h )
  4325.          x264_log( h, X264_LOG_ERROR, "no ratecontrol method specified\n" );
  4326.          return -1;
  4327.      }
  4328. -    h->param.rc.f_rf_constant = x264_clip3f( h->param.rc.f_rf_constant, 0, 51 );
  4329. -    h->param.rc.i_qp_constant = x264_clip3( h->param.rc.i_qp_constant, 0, 51 );
  4330. +    h->param.rc.f_rf_constant = x264_clip3f( h->param.rc.f_rf_constant, 0, QP_MAX );
  4331. +    h->param.rc.i_qp_constant = x264_clip3( h->param.rc.i_qp_constant, 0, QP_MAX );
  4332.      if( h->param.rc.i_rc_method == X264_RC_CRF )
  4333.      {
  4334.          h->param.rc.i_qp_constant = h->param.rc.f_rf_constant;
  4335. @@ -502,12 +504,12 @@ static int x264_validate_parameters( x264_t *h )
  4336.          float qp_p = h->param.rc.i_qp_constant;
  4337.          float qp_i = qp_p - 6*log2f( h->param.rc.f_ip_factor );
  4338.          float qp_b = qp_p + 6*log2f( h->param.rc.f_pb_factor );
  4339. -        h->param.rc.i_qp_min = x264_clip3( (int)(X264_MIN3( qp_p, qp_i, qp_b )), 0, 51 );
  4340. -        h->param.rc.i_qp_max = x264_clip3( (int)(X264_MAX3( qp_p, qp_i, qp_b ) + .999), 0, 51 );
  4341. +        h->param.rc.i_qp_min = x264_clip3( (int)(X264_MIN3( qp_p, qp_i, qp_b )), 0, QP_MAX );
  4342. +        h->param.rc.i_qp_max = x264_clip3( (int)(X264_MAX3( qp_p, qp_i, qp_b ) + .999), 0, QP_MAX );
  4343.          h->param.rc.i_aq_mode = 0;
  4344.          h->param.rc.b_mb_tree = 0;
  4345.      }
  4346. -    h->param.rc.i_qp_max = x264_clip3( h->param.rc.i_qp_max, 0, 51 );
  4347. +    h->param.rc.i_qp_max = x264_clip3( h->param.rc.i_qp_max, 0, QP_MAX );
  4348.      h->param.rc.i_qp_min = x264_clip3( h->param.rc.i_qp_min, 0, h->param.rc.i_qp_max );
  4349.      if( h->param.rc.i_vbv_buffer_size )
  4350.      {
  4351. @@ -1054,8 +1056,9 @@ x264_t *x264_encoder_open( x264_param_t *param )
  4352.      if( x264_analyse_init_costs( h, X264_LOOKAHEAD_QP ) )
  4353.          goto fail;
  4354.  
  4355. +    static const uint16_t cost_mv_correct[7] = { 24, 47, 95, 189, 379, 757, 1515 };
  4356.      /* Checks for known miscompilation issues. */
  4357. -    if( h->cost_mv[1][2013] != 24 )
  4358. +    if( h->cost_mv[x264_lambda_tab[X264_LOOKAHEAD_QP]][2013] != cost_mv_correct[BIT_DEPTH-8] )
  4359.      {
  4360.          x264_log( h, X264_LOG_ERROR, "MV cost test failed: x264 has been miscompiled!\n" );
  4361.          goto fail;
  4362. @@ -1147,11 +1150,22 @@ x264_t *x264_encoder_open( x264_param_t *param )
  4363.          fclose( f );
  4364.      }
  4365.  
  4366. -    x264_log( h, X264_LOG_INFO, "profile %s, level %d.%d\n",
  4367. -        h->sps->i_profile_idc == PROFILE_BASELINE ? "Baseline" :
  4368. -        h->sps->i_profile_idc == PROFILE_MAIN ? "Main" :
  4369. -        h->sps->i_profile_idc == PROFILE_HIGH ? "High" :
  4370. -        "High 4:4:4 Predictive", h->sps->i_level_idc/10, h->sps->i_level_idc%10 );
  4371. +    const char *profile = h->sps->i_profile_idc == PROFILE_BASELINE ? "Baseline" :
  4372. +                          h->sps->i_profile_idc == PROFILE_MAIN ? "Main" :
  4373. +                          h->sps->i_profile_idc == PROFILE_HIGH ? "High" :
  4374. +                          h->sps->i_profile_idc == PROFILE_HIGH10 ? "High 10" :
  4375. +                          "High 4:4:4 Predictive";
  4376. +
  4377. +    if( h->sps->i_profile_idc < PROFILE_HIGH10 )
  4378. +    {
  4379. +        x264_log( h, X264_LOG_INFO, "profile %s, level %d.%d\n",
  4380. +            profile, h->sps->i_level_idc/10, h->sps->i_level_idc%10 );
  4381. +    }
  4382. +    else
  4383. +    {
  4384. +        x264_log( h, X264_LOG_INFO, "profile %s, level %d.%d, bit depth %d\n",
  4385. +            profile, h->sps->i_level_idc/10, h->sps->i_level_idc%10, BIT_DEPTH );
  4386. +    }
  4387.  
  4388.      return h;
  4389.  fail:
  4390. @@ -1836,7 +1850,7 @@ static int x264_slice_write( x264_t *h )
  4391.          bs_align_1( &h->out.bs );
  4392.  
  4393.          /* init cabac */
  4394. -        x264_cabac_context_init( &h->cabac, h->sh.i_type, h->sh.i_qp, h->sh.i_cabac_init_idc );
  4395. +        x264_cabac_context_init( &h->cabac, h->sh.i_type, x264_clip3( h->sh.i_qp-QP_BD_OFFSET, 0, 51 ), h->sh.i_cabac_init_idc );
  4396.          x264_cabac_encode_init ( &h->cabac, h->out.bs.p, h->out.bs.p_end );
  4397.      }
  4398.      h->mb.i_last_qp = h->sh.i_qp;
  4399. @@ -2705,6 +2719,7 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
  4400.      for( int i = 0; i < 3; i++ )
  4401.      {
  4402.          pic_out->img.i_stride[i] = h->fdec->i_stride[i];
  4403. +        // FIXME This breaks the API when pixel != uint8_t.
  4404.          pic_out->img.plane[i] = h->fdec->plane[i];
  4405.      }
  4406.  
  4407. diff --git a/encoder/macroblock.h b/encoder/macroblock.h
  4408. index b1b02fa..7c83344 100644
  4409. --- a/encoder/macroblock.h
  4410. +++ b/encoder/macroblock.h
  4411. @@ -26,8 +26,8 @@
  4412.  
  4413.  #include "common/macroblock.h"
  4414.  
  4415. -extern const int x264_lambda2_tab[52];
  4416. -extern const uint8_t x264_lambda_tab[52];
  4417. +extern const int x264_lambda2_tab[QP_MAX_MAX+1];
  4418. +extern const uint16_t x264_lambda_tab[QP_MAX_MAX+1];
  4419.  
  4420.  void x264_rdo_init( void );
  4421.  
  4422. diff --git a/encoder/me.h b/encoder/me.h
  4423. index 912b05d..b125f3d 100644
  4424. --- a/encoder/me.h
  4425. +++ b/encoder/me.h
  4426. @@ -68,7 +68,7 @@ void x264_me_refine_bidir_rd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_wei
  4427.  void x264_me_refine_bidir_satd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight );
  4428.  uint64_t x264_rd_cost_part( x264_t *h, int i_lambda2, int i8, int i_pixel );
  4429.  
  4430. -extern uint16_t *x264_cost_mv_fpel[92][4];
  4431. +extern uint16_t *x264_cost_mv_fpel[LAMBDA_MAX+1][4];
  4432.  
  4433.  #define COPY1_IF_LT(x,y)\
  4434.  if((y)<(x))\
  4435. diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
  4436. index 6fdaa98..bdf44dc 100644
  4437. --- a/encoder/ratecontrol.c
  4438. +++ b/encoder/ratecontrol.c
  4439. @@ -219,7 +219,7 @@ static ALWAYS_INLINE uint32_t ac_energy_plane( x264_t *h, int mb_x, int mb_y, x2
  4440.      uint32_t ssd = res >> 32;
  4441.      frame->i_pixel_sum[i] += sum;
  4442.      frame->i_pixel_ssd[i] += ssd;
  4443. -    return ssd - (sum * sum >> shift);
  4444. +    return ssd - ((uint64_t)sum * sum >> shift);
  4445.  }
  4446.  
  4447.  // Find the total AC energy of the block in all planes.
  4448. @@ -300,7 +300,7 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame, float *quant_off
  4449.              avg_adj /= h->mb.i_mb_count;
  4450.              avg_adj_pow2 /= h->mb.i_mb_count;
  4451.              strength = h->param.rc.f_aq_strength * avg_adj;
  4452. -            avg_adj = avg_adj - 0.5f * (avg_adj_pow2 - 14.f) / avg_adj;
  4453. +            avg_adj = avg_adj - 0.5f * (avg_adj_pow2 - (14.f + 2*(BIT_DEPTH-8))) / avg_adj;
  4454.          }
  4455.          else
  4456.              strength = h->param.rc.f_aq_strength * 1.0397f;
  4457. @@ -318,7 +318,7 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame, float *quant_off
  4458.                  else
  4459.                  {
  4460.                      uint32_t energy = x264_ac_energy_mb( h, mb_x, mb_y, frame );
  4461. -                    qp_adj = strength * (x264_log2( X264_MAX(energy, 1) ) - 14.427f);
  4462. +                    qp_adj = strength * (x264_log2( X264_MAX(energy, 1) ) - (14.427f + 2*(BIT_DEPTH-8)));
  4463.                  }
  4464.                  if( quant_offsets )
  4465.                      qp_adj += quant_offsets[mb_xy];
  4466. @@ -620,8 +620,8 @@ int x264_ratecontrol_new( x264_t *h )
  4467.      rc->ip_offset = 6.0 * log2f( h->param.rc.f_ip_factor );
  4468.      rc->pb_offset = 6.0 * log2f( h->param.rc.f_pb_factor );
  4469.      rc->qp_constant[SLICE_TYPE_P] = h->param.rc.i_qp_constant;
  4470. -    rc->qp_constant[SLICE_TYPE_I] = x264_clip3( h->param.rc.i_qp_constant - rc->ip_offset + 0.5, 0, 51 );
  4471. -    rc->qp_constant[SLICE_TYPE_B] = x264_clip3( h->param.rc.i_qp_constant + rc->pb_offset + 0.5, 0, 51 );
  4472. +    rc->qp_constant[SLICE_TYPE_I] = x264_clip3( h->param.rc.i_qp_constant - rc->ip_offset + 0.5, 0, QP_MAX );
  4473. +    rc->qp_constant[SLICE_TYPE_B] = x264_clip3( h->param.rc.i_qp_constant + rc->pb_offset + 0.5, 0, QP_MAX );
  4474.      h->mb.ip_offset = rc->ip_offset + 0.5;
  4475.  
  4476.      rc->lstep = pow( 2, h->param.rc.i_qp_step / 6.0 );
  4477. @@ -1231,7 +1231,7 @@ void x264_ratecontrol_start( x264_t *h, int i_force_qp, int overhead )
  4478.  
  4479.      rc->qpa_rc =
  4480.      rc->qpa_aq = 0;
  4481. -    rc->qp = x264_clip3( (int)(q + 0.5), 0, 51 );
  4482. +    rc->qp = x264_clip3( (int)(q + 0.5), 0, QP_MAX );
  4483.      h->fdec->f_qp_avg_rc =
  4484.      h->fdec->f_qp_avg_aq =
  4485.      rc->qpm = q;
  4486. @@ -1416,9 +1416,9 @@ int x264_ratecontrol_slice_type( x264_t *h, int frame_num )
  4487.               * So just calculate the average QP used so far. */
  4488.              h->param.rc.i_qp_constant = (h->stat.i_frame_count[SLICE_TYPE_P] == 0) ? 24
  4489.                                        : 1 + h->stat.f_frame_qp[SLICE_TYPE_P] / h->stat.i_frame_count[SLICE_TYPE_P];
  4490. -            rc->qp_constant[SLICE_TYPE_P] = x264_clip3( h->param.rc.i_qp_constant, 0, 51 );
  4491. -            rc->qp_constant[SLICE_TYPE_I] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) / fabs( h->param.rc.f_ip_factor )) + 0.5 ), 0, 51 );
  4492. -            rc->qp_constant[SLICE_TYPE_B] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) * fabs( h->param.rc.f_pb_factor )) + 0.5 ), 0, 51 );
  4493. +            rc->qp_constant[SLICE_TYPE_P] = x264_clip3( h->param.rc.i_qp_constant, 0, QP_MAX );
  4494. +            rc->qp_constant[SLICE_TYPE_I] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) / fabs( h->param.rc.f_ip_factor )) + 0.5 ), 0, QP_MAX );
  4495. +            rc->qp_constant[SLICE_TYPE_B] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) * fabs( h->param.rc.f_pb_factor )) + 0.5 ), 0, QP_MAX );
  4496.  
  4497.              x264_log(h, X264_LOG_ERROR, "2nd pass has more frames than 1st pass (%d)\n", rc->num_entries);
  4498.              x264_log(h, X264_LOG_ERROR, "continuing anyway, at constant QP=%d\n", h->param.rc.i_qp_constant);
  4499. @@ -2652,7 +2652,7 @@ static int init_pass2( x264_t *h )
  4500.          }
  4501.          else if( expected_bits > all_available_bits && avgq > h->param.rc.i_qp_max - 2 )
  4502.          {
  4503. -            if( h->param.rc.i_qp_max < 51 )
  4504. +            if( h->param.rc.i_qp_max < QP_MAX )
  4505.                  x264_log( h, X264_LOG_WARNING, "try increasing target bitrate or increasing qp_max (currently %d)\n", h->param.rc.i_qp_max );
  4506.              else
  4507.                  x264_log( h, X264_LOG_WARNING, "try increasing target bitrate\n");
  4508. diff --git a/encoder/rdo.c b/encoder/rdo.c
  4509. index afaa894..4fae811 100644
  4510. --- a/encoder/rdo.c
  4511. +++ b/encoder/rdo.c
  4512. @@ -443,10 +443,7 @@ static ALWAYS_INLINE int quant_trellis_cabac( x264_t *h, dctcoef *dct,
  4513.          /* We only need to zero an empty 4x4 block. 8x8 can be
  4514.             implicitly emptied via zero nnz, as can dc. */
  4515.          if( i_coefs == 16 && !dc )
  4516. -        {
  4517. -            M128( &dct[0] ) = M128_ZERO;
  4518. -            M128( &dct[8] ) = M128_ZERO;
  4519. -        }
  4520. +            memset( dct, 0, 16 * sizeof(dctcoef) );
  4521.          return 0;
  4522.      }
  4523.  
  4524. @@ -613,10 +610,7 @@ static ALWAYS_INLINE int quant_trellis_cabac( x264_t *h, dctcoef *dct,
  4525.      if( bnode == &nodes_cur[0] )
  4526.      {
  4527.          if( i_coefs == 16 && !dc )
  4528. -        {
  4529. -            M128( &dct[0] ) = M128_ZERO;
  4530. -            M128( &dct[8] ) = M128_ZERO;
  4531. -        }
  4532. +            memset( dct, 0, 16 * sizeof(dctcoef) );
  4533.          return 0;
  4534.      }
  4535.  
  4536. diff --git a/encoder/set.c b/encoder/set.c
  4537. index 9e6e736..55d6df7 100644
  4538. --- a/encoder/set.c
  4539. +++ b/encoder/set.c
  4540. @@ -104,6 +104,8 @@ void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param )
  4541.      sps->b_qpprime_y_zero_transform_bypass = param->rc.i_rc_method == X264_RC_CQP && param->rc.i_qp_constant == 0;
  4542.      if( sps->b_qpprime_y_zero_transform_bypass )
  4543.          sps->i_profile_idc  = PROFILE_HIGH444_PREDICTIVE;
  4544. +    else if( BIT_DEPTH > 8 )
  4545. +        sps->i_profile_idc  = PROFILE_HIGH10;
  4546.      else if( param->analyse.b_transform_8x8 || param->i_cqm_preset != X264_CQM_FLAT )
  4547.          sps->i_profile_idc  = PROFILE_HIGH;
  4548.      else if( param->b_cabac || param->i_bframe > 0 || param->b_interlaced || param->b_fake_interlaced || param->analyse.i_weighted_pred > 0 )
  4549. @@ -260,8 +262,8 @@ void x264_sps_write( bs_t *s, x264_sps_t *sps )
  4550.      if( sps->i_profile_idc >= PROFILE_HIGH )
  4551.      {
  4552.          bs_write_ue( s, 1 ); // chroma_format_idc = 4:2:0
  4553. -        bs_write_ue( s, 0 ); // bit_depth_luma_minus8
  4554. -        bs_write_ue( s, 0 ); // bit_depth_chroma_minus8
  4555. +        bs_write_ue( s, BIT_DEPTH-8 ); // bit_depth_luma_minus8
  4556. +        bs_write_ue( s, BIT_DEPTH-8 ); // bit_depth_chroma_minus8
  4557.          bs_write( s, 1, sps->b_qpprime_y_zero_transform_bypass );
  4558.          bs_write( s, 1, 0 ); // seq_scaling_matrix_present_flag
  4559.      }
  4560. @@ -488,7 +490,7 @@ void x264_pps_write( bs_t *s, x264_pps_t *pps )
  4561.      bs_write( s, 1, pps->b_weighted_pred );
  4562.      bs_write( s, 2, pps->b_weighted_bipred );
  4563.  
  4564. -    bs_write_se( s, pps->i_pic_init_qp - 26 );
  4565. +    bs_write_se( s, pps->i_pic_init_qp - 26 - QP_BD_OFFSET );
  4566.      bs_write_se( s, pps->i_pic_init_qs - 26 );
  4567.      bs_write_se( s, pps->i_chroma_qp_index_offset );
  4568.  
  4569. diff --git a/encoder/slicetype.c b/encoder/slicetype.c
  4570. index 7d69b71..ad2a8c2 100644
  4571. --- a/encoder/slicetype.c
  4572. +++ b/encoder/slicetype.c
  4573. @@ -303,7 +303,7 @@ static void x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
  4574.                                    (mv1)[0], (mv1)[1], 8, 8, w ); \
  4575.              h->mc.avg[PIXEL_8x8]( pix1, 16, src1, stride1, src2, stride2, i_bipred_weight ); \
  4576.          } \
  4577. -        i_cost = penalty + h->pixf.mbcmp[PIXEL_8x8]( \
  4578. +        i_cost = penalty * a->i_lambda + h->pixf.mbcmp[PIXEL_8x8]( \
  4579.                             m[0].p_fenc[0], FENC_STRIDE, pix1, 16 ); \
  4580.          COPY2_IF_LT( i_bcost, i_cost, list_used, 3 ); \
  4581.      }
  4582. @@ -393,9 +393,9 @@ static void x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
  4583.              }
  4584.  
  4585.              x264_me_search( h, &m[l], mvc, i_mvc );
  4586. -            m[l].cost -= 2; // remove mvcost from skip mbs
  4587. +            m[l].cost -= 2 * a->i_lambda; // remove mvcost from skip mbs
  4588.              if( M32( m[l].mv ) )
  4589. -                m[l].cost += 5;
  4590. +                m[l].cost += 5 * a->i_lambda;
  4591.  
  4592.  skip_motionest:
  4593.              CP32( fenc_mvs[l], m[l].mv );
  4594. @@ -418,7 +418,7 @@ lowres_intra_mb:
  4595.          ALIGNED_ARRAY_16( pixel, edge,[33] );
  4596.          pixel *pix = &pix1[8+FDEC_STRIDE - 1];
  4597.          pixel *src = &fenc->lowres[0][i_pel_offset - 1];
  4598. -        const int intra_penalty = 5;
  4599. +        const int intra_penalty = 5 * a->i_lambda;
  4600.          int satds[3];
  4601.  
  4602.          memcpy( pix-FDEC_STRIDE, src-i_stride, 17 * sizeof(pixel) );
  4603. @@ -496,7 +496,7 @@ lowres_intra_mb:
  4604.          }
  4605.      }
  4606.  
  4607. -    fenc->lowres_costs[b-p0][p1-b][i_mb_xy] = i_bcost + (list_used << LOWRES_COST_SHIFT);
  4608. +    fenc->lowres_costs[b-p0][p1-b][i_mb_xy] = X264_MIN( i_bcost, LOWRES_COST_MASK ) + (list_used << LOWRES_COST_SHIFT);
  4609.  }
  4610.  #undef TRY_BIDIR
  4611.  
  4612. diff --git a/tools/checkasm.c b/tools/checkasm.c
  4613. index 7fa2c0c..a5ffa17 100644
  4614. --- a/tools/checkasm.c
  4615. +++ b/tools/checkasm.c
  4616. @@ -40,8 +40,10 @@
  4617.  uint8_t *buf1, *buf2;
  4618.  /* buf3, buf4: used to store output */
  4619.  uint8_t *buf3, *buf4;
  4620. -/* pbuf*: point to the same memory as above, just for type convenience */
  4621. -pixel *pbuf1, *pbuf2, *pbuf3, *pbuf4;
  4622. +/* pbuf1, pbuf2: initialised to random pixel data and shouldn't write into them. */
  4623. +pixel *pbuf1, *pbuf2;
  4624. +/* pbuf3, pbuf4: point to buf3, buf4, just for type convenience */
  4625. +pixel *pbuf3, *pbuf4;
  4626.  
  4627.  int quiet = 0;
  4628.  
  4629. @@ -256,11 +258,15 @@ static int check_pixel( int cpu_ref, int cpu_new )
  4630.          int z = i|(i>>4);
  4631.          z ^= z>>2;
  4632.          z ^= z>>1;
  4633. -        buf3[i] = ~(buf4[i] = -(z&1));
  4634. +        pbuf4[i] = -(z&1) & PIXEL_MAX;
  4635. +        pbuf3[i] = ~pbuf4[i] & PIXEL_MAX;
  4636.      }
  4637.      // random pattern made of maxed pixel differences, in case an intermediate value overflows
  4638.      for( int i = 256; i < 0x1000; i++ )
  4639. -        buf3[i] = ~(buf4[i] = -(buf1[i&~0x88]&1));
  4640. +    {
  4641. +        pbuf4[i] = -(pbuf1[i&~0x88]&1) & PIXEL_MAX;
  4642. +        pbuf3[i] = ~(pbuf4[i]) & PIXEL_MAX;
  4643. +    }
  4644.  
  4645.  #define TEST_PIXEL( name, align ) \
  4646.      ok = 1, used_asm = 0; \
  4647. @@ -535,22 +541,22 @@ static int check_dct( int cpu_ref, int cpu_new )
  4648.          used_asm = 1; \
  4649.          call_c( dct_c.name, t1, pbuf1, pbuf2 ); \
  4650.          call_a( dct_asm.name, t2, pbuf1, pbuf2 ); \
  4651. -        if( memcmp( t1, t2, size ) ) \
  4652. +        if( memcmp( t1, t2, size*sizeof(dctcoef) ) ) \
  4653.          { \
  4654.              ok = 0; \
  4655.              fprintf( stderr, #name " [FAILED]\n" ); \
  4656.          } \
  4657.      }
  4658.      ok = 1; used_asm = 0;
  4659. -    TEST_DCT( sub4x4_dct, dct1[0], dct2[0], 16*2 );
  4660. -    TEST_DCT( sub8x8_dct, dct1, dct2, 16*2*4 );
  4661. -    TEST_DCT( sub8x8_dct_dc, dctdc[0], dctdc[1], 4*2 );
  4662. -    TEST_DCT( sub16x16_dct, dct1, dct2, 16*2*16 );
  4663. +    TEST_DCT( sub4x4_dct, dct1[0], dct2[0], 16 );
  4664. +    TEST_DCT( sub8x8_dct, dct1, dct2, 16*4 );
  4665. +    TEST_DCT( sub8x8_dct_dc, dctdc[0], dctdc[1], 4 );
  4666. +    TEST_DCT( sub16x16_dct, dct1, dct2, 16*16 );
  4667.      report( "sub_dct4 :" );
  4668.  
  4669.      ok = 1; used_asm = 0;
  4670. -    TEST_DCT( sub8x8_dct8, (void*)dct1[0], (void*)dct2[0], 64*2 );
  4671. -    TEST_DCT( sub16x16_dct8, (void*)dct1, (void*)dct2, 64*2*4 );
  4672. +    TEST_DCT( sub8x8_dct8, (void*)dct1[0], (void*)dct2[0], 64 );
  4673. +    TEST_DCT( sub16x16_dct8, (void*)dct1, (void*)dct2, 64*4 );
  4674.      report( "sub_dct8 :" );
  4675.  #undef TEST_DCT
  4676.  
  4677. @@ -574,13 +580,13 @@ static int check_dct( int cpu_ref, int cpu_new )
  4678.      { \
  4679.          set_func_name( #name ); \
  4680.          used_asm = 1; \
  4681. -        memcpy( buf3, buf1, 32*32 * sizeof(pixel) ); \
  4682. -        memcpy( buf4, buf1, 32*32 * sizeof(pixel) ); \
  4683. -        memcpy( dct1, src, 512 * sizeof(pixel) ); \
  4684. -        memcpy( dct2, src, 512 * sizeof(pixel) ); \
  4685. +        memcpy( pbuf3, pbuf1, 32*32 * sizeof(pixel) ); \
  4686. +        memcpy( pbuf4, pbuf1, 32*32 * sizeof(pixel) ); \
  4687. +        memcpy( dct1, src, 256 * sizeof(dctcoef) ); \
  4688. +        memcpy( dct2, src, 256 * sizeof(dctcoef) ); \
  4689.          call_c1( dct_c.name, pbuf3, (void*)dct1 ); \
  4690.          call_a1( dct_asm.name, pbuf4, (void*)dct2 ); \
  4691. -        if( memcmp( buf3, buf4, 32*32 * sizeof(pixel) ) ) \
  4692. +        if( memcmp( pbuf3, pbuf4, 32*32 * sizeof(pixel) ) ) \
  4693.          { \
  4694.              ok = 0; \
  4695.              fprintf( stderr, #name " [FAILED]\n" ); \
  4696. @@ -615,10 +621,10 @@ static int check_dct( int cpu_ref, int cpu_new )
  4697.                  dct1[0][j] = !i ? (j^j>>1^j>>2^j>>3)&1 ? 4080 : -4080 /* max dc */\
  4698.                             : i<8 ? (*p++)&1 ? 4080 : -4080 /* max elements */\
  4699.                             : ((*p++)&0x1fff)-0x1000; /* general case */\
  4700. -            memcpy( dct2, dct1, 32 );\
  4701. +            memcpy( dct2, dct1, 16 * sizeof(dctcoef) );\
  4702.              call_c1( dct_c.name, dct1[0] );\
  4703.              call_a1( dct_asm.name, dct2[0] );\
  4704. -            if( memcmp( dct1, dct2, 32 ) )\
  4705. +            if( memcmp( dct1, dct2, 16 * sizeof(dctcoef) ) )\
  4706.                  ok = 0;\
  4707.          }\
  4708.          call_c2( dct_c.name, dct1[0] );\
  4709. @@ -658,11 +664,11 @@ static int check_dct( int cpu_ref, int cpu_new )
  4710.          int nz_a, nz_c; \
  4711.          set_func_name( "zigzag_"#name"_%s", interlace?"field":"frame" ); \
  4712.          used_asm = 1; \
  4713. -        memcpy( buf3, buf1, 16*FDEC_STRIDE * sizeof(pixel) ); \
  4714. -        memcpy( buf4, buf1, 16*FDEC_STRIDE * sizeof(pixel) ); \
  4715. +        memcpy( pbuf3, pbuf1, 16*FDEC_STRIDE * sizeof(pixel) ); \
  4716. +        memcpy( pbuf4, pbuf1, 16*FDEC_STRIDE * sizeof(pixel) ); \
  4717.          nz_c = call_c1( zigzag_c.name, t1, pbuf2, pbuf3 ); \
  4718.          nz_a = call_a1( zigzag_asm.name, t2, pbuf2, pbuf4 ); \
  4719. -        if( memcmp( t1, t2, size*sizeof(dctcoef) )|| memcmp( buf3, buf4, 16*FDEC_STRIDE ) || nz_c != nz_a ) \
  4720. +        if( memcmp( t1, t2, size*sizeof(dctcoef) ) || memcmp( pbuf3, pbuf4, 16*FDEC_STRIDE*sizeof(pixel) ) || nz_c != nz_a ) \
  4721.          { \
  4722.              ok = 0; \
  4723.              fprintf( stderr, #name " [FAILED]\n" ); \
  4724. @@ -680,8 +686,8 @@ static int check_dct( int cpu_ref, int cpu_new )
  4725.          used_asm = 1; \
  4726.          for( int i = 0; i < 2; i++ ) \
  4727.          { \
  4728. -            memcpy( buf3, buf2, 16*FDEC_STRIDE * sizeof(pixel) ); \
  4729. -            memcpy( buf4, buf2, 16*FDEC_STRIDE * sizeof(pixel) ); \
  4730. +            memcpy( pbuf3, pbuf2, 16*FDEC_STRIDE * sizeof(pixel) ); \
  4731. +            memcpy( pbuf4, pbuf2, 16*FDEC_STRIDE * sizeof(pixel) ); \
  4732.              for( int j = 0; j < 4; j++ ) \
  4733.              { \
  4734.                  memcpy( pbuf3 + j*FDEC_STRIDE, (i?pbuf1:pbuf2) + j*FENC_STRIDE, 4 * sizeof(pixel) ); \
  4735. @@ -689,7 +695,7 @@ static int check_dct( int cpu_ref, int cpu_new )
  4736.              } \
  4737.              nz_c = call_c1( zigzag_c.name, t1, pbuf2, pbuf3, &dc_c ); \
  4738.              nz_a = call_a1( zigzag_asm.name, t2, pbuf2, pbuf4, &dc_a ); \
  4739. -            if( memcmp( t1+1, t2+1, 15*sizeof(dctcoef) ) || memcmp( buf3, buf4, 16*FDEC_STRIDE * sizeof(pixel) ) || nz_c != nz_a || dc_c != dc_a ) \
  4740. +            if( memcmp( t1+1, t2+1, 15*sizeof(dctcoef) ) || memcmp( pbuf3, pbuf4, 16*FDEC_STRIDE * sizeof(pixel) ) || nz_c != nz_a || dc_c != dc_a ) \
  4741.              { \
  4742.                  ok = 0; \
  4743.                  fprintf( stderr, #name " [FAILED]\n" ); \
  4744. @@ -779,11 +785,11 @@ static int check_mc( int cpu_ref, int cpu_new )
  4745.              const x264_weight_t *weight = weight_none; \
  4746.              set_func_name( "mc_luma_%dx%d", w, h ); \
  4747.              used_asm = 1; \
  4748. -            memset( buf3, 0xCD, 1024 ); \
  4749. -            memset( buf4, 0xCD, 1024 ); \
  4750. +            for( int i = 0; i < 1024; i++ ) \
  4751. +                pbuf3[i] = pbuf4[i] = 0xCD; \
  4752.              call_c( mc_c.mc_luma, dst1, 32, src2, 64, dx, dy, w, h, weight ); \
  4753.              call_a( mc_a.mc_luma, dst2, 32, src2, 64, dx, dy, w, h, weight ); \
  4754. -            if( memcmp( buf3, buf4, 1024 ) ) \
  4755. +            if( memcmp( pbuf3, pbuf4, 1024 * sizeof(pixel) ) ) \
  4756.              { \
  4757.                  fprintf( stderr, "mc_luma[mv(%d,%d) %2dx%-2d]     [FAILED]\n", dx, dy, w, h ); \
  4758.                  ok = 0; \
  4759. @@ -796,8 +802,8 @@ static int check_mc( int cpu_ref, int cpu_new )
  4760.              const x264_weight_t *weight = weight_none; \
  4761.              set_func_name( "get_ref_%dx%d", w, h ); \
  4762.              used_asm = 1; \
  4763. -            memset( buf3, 0xCD, 1024 ); \
  4764. -            memset( buf4, 0xCD, 1024 ); \
  4765. +            for( int i = 0; i < 1024; i++ ) \
  4766. +                pbuf3[i] = pbuf4[i] = 0xCD; \
  4767.              call_c( mc_c.mc_luma, dst1, 32, src2, 64, dx, dy, w, h, weight ); \
  4768.              ref = (pixel*)call_a( mc_a.get_ref, ref, &ref_stride, src2, 64, dx, dy, w, h, weight ); \
  4769.              for( int i = 0; i < h; i++ ) \
  4770. @@ -814,15 +820,15 @@ static int check_mc( int cpu_ref, int cpu_new )
  4771.          { \
  4772.              set_func_name( "mc_chroma_%dx%d", w, h ); \
  4773.              used_asm = 1; \
  4774. -            memset( buf3, 0xCD, 1024 ); \
  4775. -            memset( buf4, 0xCD, 1024 ); \
  4776. +            for( int i = 0; i < 1024; i++ ) \
  4777. +                pbuf3[i] = pbuf4[i] = 0xCD; \
  4778.              call_c( mc_c.mc_chroma, dst1, 16, src, 64, dx, dy, w, h ); \
  4779.              call_a( mc_a.mc_chroma, dst2, 16, src, 64, dx, dy, w, h ); \
  4780.              /* mc_chroma width=2 may write garbage to the right of dst. ignore that. */ \
  4781.              for( int j = 0; j < h; j++ ) \
  4782.                  for( int i = w; i < 4; i++ ) \
  4783.                      dst2[i+j*16] = dst1[i+j*16]; \
  4784. -            if( memcmp( buf3, buf4, 1024 ) ) \
  4785. +            if( memcmp( pbuf3, pbuf4, 1024 * sizeof(pixel) ) ) \
  4786.              { \
  4787.                  fprintf( stderr, "mc_chroma[mv(%d,%d) %2dx%-2d]     [FAILED]\n", dx, dy, w, h ); \
  4788.                  ok = 0; \
  4789. @@ -867,15 +873,15 @@ static int check_mc( int cpu_ref, int cpu_new )
  4790.      ok = 1, used_asm = 0; \
  4791.      for( int i = 0; i < 10; i++ ) \
  4792.      { \
  4793. -        memcpy( buf3, pbuf1+320, 320 * sizeof(pixel) ); \
  4794. -        memcpy( buf4, pbuf1+320, 320 * sizeof(pixel) ); \
  4795. +        memcpy( pbuf3, pbuf1+320, 320 * sizeof(pixel) ); \
  4796. +        memcpy( pbuf4, pbuf1+320, 320 * sizeof(pixel) ); \
  4797.          if( mc_a.name[i] != mc_ref.name[i] ) \
  4798.          { \
  4799.              set_func_name( "%s_%s", #name, pixel_names[i] ); \
  4800.              used_asm = 1; \
  4801.              call_c1( mc_c.name[i], pbuf3, 16, pbuf2+1, 16, pbuf1+18, 16, weight ); \
  4802.              call_a1( mc_a.name[i], pbuf4, 16, pbuf2+1, 16, pbuf1+18, 16, weight ); \
  4803. -            if( memcmp( buf3, buf4, 320 * sizeof(pixel) ) ) \
  4804. +            if( memcmp( pbuf3, pbuf4, 320 * sizeof(pixel) ) ) \
  4805.              { \
  4806.                  ok = 0; \
  4807.                  fprintf( stderr, #name "[%d]: [FAILED]\n", i ); \
  4808. @@ -971,8 +977,8 @@ static int check_mc( int cpu_ref, int cpu_new )
  4809.          void *tmp = pbuf3+49*64;
  4810.          set_func_name( "hpel_filter" );
  4811.          ok = 1; used_asm = 1;
  4812. -        memset( buf3, 0, 4096 * sizeof(pixel) );
  4813. -        memset( buf4, 0, 4096 * sizeof(pixel) );
  4814. +        memset( pbuf3, 0, 4096 * sizeof(pixel) );
  4815. +        memset( pbuf4, 0, 4096 * sizeof(pixel) );
  4816.          call_c( mc_c.hpel_filter, dstc[0], dstc[1], dstc[2], srchpel, 64, 48, 10, tmp );
  4817.          call_a( mc_a.hpel_filter, dsta[0], dsta[1], dsta[2], srchpel, 64, 48, 10, tmp );
  4818.          for( int i = 0; i < 3; i++ )
  4819. @@ -1030,13 +1036,13 @@ static int check_mc( int cpu_ref, int cpu_new )
  4820.          int stride = 80;\
  4821.          set_func_name( #name );\
  4822.          used_asm = 1;\
  4823. -        memcpy( buf3, buf1, size*2*stride * sizeof(pixel) );\
  4824. -        memcpy( buf4, buf1, size*2*stride * sizeof(pixel) );\
  4825. -        uint16_t *sum = (uint16_t*)buf3;\
  4826. +        memcpy( pbuf3, pbuf1, size*2*stride * sizeof(pixel) );\
  4827. +        memcpy( pbuf4, pbuf1, size*2*stride * sizeof(pixel) );\
  4828. +        uint16_t *sum = (uint16_t*)pbuf3;\
  4829.          call_c1( mc_c.name, __VA_ARGS__ );\
  4830. -        sum = (uint16_t*)buf4;\
  4831. +        sum = (uint16_t*)pbuf4;\
  4832.          call_a1( mc_a.name, __VA_ARGS__ );\
  4833. -        if( memcmp( buf3, buf4, (stride-8)*2 * sizeof(pixel) )\
  4834. +        if( memcmp( pbuf3, pbuf4, (stride-8)*2 * sizeof(pixel) )\
  4835.              || (size>9 && memcmp( pbuf3+18*stride, pbuf4+18*stride, (stride-8)*2 * sizeof(pixel) )))\
  4836.              ok = 0;\
  4837.          call_c2( mc_c.name, __VA_ARGS__ );\
  4838. @@ -1096,11 +1102,11 @@ static int check_deblock( int cpu_ref, int cpu_new )
  4839.      /* not exactly the real values of a,b,tc but close enough */
  4840.      for( int i = 35, a = 255, c = 250; i >= 0; i-- )
  4841.      {
  4842. -        alphas[i] = a;
  4843. -        betas[i] = (i+1)/2;
  4844. -        tcs[i][0] = tcs[i][3] = (c+6)/10;
  4845. -        tcs[i][1] = (c+7)/15;
  4846. -        tcs[i][2] = (c+9)/20;
  4847. +        alphas[i] = a << (BIT_DEPTH-8);
  4848. +        betas[i] = (i+1)/2 << (BIT_DEPTH-8);
  4849. +        tcs[i][0] = tcs[i][3] = (c+6)/10 << (BIT_DEPTH-8);
  4850. +        tcs[i][1] = (c+7)/15 << (BIT_DEPTH-8);
  4851. +        tcs[i][2] = (c+9)/20 << (BIT_DEPTH-8);
  4852.          a = a*9/10;
  4853.          c = c*9/10;
  4854.      }
  4855. @@ -1111,15 +1117,15 @@ static int check_deblock( int cpu_ref, int cpu_new )
  4856.          int off = 8*32 + (i&15)*4*!align; /* benchmark various alignments of h filter */ \
  4857.          for( int j = 0; j < 1024; j++ ) \
  4858.              /* two distributions of random to excersize different failure modes */ \
  4859. -            buf3[j] = rand() & (i&1 ? 0xf : 0xff ); \
  4860. -        memcpy( buf4, buf3, 1024 * sizeof(pixel) ); \
  4861. +            pbuf3[j] = rand() & (i&1 ? 0xf : PIXEL_MAX ); \
  4862. +        memcpy( pbuf4, pbuf3, 1024 * sizeof(pixel) ); \
  4863.          if( db_a.name != db_ref.name ) \
  4864.          { \
  4865.              set_func_name( #name ); \
  4866.              used_asm = 1; \
  4867.              call_c1( db_c.name, pbuf3+off, 32, alphas[i], betas[i], ##__VA_ARGS__ ); \
  4868.              call_a1( db_a.name, pbuf4+off, 32, alphas[i], betas[i], ##__VA_ARGS__ ); \
  4869. -            if( memcmp( buf3, buf4, 1024 * sizeof(pixel) ) ) \
  4870. +            if( memcmp( pbuf3, pbuf4, 1024 * sizeof(pixel) ) ) \
  4871.              { \
  4872.                  ok = 0; \
  4873.                  fprintf( stderr, #name "(a=%d, b=%d): [FAILED]\n", alphas[i], betas[i] ); \
  4874. @@ -1200,7 +1206,7 @@ static int check_quant( int cpu_ref, int cpu_new )
  4875.      h->pps = h->pps_array;
  4876.      x264_param_default( &h->param );
  4877.      h->chroma_qp_table = i_chroma_qp_table + 12;
  4878. -    h->param.rc.i_qp_min = 26;
  4879. +    h->param.rc.i_qp_min = 26 + QP_BD_OFFSET;
  4880.      h->param.analyse.b_transform_8x8 = 1;
  4881.  
  4882.      for( int i_cqm = 0; i_cqm < 4; i_cqm++ )
  4883. @@ -1219,9 +1225,10 @@ static int check_quant( int cpu_ref, int cpu_new )
  4884.          }
  4885.          else
  4886.          {
  4887. +            int max_scale = BIT_DEPTH < 10 ? 255 : 228;
  4888.              if( i_cqm == 2 )
  4889.                  for( int i = 0; i < 64; i++ )
  4890. -                    cqm_buf[i] = 10 + rand() % 246;
  4891. +                    cqm_buf[i] = 10 + rand() % (max_scale - 9);
  4892.              else
  4893.                  for( int i = 0; i < 64; i++ )
  4894.                      cqm_buf[i] = 1;
  4895. @@ -1260,7 +1267,7 @@ static int check_quant( int cpu_ref, int cpu_new )
  4896.          { \
  4897.              set_func_name( #name ); \
  4898.              used_asms[0] = 1; \
  4899. -            for( int qp = 51; qp > 0; qp-- ) \
  4900. +            for( int qp = QP_MAX; qp > 0; qp-- ) \
  4901.              { \
  4902.                  for( int j = 0; j < 2; j++ ) \
  4903.                  { \
  4904. @@ -1269,7 +1276,7 @@ static int check_quant( int cpu_ref, int cpu_new )
  4905.                          dct1[i] = dct2[i] = j ? (rand() & 0x1fff) - 0xfff : 0; \
  4906.                      result_c = call_c1( qf_c.name, dct1, h->quant4_mf[CQM_4IY][qp][0], h->quant4_bias[CQM_4IY][qp][0] ); \
  4907.                      result_a = call_a1( qf_a.name, dct2, h->quant4_mf[CQM_4IY][qp][0], h->quant4_bias[CQM_4IY][qp][0] ); \
  4908. -                    if( memcmp( dct1, dct2, 16*2 ) || result_c != result_a ) \
  4909. +                    if( memcmp( dct1, dct2, 16*sizeof(dctcoef) ) || result_c != result_a ) \
  4910.                      { \
  4911.                          oks[0] = 0; \
  4912.                          fprintf( stderr, #name "(cqm=%d): [FAILED]\n", i_cqm ); \
  4913. @@ -1286,14 +1293,14 @@ static int check_quant( int cpu_ref, int cpu_new )
  4914.          { \
  4915.              set_func_name( #qname ); \
  4916.              used_asms[0] = 1; \
  4917. -            for( int qp = 51; qp > 0; qp-- ) \
  4918. +            for( int qp = QP_MAX; qp > 0; qp-- ) \
  4919.              { \
  4920.                  for( int j = 0; j < 2; j++ ) \
  4921.                  { \
  4922.                      INIT_QUANT##w(j) \
  4923.                      int result_c = call_c1( qf_c.qname, dct1, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \
  4924.                      int result_a = call_a1( qf_a.qname, dct2, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \
  4925. -                    if( memcmp( dct1, dct2, w*w*2 ) || result_c != result_a ) \
  4926. +                    if( memcmp( dct1, dct2, w*w*sizeof(dctcoef) ) || result_c != result_a ) \
  4927.                      { \
  4928.                          oks[0] = 0; \
  4929.                          fprintf( stderr, #qname "(qp=%d, cqm=%d, block="#block"): [FAILED]\n", qp, i_cqm ); \
  4930. @@ -1317,14 +1324,14 @@ static int check_quant( int cpu_ref, int cpu_new )
  4931.          { \
  4932.              set_func_name( "%s_%s", #dqname, i_cqm?"cqm":"flat" ); \
  4933.              used_asms[1] = 1; \
  4934. -            for( int qp = 51; qp > 0; qp-- ) \
  4935. +            for( int qp = QP_MAX; qp > 0; qp-- ) \
  4936.              { \
  4937.                  INIT_QUANT##w(1) \
  4938.                  call_c1( qf_c.qname, dct1, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \
  4939. -                memcpy( dct2, dct1, w*w*2 ); \
  4940. +                memcpy( dct2, dct1, w*w*sizeof(dctcoef) ); \
  4941.                  call_c1( qf_c.dqname, dct1, h->dequant##w##_mf[block], qp ); \
  4942.                  call_a1( qf_a.dqname, dct2, h->dequant##w##_mf[block], qp ); \
  4943. -                if( memcmp( dct1, dct2, w*w*2 ) ) \
  4944. +                if( memcmp( dct1, dct2, w*w*sizeof(dctcoef) ) ) \
  4945.                  { \
  4946.                      oks[1] = 0; \
  4947.                      fprintf( stderr, #dqname "(qp=%d, cqm=%d, block="#block"): [FAILED]\n", qp, i_cqm ); \
  4948. @@ -1345,15 +1352,15 @@ static int check_quant( int cpu_ref, int cpu_new )
  4949.          { \
  4950.              set_func_name( "%s_%s", #dqname, i_cqm?"cqm":"flat" ); \
  4951.              used_asms[1] = 1; \
  4952. -            for( int qp = 51; qp > 0; qp-- ) \
  4953. +            for( int qp = QP_MAX; qp > 0; qp-- ) \
  4954.              { \
  4955.                  for( int i = 0; i < 16; i++ ) \
  4956.                      dct1[i] = rand(); \
  4957.                  call_c1( qf_c.qname, dct1, h->quant##w##_mf[block][qp][0]>>1, h->quant##w##_bias[block][qp][0]>>1 ); \
  4958. -                memcpy( dct2, dct1, w*w*2 ); \
  4959. +                memcpy( dct2, dct1, w*w*sizeof(dctcoef) ); \
  4960.                  call_c1( qf_c.dqname, dct1, h->dequant##w##_mf[block], qp ); \
  4961.                  call_a1( qf_a.dqname, dct2, h->dequant##w##_mf[block], qp ); \
  4962. -                if( memcmp( dct1, dct2, w*w*2 ) ) \
  4963. +                if( memcmp( dct1, dct2, w*w*sizeof(dctcoef) ) ) \
  4964.                  { \
  4965.                      oks[1] = 0; \
  4966.                      fprintf( stderr, #dqname "(qp=%d, cqm=%d, block="#block"): [FAILED]\n", qp, i_cqm ); \
  4967. @@ -1381,12 +1388,12 @@ static int check_quant( int cpu_ref, int cpu_new )
  4968.          for( int size = 16; size <= 64; size += 48 )
  4969.          {
  4970.              set_func_name( "denoise_dct" );
  4971. -            memcpy( dct1, buf1, size*2 );
  4972. -            memcpy( dct2, buf1, size*2 );
  4973. +            memcpy( dct1, buf1, size*sizeof(dctcoef) );
  4974. +            memcpy( dct2, buf1, size*sizeof(dctcoef) );
  4975.              memcpy( buf3+256, buf3, 256 );
  4976.              call_c1( qf_c.denoise_dct, dct1, (uint32_t*)buf3, (uint16_t*)buf2, size );
  4977.              call_a1( qf_a.denoise_dct, dct2, (uint32_t*)(buf3+256), (uint16_t*)buf2, size );
  4978. -            if( memcmp( dct1, dct2, size*2 ) || memcmp( buf3+4, buf3+256+4, (size-1)*sizeof(uint32_t) ) )
  4979. +            if( memcmp( dct1, dct2, size*sizeof(dctcoef) ) || memcmp( buf3+4, buf3+256+4, (size-1)*sizeof(uint32_t) ) )
  4980.                  ok = 0;
  4981.              call_c2( qf_c.denoise_dct, dct1, (uint32_t*)buf3, (uint16_t*)buf2, size );
  4982.              call_a2( qf_a.denoise_dct, dct2, (uint32_t*)(buf3+256), (uint16_t*)buf2, size );
  4983. @@ -1431,7 +1438,7 @@ static int check_quant( int cpu_ref, int cpu_new )
  4984.          { \
  4985.              int nnz = 0; \
  4986.              int max = rand() & (w*w-1); \
  4987. -            memset( dct1, 0, w*w*2 ); \
  4988. +            memset( dct1, 0, w*w*sizeof(dctcoef) ); \
  4989.              for( int idx = ac; idx < max; idx++ ) \
  4990.                  nnz |= dct1[idx] = !(rand()&3) + (!(rand()&15))*rand(); \
  4991.              if( !nnz ) \
  4992. @@ -1464,7 +1471,7 @@ static int check_quant( int cpu_ref, int cpu_new )
  4993.              x264_run_level_t runlevel_c, runlevel_a; \
  4994.              int nnz = 0; \
  4995.              int max = rand() & (w*w-1); \
  4996. -            memset( dct1, 0, w*w*2 ); \
  4997. +            memset( dct1, 0, w*w*sizeof(dctcoef) ); \
  4998.              memcpy( &runlevel_a, buf1+i, sizeof(x264_run_level_t) ); \
  4999.              memcpy( &runlevel_c, buf1+i, sizeof(x264_run_level_t) ); \
  5000.              for( int idx = ac; idx < max; idx++ ) \
  5001. @@ -1474,7 +1481,7 @@ static int check_quant( int cpu_ref, int cpu_new )
  5002.              int result_c = call_c( qf_c.lastname, dct1+ac, &runlevel_c ); \
  5003.              int result_a = call_a( qf_a.lastname, dct1+ac, &runlevel_a ); \
  5004.              if( result_c != result_a || runlevel_c.last != runlevel_a.last || \
  5005. -                memcmp(runlevel_c.level, runlevel_a.level, sizeof(int16_t)*result_c) || \
  5006. +                memcmp(runlevel_c.level, runlevel_a.level, sizeof(dctcoef)*result_c) || \
  5007.                  memcmp(runlevel_c.run, runlevel_a.run, sizeof(uint8_t)*(result_c-1)) ) \
  5008.              { \
  5009.                  ok = 0; \
  5010. @@ -1529,11 +1536,11 @@ static int check_intra( int cpu_ref, int cpu_new )
  5011.      {\
  5012.          set_func_name( "intra_%s_%s", #name, intra_##name##_names[dir] );\
  5013.          used_asm = 1;\
  5014. -        memcpy( buf3, buf1, 32*20 * sizeof(pixel) );\
  5015. -        memcpy( buf4, buf1, 32*20 * sizeof(pixel) );\
  5016. +        memcpy( pbuf3, pbuf1, 32*20 * sizeof(pixel) );\
  5017. +        memcpy( pbuf4, pbuf1, 32*20 * sizeof(pixel) );\
  5018.          call_c( ip_c.name[dir], pbuf3+48, ##__VA_ARGS__ );\
  5019.          call_a( ip_a.name[dir], pbuf4+48, ##__VA_ARGS__ );\
  5020. -        if( memcmp( buf3, buf4, 32*20 * sizeof(pixel) ) )\
  5021. +        if( memcmp( pbuf3, pbuf4, 32*20 * sizeof(pixel) ) )\
  5022.          {\
  5023.              fprintf( stderr, #name "[%d] :  [FAILED]\n", dir );\
  5024.              ok = 0;\
  5025. @@ -1544,7 +1551,7 @@ static int check_intra( int cpu_ref, int cpu_new )
  5026.              {\
  5027.                  printf( "%2x ", edge[14-j] );\
  5028.                  for( int k = 0; k < w; k++ )\
  5029. -                    printf( "%2x ", buf4[48+k+j*32] );\
  5030. +                    printf( "%2x ", pbuf4[48+k+j*32] );\
  5031.                  printf( "\n" );\
  5032.              }\
  5033.              printf( "\n" );\
  5034. @@ -1552,7 +1559,7 @@ static int check_intra( int cpu_ref, int cpu_new )
  5035.              {\
  5036.                  printf( "   " );\
  5037.                  for( int k = 0; k < w; k++ )\
  5038. -                    printf( "%2x ", buf3[48+k+j*32] );\
  5039. +                    printf( "%2x ", pbuf3[48+k+j*32] );\
  5040.                  printf( "\n" );\
  5041.              }\
  5042.          }\
  5043. @@ -1831,8 +1838,9 @@ int main(int argc, char *argv[])
  5044.      fprintf( stderr, "x264: using random seed %u\n", seed );
  5045.      srand( seed );
  5046.  
  5047. -    buf1 = x264_malloc( 0x3e00 + 16*BENCH_ALIGNS );
  5048. -    if( !buf1 )
  5049. +    buf1 = x264_malloc( 0x1e00 + 0x2000*sizeof(pixel) + 16*BENCH_ALIGNS );
  5050. +    pbuf1 = x264_malloc( 0x1e00*sizeof(pixel) + 16*BENCH_ALIGNS );
  5051. +    if( !buf1 || !pbuf1 )
  5052.      {
  5053.          fprintf( stderr, "malloc failed, unable to initiate tests!\n" );
  5054.          return -1;
  5055. @@ -1840,15 +1848,17 @@ int main(int argc, char *argv[])
  5056.  #define INIT_POINTER_OFFSETS\
  5057.      buf2 = buf1 + 0xf00;\
  5058.      buf3 = buf2 + 0xf00;\
  5059. -    buf4 = buf3 + 0x1000;\
  5060. -    pbuf1 = (pixel*)buf1;\
  5061. -    pbuf2 = (pixel*)buf2;\
  5062. +    buf4 = buf3 + 0x1000*sizeof(pixel);\
  5063. +    pbuf2 = pbuf1 + 0xf00;\
  5064.      pbuf3 = (pixel*)buf3;\
  5065.      pbuf4 = (pixel*)buf4;
  5066.      INIT_POINTER_OFFSETS;
  5067.      for( int i = 0; i < 0x1e00; i++ )
  5068. +    {
  5069.          buf1[i] = rand() & 0xFF;
  5070. -    memset( buf1+0x1e00, 0, 0x2000 );
  5071. +        pbuf1[i] = rand() & PIXEL_MAX;
  5072. +    }
  5073. +    memset( buf1+0x1e00, 0, 0x2000*sizeof(pixel) );
  5074.  
  5075.      /* 16-byte alignment is guaranteed whenever it's useful, but some functions also vary in speed depending on %64 */
  5076.      if( do_bench )
  5077. @@ -1857,6 +1867,7 @@ int main(int argc, char *argv[])
  5078.              INIT_POINTER_OFFSETS;
  5079.              ret |= x264_stack_pagealign( check_all_flags, i*16 );
  5080.              buf1 += 16;
  5081. +            pbuf1 += 16;
  5082.              quiet = 1;
  5083.              fprintf( stderr, "%d/%d\r", i+1, BENCH_ALIGNS );
  5084.          }
  5085. diff --git a/x264.c b/x264.c
  5086. index 0bede93..c1141ab 100644
  5087. --- a/x264.c
  5088. +++ b/x264.c
  5089. @@ -262,6 +262,7 @@ static void Help( x264_param_t *defaults, int longhelp )
  5090.          " .mkv -> Matroska\n"
  5091.          " .flv -> Flash Video\n"
  5092.          " .mp4 -> MP4 if compiled with GPAC support (%s)\n"
  5093. +        "Output bit depth: %d (configured at compile time)\n"
  5094.          "\n"
  5095.          "Options:\n"
  5096.          "\n"
  5097. @@ -286,10 +287,11 @@ static void Help( x264_param_t *defaults, int longhelp )
  5098.          "no",
  5099.  #endif
  5100.  #if HAVE_GPAC
  5101. -        "yes"
  5102. +        "yes",
  5103.  #else
  5104. -        "no"
  5105. +        "no",
  5106.  #endif
  5107. +        BIT_DEPTH
  5108.        );
  5109.      H0( "Example usage:\n" );
  5110.      H0( "\n" );
  5111. @@ -311,8 +313,8 @@ static void Help( x264_param_t *defaults, int longhelp )
  5112.      H0( "\n" );
  5113.      H0( "Presets:\n" );
  5114.      H0( "\n" );
  5115. -    H0( "      --profile               Force the limits of an H.264 profile [high]\n"
  5116. -        "                                  Overrides all settings.\n" );
  5117. +    H0( "      --profile               Force the limits of an H.264 profile [%s]\n"
  5118. +        "                                  Overrides all settings.\n", BIT_DEPTH > 8 ? "high10" : "high" );
  5119.      H2( "                                  - baseline:\n"
  5120.          "                                    --no-8x8dct --bframes 0 --no-cabac\n"
  5121.          "                                    --cqm flat --weightp 0\n"
  5122. @@ -322,8 +324,11 @@ static void Help( x264_param_t *defaults, int longhelp )
  5123.          "                                    --no-8x8dct --cqm flat\n"
  5124.          "                                    No lossless.\n"
  5125.          "                                  - high:\n"
  5126. -        "                                    No lossless.\n" );
  5127. -        else H0( "                                  - baseline,main,high\n" );
  5128. +        "                                    No lossless.\n"
  5129. +        "                                  - high10:\n"
  5130. +        "                                    No lossless.\n"
  5131. +        "                                    Support for bit depth 8-10.\n" );
  5132. +        else H0( "                                  - baseline,main,high,high10\n" );
  5133.      H0( "      --preset                Use a preset to select encoding settings [medium]\n"
  5134.          "                                  Overridden by user settings.\n" );
  5135.      H2( "                                  - ultrafast:\n"
  5136. @@ -453,9 +458,9 @@ static void Help( x264_param_t *defaults, int longhelp )
  5137.      H0( "\n" );
  5138.      H0( "Ratecontrol:\n" );
  5139.      H0( "\n" );
  5140. -    H1( "  -q, --qp <integer>          Force constant QP (0-51, 0=lossless)\n" );
  5141. +    H1( "  -q, --qp <integer>          Force constant QP (0-%d, 0=lossless)\n", QP_MAX );
  5142.      H0( "  -B, --bitrate <integer>     Set bitrate (kbit/s)\n" );
  5143. -    H0( "      --crf <float>           Quality-based VBR (0-51, 0=lossless) [%.1f]\n", defaults->rc.f_rf_constant );
  5144. +    H0( "      --crf <float>           Quality-based VBR (0-%d, 0=lossless) [%.1f]\n", QP_MAX, defaults->rc.f_rf_constant );
  5145.      H1( "      --rc-lookahead <integer> Number of frames for frametype lookahead [%d]\n", defaults->rc.i_lookahead );
  5146.      H0( "      --vbv-maxrate <integer> Max local bitrate (kbit/s) [%d]\n", defaults->rc.i_vbv_max_bitrate );
  5147.      H0( "      --vbv-bufsize <integer> Set size of the VBV buffer (kbit) [%d]\n", defaults->rc.i_vbv_buffer_size );
  5148. @@ -1040,6 +1045,7 @@ static int Parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )
  5149.  #else
  5150.                  printf( "using a non-gcc compiler\n" );
  5151.  #endif
  5152. +                printf( "configuration: --bit-depth=%d\n", BIT_DEPTH );
  5153.                  exit(0);
  5154.              case OPT_FRAMES:
  5155.                  param->i_frame_total = X264_MAX( atoi( optarg ), 0 );
  5156. @@ -1318,7 +1324,7 @@ static void parse_qpfile( cli_opt_t *opt, x264_picture_t *pic, int i_frame )
  5157.          else if( type == 'B' ) pic->i_type = X264_TYPE_BREF;
  5158.          else if( type == 'b' ) pic->i_type = X264_TYPE_B;
  5159.          else ret = 0;
  5160. -        if( ret != 3 || qp < -1 || qp > 51 )
  5161. +        if( ret != 3 || qp < -1 || qp > QP_MAX )
  5162.          {
  5163.              x264_cli_log( "x264", X264_LOG_ERROR, "can't parse qpfile for frame %d\n", i_frame );
  5164.              fclose( opt->qpfile );
  5165. diff --git a/x264.h b/x264.h
  5166. index 097365a..4d9b9ca 100644
  5167. --- a/x264.h
  5168. +++ b/x264.h
  5169. @@ -344,7 +344,7 @@ typedef struct x264_param_t
  5170.      {
  5171.          int         i_rc_method;    /* X264_RC_* */
  5172.  
  5173. -        int         i_qp_constant;  /* 0-51 */
  5174. +        int         i_qp_constant;  /* 0 to (51 + 6*(BIT_DEPTH-8)) */
  5175.          int         i_qp_min;       /* min allowed QP value */
  5176.          int         i_qp_max;       /* max allowed QP value */
  5177.          int         i_qp_step;      /* max QP step between frames */
  5178. @@ -550,7 +550,7 @@ void    x264_param_apply_fastfirstpass( x264_param_t * );
  5179.  /* x264_param_apply_profile:
  5180.   *      Applies the restrictions of the given profile.
  5181.   *      Currently available profiles are, from most to least restrictive: */
  5182. -static const char * const x264_profile_names[] = { "baseline", "main", "high", 0 };
  5183. +static const char * const x264_profile_names[] = { "baseline", "main", "high", "high10", 0 };
  5184.  
  5185.  /*      (can be NULL, in which case the function will do nothing)
  5186.   *
  5187. --
  5188. 1.7.1
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement