Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- From aa1a8435000228c4d9e74da0f9fd3d16e85a3e80 Mon Sep 17 00:00:00 2001
- From: Loren Merritt <pengvado@akuvian.org>
- Date: Sat, 26 Jun 2010 20:55:59 -0700
- Subject: [PATCH 1/7] Simplify pixel_ads
- ---
- common/macroblock.c | 2 +-
- common/x86/pixel-a.asm | 175 +++++++++++++++++------------------------------
- encoder/me.c | 2 +-
- 3 files changed, 65 insertions(+), 114 deletions(-)
- diff --git a/common/macroblock.c b/common/macroblock.c
- index 8e9b06d..4561d8a 100644
- --- a/common/macroblock.c
- +++ b/common/macroblock.c
- @@ -341,7 +341,7 @@ int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
- int buf_ssim = h->param.analyse.b_ssim * 8 * (h->param.i_width/4+3) * sizeof(int);
- int me_range = X264_MIN(h->param.analyse.i_me_range, h->param.analyse.i_mv_range);
- int buf_tesa = (h->param.analyse.i_me_method >= X264_ME_ESA) *
- - ((me_range*2+18) * sizeof(int16_t) + (me_range+4) * (me_range+1) * 4 * sizeof(mvsad_t));
- + ((me_range*2+24) * sizeof(int16_t) + (me_range+4) * (me_range+1) * 4 * sizeof(mvsad_t));
- scratch_size = X264_MAX3( buf_hpel, buf_ssim, buf_tesa );
- }
- int buf_mbtree = h->param.rc.b_mb_tree * ((h->mb.i_mb_width+3)&~3) * sizeof(int);
- diff --git a/common/x86/pixel-a.asm b/common/x86/pixel-a.asm
- index 78ca4c7..1756f86 100644
- --- a/common/x86/pixel-a.asm
- +++ b/common/x86/pixel-a.asm
- @@ -2142,34 +2142,24 @@ cglobal pixel_ssim_end4_sse2, 3,3,7
- ; Successive Elimination ADS
- ;=============================================================================
- -%macro ADS_START 1 ; unroll_size
- -%ifdef ARCH_X86_64
- - %define t0 r6
- +%macro ADS_START 0
- %ifdef WIN64
- - mov r4, r4mp
- - movsxd r5, dword r5m
- + movsxd r5, r5d
- %endif
- - mov r10, rsp
- -%else
- - %define t0 r4
- - mov rbp, rsp
- -%endif
- - mov r0d, r5m
- - sub rsp, r0
- - sub rsp, %1*4-1
- - and rsp, ~15
- - mov t0, rsp
- + mov r0d, r5d
- + lea r6, [r4+r5+15]
- + and r6, ~15;
- shl r2d, 1
- %endmacro
- -%macro ADS_END 1
- +%macro ADS_END 1 ; unroll_size
- add r1, 8*%1
- add r3, 8*%1
- - add t0, 4*%1
- + add r6, 4*%1
- sub r0d, 4*%1
- jg .loop
- %ifdef WIN64
- - RESTORE_XMM r10
- + RESTORE_XMM rsp
- %endif
- jmp ads_mvs
- %endmacro
- @@ -2180,14 +2170,14 @@ cglobal pixel_ssim_end4_sse2, 3,3,7
- ; int pixel_ads4( int enc_dc[4], uint16_t *sums, int delta,
- ; uint16_t *cost_mvx, int16_t *mvs, int width, int thresh )
- ;-----------------------------------------------------------------------------
- -cglobal pixel_ads4_mmxext, 4,7
- +cglobal pixel_ads4_mmxext, 6,7
- movq mm6, [r0]
- movq mm4, [r0+8]
- pshufw mm7, mm6, 0
- pshufw mm6, mm6, 0xAA
- pshufw mm5, mm4, 0
- pshufw mm4, mm4, 0xAA
- - ADS_START 1
- + ADS_START
- .loop:
- movq mm0, [r1]
- movq mm1, [r1+16]
- @@ -2204,25 +2194,19 @@ cglobal pixel_ads4_mmxext, 4,7
- ABS1 mm3, mm1
- paddw mm0, mm2
- paddw mm0, mm3
- -%ifdef WIN64
- - pshufw mm1, [r10+stack_offset+56], 0
- -%elifdef ARCH_X86_64
- - pshufw mm1, [r10+8], 0
- -%else
- - pshufw mm1, [ebp+stack_offset+28], 0
- -%endif
- + pshufw mm1, r6m, 0
- paddusw mm0, [r3]
- psubusw mm1, mm0
- packsswb mm1, mm1
- - movd [t0], mm1
- + movd [r6], mm1
- ADS_END 1
- -cglobal pixel_ads2_mmxext, 4,7
- +cglobal pixel_ads2_mmxext, 6,7
- movq mm6, [r0]
- pshufw mm5, r6m, 0
- pshufw mm7, mm6, 0
- pshufw mm6, mm6, 0xAA
- - ADS_START 1
- + ADS_START
- .loop:
- movq mm0, [r1]
- movq mm1, [r1+r2]
- @@ -2235,13 +2219,13 @@ cglobal pixel_ads2_mmxext, 4,7
- movq mm4, mm5
- psubusw mm4, mm0
- packsswb mm4, mm4
- - movd [t0], mm4
- + movd [r6], mm4
- ADS_END 1
- -cglobal pixel_ads1_mmxext, 4,7
- +cglobal pixel_ads1_mmxext, 6,7
- pshufw mm7, [r0], 0
- pshufw mm6, r6m, 0
- - ADS_START 2
- + ADS_START
- .loop:
- movq mm0, [r1]
- movq mm1, [r1+8]
- @@ -2256,11 +2240,11 @@ cglobal pixel_ads1_mmxext, 4,7
- psubusw mm4, mm0
- psubusw mm5, mm1
- packsswb mm4, mm5
- - movq [t0], mm4
- + movq [r6], mm4
- ADS_END 2
- %macro ADS_SSE2 1
- -cglobal pixel_ads4_%1, 4,7,12
- +cglobal pixel_ads4_%1, 6,7,12
- movdqa xmm4, [r0]
- pshuflw xmm7, xmm4, 0
- pshuflw xmm6, xmm4, 0xAA
- @@ -2273,7 +2257,7 @@ cglobal pixel_ads4_%1, 4,7,12
- %ifdef ARCH_X86_64
- pshuflw xmm8, r6m, 0
- punpcklqdq xmm8, xmm8
- - ADS_START 2
- + ADS_START
- movdqu xmm10, [r1]
- movdqu xmm11, [r1+r2]
- .loop:
- @@ -2299,9 +2283,9 @@ cglobal pixel_ads4_%1, 4,7,12
- movdqa xmm1, xmm8
- psubusw xmm1, xmm0
- packsswb xmm1, xmm1
- - movq [t0], xmm1
- + movq [r6], xmm1
- %else
- - ADS_START 2
- + ADS_START
- .loop:
- movdqu xmm0, [r1]
- movdqu xmm1, [r1+16]
- @@ -2318,18 +2302,18 @@ cglobal pixel_ads4_%1, 4,7,12
- ABS1 xmm3, xmm1
- paddw xmm0, xmm2
- paddw xmm0, xmm3
- - movd xmm1, [ebp+stack_offset+28]
- + movd xmm1, r6m
- movdqu xmm2, [r3]
- pshuflw xmm1, xmm1, 0
- punpcklqdq xmm1, xmm1
- paddusw xmm0, xmm2
- psubusw xmm1, xmm0
- packsswb xmm1, xmm1
- - movq [t0], xmm1
- + movq [r6], xmm1
- %endif ; ARCH
- ADS_END 2
- -cglobal pixel_ads2_%1, 4,7,8
- +cglobal pixel_ads2_%1, 6,7,8
- movq xmm6, [r0]
- movd xmm5, r6m
- pshuflw xmm7, xmm6, 0
- @@ -2338,7 +2322,7 @@ cglobal pixel_ads2_%1, 4,7,8
- punpcklqdq xmm7, xmm7
- punpcklqdq xmm6, xmm6
- punpcklqdq xmm5, xmm5
- - ADS_START 2
- + ADS_START
- .loop:
- movdqu xmm0, [r1]
- movdqu xmm1, [r1+r2]
- @@ -2352,17 +2336,17 @@ cglobal pixel_ads2_%1, 4,7,8
- movdqa xmm1, xmm5
- psubusw xmm1, xmm0
- packsswb xmm1, xmm1
- - movq [t0], xmm1
- + movq [r6], xmm1
- ADS_END 2
- -cglobal pixel_ads1_%1, 4,7,8
- +cglobal pixel_ads1_%1, 6,7,8
- movd xmm7, [r0]
- movd xmm6, r6m
- pshuflw xmm7, xmm7, 0
- pshuflw xmm6, xmm6, 0
- punpcklqdq xmm7, xmm7
- punpcklqdq xmm6, xmm6
- - ADS_START 4
- + ADS_START
- .loop:
- movdqu xmm0, [r1]
- movdqu xmm1, [r1+16]
- @@ -2379,7 +2363,7 @@ cglobal pixel_ads1_%1, 4,7,8
- psubusw xmm4, xmm0
- psubusw xmm5, xmm1
- packsswb xmm4, xmm5
- - movdqa [t0], xmm4
- + movdqa [r6], xmm4
- ADS_END 4
- %endmacro
- @@ -2401,90 +2385,57 @@ ADS_SSE2 ssse3
- ; }
- ; return nmv;
- ; }
- +
- +%macro TEST 1
- + mov [r4+r0*2], r1w
- + test r2d, 0xff<<(%1*8)
- + setne r3b
- + add r0d, r3d
- + inc r1d
- +%endmacro
- +
- cglobal pixel_ads_mvs, 0,7,0
- ads_mvs:
- -%ifdef ARCH_X86_64
- + lea r6, [r4+r5+15]
- + and r6, ~15;
- ; mvs = r4
- - ; masks = rsp
- + ; masks = r6
- ; width = r5
- ; clear last block in case width isn't divisible by 8. (assume divisible by 4, so clearing 4 bytes is enough.)
- -%ifdef WIN64
- - mov r8, r4
- - mov r9, r5
- -%endif
- - xor eax, eax
- - xor esi, esi
- - mov dword [rsp+r9], 0
- + xor r0d, r0d
- + xor r1d, r1d
- + mov [r6+r5], r0d
- jmp .loopi
- +ALIGN 16
- .loopi0:
- - add esi, 8
- - cmp esi, r9d
- + add r1d, 8
- + cmp r1d, r5d
- jge .end
- .loopi:
- - mov rdi, [rsp+rsi]
- - test rdi, rdi
- + mov r2, [r6+r1]
- +%ifdef ARCH_X86_64
- + test r2, r2
- +%else
- + mov r3, r2
- + or r3d, [r6+r1+4]
- +%endif
- jz .loopi0
- - xor ecx, ecx
- -%macro TEST 1
- - mov [r8+rax*2], si
- - test edi, 0xff<<(%1*8)
- - setne cl
- - add eax, ecx
- - inc esi
- -%endmacro
- + xor r3d, r3d
- TEST 0
- TEST 1
- TEST 2
- TEST 3
- - shr rdi, 32
- +%ifdef ARCH_X86_64
- + shr r2, 32
- +%else
- + mov r2d, [r6+r1]
- +%endif
- TEST 0
- TEST 1
- TEST 2
- TEST 3
- - cmp esi, r9d
- - jl .loopi
- -.end:
- - mov rsp, r10
- - RET
- -
- -%else
- - xor eax, eax
- - xor esi, esi
- - mov ebx, [ebp+stack_offset+20] ; mvs
- - mov edi, [ebp+stack_offset+24] ; width
- - mov dword [esp+edi], 0
- - push ebp
- - jmp .loopi
- -.loopi0:
- - add esi, 8
- - cmp esi, edi
- - jge .end
- -.loopi:
- - mov ebp, [esp+esi+4]
- - mov edx, [esp+esi+8]
- - mov ecx, ebp
- - or ecx, edx
- - jz .loopi0
- - xor ecx, ecx
- -%macro TEST 2
- - mov [ebx+eax*2], si
- - test %2, 0xff<<(%1*8)
- - setne cl
- - add eax, ecx
- - inc esi
- -%endmacro
- - TEST 0, ebp
- - TEST 1, ebp
- - TEST 2, ebp
- - TEST 3, ebp
- - TEST 0, edx
- - TEST 1, edx
- - TEST 2, edx
- - TEST 3, edx
- - cmp esi, edi
- + cmp r1d, r5d
- jl .loopi
- .end:
- - pop esp
- + movifnidn eax, r0d
- RET
- -%endif ; ARCH
- -
- diff --git a/encoder/me.c b/encoder/me.c
- index 291104a..19c5b2b 100644
- --- a/encoder/me.c
- +++ b/encoder/me.c
- @@ -609,7 +609,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
- if( h->mb.i_me_method == X264_ME_TESA )
- {
- // ADS threshold, then SAD threshold, then keep the best few SADs, then SATD
- - mvsad_t *mvsads = (mvsad_t *)(xs + ((width+15)&~15));
- + mvsad_t *mvsads = (mvsad_t *)(xs + ((width+15)&~15) + 4);
- int nmvsad = 0, limit;
- int sad_thresh = i_me_range <= 16 ? 10 : i_me_range <= 24 ? 11 : 12;
- int bsad = h->pixf.sad[i_pixel]( p_fenc, FENC_STRIDE, p_fref_w+bmy*stride+bmx, stride )
- --
- 1.7.1
- From 4f74306c2f266bfc671ad99e9027b816dd423ece Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Mon, 28 Jun 2010 15:02:33 -0700
- Subject: [PATCH 2/7] Callback feature for low-latency per-slice output
- Add a callback to allow the calling application to send slices immediately after being encoded.
- Also add some extra information to the x264_nal_t structure to help inform such a calling application how the NAL units should be ordered.
- Full documentation is in x264.h.
- ---
- common/bitstream.c | 7 ++-
- common/bitstream.h | 1 -
- encoder/encoder.c | 26 ++++++++---
- x264.h | 128 +++++++++++++++++++++++++++++++++-------------------
- 4 files changed, 105 insertions(+), 57 deletions(-)
- diff --git a/common/bitstream.c b/common/bitstream.c
- index 0aaac21..ad8c16e 100644
- --- a/common/bitstream.c
- +++ b/common/bitstream.c
- @@ -44,7 +44,7 @@ uint8_t *x264_nal_escape_sse2( uint8_t *dst, uint8_t *src, uint8_t *end );
- /****************************************************************************
- * x264_nal_encode:
- ****************************************************************************/
- -int x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal, int b_long_startcode )
- +void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal )
- {
- uint8_t *src = nal->p_payload;
- uint8_t *end = nal->p_payload + nal->i_payload;
- @@ -52,7 +52,7 @@ int x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal, int b_long_startc
- if( h->param.b_annexb )
- {
- - if( b_long_startcode )
- + if( nal->b_long_startcode )
- *dst++ = 0x00;
- *dst++ = 0x00;
- *dst++ = 0x00;
- @@ -77,7 +77,8 @@ int x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal, int b_long_startc
- orig_dst[3] = size>> 0;
- }
- - return size+4;
- + nal->i_payload = size+4;
- + nal->p_payload = orig_dst;
- }
- void x264_bitstream_init( int cpu, x264_bitstream_function_t *pf )
- diff --git a/common/bitstream.h b/common/bitstream.h
- index 9ce5bd7..dd8118d 100644
- --- a/common/bitstream.h
- +++ b/common/bitstream.h
- @@ -68,7 +68,6 @@ typedef struct
- uint8_t *(*nal_escape) ( uint8_t *dst, uint8_t *src, uint8_t *end );
- } x264_bitstream_function_t;
- -int x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal, int b_long_startcode );
- void x264_bitstream_init( int cpu, x264_bitstream_function_t *pf );
- /* A larger level table size theoretically could help a bit at extremely
- diff --git a/encoder/encoder.c b/encoder/encoder.c
- index f54fe85..fe97aef 100644
- --- a/encoder/encoder.c
- +++ b/encoder/encoder.c
- @@ -427,6 +427,8 @@ static int x264_validate_parameters( x264_t *h )
- else
- h->param.b_sliced_threads = 0;
- h->i_thread_frames = h->param.b_sliced_threads ? 1 : h->param.i_threads;
- + if( h->i_thread_frames > 1 )
- + h->param.nalu_process = NULL;
- if( h->param.b_interlaced )
- {
- @@ -1253,8 +1255,9 @@ static void x264_nal_start( x264_t *h, int i_type, int i_ref_idc )
- {
- x264_nal_t *nal = &h->out.nal[h->out.i_nal];
- - nal->i_ref_idc = i_ref_idc;
- - nal->i_type = i_type;
- + nal->i_ref_idc = i_ref_idc;
- + nal->i_type = i_type;
- + nal->b_long_startcode = 1;
- nal->i_payload= 0;
- nal->p_payload= &h->out.p_bitstream[bs_pos( &h->out.bs ) / 8];
- @@ -1280,6 +1283,8 @@ static int x264_nal_end( x264_t *h )
- {
- x264_nal_t *nal = &h->out.nal[h->out.i_nal];
- nal->i_payload = &h->out.p_bitstream[bs_pos( &h->out.bs ) / 8] - nal->p_payload;
- + if( h->param.nalu_process )
- + h->param.nalu_process( h, nal );
- h->out.i_nal++;
- return x264_nal_check_buffer( h );
- @@ -1289,6 +1294,13 @@ static int x264_encoder_encapsulate_nals( x264_t *h, int start )
- {
- int nal_size = 0, previous_nal_size = 0;
- + if( h->param.nalu_process )
- + {
- + for( int i = start; i < h->out.i_nal; i++ )
- + nal_size += h->out.nal[i].i_payload;
- + return nal_size;
- + }
- +
- for( int i = 0; i < start; i++ )
- previous_nal_size += h->out.nal[i].i_payload;
- @@ -1311,11 +1323,9 @@ static int x264_encoder_encapsulate_nals( x264_t *h, int start )
- for( int i = start; i < h->out.i_nal; i++ )
- {
- - int long_startcode = !i || h->out.nal[i].i_type == NAL_SPS || h->out.nal[i].i_type == NAL_PPS;
- - int size = x264_nal_encode( h, nal_buffer, &h->out.nal[i], long_startcode );
- - h->out.nal[i].i_payload = size;
- - h->out.nal[i].p_payload = nal_buffer;
- - nal_buffer += size;
- + h->out.nal[i].b_long_startcode = !i || h->out.nal[i].i_type == NAL_SPS || h->out.nal[i].i_type == NAL_PPS;
- + x264_nal_encode( h, nal_buffer, &h->out.nal[i] );
- + nal_buffer += h->out.nal[i].i_payload;
- }
- x264_emms();
- @@ -1805,6 +1815,7 @@ static int x264_slice_write( x264_t *h )
- /* Slice */
- x264_nal_start( h, h->i_nal_type, h->i_nal_ref_idc );
- + h->out.nal[h->out.i_nal].i_first_mb = h->sh.i_first_mb;
- /* Slice header */
- x264_macroblock_thread_init( h );
- @@ -2020,6 +2031,7 @@ static int x264_slice_write( x264_t *h )
- i_mb_x = 0;
- }
- }
- + h->out.nal[h->out.i_nal].i_last_mb = h->sh.i_last_mb;
- if( h->param.b_cabac )
- {
- diff --git a/x264.h b/x264.h
- index 1138a8b..e1ae084 100644
- --- a/x264.h
- +++ b/x264.h
- @@ -35,13 +35,61 @@
- #include <stdarg.h>
- -#define X264_BUILD 100
- +#define X264_BUILD 101
- /* x264_t:
- * opaque handler for encoder */
- typedef struct x264_t x264_t;
- /****************************************************************************
- + * NAL structure and functions
- + ****************************************************************************/
- +
- +enum nal_unit_type_e
- +{
- + NAL_UNKNOWN = 0,
- + NAL_SLICE = 1,
- + NAL_SLICE_DPA = 2,
- + NAL_SLICE_DPB = 3,
- + NAL_SLICE_DPC = 4,
- + NAL_SLICE_IDR = 5, /* ref_idc != 0 */
- + NAL_SEI = 6, /* ref_idc == 0 */
- + NAL_SPS = 7,
- + NAL_PPS = 8,
- + NAL_AUD = 9,
- + NAL_FILLER = 12,
- + /* ref_idc == 0 for 6,9,10,11,12 */
- +};
- +enum nal_priority_e
- +{
- + NAL_PRIORITY_DISPOSABLE = 0,
- + NAL_PRIORITY_LOW = 1,
- + NAL_PRIORITY_HIGH = 2,
- + NAL_PRIORITY_HIGHEST = 3,
- +};
- +
- +/* The data within the payload is already NAL-encapsulated; the ref_idc and type
- + * are merely in the struct for easy access by the calling application.
- + * All data returned in an x264_nal_t, including the data in p_payload, is no longer
- + * valid after the next call to x264_encoder_encode. Thus it must be used or copied
- + * before calling x264_encoder_encode or x264_encoder_headers again. */
- +typedef struct
- +{
- + int i_ref_idc; /* nal_priority_e */
- + int i_type; /* nal_unit_type_e */
- + int b_long_startcode;
- + int i_first_mb; /* If this NAL is a slice, the index of the first MB in the slice. */
- + int i_last_mb; /* If this NAL is a slice, the index of the last MB in the slice. */
- +
- + /* Size of payload in bytes. */
- + int i_payload;
- + /* If param->b_annexb is set, Annex-B bytestream with startcode.
- + * Otherwise, startcode is replaced with a 4-byte size.
- + * This size is the size used in mp4/similar muxing; it is equal to i_payload-4 */
- + uint8_t *p_payload;
- +} x264_nal_t;
- +
- +/****************************************************************************
- * Encoder parameters
- ****************************************************************************/
- /* CPU flags
- @@ -377,8 +425,41 @@ typedef struct x264_param_t
- * i.e. when an x264_param_t is passed to x264_t in an x264_picture_t or in zones.
- * Not used when x264_encoder_reconfig is called directly. */
- void (*param_free)( void* );
- +
- + /* Optional low-level callback for low-latency encoding. Called for each output NAL unit
- + * immediately after the NAL unit is finished encoding. This allows the calling application
- + * to begin processing video data (e.g. by sending packets over a network) before the frame
- + * is done encoding.
- + *
- + * This callback MUST do the following in order to work correctly:
- + * 1) Have available an output buffer of at least size nal->i_payload*3/2 + 5 + 16.
- + * 2) Call x264_nal_encode( h, dst, nal ), where dst is the output buffer.
- + * After these steps, the content of nal is valid and can be used in the same way as if
- + * the NAL unit were output by x264_encoder_encode.
- + *
- + * This does not need to be synchronous with the encoding process: the data pointed to
- + * by nal (both before and after x264_nal_encode) will remain valid until the next
- + * x264_encoder_encode call. The callback must be re-entrant.
- + *
- + * This callback does not work with frame-based threads; threads must be disabled
- + * or sliced-threads enabled. This callback also does not work as one would expect
- + * with HRD -- since the buffering period SEI cannot be calculated until the frame
- + * is finished encoding, it will not be sent via this callback.
- + *
- + * Note also that the NALs are not necessarily returned in order when sliced threads is
- + * enabled. Accordingly, the variable i_first_mb and i_last_mb are available in
- + * x264_nal_t to help the calling application reorder the slices if necessary.
- + *
- + * When this callback is enabled, x264_encoder_encode does not return valid NALs;
- + * the calling application is expected to acquire all output NALs through the callback.
- + *
- + * It is generally sensible to combine this callback with a use of slice-max-mbs or
- + * slice-max-size. */
- + void (*nalu_process) ( x264_t *h, x264_nal_t *nal );
- } x264_param_t;
- +void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal );
- +
- /****************************************************************************
- * H.264 level restriction information
- ****************************************************************************/
- @@ -586,51 +667,6 @@ int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_heigh
- void x264_picture_clean( x264_picture_t *pic );
- /****************************************************************************
- - * NAL structure and functions
- - ****************************************************************************/
- -
- -enum nal_unit_type_e
- -{
- - NAL_UNKNOWN = 0,
- - NAL_SLICE = 1,
- - NAL_SLICE_DPA = 2,
- - NAL_SLICE_DPB = 3,
- - NAL_SLICE_DPC = 4,
- - NAL_SLICE_IDR = 5, /* ref_idc != 0 */
- - NAL_SEI = 6, /* ref_idc == 0 */
- - NAL_SPS = 7,
- - NAL_PPS = 8,
- - NAL_AUD = 9,
- - NAL_FILLER = 12,
- - /* ref_idc == 0 for 6,9,10,11,12 */
- -};
- -enum nal_priority_e
- -{
- - NAL_PRIORITY_DISPOSABLE = 0,
- - NAL_PRIORITY_LOW = 1,
- - NAL_PRIORITY_HIGH = 2,
- - NAL_PRIORITY_HIGHEST = 3,
- -};
- -
- -/* The data within the payload is already NAL-encapsulated; the ref_idc and type
- - * are merely in the struct for easy access by the calling application.
- - * All data returned in an x264_nal_t, including the data in p_payload, is no longer
- - * valid after the next call to x264_encoder_encode. Thus it must be used or copied
- - * before calling x264_encoder_encode or x264_encoder_headers again. */
- -typedef struct
- -{
- - int i_ref_idc; /* nal_priority_e */
- - int i_type; /* nal_unit_type_e */
- -
- - /* Size of payload in bytes. */
- - int i_payload;
- - /* If param->b_annexb is set, Annex-B bytestream with 4-byte startcode.
- - * Otherwise, startcode is replaced with a 4-byte size.
- - * This size is the size used in mp4/similar muxing; it is equal to i_payload-4 */
- - uint8_t *p_payload;
- -} x264_nal_t;
- -
- -/****************************************************************************
- * Encoder functions
- ****************************************************************************/
- --
- 1.7.1
- From 22bf1672adafa4e938a13952b8f71cd7548d31f1 Mon Sep 17 00:00:00 2001
- From: Lamont Alston <wewk584@gmail.com>
- Date: Tue, 29 Jun 2010 10:11:42 -0700
- Subject: [PATCH 3/7] Make open-GOP Blu-ray compatible
- Blu-ray is even more braindamaged than we thought.
- Accordingly, open-gop options are now "normal" and "bluray", as opposed to display and coded.
- Normal should be used in all cases besides Blu-ray authoring.
- ---
- encoder/encoder.c | 2 +-
- encoder/slicetype.c | 28 +++++++---------------------
- x264.c | 8 ++++----
- x264.h | 8 ++++----
- 4 files changed, 16 insertions(+), 30 deletions(-)
- diff --git a/encoder/encoder.c b/encoder/encoder.c
- index fe97aef..5cd3307 100644
- --- a/encoder/encoder.c
- +++ b/encoder/encoder.c
- @@ -577,7 +577,7 @@ static int x264_validate_parameters( x264_t *h )
- h->param.analyse.i_direct_mv_pred = X264_DIRECT_PRED_SPATIAL;
- }
- h->param.i_bframe = x264_clip3( h->param.i_bframe, 0, X264_MIN( X264_BFRAME_MAX, h->param.i_keyint_max-1 ) );
- - h->param.i_open_gop = x264_clip3( h->param.i_open_gop, X264_OPEN_GOP_NONE, X264_OPEN_GOP_CODED_ORDER );
- + h->param.i_open_gop = x264_clip3( h->param.i_open_gop, X264_OPEN_GOP_NONE, X264_OPEN_GOP_BLURAY );
- if( h->param.i_keyint_max == 1 )
- h->param.b_intra_refresh = 0;
- h->param.i_bframe_bias = x264_clip3( h->param.i_bframe_bias, -90, 100 );
- diff --git a/encoder/slicetype.c b/encoder/slicetype.c
- index 2703f02..4ede8cf 100644
- --- a/encoder/slicetype.c
- +++ b/encoder/slicetype.c
- @@ -1233,17 +1233,11 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
- if( !h->param.b_intra_refresh )
- for( int i = keyint_limit+1; i <= num_frames; i += h->param.i_keyint_max )
- {
- - int j = i;
- - if( h->param.i_open_gop == X264_OPEN_GOP_CODED_ORDER )
- - {
- - while( IS_X264_TYPE_B( frames[i]->i_type ) )
- - i++;
- - while( IS_X264_TYPE_B( frames[j-1]->i_type ) )
- - j--;
- - }
- frames[i]->i_type = X264_TYPE_I;
- reset_start = X264_MIN( reset_start, i+1 );
- - i = j;
- + if( h->param.i_open_gop == X264_OPEN_GOP_BLURAY )
- + while( IS_X264_TYPE_B( frames[i-1]->i_type ) )
- + i--;
- }
- if( vbv_lookahead )
- @@ -1337,16 +1331,8 @@ void x264_slicetype_decide( x264_t *h )
- if( frm->i_type == X264_TYPE_AUTO || frm->i_type == X264_TYPE_I )
- frm->i_type = h->param.i_open_gop && h->lookahead->i_last_keyframe >= 0 ? X264_TYPE_I : X264_TYPE_IDR;
- int warn = frm->i_type != X264_TYPE_IDR;
- - if( warn && h->param.i_open_gop == X264_OPEN_GOP_DISPLAY_ORDER )
- - warn &= frm->i_type != X264_TYPE_I && frm->i_type != X264_TYPE_KEYFRAME;
- - if( warn && h->param.i_open_gop == X264_OPEN_GOP_CODED_ORDER )
- - {
- - /* if this minigop ends with i, it's not a violation */
- - int j = bframes;
- - while( IS_X264_TYPE_B( h->lookahead->next.list[j]->i_type ) )
- - j++;
- - warn = h->lookahead->next.list[j]->i_type != X264_TYPE_I && h->lookahead->next.list[j]->i_type != X264_TYPE_KEYFRAME;
- - }
- + if( warn && h->param.i_open_gop )
- + warn &= frm->i_type != X264_TYPE_I;
- if( warn )
- x264_log( h, X264_LOG_WARNING, "specified frame type (%d) at %d is not compatible with keyframe interval\n", frm->i_type, frm->i_frame );
- }
- @@ -1355,8 +1341,8 @@ void x264_slicetype_decide( x264_t *h )
- if( h->param.i_open_gop )
- {
- h->lookahead->i_last_keyframe = frm->i_frame; // Use display order
- - if( h->param.i_open_gop == X264_OPEN_GOP_CODED_ORDER )
- - h->lookahead->i_last_keyframe -= bframes; // Use coded order
- + if( h->param.i_open_gop == X264_OPEN_GOP_BLURAY )
- + h->lookahead->i_last_keyframe -= bframes; // Use bluray order
- frm->b_keyframe = 1;
- }
- else
- diff --git a/x264.c b/x264.c
- index df04385..f08ab41 100644
- --- a/x264.c
- +++ b/x264.c
- @@ -382,10 +382,10 @@ static void Help( x264_param_t *defaults, int longhelp )
- " - normal: Non-strict (not Blu-ray compatible)\n",
- strtable_lookup( x264_b_pyramid_names, defaults->i_bframe_pyramid ) );
- H1( " --open-gop <string> Use recovery points to close GOPs [none]\n"
- - " - none: Use standard closed GOPs\n"
- - " - display: Base GOP length on display order\n"
- - " (not Blu-ray compatible)\n"
- - " - coded: Base GOP length on coded order\n"
- + " - none: closed GOPs only\n"
- + " - normal: standard open GOPs\n"
- + " (not Blu-ray compatible)\n"
- + " - bluray: Blu-ray-compatible open GOPs\n"
- " Only available with b-frames\n" );
- H1( " --no-cabac Disable CABAC\n" );
- H1( " -r, --ref <integer> Number of reference frames [%d]\n", defaults->i_frame_reference );
- diff --git a/x264.h b/x264.h
- index e1ae084..86f7426 100644
- --- a/x264.h
- +++ b/x264.h
- @@ -153,8 +153,8 @@ typedef struct
- #define X264_B_PYRAMID_NORMAL 2
- #define X264_KEYINT_MIN_AUTO 0
- #define X264_OPEN_GOP_NONE 0
- -#define X264_OPEN_GOP_DISPLAY_ORDER 1
- -#define X264_OPEN_GOP_CODED_ORDER 2
- +#define X264_OPEN_GOP_NORMAL 1
- +#define X264_OPEN_GOP_BLURAY 2
- static const char * const x264_direct_pred_names[] = { "none", "spatial", "temporal", "auto", 0 };
- static const char * const x264_motion_est_names[] = { "dia", "hex", "umh", "esa", "tesa", 0 };
- @@ -166,7 +166,7 @@ static const char * const x264_colorprim_names[] = { "", "bt709", "undef", "", "
- static const char * const x264_transfer_names[] = { "", "bt709", "undef", "", "bt470m", "bt470bg", "smpte170m", "smpte240m", "linear", "log100", "log316", 0 };
- static const char * const x264_colmatrix_names[] = { "GBR", "bt709", "undef", "", "fcc", "bt470bg", "smpte170m", "smpte240m", "YCgCo", 0 };
- static const char * const x264_nal_hrd_names[] = { "none", "vbr", "cbr", 0 };
- -static const char * const x264_open_gop_names[] = { "none", "display", "coded", 0 };
- +static const char * const x264_open_gop_names[] = { "none", "normal", "bluray", 0 };
- /* Colorspace type
- * legacy only; nothing other than I420 is really supported. */
- @@ -276,7 +276,7 @@ typedef struct x264_param_t
- int i_bframe_adaptive;
- int i_bframe_bias;
- int i_bframe_pyramid; /* Keep some B-frames as references: 0=off, 1=strict hierarchical, 2=normal */
- - int i_open_gop; /* Open gop: 1=display order, 2=coded order to determine gop size */
- + int i_open_gop; /* Open gop: 1=display order, 2=bluray compatibility braindamage mode */
- int b_deblocking_filter;
- int i_deblocking_filter_alphac0; /* [-6, 6] -6 light filter, 6 strong */
- --
- 1.7.1
- From ae5c32e10d6b500366d1d638c52b75e65aad1d9f Mon Sep 17 00:00:00 2001
- From: Steven Walters <kemuri9@gmail.com>
- Date: Sat, 26 Jun 2010 16:28:49 -0400
- Subject: [PATCH 4/7] Centralize logging within x264cli
- x264cli messages will now respect the log level they pertain to.
- Slightly reduces binary size.
- ---
- input/avs.c | 88 +++++-------------
- input/ffms.c | 58 +++--------
- input/input.h | 2 +
- input/lavf.c | 55 +++--------
- input/thread.c | 9 +-
- input/timecode.c | 111 ++++++----------------
- input/y4m.c | 23 +----
- input/yuv.c | 8 +-
- muxers.h | 61 ------------
- output/flv.c | 10 +-
- output/flv_bytestream.c | 2 +-
- output/matroska.c | 2 +-
- output/matroska_ebml.c | 2 +-
- output/mp4.c | 12 +--
- output/output.h | 2 +
- output/raw.c | 2 +-
- x264.c | 246 +++++++++++++++++++++--------------------------
- x264cli.h | 67 +++++++++++++
- 18 files changed, 289 insertions(+), 471 deletions(-)
- delete mode 100644 muxers.h
- create mode 100644 x264cli.h
- diff --git a/input/avs.c b/input/avs.c
- index 07add40..b83f715 100644
- --- a/input/avs.c
- +++ b/input/avs.c
- @@ -20,8 +20,9 @@
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
- *****************************************************************************/
- -#include "muxers.h"
- +#include "input.h"
- #include <windows.h>
- +#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "avs", __VA_ARGS__ )
- /* the AVS interface currently uses __declspec to link function declarations to their definitions in the dll.
- this has a side effect of preventing program execution if the avisynth dll is not found,
- @@ -131,27 +132,15 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
- FILE *fh = fopen( psz_filename, "r" );
- if( !fh )
- return -1;
- - else if( !x264_is_regular_file( fh ) )
- - {
- - fprintf( stderr, "avs [error]: AVS input is incompatible with non-regular file `%s'\n", psz_filename );
- - return -1;
- - }
- + FAIL_IF_ERROR( !x264_is_regular_file( fh ), "AVS input is incompatible with non-regular file `%s'\n", psz_filename );
- fclose( fh );
- avs_hnd_t *h = malloc( sizeof(avs_hnd_t) );
- if( !h )
- return -1;
- - if( avs_load_library( h ) )
- - {
- - fprintf( stderr, "avs [error]: failed to load avisynth\n" );
- - return -1;
- - }
- + FAIL_IF_ERROR( avs_load_library( h ), "failed to load avisynth\n" )
- h->env = h->func.avs_create_script_environment( AVS_INTERFACE_YV12 );
- - if( !h->env )
- - {
- - fprintf( stderr, "avs [error]: failed to initiate avisynth\n" );
- - return -1;
- - }
- + FAIL_IF_ERROR( !h->env, "failed to initiate avisynth\n" )
- AVS_Value arg = avs_new_value_string( psz_filename );
- AVS_Value res;
- char *filename_ext = get_filename_extension( psz_filename );
- @@ -159,11 +148,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
- if( !strcasecmp( filename_ext, "avs" ) )
- {
- res = h->func.avs_invoke( h->env, "Import", arg, NULL );
- - if( avs_is_error( res ) )
- - {
- - fprintf( stderr, "avs [error]: %s\n", avs_as_string( res ) );
- - return -1;
- - }
- + FAIL_IF_ERROR( avs_is_error( res ), "%s\n", avs_as_string( res ) )
- /* check if the user is using a multi-threaded script and apply distributor if necessary.
- adapted from avisynth's vfw interface */
- AVS_Value mt_test = h->func.avs_invoke( h->env, "GetMTMode", avs_new_value_bool( 0 ), NULL );
- @@ -184,78 +169,55 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
- int i;
- for( i = 0; filter[i]; i++ )
- {
- - fprintf( stderr, "avs [info]: trying %s... ", filter[i] );
- + x264_cli_log( "avs", X264_LOG_INFO, "trying %s... ", filter[i] );
- if( !h->func.avs_function_exists( h->env, filter[i] ) )
- {
- - fprintf( stderr, "not found\n" );
- + x264_cli_printf( X264_LOG_INFO, "not found\n" );
- continue;
- }
- if( !strncasecmp( filter[i], "FFmpegSource", 12 ) )
- {
- - fprintf( stderr, "indexing... " );
- + x264_cli_printf( X264_LOG_INFO, "indexing... " );
- fflush( stderr );
- }
- res = h->func.avs_invoke( h->env, filter[i], arg, NULL );
- if( !avs_is_error( res ) )
- {
- - fprintf( stderr, "succeeded\n" );
- + x264_cli_printf( X264_LOG_INFO, "succeeded\n" );
- break;
- }
- - fprintf( stderr, "failed\n" );
- - }
- - if( !filter[i] )
- - {
- - fprintf( stderr, "avs [error]: unable to find source filter to open `%s'\n", psz_filename );
- - return -1;
- + x264_cli_printf( X264_LOG_INFO, "failed\n" );
- }
- + FAIL_IF_ERROR( !filter[i], "unable to find source filter to open `%s'\n", psz_filename )
- }
- - if( !avs_is_clip( res ) )
- - {
- - fprintf( stderr, "avs [error]: `%s' didn't return a video clip\n", psz_filename );
- - return -1;
- - }
- + FAIL_IF_ERROR( !avs_is_clip( res ), "`%s' didn't return a video clip\n", psz_filename )
- h->clip = h->func.avs_take_clip( res, h->env );
- int avs_version = h->func.avs_get_version( h->clip );
- const AVS_VideoInfo *vi = h->func.avs_get_video_info( h->clip );
- - if( !avs_has_video( vi ) )
- - {
- - fprintf( stderr, "avs [error]: `%s' has no video data\n", psz_filename );
- - return -1;
- - }
- + FAIL_IF_ERROR( !avs_has_video( vi ), "`%s' has no video data\n", psz_filename )
- /* if the clip is made of fields instead of frames, call weave to make them frames */
- if( avs_is_field_based( vi ) )
- {
- - fprintf( stderr, "avs [warning]: detected fieldbased (separated) input, weaving to frames\n" );
- + x264_cli_log( "avs", X264_LOG_WARNING, "detected fieldbased (separated) input, weaving to frames\n" );
- AVS_Value tmp = h->func.avs_invoke( h->env, "Weave", res, NULL );
- - if( avs_is_error( tmp ) )
- - {
- - fprintf( stderr, "avs [error]: couldn't weave fields into frames\n" );
- - return -1;
- - }
- + FAIL_IF_ERROR( avs_is_error( tmp ), "couldn't weave fields into frames\n" )
- res = update_clip( h, &vi, tmp, res );
- info->interlaced = 1;
- info->tff = avs_is_tff( vi );
- }
- - if( vi->width&1 || vi->height&1 )
- - {
- - fprintf( stderr, "avs [error]: input clip width or height not divisible by 2 (%dx%d)\n",
- - vi->width, vi->height );
- - return -1;
- - }
- + FAIL_IF_ERROR( vi->width&1 || vi->height&1, "input clip width or height not divisible by 2 (%dx%d)\n", vi->width, vi->height )
- /* always call ConvertToYV12 to convert non YV12 planar colorspaces to YV12 when user's AVS supports them,
- as all planar colorspaces are flagged as YV12. If it is already YV12 in this case, the call does nothing */
- if( !avs_is_yv12( vi ) || avs_version >= AVS_INTERFACE_OTHER_PLANAR )
- {
- - fprintf( stderr, "avs %s\n", !avs_is_yv12( vi ) ? "[warning]: converting input clip to YV12"
- - : "[info]: avisynth 2.6+ detected, forcing conversion to YV12" );
- + if( !avs_is_yv12( vi ) )
- + x264_cli_log( "avs", X264_LOG_WARNING, "converting input clip to YV12" );
- + else
- + x264_cli_log( "avs", X264_LOG_INFO, "avisynth 2.6+ detected, forcing conversion to YV12" );
- const char *arg_name[2] = { NULL, "interlaced" };
- AVS_Value arg_arr[2] = { res, avs_new_value_bool( info->interlaced ) };
- AVS_Value res2 = h->func.avs_invoke( h->env, "ConvertToYV12", avs_new_value_array( arg_arr, 2 ), arg_name );
- - if( avs_is_error( res2 ) )
- - {
- - fprintf( stderr, "avs [error]: couldn't convert input clip to YV12\n" );
- - return -1;
- - }
- + FAIL_IF_ERROR( avs_is_error( res2 ), "couldn't convert input clip to YV12\n" )
- res = update_clip( h, &vi, res2, res );
- }
- h->func.avs_release_value( res );
- @@ -294,11 +256,7 @@ static int read_frame( x264_picture_t *p_pic, hnd_t handle, int i_frame )
- return -1;
- AVS_VideoFrame *frm = p_pic->opaque = h->func.avs_get_frame( h->clip, i_frame );
- const char *err = h->func.avs_clip_get_error( h->clip );
- - if( err )
- - {
- - fprintf( stderr, "avs [error]: %s occurred while reading frame %d\n", err, i_frame );
- - return -1;
- - }
- + FAIL_IF_ERROR( err, "%s occurred while reading frame %d\n", err, i_frame )
- for( int i = 0; i < 3; i++ )
- {
- /* explicitly cast away the const attribute to avoid a warning */
- diff --git a/input/ffms.c b/input/ffms.c
- index b2a253e..fe8bf7e 100644
- --- a/input/ffms.c
- +++ b/input/ffms.c
- @@ -21,8 +21,10 @@
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
- *****************************************************************************/
- -#include "muxers.h"
- +#include "input.h"
- #include <ffms.h>
- +#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "ffms", __VA_ARGS__ )
- +
- #undef DECLARE_ALIGNED
- #include <libavcodec/avcodec.h>
- #include <libswscale/swscale.h>
- @@ -86,28 +88,16 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
- {
- idx = FFMS_MakeIndex( psz_filename, 0, 0, NULL, NULL, 0, update_progress, NULL, &e );
- fprintf( stderr, " \r" );
- - if( !idx )
- - {
- - fprintf( stderr, "ffms [error]: could not create index\n" );
- - return -1;
- - }
- + FAIL_IF_ERROR( !idx, "could not create index\n" )
- if( opt->index_file && FFMS_WriteIndex( opt->index_file, idx, &e ) )
- - fprintf( stderr, "ffms [warning]: could not write index file\n" );
- + x264_cli_log( "ffms", X264_LOG_WARNING, "could not write index file\n" );
- }
- int trackno = FFMS_GetFirstTrackOfType( idx, FFMS_TYPE_VIDEO, &e );
- - if( trackno < 0 )
- - {
- - fprintf( stderr, "ffms [error]: could not find video track\n" );
- - return -1;
- - }
- + FAIL_IF_ERROR( trackno < 0, "could not find video track\n" )
- h->video_source = FFMS_CreateVideoSource( psz_filename, trackno, idx, 1, seekmode, &e );
- - if( !h->video_source )
- - {
- - fprintf( stderr, "ffms [error]: could not create video source\n" );
- - return -1;
- - }
- + FAIL_IF_ERROR( !h->video_source, "could not create video source\n" )
- h->track = FFMS_GetTrackFromVideo( h->video_source );
- @@ -121,11 +111,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
- h->vfr_input = info->vfr;
- const FFMS_Frame *frame = FFMS_GetFrame( h->video_source, 0, &e );
- - if( !frame )
- - {
- - fprintf( stderr, "ffms [error]: could not read frame 0\n" );
- - return -1;
- - }
- + FAIL_IF_ERROR( !frame, "could not read frame 0\n" )
- h->init_width = h->cur_width = info->width = frame->EncodedWidth;
- h->init_height = h->cur_height = info->height = frame->EncodedHeight;
- @@ -134,8 +120,8 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
- info->tff = frame->TopFieldFirst;
- if( h->cur_pix_fmt != PIX_FMT_YUV420P )
- - fprintf( stderr, "ffms [warning]: converting from %s to YV12\n",
- - avcodec_get_pix_fmt_name( h->cur_pix_fmt ) );
- + x264_cli_log( "ffms", X264_LOG_WARNING, "converting from %s to YV12\n",
- + avcodec_get_pix_fmt_name( h->cur_pix_fmt ) );
- /* ffms timestamps are in milliseconds. ffms also uses int64_ts for timebase,
- * so we need to reduce large timebases to prevent overflow */
- @@ -173,19 +159,15 @@ static int check_swscale( ffms_hnd_t *h, const FFMS_Frame *frame, int i_frame )
- if( h->scaler )
- {
- sws_freeContext( h->scaler );
- - fprintf( stderr, "ffms [warning]: stream properties changed to %dx%d, %s at frame %d \n", frame->EncodedWidth,
- - frame->EncodedHeight, avcodec_get_pix_fmt_name( frame->EncodedPixelFormat ), i_frame );
- + x264_cli_log( "ffms", X264_LOG_WARNING, "stream properties changed to %dx%d, %s at frame %d \n", frame->EncodedWidth,
- + frame->EncodedHeight, avcodec_get_pix_fmt_name( frame->EncodedPixelFormat ), i_frame );
- h->cur_width = frame->EncodedWidth;
- h->cur_height = frame->EncodedHeight;
- h->cur_pix_fmt = frame->EncodedPixelFormat;
- }
- h->scaler = sws_getContext( h->cur_width, h->cur_height, h->cur_pix_fmt, h->init_width, h->init_height,
- PIX_FMT_YUV420P, SWS_BICUBIC, NULL, NULL, NULL );
- - if( !h->scaler )
- - {
- - fprintf( stderr, "ffms [error]: could not open swscale context\n" );
- - return -1;
- - }
- + FAIL_IF_ERROR( !h->scaler, "could not open swscale context\n" )
- return 0;
- }
- @@ -195,11 +177,7 @@ static int read_frame( x264_picture_t *p_pic, hnd_t handle, int i_frame )
- FFMS_ErrorInfo e;
- e.BufferSize = 0;
- const FFMS_Frame *frame = FFMS_GetFrame( h->video_source, i_frame, &e );
- - if( !frame )
- - {
- - fprintf( stderr, "ffms [error]: could not read frame %d\n", i_frame );
- - return -1;
- - }
- + FAIL_IF_ERROR( !frame, "could not read frame %d\n", i_frame )
- if( check_swscale( h, frame, i_frame ) )
- return -1;
- @@ -214,12 +192,8 @@ static int read_frame( x264_picture_t *p_pic, hnd_t handle, int i_frame )
- if( h->vfr_input )
- {
- - if( info->PTS == AV_NOPTS_VALUE )
- - {
- - fprintf( stderr, "ffms [error]: invalid timestamp. "
- - "Use --force-cfr and specify a framerate with --fps\n" );
- - return -1;
- - }
- + FAIL_IF_ERROR( info->PTS == AV_NOPTS_VALUE, "invalid timestamp. "
- + "Use --force-cfr and specify a framerate with --fps\n" )
- if( !h->pts_offset_flag )
- {
- diff --git a/input/input.h b/input/input.h
- index f89b13b..f588f3c 100644
- --- a/input/input.h
- +++ b/input/input.h
- @@ -25,6 +25,8 @@
- #ifndef X264_INPUT_H
- #define X264_INPUT_H
- +#include "x264cli.h"
- +
- /* options that are used by only some demuxers */
- typedef struct
- {
- diff --git a/input/lavf.c b/input/lavf.c
- index 4b0375f..54a275f 100644
- --- a/input/lavf.c
- +++ b/input/lavf.c
- @@ -21,7 +21,8 @@
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
- *****************************************************************************/
- -#include "muxers.h"
- +#include "input.h"
- +#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "lavf", __VA_ARGS__ )
- #undef DECLARE_ALIGNED
- #include <libavformat/avformat.h>
- #include <libswscale/swscale.h>
- @@ -59,19 +60,15 @@ static int check_swscale( lavf_hnd_t *h, AVCodecContext *c, int i_frame )
- if( h->scaler )
- {
- sws_freeContext( h->scaler );
- - fprintf( stderr, "lavf [warning]: stream properties changed to %dx%d, %s at frame %d \n",
- - c->width, c->height, avcodec_get_pix_fmt_name( c->pix_fmt ), i_frame );
- + x264_cli_log( "lavf", X264_LOG_WARNING, "stream properties changed to %dx%d, %s at frame %d \n",
- + c->width, c->height, avcodec_get_pix_fmt_name( c->pix_fmt ), i_frame );
- h->cur_width = c->width;
- h->cur_height = c->height;
- h->cur_pix_fmt = c->pix_fmt;
- }
- h->scaler = sws_getContext( h->cur_width, h->cur_height, h->cur_pix_fmt, h->init_width, h->init_height,
- PIX_FMT_YUV420P, SWS_BICUBIC, NULL, NULL, NULL );
- - if( !h->scaler )
- - {
- - fprintf( stderr, "lavf [error]: could not open swscale context\n" );
- - return -1;
- - }
- + FAIL_IF_ERROR( !h->scaler, "could not open swscale context\n" )
- return 0;
- }
- @@ -106,12 +103,12 @@ static int read_frame_internal( x264_picture_t *p_pic, lavf_hnd_t *h, int i_fram
- {
- c->reordered_opaque = pkt->pts;
- if( avcodec_decode_video2( c, frame, &finished, pkt ) < 0 )
- - fprintf( stderr, "lavf [warning]: video decoding failed on frame %d\n", h->next_frame );
- + x264_cli_log( "lavf", X264_LOG_WARNING, "video decoding failed on frame %d\n", h->next_frame );
- }
- if( !finished )
- {
- if( avcodec_decode_video2( c, frame, &finished, pkt ) < 0 )
- - fprintf( stderr, "lavf [warning]: video decoding failed on frame %d\n", h->next_frame );
- + x264_cli_log( "lavf", X264_LOG_WARNING, "video decoding failed on frame %d\n", h->next_frame );
- if( !finished )
- return -1;
- }
- @@ -166,26 +163,13 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
- if( !strcmp( psz_filename, "-" ) )
- psz_filename = "pipe:";
- - if( av_open_input_file( &h->lavf, psz_filename, NULL, 0, NULL ) )
- - {
- - fprintf( stderr, "lavf [error]: could not open input file\n" );
- - return -1;
- - }
- -
- - if( av_find_stream_info( h->lavf ) < 0 )
- - {
- - fprintf( stderr, "lavf [error]: could not find input stream info\n" );
- - return -1;
- - }
- + FAIL_IF_ERROR( av_open_input_file( &h->lavf, psz_filename, NULL, 0, NULL ), "could not open input file\n" )
- + FAIL_IF_ERROR( av_find_stream_info( h->lavf ) < 0, "could not find input stream info\n" )
- int i = 0;
- while( i < h->lavf->nb_streams && h->lavf->streams[i]->codec->codec_type != CODEC_TYPE_VIDEO )
- i++;
- - if( i == h->lavf->nb_streams )
- - {
- - fprintf( stderr, "lavf [error]: could not find video stream\n" );
- - return -1;
- - }
- + FAIL_IF_ERROR( i == h->lavf->nb_streams, "could not find video stream\n" )
- h->stream_id = i;
- h->next_frame = 0;
- h->pts_offset_flag = 0;
- @@ -207,22 +191,15 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
- info->csp |= X264_CSP_VFLIP;
- if( h->cur_pix_fmt != PIX_FMT_YUV420P )
- - fprintf( stderr, "lavf [warning]: converting from %s to YV12\n",
- - avcodec_get_pix_fmt_name( h->cur_pix_fmt ) );
- -
- - if( avcodec_open( c, avcodec_find_decoder( c->codec_id ) ) )
- - {
- - fprintf( stderr, "lavf [error]: could not find decoder for video stream\n" );
- - return -1;
- - }
- + x264_cli_log( "lavf", X264_LOG_WARNING, "converting from %s to YV12\n",
- + avcodec_get_pix_fmt_name( h->cur_pix_fmt ) );
- + FAIL_IF_ERROR( avcodec_open( c, avcodec_find_decoder( c->codec_id ) ),
- + "could not find decoder for video stream\n" )
- /* prefetch the first frame and set/confirm flags */
- h->first_pic = malloc( sizeof(x264_picture_t) );
- - if( !h->first_pic || lavf_input.picture_alloc( h->first_pic, info->csp, info->width, info->height ) )
- - {
- - fprintf( stderr, "lavf [error]: malloc failed\n" );
- - return -1;
- - }
- + FAIL_IF_ERROR( !h->first_pic || lavf_input.picture_alloc( h->first_pic, info->csp, info->width, info->height ),
- + "malloc failed\n" )
- else if( read_frame_internal( h->first_pic, h, 0, info ) )
- return -1;
- diff --git a/input/thread.c b/input/thread.c
- index c4b07fa..98af22b 100644
- --- a/input/thread.c
- +++ b/input/thread.c
- @@ -21,7 +21,7 @@
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
- *****************************************************************************/
- -#include "muxers.h"
- +#include "input.h"
- extern cli_input_t input;
- @@ -47,11 +47,8 @@ typedef struct thread_input_arg_t
- static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, cli_input_opt_t *opt )
- {
- thread_hnd_t *h = malloc( sizeof(thread_hnd_t) );
- - if( !h || input.picture_alloc( &h->pic, info->csp, info->width, info->height ) )
- - {
- - fprintf( stderr, "x264 [error]: malloc failed\n" );
- - return -1;
- - }
- + FAIL_IF_ERR( !h || input.picture_alloc( &h->pic, info->csp, info->width, info->height ),
- + "x264", "malloc failed\n" )
- h->input = input;
- h->p_handle = *p_handle;
- h->next_frame = -1;
- diff --git a/input/timecode.c b/input/timecode.c
- index a307327..7821e76 100644
- --- a/input/timecode.c
- +++ b/input/timecode.c
- @@ -20,7 +20,8 @@
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
- *****************************************************************************/
- -#include "muxers.h"
- +#include "input.h"
- +#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "timecode", __VA_ARGS__ )
- #include <math.h>
- extern cli_input_t input;
- @@ -61,12 +62,8 @@ static double correct_fps( double fps, timecode_hnd_t *h )
- {
- fps_den = i * h->timebase_num;
- fps_num = round( fps_den * fps_sig ) * exponent;
- - if( fps_num > UINT32_MAX )
- - {
- - fprintf( stderr, "timecode [error]: tcfile fps correction failed.\n"
- - " Specify an appropriate timebase manually or remake tcfile.\n" );
- - return -1;
- - }
- + FAIL_IF_ERROR( fps_num > UINT32_MAX, "tcfile fps correction failed.\n"
- + " Specify an appropriate timebase manually or remake tcfile.\n" )
- if( fabs( ((double)fps_num / fps_den) / exponent - fps_sig ) < DOUBLE_EPSILON )
- break;
- ++i;
- @@ -91,12 +88,8 @@ static int try_mkv_timebase_den( double *fpss, timecode_hnd_t *h, int loop_num )
- double fps_sig = sigexp10( fpss[num], &exponent );
- fps_den = round( MKV_TIMEBASE_DEN / fps_sig ) / exponent;
- h->timebase_num = fps_den && h->timebase_num ? gcd( h->timebase_num, fps_den ) : fps_den;
- - if( h->timebase_num > UINT32_MAX || !h->timebase_num )
- - {
- - fprintf( stderr, "timecode [error]: automatic timebase generation failed.\n"
- - " Specify timebase manually.\n" );
- - return -1;
- - }
- + FAIL_IF_ERROR( h->timebase_num > UINT32_MAX || !h->timebase_num, "automatic timebase generation failed.\n"
- + " Specify timebase manually.\n" )
- }
- return 0;
- }
- @@ -110,11 +103,7 @@ static int parse_tcfile( FILE *tcfile_in, timecode_hnd_t *h, video_info_t *info
- double *fpss = NULL;
- ret = fscanf( tcfile_in, "# timecode format v%d", &tcfv );
- - if( ret != 1 || (tcfv != 1 && tcfv != 2) )
- - {
- - fprintf( stderr, "timecode [error]: unsupported timecode format\n" );
- - return -1;
- - }
- + FAIL_IF_ERROR( ret != 1 || (tcfv != 1 && tcfv != 2), "unsupported timecode format\n" )
- if( tcfv == 1 )
- {
- @@ -128,18 +117,11 @@ static int parse_tcfile( FILE *tcfile_in, timecode_hnd_t *h, video_info_t *info
- {
- if( buff[0] == '#' || buff[0] == '\n' || buff[0] == '\r' )
- continue;
- - if( sscanf( buff, "assume %lf", &h->assume_fps ) != 1 && sscanf( buff, "Assume %lf", &h->assume_fps ) != 1 )
- - {
- - fprintf( stderr, "timecode [error]: tcfile parsing error: assumed fps not found\n" );
- - return -1;
- - }
- + FAIL_IF_ERROR( sscanf( buff, "assume %lf", &h->assume_fps ) != 1 && sscanf( buff, "Assume %lf", &h->assume_fps ) != 1,
- + "tcfile parsing error: assumed fps not found\n" )
- break;
- }
- - if( h->assume_fps <= 0 )
- - {
- - fprintf( stderr, "timecode [error]: invalid assumed fps %.6f\n", h->assume_fps );
- - return -1;
- - }
- + FAIL_IF_ERROR( h->assume_fps <= 0, "invalid assumed fps %.6f\n", h->assume_fps )
- file_pos = ftell( tcfile_in );
- h->stored_pts_num = 0;
- @@ -152,16 +134,9 @@ static int parse_tcfile( FILE *tcfile_in, timecode_hnd_t *h, video_info_t *info
- continue;
- }
- ret = sscanf( buff, "%d,%d,%lf", &start, &end, &seq_fps );
- - if( ret != 3 && ret != EOF )
- - {
- - fprintf( stderr, "timecode [error]: invalid input tcfile\n" );
- - return -1;
- - }
- - if( start > end || start <= prev_start || end <= prev_end || seq_fps <= 0 )
- - {
- - fprintf( stderr, "timecode [error]: invalid input tcfile at line %d: %s\n", num, buff );
- - return -1;
- - }
- + FAIL_IF_ERROR( ret != 3 && ret != EOF, "invalid input tcfile\n" )
- + FAIL_IF_ERROR( start > end || start <= prev_start || end <= prev_end || seq_fps <= 0,
- + "invalid input tcfile at line %d: %s\n", num, buff )
- prev_start = start;
- prev_end = end;
- if( h->auto_timebase_den || h->auto_timebase_num )
- @@ -259,11 +234,7 @@ static int parse_tcfile( FILE *tcfile_in, timecode_hnd_t *h, video_info_t *info
- ++num;
- }
- timecodes_num = h->stored_pts_num + h->seek;
- - if( !timecodes_num )
- - {
- - fprintf( stderr, "timecode [error]: input tcfile doesn't have any timecodes!\n" );
- - return -1;
- - }
- + FAIL_IF_ERROR( !timecodes_num, "input tcfile doesn't have any timecodes!\n" )
- fseek( tcfile_in, file_pos, SEEK_SET );
- timecodes = malloc( timecodes_num * sizeof(double) );
- @@ -272,11 +243,7 @@ static int parse_tcfile( FILE *tcfile_in, timecode_hnd_t *h, video_info_t *info
- fgets( buff, sizeof(buff), tcfile_in );
- ret = sscanf( buff, "%lf", &timecodes[0] );
- - if( ret != 1 )
- - {
- - fprintf( stderr, "timecode [error]: invalid input tcfile for frame 0\n" );
- - goto fail;
- - }
- + FAIL_IF_ERROR( ret != 1, "invalid input tcfile for frame 0\n" )
- for( num = 1; num < timecodes_num; )
- {
- fgets( buff, sizeof(buff), tcfile_in );
- @@ -284,11 +251,8 @@ static int parse_tcfile( FILE *tcfile_in, timecode_hnd_t *h, video_info_t *info
- continue;
- ret = sscanf( buff, "%lf", &timecodes[num] );
- timecodes[num] *= 1e-3; /* Timecode format v2 is expressed in milliseconds. */
- - if( ret != 1 || timecodes[num] <= timecodes[num - 1] )
- - {
- - fprintf( stderr, "timecode [error]: invalid input tcfile for frame %d\n", num );
- - goto fail;
- - }
- + FAIL_IF_ERROR( ret != 1 || timecodes[num] <= timecodes[num - 1],
- + "invalid input tcfile for frame %d\n", num )
- ++num;
- }
- @@ -342,14 +306,10 @@ static int parse_tcfile( FILE *tcfile_in, timecode_hnd_t *h, video_info_t *info
- uint64_t i = gcd( h->timebase_num, h->timebase_den );
- h->timebase_num /= i;
- h->timebase_den /= i;
- - fprintf( stderr, "timecode [info]: automatic timebase generation %"PRIu64"/%"PRIu64"\n", h->timebase_num, h->timebase_den );
- - }
- - else if( h->timebase_den > UINT32_MAX || !h->timebase_den )
- - {
- - fprintf( stderr, "timecode [error]: automatic timebase generation failed.\n"
- - " Specify an appropriate timebase manually.\n" );
- - goto fail;
- + x264_cli_log( "timecode", X264_LOG_INFO, "automatic timebase generation %"PRIu64"/%"PRIu64"\n", h->timebase_num, h->timebase_den );
- }
- + else FAIL_IF_ERROR( h->timebase_den > UINT32_MAX || !h->timebase_den, "automatic timebase generation failed.\n"
- + " Specify an appropriate timebase manually.\n" )
- h->pts = malloc( h->stored_pts_num * sizeof(int64_t) );
- if( !h->pts )
- @@ -360,11 +320,7 @@ static int parse_tcfile( FILE *tcfile_in, timecode_hnd_t *h, video_info_t *info
- {
- h->pts[num] = (int64_t)( timecodes[h->seek + num] * ((double)h->timebase_den / h->timebase_num) + 0.5 );
- h->pts[num] -= pts_seek_offset;
- - if( h->pts[num] <= h->pts[num - 1] )
- - {
- - fprintf( stderr, "timecode [error]: invalid timebase or timecode for frame %d\n", num );
- - goto fail;
- - }
- + FAIL_IF_ERROR( h->pts[num] <= h->pts[num - 1], "invalid timebase or timecode for frame %d\n", num )
- }
- free( timecodes );
- @@ -386,11 +342,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
- int ret = 0;
- FILE *tcfile_in;
- timecode_hnd_t *h = malloc( sizeof(timecode_hnd_t) );
- - if( !h )
- - {
- - fprintf( stderr, "timecode [error]: malloc failed\n" );
- - return -1;
- - }
- + FAIL_IF_ERROR( !h, "malloc failed\n" )
- h->input = input;
- h->p_handle = *p_handle;
- h->frame_total = input.get_frame_total( h->p_handle );
- @@ -400,11 +352,8 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
- ret = sscanf( opt->timebase, "%"SCNu64"/%"SCNu64, &h->timebase_num, &h->timebase_den );
- if( ret == 1 )
- h->timebase_num = strtoul( opt->timebase, NULL, 10 );
- - if( h->timebase_num > UINT32_MAX || h->timebase_den > UINT32_MAX )
- - {
- - fprintf( stderr, "timecode [error]: timebase you specified exceeds H.264 maximum\n" );
- - return -1;
- - }
- + FAIL_IF_ERROR( h->timebase_num > UINT32_MAX || h->timebase_den > UINT32_MAX,
- + "timebase you specified exceeds H.264 maximum\n" )
- }
- h->auto_timebase_num = !ret;
- h->auto_timebase_den = ret < 2;
- @@ -418,14 +367,10 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
- *p_handle = h;
- tcfile_in = fopen( psz_filename, "rb" );
- - if( !tcfile_in )
- - {
- - fprintf( stderr, "timecode [error]: can't open `%s'\n", psz_filename );
- - return -1;
- - }
- + FAIL_IF_ERROR( !tcfile_in, "can't open `%s'\n", psz_filename )
- else if( !x264_is_regular_file( tcfile_in ) )
- {
- - fprintf( stderr, "timecode [error]: tcfile input incompatible with non-regular file `%s'\n", psz_filename );
- + x264_cli_log( "timecode", X264_LOG_ERROR, "tcfile input incompatible with non-regular file `%s'\n", psz_filename );
- fclose( tcfile_in );
- return -1;
- }
- @@ -466,8 +411,8 @@ static int read_frame( x264_picture_t *p_pic, hnd_t handle, int i_frame )
- {
- if( h->pts )
- {
- - fprintf( stderr, "timecode [info]: input timecode file missing data for frame %d and later\n"
- - " assuming constant fps %.6f\n", i_frame, h->assume_fps );
- + x264_cli_log( "timecode", X264_LOG_INFO, "input timecode file missing data for frame %d and later\n"
- + " assuming constant fps %.6f\n", i_frame, h->assume_fps );
- free( h->pts );
- h->pts = NULL;
- }
- diff --git a/input/y4m.c b/input/y4m.c
- index fd42140..9b39d2f 100644
- --- a/input/y4m.c
- +++ b/input/y4m.c
- @@ -21,7 +21,8 @@
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
- *****************************************************************************/
- -#include "muxers.h"
- +#include "input.h"
- +#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "y4m", __VA_ARGS__ )
- typedef struct
- {
- @@ -162,11 +163,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
- if( colorspace == X264_CSP_NONE )
- colorspace = X264_CSP_I420;
- - if( colorspace != X264_CSP_I420 )
- - {
- - fprintf( stderr, "y4m [error]: colorspace unhandled\n" );
- - return -1;
- - }
- + FAIL_IF_ERROR( colorspace != X264_CSP_I420, "colorspace unhandled\n" )
- *p_handle = h;
- return 0;
- @@ -202,21 +199,13 @@ static int read_frame_internal( x264_picture_t *p_pic, y4m_hnd_t *h )
- return -1;
- header[slen] = 0;
- - if( strncmp( header, Y4M_FRAME_MAGIC, slen ) )
- - {
- - fprintf( stderr, "y4m [error]: bad header magic (%"PRIx32" <=> %s)\n",
- - M32(header), header );
- - return -1;
- - }
- + FAIL_IF_ERROR( strncmp( header, Y4M_FRAME_MAGIC, slen ), "bad header magic (%"PRIx32" <=> %s)\n",
- + M32(header), header )
- /* Skip most of it */
- while( i < MAX_FRAME_HEADER && fgetc( h->fh ) != '\n' )
- i++;
- - if( i == MAX_FRAME_HEADER )
- - {
- - fprintf( stderr, "y4m [error]: bad frame header!\n" );
- - return -1;
- - }
- + FAIL_IF_ERROR( i == MAX_FRAME_HEADER, "bad frame header!\n" )
- h->frame_header_len = i+slen+1;
- if( fread( p_pic->img.plane[0], h->width * h->height, 1, h->fh ) <= 0
- diff --git a/input/yuv.c b/input/yuv.c
- index cbed7fc..613662c 100644
- --- a/input/yuv.c
- +++ b/input/yuv.c
- @@ -21,7 +21,7 @@
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
- *****************************************************************************/
- -#include "muxers.h"
- +#include "input.h"
- typedef struct
- {
- @@ -45,11 +45,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
- }
- else
- sscanf( opt->resolution, "%ux%u", &info->width, &info->height );
- - if( !info->width || !info->height )
- - {
- - fprintf( stderr, "yuv [error]: rawyuv input requires a resolution.\n" );
- - return -1;
- - }
- + FAIL_IF_ERR( !info->width || !info->height, "yuv", "rawyuv input requires a resolution.\n" )
- h->next_frame = 0;
- info->vfr = 0;
- diff --git a/muxers.h b/muxers.h
- deleted file mode 100644
- index b309320..0000000
- --- a/muxers.h
- +++ /dev/null
- @@ -1,61 +0,0 @@
- -/*****************************************************************************
- - * muxers.h: h264 file i/o modules
- - *****************************************************************************
- - * Copyright (C) 2003-2009 x264 project
- - *
- - * Authors: Laurent Aimar <fenrir@via.ecp.fr>
- - * Loren Merritt <lorenm@u.washington.edu>
- - *
- - * This program is free software; you can redistribute it and/or modify
- - * it under the terms of the GNU General Public License as published by
- - * the Free Software Foundation; either version 2 of the License, or
- - * (at your option) any later version.
- - *
- - * This program is distributed in the hope that it will be useful,
- - * but WITHOUT ANY WARRANTY; without even the implied warranty of
- - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- - * GNU General Public License for more details.
- - *
- - * You should have received a copy of the GNU General Public License
- - * along with this program; if not, write to the Free Software
- - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
- - *****************************************************************************/
- -
- -#ifndef X264_MUXERS_H
- -#define X264_MUXERS_H
- -
- -#include "common/common.h"
- -#include "x264.h"
- -
- -typedef void *hnd_t;
- -
- -static inline int64_t gcd( int64_t a, int64_t b )
- -{
- - while( 1 )
- - {
- - int64_t c = a % b;
- - if( !c )
- - return b;
- - a = b;
- - b = c;
- - }
- -}
- -
- -static inline int64_t lcm( int64_t a, int64_t b )
- -{
- - return ( a / gcd( a, b ) ) * b;
- -}
- -
- -static inline char *get_filename_extension( char *filename )
- -{
- - char *ext = filename + strlen( filename );
- - while( *ext != '.' && ext > filename )
- - ext--;
- - ext += *ext == '.';
- - return ext;
- -}
- -
- -#include "input/input.h"
- -#include "output/output.h"
- -
- -#endif
- diff --git a/output/flv.c b/output/flv.c
- index e441b6d..9831a5b 100644
- --- a/output/flv.c
- +++ b/output/flv.c
- @@ -18,7 +18,7 @@
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
- *****************************************************************************/
- -#include "muxers.h"
- +#include "output.h"
- #include "flv_bytestream.h"
- #define CHECK(x)\
- @@ -223,14 +223,14 @@ static int write_frame( hnd_t handle, uint8_t *p_nalu, int i_size, x264_picture_
- if( prev_dts == dts )
- {
- double fps = ((double)p_flv->i_timebase_den / p_flv->i_timebase_num) / (p_picture->i_dts - p_flv->i_prev_dts);
- - fprintf( stderr, "flv [warning]: duplicate DTS %"PRId64" generated by rounding\n"
- - " current internal decoding framerate: %.6f fps\n", dts, fps );
- + x264_cli_log( "flv", X264_LOG_WARNING, "duplicate DTS %"PRId64" generated by rounding\n"
- + " current internal decoding framerate: %.6f fps\n", dts, fps );
- }
- if( prev_cts == cts )
- {
- double fps = ((double)p_flv->i_timebase_den / p_flv->i_timebase_num) / (p_picture->i_pts - p_flv->i_prev_pts);
- - fprintf( stderr, "flv [warning]: duplicate CTS %"PRId64" generated by rounding\n"
- - " current internal composition framerate: %.6f fps\n", cts, fps );
- + x264_cli_log( "flv", X264_LOG_WARNING, "duplicate CTS %"PRId64" generated by rounding\n"
- + " current internal composition framerate: %.6f fps\n", cts, fps );
- }
- }
- p_flv->i_prev_dts = p_picture->i_dts;
- diff --git a/output/flv_bytestream.c b/output/flv_bytestream.c
- index 316114c..e02476c 100644
- --- a/output/flv_bytestream.c
- +++ b/output/flv_bytestream.c
- @@ -18,7 +18,7 @@
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
- *****************************************************************************/
- -#include "muxers.h"
- +#include "output.h"
- #include "flv_bytestream.h"
- uint64_t dbl2int( double value )
- diff --git a/output/matroska.c b/output/matroska.c
- index 0304c84..a1219d0 100644
- --- a/output/matroska.c
- +++ b/output/matroska.c
- @@ -18,7 +18,7 @@
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
- *****************************************************************************/
- -#include "muxers.h"
- +#include "output.h"
- #include "matroska_ebml.h"
- typedef struct
- diff --git a/output/matroska_ebml.c b/output/matroska_ebml.c
- index 31b62f8..adfcaa8 100644
- --- a/output/matroska_ebml.c
- +++ b/output/matroska_ebml.c
- @@ -18,7 +18,7 @@
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
- *****************************************************************************/
- -#include "muxers.h"
- +#include "output.h"
- #include "matroska_ebml.h"
- #define CLSIZE 1048576
- diff --git a/output/mp4.c b/output/mp4.c
- index 0e3c2fc..f2fc5f5 100644
- --- a/output/mp4.c
- +++ b/output/mp4.c
- @@ -21,7 +21,7 @@
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
- *****************************************************************************/
- -#include "muxers.h"
- +#include "output.h"
- #include <gpac/isomedia.h>
- #if HAVE_GF_MALLOC
- @@ -61,12 +61,12 @@ static void recompute_bitrate_mp4( GF_ISOFile *p_file, int i_track )
- timescale = gf_isom_get_media_timescale( p_file, i_track );
- count = gf_isom_get_sample_count( p_file, i_track );
- - for( int i = 0; i < count; i++ )
- + for( u32 i = 0; i < count; i++ )
- {
- GF_ISOSample *samp = gf_isom_get_sample_info( p_file, i_track, i+1, &di, &offset );
- if( !samp )
- {
- - fprintf( stderr, "mp4 [error]: failure reading back frame %u\n", i );
- + x264_cli_log( "mp4", X264_LOG_ERROR, "failure reading back frame %u\n", i );
- break;
- }
- @@ -163,11 +163,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle )
- FILE *fh = fopen( psz_filename, "w" );
- if( !fh )
- return -1;
- - else if( !x264_is_regular_file( fh ) )
- - {
- - fprintf( stderr, "mp4 [error]: MP4 output is incompatible with non-regular file `%s'\n", psz_filename );
- - return -1;
- - }
- + FAIL_IF_ERR( !x264_is_regular_file( fh ), "MP4 output is incompatible with non-regular file `%s'\n", psz_filename )
- fclose( fh );
- if( !(p_mp4 = malloc( sizeof(mp4_hnd_t) )) )
- diff --git a/output/output.h b/output/output.h
- index c79b48e..094fefc 100644
- --- a/output/output.h
- +++ b/output/output.h
- @@ -24,6 +24,8 @@
- #ifndef X264_OUTPUT_H
- #define X264_OUTPUT_H
- +#include "x264cli.h"
- +
- typedef struct
- {
- int (*open_file)( char *psz_filename, hnd_t *p_handle );
- diff --git a/output/raw.c b/output/raw.c
- index 02e4c56..fc418fb 100644
- --- a/output/raw.c
- +++ b/output/raw.c
- @@ -21,7 +21,7 @@
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
- *****************************************************************************/
- -#include "muxers.h"
- +#include "output.h"
- static int open_file( char *psz_filename, hnd_t *p_handle )
- {
- diff --git a/x264.c b/x264.c
- index f08ab41..741570c 100644
- --- a/x264.c
- +++ b/x264.c
- @@ -31,9 +31,11 @@
- #include <getopt.h>
- #include "common/common.h"
- -#include "common/cpu.h"
- -#include "x264.h"
- -#include "muxers.h"
- +#include "x264cli.h"
- +#include "input/input.h"
- +#include "output/output.h"
- +
- +#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "x264", __VA_ARGS__ )
- #ifdef _WIN32
- #include <windows.h>
- @@ -96,6 +98,7 @@ static const char * const muxer_names[] =
- };
- static const char * const pulldown_names[] = { "none", "22", "32", "64", "double", "triple", "euro", 0 };
- +static const char * const log_level_names[] = { "none", "error", "warning", "info", "debug", 0 };
- typedef struct{
- int mod;
- @@ -141,6 +144,48 @@ static void Help( x264_param_t *defaults, int longhelp );
- static int Parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt );
- static int Encode( x264_param_t *param, cli_opt_t *opt );
- +/* logging and printing for within the cli system */
- +static int cli_log_level;
- +void x264_cli_log( const char *name, int i_level, const char *fmt, ... )
- +{
- + if( i_level > cli_log_level )
- + return;
- + char *s_level;
- + switch( i_level )
- + {
- + case X264_LOG_ERROR:
- + s_level = "error";
- + break;
- + case X264_LOG_WARNING:
- + s_level = "warning";
- + break;
- + case X264_LOG_INFO:
- + s_level = "info";
- + break;
- + case X264_LOG_DEBUG:
- + s_level = "debug";
- + break;
- + default:
- + s_level = "unknown";
- + break;
- + }
- + fprintf( stderr, "%s [%s]: ", name, s_level );
- + va_list arg;
- + va_start( arg, fmt );
- + vfprintf( stderr, fmt, arg );
- + va_end( arg );
- +}
- +
- +void x264_cli_printf( int i_level, const char *fmt, ... )
- +{
- + if( i_level > cli_log_level )
- + return;
- + va_list arg;
- + va_start( arg, fmt );
- + vfprintf( stderr, fmt, arg );
- + va_end( arg );
- +}
- +
- /****************************************************************************
- * main:
- ****************************************************************************/
- @@ -571,6 +616,9 @@ static void Help( x264_param_t *defaults, int longhelp )
- H1( " -v, --verbose Print stats for each frame\n" );
- H1( " --no-progress Don't show the progress indicator while encoding\n" );
- H0( " --quiet Quiet Mode\n" );
- + H1( " --log-level <string> Specify the maximum level of logging [\"%s\"]\n"
- + " - %s\n", strtable_lookup( log_level_names, cli_log_level - X264_LOG_NONE ),
- + stringify_names( buf, log_level_names ) );
- H1( " --psnr Enable PSNR computation\n" );
- H1( " --ssim Enable SSIM computation\n" );
- H1( " --threads <integer> Force a specific number of threads\n" );
- @@ -616,6 +664,7 @@ enum {
- OPT_TCFILE_OUT,
- OPT_TIMEBASE,
- OPT_PULLDOWN,
- + OPT_LOG_LEVEL
- } OptionsOPT;
- static char short_options[] = "8A:B:b:f:hI:i:m:o:p:q:r:t:Vvw";
- @@ -729,6 +778,7 @@ static struct option long_options[] =
- { "ssim", no_argument, NULL, 0 },
- { "quiet", no_argument, NULL, OPT_QUIET },
- { "verbose", no_argument, NULL, 'v' },
- + { "log-level", required_argument, NULL, OPT_LOG_LEVEL },
- { "no-progress", no_argument, NULL, OPT_NOPROGRESS },
- { "visualize", no_argument, NULL, OPT_VISUALIZE },
- { "dump-yuv", required_argument, NULL, 0 },
- @@ -780,11 +830,11 @@ static int select_output( const char *muxer, char *filename, x264_param_t *param
- param->b_repeat_headers = 0;
- if( param->i_nal_hrd == X264_NAL_HRD_CBR )
- {
- - fprintf( stderr, "x264 [warning]: cbr nal-hrd is not compatible with mp4\n" );
- + x264_cli_log( "x264", X264_LOG_WARNING, "cbr nal-hrd is not compatible with mp4\n" );
- param->i_nal_hrd = X264_NAL_HRD_VBR;
- }
- #else
- - fprintf( stderr, "x264 [error]: not compiled with MP4 output support\n" );
- + x264_cli_log( "x264", X264_LOG_ERROR, "not compiled with MP4 output support\n" );
- return -1;
- #endif
- }
- @@ -833,7 +883,7 @@ static int select_input( const char *demuxer, char *used_demuxer, char *filename
- input = avs_input;
- module = "avs";
- #else
- - fprintf( stderr, "x264 [error]: not compiled with AVS input support\n" );
- + x264_cli_log( "x264", X264_LOG_ERROR, "not compiled with AVS input support\n" );
- return -1;
- #endif
- }
- @@ -877,11 +927,7 @@ static int select_input( const char *demuxer, char *used_demuxer, char *filename
- input = yuv_input;
- }
- - if( !(*p_handle) )
- - {
- - fprintf( stderr, "x264 [error]: could not open input file `%s' via any method!\n", filename );
- - return -1;
- - }
- + FAIL_IF_ERROR( !(*p_handle), "could not open input file `%s' via any method!\n", filename )
- }
- strcpy( used_demuxer, module );
- @@ -932,6 +978,7 @@ static int Parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )
- char *tune = NULL;
- x264_param_default( &defaults );
- + cli_log_level = defaults.i_log_level;
- memset( opt, 0, sizeof(cli_opt_t) );
- memset( &input_opt, 0, sizeof(cli_input_opt_t) );
- @@ -1004,32 +1051,20 @@ static int Parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )
- output_filename = optarg;
- break;
- case OPT_MUXER:
- - if( parse_enum_name( optarg, muxer_names, &muxer ) < 0 )
- - {
- - fprintf( stderr, "x264 [error]: Unknown muxer `%s'\n", optarg );
- - return -1;
- - }
- + FAIL_IF_ERROR( parse_enum_name( optarg, muxer_names, &muxer ), "Unknown muxer `%s'\n", optarg )
- break;
- case OPT_DEMUXER:
- - if( parse_enum_name( optarg, demuxer_names, &demuxer ) < 0 )
- - {
- - fprintf( stderr, "x264 [error]: Unknown demuxer `%s'\n", optarg );
- - return -1;
- - }
- + FAIL_IF_ERROR( parse_enum_name( optarg, demuxer_names, &demuxer ), "Unknown demuxer `%s'\n", optarg )
- break;
- case OPT_INDEX:
- input_opt.index_file = optarg;
- break;
- case OPT_QPFILE:
- opt->qpfile = fopen( optarg, "rb" );
- - if( !opt->qpfile )
- - {
- - fprintf( stderr, "x264 [error]: can't open qpfile `%s'\n", optarg );
- - return -1;
- - }
- - else if( !x264_is_regular_file( opt->qpfile ) )
- + FAIL_IF_ERROR( !opt->qpfile, "can't open qpfile `%s'\n", optarg )
- + if( !x264_is_regular_file( opt->qpfile ) )
- {
- - fprintf( stderr, "x264 [error]: qpfile incompatible with non-regular file `%s'\n", optarg );
- + x264_cli_log( "x264", X264_LOG_ERROR, "qpfile incompatible with non-regular file `%s'\n", optarg );
- fclose( opt->qpfile );
- return -1;
- }
- @@ -1038,11 +1073,17 @@ static int Parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )
- b_thread_input = 1;
- break;
- case OPT_QUIET:
- - param->i_log_level = X264_LOG_NONE;
- + cli_log_level = param->i_log_level = X264_LOG_NONE;
- break;
- case 'v':
- - param->i_log_level = X264_LOG_DEBUG;
- + cli_log_level = param->i_log_level = X264_LOG_DEBUG;
- break;
- + case OPT_LOG_LEVEL:
- + if( !parse_enum_value( optarg, log_level_names, &cli_log_level ) )
- + cli_log_level += X264_LOG_NONE;
- + else
- + cli_log_level = atoi( optarg );
- + param->i_log_level = cli_log_level;
- case OPT_NOPROGRESS:
- opt->b_progress = 0;
- break;
- @@ -1051,7 +1092,7 @@ static int Parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )
- param->b_visualize = 1;
- b_exit_on_ctrl_c = 1;
- #else
- - fprintf( stderr, "x264 [warning]: not compiled with visualization support\n" );
- + x264_cli_log( "x264", X264_LOG_WARNING, "not compiled with visualization support\n" );
- #endif
- break;
- case OPT_TUNE:
- @@ -1078,18 +1119,13 @@ static int Parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )
- break;
- case OPT_TCFILE_OUT:
- opt->tcfile_out = fopen( optarg, "wb" );
- - if( !opt->tcfile_out )
- - {
- - fprintf( stderr, "x264 [error]: can't open `%s'\n", optarg );
- - return -1;
- - }
- + FAIL_IF_ERROR( !opt->tcfile_out, "can't open `%s'\n", optarg )
- break;
- case OPT_TIMEBASE:
- input_opt.timebase = optarg;
- break;
- case OPT_PULLDOWN:
- - if( parse_enum_value( optarg, pulldown_names, &opt->i_pulldown ) < 0 )
- - return -1;
- + FAIL_IF_ERROR( parse_enum_value( optarg, pulldown_names, &opt->i_pulldown ), "Unknown pulldown `%s'\n", optarg )
- break;
- default:
- generic_option:
- @@ -1116,7 +1152,7 @@ generic_option:
- if( b_error )
- {
- const char *name = long_options_index > 0 ? long_options[long_options_index].name : argv[optind-2];
- - fprintf( stderr, "x264 [error]: invalid argument: %s = %s\n", name, optarg );
- + x264_cli_log( "x264", X264_LOG_ERROR, "invalid argument: %s = %s\n", name, optarg );
- return -1;
- }
- }
- @@ -1130,20 +1166,12 @@ generic_option:
- return -1;
- /* Get the file name */
- - if( optind > argc - 1 || !output_filename )
- - {
- - fprintf( stderr, "x264 [error]: No %s file. Run x264 --help for a list of options.\n",
- - optind > argc - 1 ? "input" : "output" );
- - return -1;
- - }
- + FAIL_IF_ERROR( optind > argc - 1 || !output_filename, "No %s file. Run x264 --help for a list of options.\n",
- + optind > argc - 1 ? "input" : "output" )
- if( select_output( muxer, output_filename, param ) )
- return -1;
- - if( output.open_file( output_filename, &opt->hout ) )
- - {
- - fprintf( stderr, "x264 [error]: could not open output file `%s'\n", output_filename );
- - return -1;
- - }
- + FAIL_IF_ERROR( output.open_file( output_filename, &opt->hout ), "could not open output file `%s'\n", output_filename )
- input_filename = argv[optind++];
- input_opt.resolution = optind < argc ? argv[optind++] : NULL;
- @@ -1163,39 +1191,22 @@ generic_option:
- if( select_input( demuxer, demuxername, input_filename, &opt->hin, &info, &input_opt ) )
- return -1;
- - if( !opt->hin && input.open_file( input_filename, &opt->hin, &info, &input_opt ) )
- - {
- - fprintf( stderr, "x264 [error]: could not open input file `%s'\n", input_filename );
- - return -1;
- - }
- + FAIL_IF_ERROR( !opt->hin && input.open_file( input_filename, &opt->hin, &info, &input_opt ),
- + "could not open input file `%s'\n", input_filename )
- x264_reduce_fraction( &info.sar_width, &info.sar_height );
- x264_reduce_fraction( &info.fps_num, &info.fps_den );
- - if( param->i_log_level >= X264_LOG_INFO )
- - fprintf( stderr, "%s [info]: %dx%d%c %d:%d @ %d/%d fps (%cfr)\n", demuxername, info.width,
- - info.height, info.interlaced ? 'i' : 'p', info.sar_width, info.sar_height,
- - info.fps_num, info.fps_den, info.vfr ? 'v' : 'c' );
- + x264_cli_log( demuxername, X264_LOG_INFO, "%dx%d%c %d:%d @ %d/%d fps (%cfr)\n", info.width,
- + info.height, info.interlaced ? 'i' : 'p', info.sar_width, info.sar_height,
- + info.fps_num, info.fps_den, info.vfr ? 'v' : 'c' );
- if( tcfile_name )
- {
- - if( b_user_fps )
- - {
- - fprintf( stderr, "x264 [error]: --fps + --tcfile-in is incompatible.\n" );
- - return -1;
- - }
- - if( timecode_input.open_file( tcfile_name, &opt->hin, &info, &input_opt ) )
- - {
- - fprintf( stderr, "x264 [error]: timecode input failed\n" );
- - return -1;
- - }
- - else
- - input = timecode_input;
- - }
- - else if( !info.vfr && input_opt.timebase )
- - {
- - fprintf( stderr, "x264 [error]: --timebase is incompatible with cfr input\n" );
- - return -1;
- + FAIL_IF_ERROR( b_user_fps, "--fps + --tcfile-in is incompatible.\n" )
- + FAIL_IF_ERROR( timecode_input.open_file( tcfile_name, &opt->hin, &info, &input_opt ), "timecode input failed\n" )
- + input = timecode_input;
- }
- + else FAIL_IF_ERROR( !info.vfr && input_opt.timebase, "--timebase is incompatible with cfr input\n" )
- /* set param flags from the info flags as necessary */
- param->i_csp = info.csp;
- @@ -1204,9 +1215,9 @@ generic_option:
- param->i_width = info.width;
- if( !b_user_interlaced && info.interlaced )
- {
- - fprintf( stderr, "x264 [warning]: input appears to be interlaced, enabling %cff interlaced mode.\n"
- - " If you want otherwise, use --no-interlaced or --%cff\n",
- - info.tff ? 't' : 'b', info.tff ? 'b' : 't' );
- + x264_cli_log( "x264", X264_LOG_WARNING, "input appears to be interlaced, enabling %cff interlaced mode.\n"
- + " If you want otherwise, use --no-interlaced or --%cff\n",
- + info.tff ? 't' : 'b', info.tff ? 'b' : 't' );
- param->b_interlaced = 1;
- param->b_tff = !!info.tff;
- }
- @@ -1230,21 +1241,14 @@ generic_option:
- uint64_t i_user_timebase_num;
- uint64_t i_user_timebase_den;
- int ret = sscanf( input_opt.timebase, "%"SCNu64"/%"SCNu64, &i_user_timebase_num, &i_user_timebase_den );
- - if( !ret )
- - {
- - fprintf( stderr, "x264 [error]: invalid argument: timebase = %s\n", input_opt.timebase );
- - return -1;
- - }
- + FAIL_IF_ERROR( !ret, "invalid argument: timebase = %s\n", input_opt.timebase )
- else if( ret == 1 )
- {
- i_user_timebase_num = param->i_timebase_num;
- i_user_timebase_den = strtoul( input_opt.timebase, NULL, 10 );
- }
- - if( i_user_timebase_num > UINT32_MAX || i_user_timebase_den > UINT32_MAX )
- - {
- - fprintf( stderr, "x264 [error]: timebase you specified exceeds H.264 maximum\n" );
- - return -1;
- - }
- + FAIL_IF_ERROR( i_user_timebase_num > UINT32_MAX || i_user_timebase_den > UINT32_MAX,
- + "timebase you specified exceeds H.264 maximum\n" )
- opt->timebase_convert_multiplier = ((double)i_user_timebase_den / param->i_timebase_den)
- * ((double)param->i_timebase_num / i_user_timebase_num);
- param->i_timebase_num = i_user_timebase_num;
- @@ -1261,13 +1265,8 @@ generic_option:
- if( b_thread_input || param->i_threads > 1
- || (param->i_threads == X264_THREADS_AUTO && x264_cpu_num_processors() > 1) )
- {
- - if( thread_input.open_file( NULL, &opt->hin, &info, NULL ) )
- - {
- - fprintf( stderr, "x264 [error]: threaded input failed\n" );
- - return -1;
- - }
- - else
- - input = thread_input;
- + FAIL_IF_ERROR( thread_input.open_file( NULL, &opt->hin, &info, NULL ), "threaded input failed\n" )
- + input = thread_input;
- }
- #endif
- @@ -1321,7 +1320,7 @@ static void parse_qpfile( cli_opt_t *opt, x264_picture_t *pic, int i_frame )
- else ret = 0;
- if( ret != 3 || qp < -1 || qp > 51 )
- {
- - fprintf( stderr, "x264 [error]: can't parse qpfile for frame %d\n", i_frame );
- + x264_cli_log( "x264", X264_LOG_ERROR, "can't parse qpfile for frame %d\n", i_frame );
- fclose( opt->qpfile );
- opt->qpfile = NULL;
- pic->i_type = X264_TYPE_AUTO;
- @@ -1344,11 +1343,7 @@ static int Encode_frame( x264_t *h, hnd_t hout, x264_picture_t *pic, int64_t *l
- i_frame_size = x264_encoder_encode( h, &nal, &i_nal, pic, &pic_out );
- - if( i_frame_size < 0 )
- - {
- - fprintf( stderr, "x264 [error]: x264_encoder_encode failed\n" );
- - return -1;
- - }
- + FAIL_IF_ERROR( i_frame_size < 0, "x264_encoder_encode failed\n" );
- if( i_frame_size )
- {
- @@ -1424,17 +1419,14 @@ static int Encode( x264_param_t *param, cli_opt_t *opt )
- param->b_pic_struct = 1;
- pulldown = &pulldown_values[opt->i_pulldown];
- param->i_timebase_num = param->i_fps_den;
- - if( fmod( param->i_fps_num * pulldown->fps_factor, 1 ) )
- - {
- - fprintf( stderr, "x264 [error]: unsupported framerate for chosen pulldown\n" );
- - return -1;
- - }
- + FAIL_IF_ERROR( fmod( param->i_fps_num * pulldown->fps_factor, 1 ),
- + "unsupported framerate for chosen pulldown\n" )
- param->i_timebase_den = param->i_fps_num * pulldown->fps_factor;
- }
- if( ( h = x264_encoder_open( param ) ) == NULL )
- {
- - fprintf( stderr, "x264 [error]: x264_encoder_open failed\n" );
- + x264_cli_log( "x264", X264_LOG_ERROR, "x264_encoder_open failed\n" );
- input.close_file( opt->hin );
- return -1;
- }
- @@ -1445,27 +1437,19 @@ static int Encode( x264_param_t *param, cli_opt_t *opt )
- if( output.set_param( opt->hout, param ) )
- {
- - fprintf( stderr, "x264 [error]: can't set outfile param\n" );
- + x264_cli_log( "x264", X264_LOG_ERROR, "can't set outfile param\n" );
- input.close_file( opt->hin );
- output.close_file( opt->hout, largest_pts, second_largest_pts );
- return -1;
- }
- /* Create a new pic */
- - if( input.picture_alloc( &pic, param->i_csp, param->i_width, param->i_height ) )
- - {
- - fprintf( stderr, "x264 [error]: malloc failed\n" );
- - return -1;
- - }
- + FAIL_IF_ERROR( input.picture_alloc( &pic, param->i_csp, param->i_width, param->i_height ), "malloc failed\n" )
- i_start = x264_mdate();
- /* ticks/frame = ticks/second / frames/second */
- ticks_per_frame = (int64_t)param->i_timebase_den * param->i_fps_den / param->i_timebase_num / param->i_fps_num;
- - if( ticks_per_frame < 1 )
- - {
- - fprintf( stderr, "x264 [error]: ticks_per_frame invalid: %"PRId64"\n", ticks_per_frame );
- - return -1;
- - }
- + FAIL_IF_ERROR( ticks_per_frame < 1, "ticks_per_frame invalid: %"PRId64"\n", ticks_per_frame )
- if( !param->b_repeat_headers )
- {
- @@ -1473,12 +1457,7 @@ static int Encode( x264_param_t *param, cli_opt_t *opt )
- x264_nal_t *headers;
- int i_nal;
- - if( x264_encoder_headers( h, &headers, &i_nal ) < 0 )
- - {
- - fprintf( stderr, "x264 [error]: x264_encoder_headers failed\n" );
- - return -1;
- - }
- -
- + FAIL_IF_ERROR( x264_encoder_headers( h, &headers, &i_nal ) < 0, "x264_encoder_headers failed\n" )
- if( (i_file = output.write_headers( opt->hout, headers )) < 0 )
- return -1;
- }
- @@ -1508,15 +1487,12 @@ static int Encode( x264_param_t *param, cli_opt_t *opt )
- if( pic.i_pts <= largest_pts )
- {
- - if( param->i_log_level >= X264_LOG_WARNING )
- - {
- - if( param->i_log_level >= X264_LOG_DEBUG || pts_warning_cnt < MAX_PTS_WARNING )
- - fprintf( stderr, "x264 [warning]: non-strictly-monotonic pts at frame %d (%"PRId64" <= %"PRId64")\n",
- + if( cli_log_level >= X264_LOG_DEBUG || pts_warning_cnt < MAX_PTS_WARNING )
- + x264_cli_log( "x264", X264_LOG_WARNING, "non-strictly-monotonic pts at frame %d (%"PRId64" <= %"PRId64")\n",
- i_frame, output_pts, largest_pts * dts_compress_multiplier );
- - else if( pts_warning_cnt == MAX_PTS_WARNING )
- - fprintf( stderr, "x264 [warning]: too many nonmonotonic pts warnings, suppressing further ones\n" );
- - pts_warning_cnt++;
- - }
- + else if( pts_warning_cnt == MAX_PTS_WARNING )
- + x264_cli_log( "x264", X264_LOG_WARNING, "too many nonmonotonic pts warnings, suppressing further ones\n" );
- + pts_warning_cnt++;
- pic.i_pts = largest_pts + ticks_per_frame;
- output_pts = pic.i_pts * dts_compress_multiplier;
- }
- @@ -1573,8 +1549,8 @@ static int Encode( x264_param_t *param, cli_opt_t *opt )
- if( opt->b_progress && i_frame_output % i_update_interval == 0 && i_frame_output )
- Print_status( i_start, i_frame_output, i_frame_total, i_file, param, 2 * last_dts - prev_dts - first_dts );
- }
- - if( pts_warning_cnt >= MAX_PTS_WARNING && param->i_log_level < X264_LOG_DEBUG )
- - fprintf( stderr, "x264 [warning]: %d suppressed nonmonotonic pts warnings\n", pts_warning_cnt-MAX_PTS_WARNING );
- + if( pts_warning_cnt >= MAX_PTS_WARNING && cli_log_level < X264_LOG_DEBUG )
- + x264_cli_log( "x264", X264_LOG_WARNING, "%d suppressed nonmonotonic pts warnings\n", pts_warning_cnt-MAX_PTS_WARNING );
- /* duration algorithm fails when only 1 frame is output */
- if( i_frame_output == 1 )
- diff --git a/x264cli.h b/x264cli.h
- new file mode 100644
- index 0000000..1acca56
- --- /dev/null
- +++ b/x264cli.h
- @@ -0,0 +1,67 @@
- +/*****************************************************************************
- + * x264cli.h: x264cli common
- + *****************************************************************************
- + * Copyright (C) 2003-2010 x264 project
- + *
- + * Authors: Laurent Aimar <fenrir@via.ecp.fr>
- + * Loren Merritt <lorenm@u.washington.edu>
- + *
- + * This program is free software; you can redistribute it and/or modify
- + * it under the terms of the GNU General Public License as published by
- + * the Free Software Foundation; either version 2 of the License, or
- + * (at your option) any later version.
- + *
- + * This program is distributed in the hope that it will be useful,
- + * but WITHOUT ANY WARRANTY; without even the implied warranty of
- + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- + * GNU General Public License for more details.
- + *
- + * You should have received a copy of the GNU General Public License
- + * along with this program; if not, write to the Free Software
- + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
- + *****************************************************************************/
- +
- +#ifndef X264_CLI_H
- +#define X264_CLI_H
- +
- +#include "common/common.h"
- +
- +typedef void *hnd_t;
- +
- +static inline int64_t gcd( int64_t a, int64_t b )
- +{
- + while( 1 )
- + {
- + int64_t c = a % b;
- + if( !c )
- + return b;
- + a = b;
- + b = c;
- + }
- +}
- +
- +static inline int64_t lcm( int64_t a, int64_t b )
- +{
- + return ( a / gcd( a, b ) ) * b;
- +}
- +
- +static inline char *get_filename_extension( char *filename )
- +{
- + char *ext = filename + strlen( filename );
- + while( *ext != '.' && ext > filename )
- + ext--;
- + ext += *ext == '.';
- + return ext;
- +}
- +
- +void x264_cli_log( const char *name, int i_level, const char *fmt, ... );
- +void x264_cli_printf( int i_level, const char *fmt, ... );
- +
- +#define FAIL_IF_ERR( cond, name, ... )\
- +if( cond )\
- +{\
- + x264_cli_log( name, X264_LOG_ERROR, __VA_ARGS__ );\
- + return -1;\
- +}
- +
- +#endif
- --
- 1.7.1
- From e9d3c46276c11f98f3819d8faf1fd0402bcb6a08 Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Wed, 30 Jun 2010 13:06:22 -0700
- Subject: [PATCH 5/7] Don't check i16x16 planar mode unless previous modes were useful
- Saves ~160 clocks per MB at subme=1, ~270 per MB at subme>1 (measured on Core i7).
- Negligle effect on compression.
- Also make a few more arrays static.
- ---
- encoder/analyse.c | 29 +++++++++++++++++++----------
- encoder/set.c | 3 ++-
- 2 files changed, 21 insertions(+), 11 deletions(-)
- diff --git a/encoder/analyse.c b/encoder/analyse.c
- index 696c78f..cdbdd1e 100644
- --- a/encoder/analyse.c
- +++ b/encoder/analyse.c
- @@ -646,16 +646,27 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_
- /* 16x16 prediction selection */
- const int8_t *predict_mode = predict_16x16_mode_available( h->mb.i_neighbour_intra );
- + /* Not heavily tuned */
- + static const uint8_t i16x16_thresh_lut[11] = { 2, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4 };
- + int i16x16_thresh = a->b_fast_intra ? (i16x16_thresh_lut[h->mb.i_subpel_refine]*i_satd_inter)>>1 : COST_MAX;
- +
- if( !h->mb.b_lossless && predict_mode[3] >= 0 )
- {
- h->pixf.intra_mbcmp_x3_16x16( p_src, p_dst, a->i_satd_i16x16_dir );
- - h->predict_16x16[I_PRED_16x16_P]( p_dst );
- - a->i_satd_i16x16_dir[I_PRED_16x16_P] =
- - h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE );
- - for( int i = 0; i < 4; i++ )
- + a->i_satd_i16x16_dir[0] += lambda * bs_size_ue(0);
- + a->i_satd_i16x16_dir[1] += lambda * bs_size_ue(1);
- + a->i_satd_i16x16_dir[2] += lambda * bs_size_ue(2);
- + COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[0], a->i_predict16x16, 0 );
- + COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[1], a->i_predict16x16, 1 );
- + COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[2], a->i_predict16x16, 2 );
- +
- + /* Plane is expensive, so don't check it unless one of the previous modes was useful. */
- + if( a->i_satd_i16x16 <= i16x16_thresh )
- {
- - int cost = a->i_satd_i16x16_dir[i] += lambda * bs_size_ue(i);
- - COPY2_IF_LT( a->i_satd_i16x16, cost, a->i_predict16x16, i );
- + h->predict_16x16[I_PRED_16x16_P]( p_dst );
- + a->i_satd_i16x16_dir[I_PRED_16x16_P] = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE );
- + a->i_satd_i16x16_dir[I_PRED_16x16_P] += lambda * bs_size_ue(3);
- + COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[I_PRED_16x16_P], a->i_predict16x16, 3 );
- }
- }
- else
- @@ -681,9 +692,7 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_
- /* cavlc mb type prefix */
- a->i_satd_i16x16 += lambda * i_mb_b_cost_table[I_16x16];
- - /* Not heavily tuned */
- - const uint8_t i16x16_thresh[11] = { 2, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4 };
- - if( a->b_fast_intra && a->i_satd_i16x16 > (i16x16_thresh[h->mb.i_subpel_refine]*i_satd_inter)>>1 )
- + if( a->i_satd_i16x16 > i16x16_thresh )
- return;
- /* 8x8 prediction selection */
- @@ -784,7 +793,7 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_
- i_cost = (i_cost * cost_div_fix8[idx]) >> 8;
- }
- /* Not heavily tuned */
- - const uint8_t i8x8_thresh[11] = { 4, 4, 4, 5, 5, 5, 6, 6, 6, 6, 6 };
- + static const uint8_t i8x8_thresh[11] = { 4, 4, 4, 5, 5, 5, 6, 6, 6, 6, 6 };
- if( X264_MIN(i_cost, a->i_satd_i16x16) > (i_satd_inter*i8x8_thresh[h->mb.i_subpel_refine])>>2 )
- return;
- }
- diff --git a/encoder/set.c b/encoder/set.c
- index 8d007aa..8ea6eac 100644
- --- a/encoder/set.c
- +++ b/encoder/set.c
- @@ -534,7 +534,8 @@ int x264_sei_version_write( x264_t *h, bs_t *s )
- {
- int i;
- // random ID number generated according to ISO-11578
- - const uint8_t uuid[16] = {
- + static const uint8_t uuid[16] =
- + {
- 0xdc, 0x45, 0xe9, 0xbd, 0xe6, 0xd9, 0x48, 0xb7,
- 0x96, 0x2c, 0xd8, 0x20, 0xd9, 0x23, 0xee, 0xef
- };
- --
- 1.7.1
- From 470c853a3c0817573139ab387b1c3fe207d62a17 Mon Sep 17 00:00:00 2001
- From: Jason Garrett-Glaser <darkshikari@gmail.com>
- Date: Wed, 30 Jun 2010 13:55:46 -0700
- Subject: [PATCH 6/7] Support infinite keyint (--keyint infinite).
- This just means x264 won't insert non-scenecut keyframes.
- Useful for streaming when using interactive error recovery or some other mechanism that makes keyframes unnecessary.
- Also change POC logic to limit POC/framenum LSB size (to save bits per slice).
- Also fix a bug in the CPB underflow detection code (didn't affect the bitstream, just resulted in the failure to print certain warning messages).
- ---
- common/common.c | 7 ++++---
- encoder/encoder.c | 8 ++++----
- encoder/ratecontrol.c | 10 +++++-----
- encoder/set.c | 29 +++++++++++++++++------------
- encoder/slicetype.c | 2 +-
- x264.c | 2 +-
- x264.h | 3 ++-
- 7 files changed, 34 insertions(+), 27 deletions(-)
- diff --git a/common/common.c b/common/common.c
- index 8c7cf3c..14dd716 100644
- --- a/common/common.c
- +++ b/common/common.c
- @@ -638,9 +638,10 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
- p->i_dpb_size = atoi(value);
- OPT("keyint")
- {
- - p->i_keyint_max = atoi(value);
- - if( p->i_keyint_min > p->i_keyint_max )
- - p->i_keyint_min = p->i_keyint_max;
- + if( strstr( value, "infinite" ) )
- + p->i_keyint_max = X264_KEYINT_MAX_INFINITE;
- + else
- + p->i_keyint_max = atoi(value);
- }
- OPT2("min-keyint", "keyint-min")
- {
- diff --git a/encoder/encoder.c b/encoder/encoder.c
- index 5cd3307..31cb84a 100644
- --- a/encoder/encoder.c
- +++ b/encoder/encoder.c
- @@ -567,8 +567,7 @@ static int x264_validate_parameters( x264_t *h )
- h->param.i_frame_reference = x264_clip3( h->param.i_frame_reference, 1, 16 );
- h->param.i_dpb_size = x264_clip3( h->param.i_dpb_size, 1, 16 );
- - if( h->param.i_keyint_max <= 0 )
- - h->param.i_keyint_max = 1;
- + h->param.i_keyint_max = x264_clip3( h->param.i_keyint_max, 1, X264_KEYINT_MAX_INFINITE );
- if( h->param.i_scenecut_threshold < 0 )
- h->param.i_scenecut_threshold = 0;
- if( !h->param.analyse.i_subpel_refine && h->param.analyse.i_direct_mv_pred > X264_DIRECT_PRED_SPATIAL )
- @@ -627,9 +626,10 @@ static int x264_validate_parameters( x264_t *h )
- h->param.rc.f_qcompress = x264_clip3f( h->param.rc.f_qcompress, 0.0, 1.0 );
- if( h->param.i_keyint_max == 1 || h->param.rc.f_qcompress == 1 )
- h->param.rc.b_mb_tree = 0;
- - if( !h->param.rc.i_lookahead && !h->param.b_intra_refresh && h->param.rc.b_mb_tree )
- + if( (!h->param.b_intra_refresh && h->param.i_keyint_max != X264_KEYINT_MAX_INFINITE) &&
- + !h->param.rc.i_lookahead && h->param.rc.b_mb_tree )
- {
- - x264_log( h, X264_LOG_WARNING, "lookaheadless mb-tree requires intra refresh\n" );
- + x264_log( h, X264_LOG_WARNING, "lookaheadless mb-tree requires intra refresh or infinite keyint\n" );
- h->param.rc.b_mb_tree = 0;
- }
- if( h->param.rc.b_stat_read )
- diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
- index 1030ef2..6fdaa98 100644
- --- a/encoder/ratecontrol.c
- +++ b/encoder/ratecontrol.c
- @@ -492,13 +492,13 @@ void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init )
- // arbitrary
- #define MAX_DURATION 0.5
- - int max_cpb_output_delay = h->param.i_keyint_max * MAX_DURATION * h->sps->vui.i_time_scale / h->sps->vui.i_num_units_in_tick;
- + int max_cpb_output_delay = X264_MIN( h->param.i_keyint_max * MAX_DURATION * h->sps->vui.i_time_scale / h->sps->vui.i_num_units_in_tick, INT_MAX );
- int max_dpb_output_delay = h->sps->vui.i_max_dec_frame_buffering * MAX_DURATION * h->sps->vui.i_time_scale / h->sps->vui.i_num_units_in_tick;
- int max_delay = (int)(90000.0 * (double)h->sps->vui.hrd.i_cpb_size_unscaled / h->sps->vui.hrd.i_bit_rate_unscaled + 0.5);
- h->sps->vui.hrd.i_initial_cpb_removal_delay_length = 2 + x264_clip3( 32 - x264_clz( max_delay ), 4, 22 );
- - h->sps->vui.hrd.i_cpb_removal_delay_length = x264_clip3( 32 - x264_clz( max_cpb_output_delay ), 4, 32 );
- - h->sps->vui.hrd.i_dpb_output_delay_length = x264_clip3( 32 - x264_clz( max_dpb_output_delay ), 4, 32 );
- + h->sps->vui.hrd.i_cpb_removal_delay_length = x264_clip3( 32 - x264_clz( max_cpb_output_delay ), 4, 31 );
- + h->sps->vui.hrd.i_dpb_output_delay_length = x264_clip3( 32 - x264_clz( max_dpb_output_delay ), 4, 31 );
- #undef MAX_DURATION
- @@ -1781,10 +1781,10 @@ void x264_hrd_fullness( x264_t *h )
- uint64_t cpb_size = (uint64_t)h->sps->vui.hrd.i_cpb_size_unscaled * h->sps->vui.i_time_scale;
- uint64_t multiply_factor = 180000 / rct->hrd_multiply_denom;
- - if( cpb_state < 0 || cpb_state > cpb_size )
- + if( rct->buffer_fill_final < 0 || rct->buffer_fill_final > cpb_size )
- {
- x264_log( h, X264_LOG_WARNING, "CPB %s: %.0lf bits in a %.0lf-bit buffer\n",
- - cpb_state < 0 ? "underflow" : "overflow", (float)cpb_state/denom, (float)cpb_size/denom );
- + rct->buffer_fill_final < 0 ? "underflow" : "overflow", (float)rct->buffer_fill_final/denom, (float)cpb_size/denom );
- }
- h->initial_cpb_removal_delay = (multiply_factor * cpb_state + denom) / (2*denom);
- diff --git a/encoder/set.c b/encoder/set.c
- index 8ea6eac..9e6e736 100644
- --- a/encoder/set.c
- +++ b/encoder/set.c
- @@ -99,6 +99,7 @@ static void x264_sei_write( bs_t *s, uint8_t *p_start )
- void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param )
- {
- sps->i_id = i_id;
- + int max_frame_num;
- sps->b_qpprime_y_zero_transform_bypass = param->rc.i_rc_method == X264_RC_CQP && param->rc.i_qp_constant == 0;
- if( sps->b_qpprime_y_zero_transform_bypass )
- @@ -118,15 +119,27 @@ void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param )
- /* Never set constraint_set2, it is not necessary and not used in real world. */
- sps->b_constraint_set2 = 0;
- - sps->i_log2_max_frame_num = 4; /* at least 4 */
- - while( (1 << sps->i_log2_max_frame_num) <= param->i_keyint_max && sps->i_log2_max_frame_num < 10 )
- + sps->vui.i_num_reorder_frames = param->i_bframe_pyramid ? 2 : param->i_bframe ? 1 : 0;
- + /* extra slot with pyramid so that we don't have to override the
- + * order of forgetting old pictures */
- + sps->vui.i_max_dec_frame_buffering =
- + sps->i_num_ref_frames = X264_MIN(16, X264_MAX4(param->i_frame_reference, 1 + sps->vui.i_num_reorder_frames,
- + param->i_bframe_pyramid ? 4 : 1, param->i_dpb_size));
- + sps->i_num_ref_frames -= param->i_bframe_pyramid == X264_B_PYRAMID_STRICT;
- +
- + /* number of refs + current frame */
- + max_frame_num = sps->vui.i_max_dec_frame_buffering * (!!param->i_bframe_pyramid+1) + 1;
- + sps->i_log2_max_frame_num = 4;
- + while( (1 << sps->i_log2_max_frame_num) <= max_frame_num )
- sps->i_log2_max_frame_num++;
- - sps->i_log2_max_frame_num++;
- sps->i_poc_type = 0;
- if( sps->i_poc_type == 0 )
- {
- - sps->i_log2_max_poc_lsb = sps->i_log2_max_frame_num + 1; /* max poc = 2*frame_num */
- + int max_delta_poc = (param->i_bframe + 2) * (!!param->i_bframe_pyramid + 1) * 2;
- + sps->i_log2_max_poc_lsb = 4;
- + while( (1 << sps->i_log2_max_poc_lsb) <= max_delta_poc * 2 )
- + sps->i_log2_max_poc_lsb++;
- }
- else if( sps->i_poc_type == 1 )
- {
- @@ -219,14 +232,6 @@ void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param )
- // NOTE: HRD related parts of the SPS are initialised in x264_ratecontrol_init_reconfigurable
- - sps->vui.i_num_reorder_frames = param->i_bframe_pyramid ? 2 : param->i_bframe ? 1 : 0;
- - /* extra slot with pyramid so that we don't have to override the
- - * order of forgetting old pictures */
- - sps->vui.i_max_dec_frame_buffering =
- - sps->i_num_ref_frames = X264_MIN(16, X264_MAX4(param->i_frame_reference, 1 + sps->vui.i_num_reorder_frames,
- - param->i_bframe_pyramid ? 4 : 1, param->i_dpb_size));
- - sps->i_num_ref_frames -= param->i_bframe_pyramid == X264_B_PYRAMID_STRICT;
- -
- sps->vui.b_bitstream_restriction = 1;
- if( sps->vui.b_bitstream_restriction )
- {
- diff --git a/encoder/slicetype.c b/encoder/slicetype.c
- index 4ede8cf..7d69b71 100644
- --- a/encoder/slicetype.c
- +++ b/encoder/slicetype.c
- @@ -1009,7 +1009,7 @@ static int scenecut_internal( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **f
- float f_thresh_max = h->param.i_scenecut_threshold / 100.0;
- /* magic numbers pulled out of thin air */
- float f_thresh_min = f_thresh_max * h->param.i_keyint_min
- - / ( h->param.i_keyint_max * 4 );
- + / ( h->param.i_keyint_max * 4. );
- int res;
- if( h->param.i_keyint_min == h->param.i_keyint_max )
- diff --git a/x264.c b/x264.c
- index 741570c..0bede93 100644
- --- a/x264.c
- +++ b/x264.c
- @@ -409,7 +409,7 @@ static void Help( x264_param_t *defaults, int longhelp )
- H0( "\n" );
- H0( "Frame-type options:\n" );
- H0( "\n" );
- - H0( " -I, --keyint <integer> Maximum GOP size [%d]\n", defaults->i_keyint_max );
- + H0( " -I, --keyint <integer or \"infinite\"> Maximum GOP size [%d]\n", defaults->i_keyint_max );
- H2( " -i, --min-keyint <integer> Minimum GOP size [auto]\n" );
- H2( " --no-scenecut Disable adaptive I-frame decision\n" );
- H2( " --scenecut <integer> How aggressively to insert extra I-frames [%d]\n", defaults->i_scenecut_threshold );
- diff --git a/x264.h b/x264.h
- index 86f7426..097365a 100644
- --- a/x264.h
- +++ b/x264.h
- @@ -35,7 +35,7 @@
- #include <stdarg.h>
- -#define X264_BUILD 101
- +#define X264_BUILD 102
- /* x264_t:
- * opaque handler for encoder */
- @@ -152,6 +152,7 @@ typedef struct
- #define X264_B_PYRAMID_STRICT 1
- #define X264_B_PYRAMID_NORMAL 2
- #define X264_KEYINT_MIN_AUTO 0
- +#define X264_KEYINT_MAX_INFINITE (1<<30)
- #define X264_OPEN_GOP_NONE 0
- #define X264_OPEN_GOP_NORMAL 1
- #define X264_OPEN_GOP_BLURAY 2
- --
- 1.7.1
- From 1b4e95140832b569f81984dcc36fea50452380f9 Mon Sep 17 00:00:00 2001
- From: Oskar Arvidsson <oskar@irock.se>
- Date: Fri, 2 Jul 2010 04:06:08 +0200
- Subject: [PATCH 7/7] Support for 9 and 10-bit encoding
- Output bit depth is specified on compilation time via --bit-depth.
- There is currently almost no assembly code available for high-bit-depth modes, so encoding will be very slow.
- Input is still 8-bit only; this will change in the future.
- Note that very few H.264 decoders support >8 bit depth currently.
- ---
- common/arm/mc-c.c | 42 +++++++-----
- common/arm/predict-c.c | 8 ++
- common/bitstream.h | 2 +-
- common/common.c | 17 ++++-
- common/common.h | 39 +++++++----
- common/dct.c | 15 +++-
- common/deblock.c | 27 +++++---
- common/macroblock.c | 2 +-
- common/macroblock.h | 66 ++++++++++++-------
- common/mc.c | 33 ++++-----
- common/mc.h | 2 +-
- common/pixel.c | 14 +++-
- common/ppc/dct.c | 2 +
- common/ppc/deblock.c | 2 +
- common/ppc/mc.c | 4 +
- common/ppc/pixel.c | 4 +
- common/ppc/predict.c | 6 ++
- common/ppc/quant.c | 2 +
- common/predict.c | 63 ++++++++++--------
- common/quant.c | 14 ++---
- common/set.c | 25 ++++++-
- common/x86/mc-c.c | 12 +++-
- common/x86/predict-c.c | 10 +++
- configure | 16 +++++
- encoder/analyse.c | 80 ++++++++++++-----------
- encoder/cabac.c | 25 ++++---
- encoder/cavlc.c | 24 +++----
- encoder/encoder.c | 45 +++++++++----
- encoder/macroblock.h | 4 +-
- encoder/me.h | 2 +-
- encoder/ratecontrol.c | 20 +++---
- encoder/rdo.c | 10 +--
- encoder/set.c | 8 ++-
- encoder/slicetype.c | 10 ++--
- tools/checkasm.c | 169 +++++++++++++++++++++++++----------------------
- x264.c | 24 ++++---
- x264.h | 4 +-
- 37 files changed, 516 insertions(+), 336 deletions(-)
- diff --git a/common/arm/mc-c.c b/common/arm/mc-c.c
- index d294eff..b1106dd 100644
- --- a/common/arm/mc-c.c
- +++ b/common/arm/mc-c.c
- @@ -64,6 +64,19 @@ MC_WEIGHT(_nodenom)
- MC_WEIGHT(_offsetadd)
- MC_WEIGHT(_offsetsub)
- +void x264_mc_copy_w4_neon( uint8_t *, int, uint8_t *, int, int );
- +void x264_mc_copy_w8_neon( uint8_t *, int, uint8_t *, int, int );
- +void x264_mc_copy_w16_neon( uint8_t *, int, uint8_t *, int, int );
- +void x264_mc_copy_w16_aligned_neon( uint8_t *, int, uint8_t *, int, int );
- +
- +void x264_mc_chroma_neon( uint8_t *, int, uint8_t *, int, int, int, int, int );
- +void x264_frame_init_lowres_core_neon( uint8_t *, uint8_t *, uint8_t *, uint8_t *, uint8_t *, int, int, int, int);
- +
- +void x264_hpel_filter_v_neon( uint8_t *, uint8_t *, int16_t *, int, int );
- +void x264_hpel_filter_c_neon( uint8_t *, int16_t *, int );
- +void x264_hpel_filter_h_neon( uint8_t *, uint8_t *, int );
- +
- +#if !X264_HIGH_BIT_DEPTH
- static void x264_weight_cache_neon( x264_t *h, x264_weight_t *w )
- {
- if( w->i_scale == 1<<w->i_denom )
- @@ -85,14 +98,6 @@ static void x264_weight_cache_neon( x264_t *h, x264_weight_t *w )
- w->weightfn = x264_mc_wtab_neon;
- }
- -void x264_mc_copy_w4_neon( uint8_t *, int, uint8_t *, int, int );
- -void x264_mc_copy_w8_neon( uint8_t *, int, uint8_t *, int, int );
- -void x264_mc_copy_w16_neon( uint8_t *, int, uint8_t *, int, int );
- -void x264_mc_copy_w16_aligned_neon( uint8_t *, int, uint8_t *, int, int );
- -
- -void x264_mc_chroma_neon( uint8_t *, int, uint8_t *, int, int, int, int, int );
- -void x264_frame_init_lowres_core_neon( uint8_t *, uint8_t *, uint8_t *, uint8_t *, uint8_t *, int, int, int, int);
- -
- static void (* const x264_pixel_avg_wtab_neon[6])( uint8_t *, int, uint8_t *, int, uint8_t *, int ) =
- {
- NULL,
- @@ -174,10 +179,6 @@ static uint8_t *get_ref_neon( uint8_t *dst, int *i_dst_stride,
- }
- }
- -void x264_hpel_filter_v_neon( uint8_t *, uint8_t *, int16_t *, int, int );
- -void x264_hpel_filter_c_neon( uint8_t *, int16_t *, int );
- -void x264_hpel_filter_h_neon( uint8_t *, uint8_t *, int );
- -
- static void hpel_filter_neon( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src,
- int stride, int width, int height, int16_t *buf )
- {
- @@ -198,18 +199,22 @@ static void hpel_filter_neon( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8
- src += stride;
- }
- }
- +#endif // !X264_HIGH_BIT_DEPTH
- void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf )
- {
- if( !(cpu&X264_CPU_ARMV6) )
- return;
- +#if !X264_HIGH_BIT_DEPTH
- pf->prefetch_fenc = x264_prefetch_fenc_arm;
- pf->prefetch_ref = x264_prefetch_ref_arm;
- +#endif // !X264_HIGH_BIT_DEPTH
- if( !(cpu&X264_CPU_NEON) )
- return;
- +#if !X264_HIGH_BIT_DEPTH
- pf->copy_16x16_unaligned = x264_mc_copy_w16_neon;
- pf->copy[PIXEL_16x16] = x264_mc_copy_w16_aligned_neon;
- pf->copy[PIXEL_8x8] = x264_mc_copy_w8_neon;
- @@ -229,15 +234,16 @@ void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf )
- pf->offsetsub = x264_mc_offsetsub_wtab_neon;
- pf->weight_cache = x264_weight_cache_neon;
- -// Apple's gcc stupidly cannot align stack variables, and ALIGNED_ARRAY can't work on structs
- -#ifndef SYS_MACOSX
- - pf->memcpy_aligned = x264_memcpy_aligned_neon;
- -#endif
- - pf->memzero_aligned = x264_memzero_aligned_neon;
- -
- pf->mc_chroma = x264_mc_chroma_neon;
- pf->mc_luma = mc_luma_neon;
- pf->get_ref = get_ref_neon;
- pf->hpel_filter = hpel_filter_neon;
- pf->frame_init_lowres_core = x264_frame_init_lowres_core_neon;
- +#endif // !X264_HIGH_BIT_DEPTH
- +
- +// Apple's gcc stupidly cannot align stack variables, and ALIGNED_ARRAY can't work on structs
- +#ifndef SYS_MACOSX
- + pf->memcpy_aligned = x264_memcpy_aligned_neon;
- +#endif
- + pf->memzero_aligned = x264_memzero_aligned_neon;
- }
- diff --git a/common/arm/predict-c.c b/common/arm/predict-c.c
- index fa7b9f7..b40dc9a 100644
- --- a/common/arm/predict-c.c
- +++ b/common/arm/predict-c.c
- @@ -51,6 +51,7 @@ void x264_predict_4x4_init_arm( int cpu, x264_predict_t pf[12] )
- if (!(cpu&X264_CPU_ARMV6))
- return;
- +#if !X264_HIGH_BIT_DEPTH
- pf[I_PRED_4x4_H] = x264_predict_4x4_h_armv6;
- pf[I_PRED_4x4_DC] = x264_predict_4x4_dc_armv6;
- pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_armv6;
- @@ -59,6 +60,7 @@ void x264_predict_4x4_init_arm( int cpu, x264_predict_t pf[12] )
- return;
- pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_neon;
- +#endif // !X264_HIGH_BIT_DEPTH
- }
- void x264_predict_8x8c_init_arm( int cpu, x264_predict_t pf[7] )
- @@ -66,12 +68,14 @@ void x264_predict_8x8c_init_arm( int cpu, x264_predict_t pf[7] )
- if (!(cpu&X264_CPU_NEON))
- return;
- +#if !X264_HIGH_BIT_DEPTH
- pf[I_PRED_CHROMA_DC] = x264_predict_8x8c_dc_neon;
- pf[I_PRED_CHROMA_DC_TOP] = x264_predict_8x8c_dc_top_neon;
- pf[I_PRED_CHROMA_DC_LEFT] = x264_predict_8x8c_dc_left_neon;
- pf[I_PRED_CHROMA_H] = x264_predict_8x8c_h_neon;
- pf[I_PRED_CHROMA_V] = x264_predict_8x8c_v_neon;
- pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_neon;
- +#endif // !X264_HIGH_BIT_DEPTH
- }
- void x264_predict_8x8_init_arm( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter )
- @@ -79,8 +83,10 @@ void x264_predict_8x8_init_arm( int cpu, x264_predict8x8_t pf[12], x264_predict_
- if (!(cpu&X264_CPU_NEON))
- return;
- +#if !X264_HIGH_BIT_DEPTH
- pf[I_PRED_8x8_DC] = x264_predict_8x8_dc_neon;
- pf[I_PRED_8x8_H] = x264_predict_8x8_h_neon;
- +#endif // !X264_HIGH_BIT_DEPTH
- }
- void x264_predict_16x16_init_arm( int cpu, x264_predict_t pf[7] )
- @@ -88,10 +94,12 @@ void x264_predict_16x16_init_arm( int cpu, x264_predict_t pf[7] )
- if (!(cpu&X264_CPU_NEON))
- return;
- +#if !X264_HIGH_BIT_DEPTH
- pf[I_PRED_16x16_DC ] = x264_predict_16x16_dc_neon;
- pf[I_PRED_16x16_DC_TOP] = x264_predict_16x16_dc_top_neon;
- pf[I_PRED_16x16_DC_LEFT]= x264_predict_16x16_dc_left_neon;
- pf[I_PRED_16x16_H ] = x264_predict_16x16_h_neon;
- pf[I_PRED_16x16_V ] = x264_predict_16x16_v_neon;
- pf[I_PRED_16x16_P ] = x264_predict_16x16_p_neon;
- +#endif // !X264_HIGH_BIT_DEPTH
- }
- diff --git a/common/bitstream.h b/common/bitstream.h
- index dd8118d..318c790 100644
- --- a/common/bitstream.h
- +++ b/common/bitstream.h
- @@ -53,7 +53,7 @@ typedef struct bs_s
- typedef struct
- {
- int last;
- - int16_t level[16];
- + dctcoef level[16];
- uint8_t run[16];
- } x264_run_level_t;
- diff --git a/common/common.c b/common/common.c
- index 14dd716..728dfab 100644
- --- a/common/common.c
- +++ b/common/common.c
- @@ -91,10 +91,10 @@ void x264_param_default( x264_param_t *param )
- param->rc.i_vbv_max_bitrate = 0;
- param->rc.i_vbv_buffer_size = 0;
- param->rc.f_vbv_buffer_init = 0.9;
- - param->rc.i_qp_constant = 23;
- - param->rc.f_rf_constant = 23;
- + param->rc.i_qp_constant = 23 + QP_BD_OFFSET;
- + param->rc.f_rf_constant = 23 + QP_BD_OFFSET;
- param->rc.i_qp_min = 10;
- - param->rc.i_qp_max = 51;
- + param->rc.i_qp_max = QP_MAX;
- param->rc.i_qp_step = 4;
- param->rc.f_ip_factor = 1.4;
- param->rc.f_pb_factor = 1.3;
- @@ -418,6 +418,15 @@ int x264_param_apply_profile( x264_param_t *param, const char *profile )
- if( !profile )
- return 0;
- +#if BIT_DEPTH > 8
- + if( !strcasecmp( profile, "baseline" ) || !strcasecmp( profile, "main" ) ||
- + !strcasecmp( profile, "high" ) )
- + {
- + x264_log( NULL, X264_LOG_ERROR, "%s profile doesn't support a bit depth of %d.\n", profile, BIT_DEPTH );
- + return -1;
- + }
- +#endif
- +
- if( !strcasecmp( profile, "baseline" ) )
- {
- param->analyse.b_transform_8x8 = 0;
- @@ -441,7 +450,7 @@ int x264_param_apply_profile( x264_param_t *param, const char *profile )
- param->analyse.b_transform_8x8 = 0;
- param->i_cqm_preset = X264_CQM_FLAT;
- }
- - else if( !strcasecmp( profile, "high" ) )
- + else if( !strcasecmp( profile, "high" ) || !strcasecmp( profile, "high10" ) )
- {
- /* Default */
- }
- diff --git a/common/common.h b/common/common.h
- index 7b60811..a218d35 100644
- --- a/common/common.h
- +++ b/common/common.h
- @@ -54,8 +54,13 @@ do {\
- #define X264_THREAD_MAX 128
- #define X264_PCM_COST (386*8)
- #define X264_LOOKAHEAD_MAX 250
- +#define QP_BD_OFFSET (6*(BIT_DEPTH-8))
- +#define QP_MAX (51+QP_BD_OFFSET)
- +#define QP_MAX_MAX (51+2*6)
- +#define LAMBDA_MAX (91 << (BIT_DEPTH-8))
- +#define PIXEL_MAX ((1 << BIT_DEPTH)-1)
- // arbitrary, but low because SATD scores are 1/4 normal
- -#define X264_LOOKAHEAD_QP 12
- +#define X264_LOOKAHEAD_QP (12+QP_BD_OFFSET)
- // number of pixels (per thread) in progress at any given time.
- // 16 for the macroblock in progress + 3 for deblocking + 3 for motion compensation filter + 2 for extra safety
- @@ -101,17 +106,23 @@ typedef union { x264_uint128_t i; uint64_t a[2]; uint32_t b[4]; uint16_t c[8]; u
- #define CP64(dst,src) M64(dst) = M64(src)
- #define CP128(dst,src) M128(dst) = M128(src)
- -typedef uint8_t pixel;
- -typedef uint32_t pixel4;
- -typedef int16_t dctcoef;
- +#if X264_HIGH_BIT_DEPTH
- + typedef uint16_t pixel;
- + typedef uint64_t pixel4;
- + typedef int32_t dctcoef;
- -#define PIXEL_SPLAT_X4(x) ((x)*0x01010101U)
- -#define MPIXEL_X4(src) M32(src)
- -#define CPPIXEL_X4(dst,src) CP32(dst,src)
- -#define CPPIXEL_X8(dst,src) CP64(dst,src)
- -#define MDCT_X2(dct) M32(dct)
- -#define CPDCT_X2(dst,src) CP32(dst,src)
- -#define CPDCT_X4(dst,src) CP64(dst,src)
- +# define PIXEL_SPLAT_X4(x) ((x)*0x0001000100010001ULL)
- +# define MPIXEL_X4(src) M64(src)
- +#else
- + typedef uint8_t pixel;
- + typedef uint32_t pixel4;
- + typedef int16_t dctcoef;
- +
- +# define PIXEL_SPLAT_X4(x) ((x)*0x01010101U)
- +# define MPIXEL_X4(src) M32(src)
- +#endif
- +
- +#define CPPIXEL_X4(dst,src) MPIXEL_X4(dst) = MPIXEL_X4(src)
- #define X264_SCAN8_SIZE (6*8)
- #define X264_SCAN8_LUMA_SIZE (5*8)
- @@ -189,7 +200,7 @@ void x264_init_vlc_tables();
- static ALWAYS_INLINE pixel x264_clip_pixel( int x )
- {
- - return x&(~255) ? (-x)>>31 : x;
- + return ( (x & ~PIXEL_MAX) ? (-x)>>31 & PIXEL_MAX : x );
- }
- static ALWAYS_INLINE int x264_clip3( int v, int i_min, int i_max )
- @@ -449,8 +460,8 @@ struct x264_t
- /* mv/ref cost arrays. Indexed by lambda instead of
- * qp because, due to rounding, some quantizers share
- * lambdas. This saves memory. */
- - uint16_t *cost_mv[92];
- - uint16_t *cost_mv_fpel[92][4];
- + uint16_t *cost_mv[LAMBDA_MAX+1];
- + uint16_t *cost_mv_fpel[LAMBDA_MAX+1][4];
- const uint8_t *chroma_qp_table; /* includes both the nonlinear luma->chroma mapping and chroma_qp_offset */
- diff --git a/common/dct.c b/common/dct.c
- index 60dbd55..cd27363 100644
- --- a/common/dct.c
- +++ b/common/dct.c
- @@ -418,6 +418,7 @@ void x264_dct_init( int cpu, x264_dct_function_t *dctf )
- dctf->dct4x4dc = dct4x4dc;
- dctf->idct4x4dc = idct4x4dc;
- +#if !X264_HIGH_BIT_DEPTH
- #if HAVE_MMX
- if( cpu&X264_CPU_MMX )
- {
- @@ -515,6 +516,7 @@ void x264_dct_init( int cpu, x264_dct_function_t *dctf )
- dctf->add16x16_idct8= x264_add16x16_idct8_neon;
- }
- #endif
- +#endif // !X264_HIGH_BIT_DEPTH
- }
- void x264_dct_init_weights( void )
- @@ -599,11 +601,9 @@ static void zigzag_scan_4x4_frame( dctcoef level[16], dctcoef dct[16] )
- static void zigzag_scan_4x4_field( dctcoef level[16], dctcoef dct[16] )
- {
- - CPDCT_X2( level, dct );
- + memcpy( level, dct, 2 * sizeof(dctcoef) );
- ZIG(2,0,1) ZIG(3,2,0) ZIG(4,3,0) ZIG(5,1,1)
- - CPDCT_X2( level+6, dct+6 );
- - CPDCT_X4( level+8, dct+8 );
- - CPDCT_X4( level+12, dct+12 );
- + memcpy( level+6, dct+6, 10 * sizeof(dctcoef) );
- }
- #undef ZIG
- @@ -618,6 +618,7 @@ static void zigzag_scan_4x4_field( dctcoef level[16], dctcoef dct[16] )
- CPPIXEL_X4( p_dst+1*FDEC_STRIDE, p_src+1*FENC_STRIDE );\
- CPPIXEL_X4( p_dst+2*FDEC_STRIDE, p_src+2*FENC_STRIDE );\
- CPPIXEL_X4( p_dst+3*FDEC_STRIDE, p_src+3*FENC_STRIDE );
- +#define CPPIXEL_X8(dst,src) ( CPPIXEL_X4(dst,src), CPPIXEL_X4(dst+4,src+4) )
- #define COPY8x8\
- CPPIXEL_X8( p_dst+0*FDEC_STRIDE, p_src+0*FENC_STRIDE );\
- CPPIXEL_X8( p_dst+1*FDEC_STRIDE, p_src+1*FENC_STRIDE );\
- @@ -709,6 +710,7 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf, int b_interlaced )
- pf->sub_8x8 = zigzag_sub_8x8_field;
- pf->sub_4x4 = zigzag_sub_4x4_field;
- pf->sub_4x4ac = zigzag_sub_4x4ac_field;
- +#if !X264_HIGH_BIT_DEPTH
- #if HAVE_MMX
- if( cpu&X264_CPU_MMXEXT )
- {
- @@ -726,6 +728,7 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf, int b_interlaced )
- if( cpu&X264_CPU_ALTIVEC )
- pf->scan_4x4 = x264_zigzag_scan_4x4_field_altivec;
- #endif
- +#endif // !X264_HIGH_BIT_DEPTH
- }
- else
- {
- @@ -734,6 +737,7 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf, int b_interlaced )
- pf->sub_8x8 = zigzag_sub_8x8_frame;
- pf->sub_4x4 = zigzag_sub_4x4_frame;
- pf->sub_4x4ac = zigzag_sub_4x4ac_frame;
- +#if !X264_HIGH_BIT_DEPTH
- #if HAVE_MMX
- if( cpu&X264_CPU_MMX )
- pf->scan_4x4 = x264_zigzag_scan_4x4_frame_mmx;
- @@ -759,13 +763,16 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf, int b_interlaced )
- if( cpu&X264_CPU_NEON )
- pf->scan_4x4 = x264_zigzag_scan_4x4_frame_neon;
- #endif
- +#endif // !X264_HIGH_BIT_DEPTH
- }
- pf->interleave_8x8_cavlc = zigzag_interleave_8x8_cavlc;
- +#if !X264_HIGH_BIT_DEPTH
- #if HAVE_MMX
- if( cpu&X264_CPU_MMX )
- pf->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_mmx;
- if( cpu&X264_CPU_SHUFFLE_IS_FAST )
- pf->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_sse2;
- #endif
- +#endif // !X264_HIGH_BIT_DEPTH
- }
- diff --git a/common/deblock.c b/common/deblock.c
- index db9c95d..0b3b6df 100644
- --- a/common/deblock.c
- +++ b/common/deblock.c
- @@ -265,18 +265,19 @@ static void deblock_strength_c( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264
- static inline void deblock_edge( x264_t *h, pixel *pix1, pixel *pix2, int i_stride, uint8_t bS[4], int i_qp, int b_chroma, x264_deblock_inter_t pf_inter )
- {
- - int index_a = i_qp + h->sh.i_alpha_c0_offset;
- - int alpha = alpha_table(index_a);
- - int beta = beta_table(i_qp + h->sh.i_beta_offset);
- + int index_a = x264_clip3((i_qp-QP_BD_OFFSET + h->sh.i_alpha_c0_offset), 0, 51);
- + int index_b = x264_clip3((i_qp-QP_BD_OFFSET + h->sh.i_beta_offset), 0, 51);
- + int alpha = alpha_table(index_a) << (BIT_DEPTH-8);
- + int beta = beta_table(index_b) << (BIT_DEPTH-8);
- int8_t tc[4];
- if( !M32(bS) || !alpha || !beta )
- return;
- - tc[0] = tc0_table(index_a)[bS[0]] + b_chroma;
- - tc[1] = tc0_table(index_a)[bS[1]] + b_chroma;
- - tc[2] = tc0_table(index_a)[bS[2]] + b_chroma;
- - tc[3] = tc0_table(index_a)[bS[3]] + b_chroma;
- + tc[0] = (tc0_table(index_a)[bS[0]] << (BIT_DEPTH-8)) + b_chroma;
- + tc[1] = (tc0_table(index_a)[bS[1]] << (BIT_DEPTH-8)) + b_chroma;
- + tc[2] = (tc0_table(index_a)[bS[2]] << (BIT_DEPTH-8)) + b_chroma;
- + tc[3] = (tc0_table(index_a)[bS[3]] << (BIT_DEPTH-8)) + b_chroma;
- pf_inter( pix1, i_stride, alpha, beta, tc );
- if( b_chroma )
- @@ -285,8 +286,10 @@ static inline void deblock_edge( x264_t *h, pixel *pix1, pixel *pix2, int i_stri
- static inline void deblock_edge_intra( x264_t *h, pixel *pix1, pixel *pix2, int i_stride, uint8_t bS[4], int i_qp, int b_chroma, x264_deblock_intra_t pf_intra )
- {
- - int alpha = alpha_table(i_qp + h->sh.i_alpha_c0_offset);
- - int beta = beta_table(i_qp + h->sh.i_beta_offset);
- + int index_a = x264_clip3((i_qp-QP_BD_OFFSET + h->sh.i_alpha_c0_offset), 0, 51);
- + int index_b = x264_clip3((i_qp-QP_BD_OFFSET + h->sh.i_beta_offset), 0, 51);
- + int alpha = alpha_table(index_a) << (BIT_DEPTH-8);
- + int beta = beta_table(index_b) << (BIT_DEPTH-8);
- if( !alpha || !beta )
- return;
- @@ -450,6 +453,7 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf )
- #if HAVE_MMX
- if( cpu&X264_CPU_MMXEXT )
- {
- +#if !X264_HIGH_BIT_DEPTH
- pf->deblock_chroma[1] = x264_deblock_v_chroma_mmxext;
- pf->deblock_chroma[0] = x264_deblock_h_chroma_mmxext;
- pf->deblock_chroma_intra[1] = x264_deblock_v_chroma_intra_mmxext;
- @@ -460,10 +464,12 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf )
- pf->deblock_luma_intra[1] = x264_deblock_v_luma_intra_mmxext;
- pf->deblock_luma_intra[0] = x264_deblock_h_luma_intra_mmxext;
- #endif
- +#endif // !X264_HIGH_BIT_DEPTH
- pf->deblock_strength = x264_deblock_strength_mmxext;
- if( cpu&X264_CPU_SSE2 )
- {
- pf->deblock_strength = x264_deblock_strength_sse2;
- +#if !X264_HIGH_BIT_DEPTH
- if( !(cpu&X264_CPU_STACK_MOD4) )
- {
- pf->deblock_luma[1] = x264_deblock_v_luma_sse2;
- @@ -471,12 +477,14 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf )
- pf->deblock_luma_intra[1] = x264_deblock_v_luma_intra_sse2;
- pf->deblock_luma_intra[0] = x264_deblock_h_luma_intra_sse2;
- }
- +#endif // !X264_HIGH_BIT_DEPTH
- }
- if( cpu&X264_CPU_SSSE3 )
- pf->deblock_strength = x264_deblock_strength_ssse3;
- }
- #endif
- +#if !X264_HIGH_BIT_DEPTH
- #if HAVE_ALTIVEC
- if( cpu&X264_CPU_ALTIVEC )
- {
- @@ -494,4 +502,5 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf )
- pf->deblock_chroma[0] = x264_deblock_h_chroma_neon;
- }
- #endif
- +#endif // !X264_HIGH_BIT_DEPTH
- }
- diff --git a/common/macroblock.c b/common/macroblock.c
- index 4561d8a..f0a624f 100644
- --- a/common/macroblock.c
- +++ b/common/macroblock.c
- @@ -337,7 +337,7 @@ int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
- int scratch_size = 0;
- if( !b_lookahead )
- {
- - int buf_hpel = (h->thread[0]->fdec->i_width[0]+48) * sizeof(int16_t);
- + int buf_hpel = (h->thread[0]->fdec->i_width[0]+48) * sizeof(dctcoef);
- int buf_ssim = h->param.analyse.b_ssim * 8 * (h->param.i_width/4+3) * sizeof(int);
- int me_range = X264_MIN(h->param.analyse.i_me_range, h->param.analyse.i_mv_range);
- int buf_tesa = (h->param.analyse.i_me_method >= X264_ME_ESA) *
- diff --git a/common/macroblock.h b/common/macroblock.h
- index 1a4992f..e09cd55 100644
- --- a/common/macroblock.h
- +++ b/common/macroblock.h
- @@ -238,17 +238,30 @@ static const uint16_t block_idx_xy_fdec[16] =
- 2*4 + 3*4*FDEC_STRIDE, 3*4 + 3*4*FDEC_STRIDE
- };
- -static const uint8_t i_chroma_qp_table[52+12*2] =
- +#define QP(qP) ( (qP)+QP_BD_OFFSET )
- +static const uint8_t i_chroma_qp_table[QP_MAX+1+12*2] =
- {
- - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
- - 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
- - 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
- - 29, 30, 31, 32, 32, 33, 34, 34, 35, 35,
- - 36, 36, 37, 37, 37, 38, 38, 38, 39, 39,
- - 39, 39,
- - 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39,
- + 0, 0, 0, 0, 0, 0,
- + 0, 0, 0, 0, 0, 0,
- +#if BIT_DEPTH > 9
- + QP(-12),QP(-11),QP(-10), QP(-9), QP(-8), QP(-7),
- +#endif
- +#if BIT_DEPTH > 8
- + QP(-6), QP(-5), QP(-4), QP(-3), QP(-2), QP(-1),
- +#endif
- + QP(0), QP(1), QP(2), QP(3), QP(4), QP(5),
- + QP(6), QP(7), QP(8), QP(9), QP(10), QP(11),
- + QP(12), QP(13), QP(14), QP(15), QP(16), QP(17),
- + QP(18), QP(19), QP(20), QP(21), QP(22), QP(23),
- + QP(24), QP(25), QP(26), QP(27), QP(28), QP(29),
- + QP(29), QP(30), QP(31), QP(32), QP(32), QP(33),
- + QP(34), QP(34), QP(35), QP(35), QP(36), QP(36),
- + QP(37), QP(37), QP(37), QP(38), QP(38), QP(38),
- + QP(39), QP(39), QP(39), QP(39),
- + QP(39), QP(39), QP(39), QP(39), QP(39), QP(39),
- + QP(39), QP(39), QP(39), QP(39), QP(39), QP(39),
- };
- +#undef QP
- enum cabac_ctx_block_cat_e
- {
- @@ -340,26 +353,31 @@ static ALWAYS_INLINE uint32_t pack16to32_mask( int a, int b )
- return (a&0xFFFF) + (b<<16);
- #endif
- }
- +static ALWAYS_INLINE uint64_t pack32to64( uint32_t a, uint32_t b )
- +{
- +#ifdef WORDS_BIGENDIAN
- + return b + ((uint64_t)a<<32);
- +#else
- + return a + ((uint64_t)b<<32);
- +#endif
- +}
- -#define pack_pixel_1to2 pack8to16
- -#define pack_pixel_2to4 pack16to32
- +#if X264_HIGH_BIT_DEPTH
- +# define pack_pixel_1to2 pack16to32
- +# define pack_pixel_2to4 pack32to64
- +#else
- +# define pack_pixel_1to2 pack8to16
- +# define pack_pixel_2to4 pack16to32
- +#endif
- -#define array_non_zero(a) array_non_zero_int(a, sizeof(a))
- +#define array_non_zero(a) array_non_zero_int(a, sizeof(a)/sizeof(dctcoef))
- #define array_non_zero_int array_non_zero_int
- static ALWAYS_INLINE int array_non_zero_int( dctcoef *v, int i_count )
- {
- - if(i_count == 8)
- - return !!M64( &v[0] );
- - else if(i_count == 16)
- - return !!(M64( &v[0] ) | M64( &v[4] ));
- - else if(i_count == 32)
- - return !!(M64( &v[0] ) | M64( &v[4] ) | M64( &v[8] ) | M64( &v[12] ));
- - else
- - {
- - for( int i = 0; i < i_count; i+=4 )
- - if( M64( &v[i] ) ) return 1;
- - return 0;
- - }
- + for( int i = 0; i < i_count; i++ )
- + if( v[i] )
- + return 1;
- + return 0;
- }
- static ALWAYS_INLINE int x264_mb_predict_intra4x4_mode( x264_t *h, int idx )
- {
- diff --git a/common/mc.c b/common/mc.c
- index 9776bec..5ef0682 100644
- --- a/common/mc.c
- +++ b/common/mc.c
- @@ -117,11 +117,14 @@ static void x264_weight_cache( x264_t *h, x264_weight_t *w )
- {
- w->weightfn = h->mc.weight;
- }
- -#define opscale(x) dst[x] = x264_clip_pixel( ((src[x] * weight->i_scale + (1<<(weight->i_denom - 1))) >> weight->i_denom) + weight->i_offset )
- -#define opscale_noden(x) dst[x] = x264_clip_pixel( src[x] * weight->i_scale + weight->i_offset )
- -static inline void mc_weight( pixel *dst, int i_dst_stride, pixel *src, int i_src_stride, const x264_weight_t *weight, int i_width, int i_height )
- +#define opscale(x) dst[x] = x264_clip_pixel( ((src[x] * scale + (1<<(denom - 1))) >> denom) + offset )
- +#define opscale_noden(x) dst[x] = x264_clip_pixel( src[x] * scale + offset )
- +static void mc_weight( pixel *dst, int i_dst_stride, pixel *src, int i_src_stride, const x264_weight_t *weight, int i_width, int i_height )
- {
- - if( weight->i_denom >= 1 )
- + int offset = weight->i_offset << (BIT_DEPTH-8);
- + int scale = weight->i_scale;
- + int denom = weight->i_denom;
- + if( denom >= 1 )
- {
- for( int y = 0; y < i_height; y++, dst += i_dst_stride, src += i_src_stride )
- for( int x = 0; x < i_width; x++ )
- @@ -135,21 +138,10 @@ static inline void mc_weight( pixel *dst, int i_dst_stride, pixel *src, int i_sr
- }
- }
- -#define MC_WEIGHT_C( name, lx ) \
- +#define MC_WEIGHT_C( name, width ) \
- static void name( pixel *dst, int i_dst_stride, pixel *src, int i_src_stride, const x264_weight_t *weight, int height ) \
- { \
- - if( weight->i_denom >= 1 ) \
- - { \
- - for( int y = 0; y < height; y++, dst += i_dst_stride, src += i_src_stride ) \
- - for( int x = 0; x < lx; x++ ) \
- - opscale( x ); \
- - } \
- - else \
- - { \
- - for( int y = 0; y < height; y++, dst += i_dst_stride, src += i_src_stride ) \
- - for( int x = 0; x < lx; x++ ) \
- - opscale_noden( x ); \
- - } \
- + mc_weight( dst, i_dst_stride, src, i_src_stride, weight, width, height );\
- }
- MC_WEIGHT_C( mc_weight_w20, 20 )
- @@ -182,7 +174,7 @@ static void mc_copy( pixel *src, int i_src_stride, pixel *dst, int i_dst_stride,
- #define TAPFILTER(pix, d) ((pix)[x-2*d] + (pix)[x+3*d] - 5*((pix)[x-d] + (pix)[x+2*d]) + 20*((pix)[x] + (pix)[x+d]))
- static void hpel_filter( pixel *dsth, pixel *dstv, pixel *dstc, pixel *src,
- - int stride, int width, int height, int16_t *buf )
- + int stride, int width, int height, dctcoef *buf )
- {
- for( int y = 0; y < height; y++ )
- {
- @@ -301,7 +293,12 @@ void x264_plane_copy_c( pixel *dst, int i_dst,
- {
- while( h-- )
- {
- +#if X264_HIGH_BIT_DEPTH
- + for( int i = 0; i < w; i++ )
- + dst[i] = src[i] << (BIT_DEPTH-8);
- +#else
- memcpy( dst, src, w );
- +#endif
- dst += i_dst;
- src += i_src;
- }
- diff --git a/common/mc.h b/common/mc.h
- index bb16d13..cbdf1a6 100644
- --- a/common/mc.h
- +++ b/common/mc.h
- @@ -82,7 +82,7 @@ typedef struct
- uint8_t *src, int i_src, int w, int h);
- void (*hpel_filter)( pixel *dsth, pixel *dstv, pixel *dstc, pixel *src,
- - int i_stride, int i_width, int i_height, int16_t *buf );
- + int i_stride, int i_width, int i_height, dctcoef *buf );
- /* prefetch the next few macroblocks of fenc or fdec */
- void (*prefetch_fenc)( pixel *pix_y, int stride_y,
- diff --git a/common/pixel.c b/common/pixel.c
- index 8441c7a..069589f 100644
- --- a/common/pixel.c
- +++ b/common/pixel.c
- @@ -177,7 +177,7 @@ static int pixel_var2_8x8( pixel *pix1, int i_stride1, pixel *pix2, int i_stride
- pix2 += i_stride2;
- }
- sum = abs(sum);
- - var = sqr - (sum * sum >> 6);
- + var = sqr - ((uint64_t)sum * sum >> 6);
- *ssd = sqr;
- return var;
- }
- @@ -406,12 +406,14 @@ SAD_X( 8x4 )
- SAD_X( 4x8 )
- SAD_X( 4x4 )
- +#if !X264_HIGH_BIT_DEPTH
- #if ARCH_UltraSparc
- SAD_X( 16x16_vis )
- SAD_X( 16x8_vis )
- SAD_X( 8x16_vis )
- SAD_X( 8x8_vis )
- #endif
- +#endif // !X264_HIGH_BIT_DEPTH
- /****************************************************************************
- * pixel_satd_x4
- @@ -444,6 +446,7 @@ SATD_X_DECL6( cpu )\
- SATD_X( 4x4, cpu )
- SATD_X_DECL7()
- +#if !X264_HIGH_BIT_DEPTH
- #if HAVE_MMX
- SATD_X_DECL7( _mmxext )
- SATD_X_DECL6( _sse2 )
- @@ -454,6 +457,7 @@ SATD_X_DECL7( _sse4 )
- #if HAVE_ARMV6
- SATD_X_DECL7( _neon )
- #endif
- +#endif // !X264_HIGH_BIT_DEPTH
- #define INTRA_MBCMP_8x8( mbcmp )\
- void x264_intra_##mbcmp##_x3_8x8( pixel *fenc, pixel edge[33], int res[3] )\
- @@ -520,8 +524,8 @@ static void ssim_4x4x2_core( const pixel *pix1, int stride1,
- static float ssim_end1( int s1, int s2, int ss, int s12 )
- {
- - static const int ssim_c1 = (int)(.01*.01*255*255*64 + .5);
- - static const int ssim_c2 = (int)(.03*.03*255*255*64*63 + .5);
- + static const int ssim_c1 = (int)(.01*.01*PIXEL_MAX*PIXEL_MAX*64 + .5);
- + static const int ssim_c2 = (int)(.03*.03*PIXEL_MAX*PIXEL_MAX*64*63 + .5);
- int vars = ss*64 - s1*s1 - s2*s2;
- int covar = s12*64 - s1*s2;
- return (float)(2*s1*s2 + ssim_c1) * (float)(2*covar + ssim_c2)
- @@ -678,6 +682,7 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
- pixf->intra_sad_x3_16x16 = x264_intra_sad_x3_16x16;
- pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16;
- +#if !X264_HIGH_BIT_DEPTH
- #if HAVE_MMX
- if( cpu&X264_CPU_MMX )
- {
- @@ -903,17 +908,20 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
- }
- }
- #endif
- +#endif // !X264_HIGH_BIT_DEPTH
- #if HAVE_ALTIVEC
- if( cpu&X264_CPU_ALTIVEC )
- {
- x264_pixel_altivec_init( pixf );
- }
- #endif
- +#if !X264_HIGH_BIT_DEPTH
- #if ARCH_UltraSparc
- INIT4( sad, _vis );
- INIT4( sad_x3, _vis );
- INIT4( sad_x4, _vis );
- #endif
- +#endif // !X264_HIGH_BIT_DEPTH
- pixf->ads[PIXEL_8x16] =
- pixf->ads[PIXEL_8x4] =
- diff --git a/common/ppc/dct.c b/common/ppc/dct.c
- index eb223ae..85d5ce7 100644
- --- a/common/ppc/dct.c
- +++ b/common/ppc/dct.c
- @@ -24,6 +24,7 @@
- #include "common/common.h"
- #include "ppccommon.h"
- +#if !X264_HIGH_BIT_DEPTH
- #define VEC_DCT(a0,a1,a2,a3,b0,b1,b2,b3) \
- b1 = vec_add( a0, a3 ); \
- b3 = vec_add( a1, a2 ); \
- @@ -482,4 +483,5 @@ void x264_zigzag_scan_4x4_field_altivec( int16_t level[16], int16_t dct[4][4] )
- vec_st( tmp0v, 0x00, level );
- vec_st( tmp1v, 0x10, level );
- }
- +#endif // !X264_HIGH_BIT_DEPTH
- diff --git a/common/ppc/deblock.c b/common/ppc/deblock.c
- index 0c8d2d4..986710d 100644
- --- a/common/ppc/deblock.c
- +++ b/common/ppc/deblock.c
- @@ -21,6 +21,7 @@
- #include "common/common.h"
- #include "ppccommon.h"
- +#if !X264_HIGH_BIT_DEPTH
- #define transpose4x16(r0, r1, r2, r3) \
- { \
- register vec_u8_t r4; \
- @@ -292,3 +293,4 @@ void x264_deblock_h_luma_altivec( uint8_t *pix, int stride, int alpha, int beta,
- transpose4x16(line1, line2, line3, line4);
- write16x4(pix-2, stride, line1, line2, line3, line4);
- }
- +#endif // !X264_HIGH_BIT_DEPTH
- diff --git a/common/ppc/mc.c b/common/ppc/mc.c
- index 7ad8050..744a804 100644
- --- a/common/ppc/mc.c
- +++ b/common/ppc/mc.c
- @@ -33,6 +33,7 @@
- #include "mc.h"
- #include "ppccommon.h"
- +#if !X264_HIGH_BIT_DEPTH
- typedef void (*pf_mc_t)( uint8_t *src, int i_src,
- uint8_t *dst, int i_dst, int i_height );
- @@ -792,9 +793,11 @@ static void frame_init_lowres_core_altivec( uint8_t *src0, uint8_t *dst0, uint8_
- dstc += dst_stride;
- }
- }
- +#endif // !X264_HIGH_BIT_DEPTH
- void x264_mc_altivec_init( x264_mc_functions_t *pf )
- {
- +#if !X264_HIGH_BIT_DEPTH
- pf->mc_luma = mc_luma_altivec;
- pf->get_ref = get_ref_altivec;
- pf->mc_chroma = mc_chroma_altivec;
- @@ -804,4 +807,5 @@ void x264_mc_altivec_init( x264_mc_functions_t *pf )
- pf->hpel_filter = x264_hpel_filter_altivec;
- pf->frame_init_lowres_core = frame_init_lowres_core_altivec;
- +#endif // !X264_HIGH_BIT_DEPTH
- }
- diff --git a/common/ppc/pixel.c b/common/ppc/pixel.c
- index 3f99606..bd5f547 100644
- --- a/common/ppc/pixel.c
- +++ b/common/ppc/pixel.c
- @@ -24,6 +24,7 @@
- #include "common/common.h"
- #include "ppccommon.h"
- +#if !X264_HIGH_BIT_DEPTH
- /***********************************************************************
- * SAD routines
- **********************************************************************/
- @@ -1979,12 +1980,14 @@ static void ssim_4x4x2_core_altivec( const uint8_t *pix1, int stride1,
- sums[0][3] = temp[0];
- sums[1][3] = temp[1];
- }
- +#endif // !X264_HIGH_BIT_DEPTH
- /****************************************************************************
- * x264_pixel_init:
- ****************************************************************************/
- void x264_pixel_altivec_init( x264_pixel_function_t *pixf )
- {
- +#if !X264_HIGH_BIT_DEPTH
- pixf->sad[PIXEL_16x16] = pixel_sad_16x16_altivec;
- pixf->sad[PIXEL_8x16] = pixel_sad_8x16_altivec;
- pixf->sad[PIXEL_16x8] = pixel_sad_16x8_altivec;
- @@ -2023,4 +2026,5 @@ void x264_pixel_altivec_init( x264_pixel_function_t *pixf )
- pixf->hadamard_ac[PIXEL_8x8] = x264_pixel_hadamard_ac_8x8_altivec;
- pixf->ssim_4x4x2_core = ssim_4x4x2_core_altivec;
- +#endif // !X264_HIGH_BIT_DEPTH
- }
- diff --git a/common/ppc/predict.c b/common/ppc/predict.c
- index 3fb1a2b..c71dbb5 100644
- --- a/common/ppc/predict.c
- +++ b/common/ppc/predict.c
- @@ -23,6 +23,7 @@
- #include "pixel.h"
- #include "ppccommon.h"
- +#if !X264_HIGH_BIT_DEPTH
- static void predict_8x8c_p_altivec( uint8_t *src )
- {
- int H = 0, V = 0;
- @@ -194,6 +195,7 @@ static void predict_16x16_v_altivec( uint8_t *src )
- src += FDEC_STRIDE;
- }
- }
- +#endif // !X264_HIGH_BIT_DEPTH
- /****************************************************************************
- @@ -201,6 +203,7 @@ static void predict_16x16_v_altivec( uint8_t *src )
- ****************************************************************************/
- void x264_predict_16x16_init_altivec( x264_predict_t pf[7] )
- {
- +#if !X264_HIGH_BIT_DEPTH
- pf[I_PRED_16x16_V ] = predict_16x16_v_altivec;
- pf[I_PRED_16x16_H ] = predict_16x16_h_altivec;
- pf[I_PRED_16x16_DC] = predict_16x16_dc_altivec;
- @@ -208,9 +211,12 @@ void x264_predict_16x16_init_altivec( x264_predict_t pf[7] )
- pf[I_PRED_16x16_DC_LEFT] = predict_16x16_dc_left_altivec;
- pf[I_PRED_16x16_DC_TOP ] = predict_16x16_dc_top_altivec;
- pf[I_PRED_16x16_DC_128 ] = predict_16x16_dc_128_altivec;
- +#endif // !X264_HIGH_BIT_DEPTH
- }
- void x264_predict_8x8c_init_altivec( x264_predict_t pf[7] )
- {
- +#if !X264_HIGH_BIT_DEPTH
- pf[I_PRED_CHROMA_P] = predict_8x8c_p_altivec;
- +#endif // !X264_HIGH_BIT_DEPTH
- }
- diff --git a/common/ppc/quant.c b/common/ppc/quant.c
- index 6f41a06..ffd6a1b 100644
- --- a/common/ppc/quant.c
- +++ b/common/ppc/quant.c
- @@ -22,6 +22,7 @@
- #include "ppccommon.h"
- #include "quant.h"
- +#if !X264_HIGH_BIT_DEPTH
- // quant of a whole 4x4 block, unrolled 2x and "pre-scheduled"
- #define QUANT_16_U( idx0, idx1 ) \
- { \
- @@ -360,4 +361,5 @@ void x264_dequant_8x8_altivec( int16_t dct[8][8], int dequant_mf[6][8][8], int i
- DEQUANT_SHR();
- }
- }
- +#endif // !X264_HIGH_BIT_DEPTH
- diff --git a/common/predict.c b/common/predict.c
- index 79ec1fc..dc92083 100644
- --- a/common/predict.c
- +++ b/common/predict.c
- @@ -53,40 +53,40 @@
- void x264_predict_16x16_dc_c( pixel *src )
- {
- - pixel4 dc = 0;
- + int dc = 0;
- for( int i = 0; i < 16; i++ )
- {
- dc += src[-1 + i * FDEC_STRIDE];
- dc += src[i - FDEC_STRIDE];
- }
- - dc = PIXEL_SPLAT_X4( ( dc + 16 ) >> 5 );
- + pixel4 dcsplat = PIXEL_SPLAT_X4( ( dc + 16 ) >> 5 );
- - PREDICT_16x16_DC( dc );
- + PREDICT_16x16_DC( dcsplat );
- }
- static void x264_predict_16x16_dc_left_c( pixel *src )
- {
- - pixel4 dc = 0;
- + int dc = 0;
- for( int i = 0; i < 16; i++ )
- dc += src[-1 + i * FDEC_STRIDE];
- - dc = PIXEL_SPLAT_X4( ( dc + 8 ) >> 4 );
- + pixel4 dcsplat = PIXEL_SPLAT_X4( ( dc + 8 ) >> 4 );
- - PREDICT_16x16_DC( dc );
- + PREDICT_16x16_DC( dcsplat );
- }
- static void x264_predict_16x16_dc_top_c( pixel *src )
- {
- - pixel4 dc = 0;
- + int dc = 0;
- for( int i = 0; i < 16; i++ )
- dc += src[i - FDEC_STRIDE];
- - dc = PIXEL_SPLAT_X4( ( dc + 8 ) >> 4 );
- + pixel4 dcsplat = PIXEL_SPLAT_X4( ( dc + 8 ) >> 4 );
- - PREDICT_16x16_DC( dc );
- + PREDICT_16x16_DC( dcsplat );
- }
- static void x264_predict_16x16_dc_128_c( pixel *src )
- {
- - PREDICT_16x16_DC( PIXEL_SPLAT_X4( 0x80 ) );
- + PREDICT_16x16_DC( PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) ) );
- }
- void x264_predict_16x16_h_c( pixel *src )
- {
- @@ -155,53 +155,53 @@ static void x264_predict_8x8c_dc_128_c( pixel *src )
- {
- for( int y = 0; y < 8; y++ )
- {
- - MPIXEL_X4( src+0 ) = PIXEL_SPLAT_X4( 0x80 );
- - MPIXEL_X4( src+4 ) = PIXEL_SPLAT_X4( 0x80 );
- + MPIXEL_X4( src+0 ) = PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) );
- + MPIXEL_X4( src+4 ) = PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) );
- src += FDEC_STRIDE;
- }
- }
- static void x264_predict_8x8c_dc_left_c( pixel *src )
- {
- - pixel4 dc0 = 0, dc1 = 0;
- + int dc0 = 0, dc1 = 0;
- for( int y = 0; y < 4; y++ )
- {
- dc0 += src[y * FDEC_STRIDE - 1];
- dc1 += src[(y+4) * FDEC_STRIDE - 1];
- }
- - dc0 = PIXEL_SPLAT_X4( ( dc0 + 2 ) >> 2 );
- - dc1 = PIXEL_SPLAT_X4( ( dc1 + 2 ) >> 2 );
- + pixel4 dc0splat = PIXEL_SPLAT_X4( ( dc0 + 2 ) >> 2 );
- + pixel4 dc1splat = PIXEL_SPLAT_X4( ( dc1 + 2 ) >> 2 );
- for( int y = 0; y < 4; y++ )
- {
- - MPIXEL_X4( src+0 ) = dc0;
- - MPIXEL_X4( src+4 ) = dc0;
- + MPIXEL_X4( src+0 ) = dc0splat;
- + MPIXEL_X4( src+4 ) = dc0splat;
- src += FDEC_STRIDE;
- }
- for( int y = 0; y < 4; y++ )
- {
- - MPIXEL_X4( src+0 ) = dc1;
- - MPIXEL_X4( src+4 ) = dc1;
- + MPIXEL_X4( src+0 ) = dc1splat;
- + MPIXEL_X4( src+4 ) = dc1splat;
- src += FDEC_STRIDE;
- }
- }
- static void x264_predict_8x8c_dc_top_c( pixel *src )
- {
- - pixel4 dc0 = 0, dc1 = 0;
- + int dc0 = 0, dc1 = 0;
- for( int x = 0; x < 4; x++ )
- {
- dc0 += src[x - FDEC_STRIDE];
- dc1 += src[x + 4 - FDEC_STRIDE];
- }
- - dc0 = PIXEL_SPLAT_X4( ( dc0 + 2 ) >> 2 );
- - dc1 = PIXEL_SPLAT_X4( ( dc1 + 2 ) >> 2 );
- + pixel4 dc0splat = PIXEL_SPLAT_X4( ( dc0 + 2 ) >> 2 );
- + pixel4 dc1splat = PIXEL_SPLAT_X4( ( dc1 + 2 ) >> 2 );
- for( int y = 0; y < 8; y++ )
- {
- - MPIXEL_X4( src+0 ) = dc0;
- - MPIXEL_X4( src+4 ) = dc1;
- + MPIXEL_X4( src+0 ) = dc0splat;
- + MPIXEL_X4( src+4 ) = dc1splat;
- src += FDEC_STRIDE;
- }
- }
- @@ -306,7 +306,7 @@ static void x264_predict_8x8c_p_c( pixel *src )
- static void x264_predict_4x4_dc_128_c( pixel *src )
- {
- - PREDICT_4x4_DC( PIXEL_SPLAT_X4( 0x80 ) );
- + PREDICT_4x4_DC( PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) ) );
- }
- static void x264_predict_4x4_dc_left_c( pixel *src )
- {
- @@ -491,7 +491,8 @@ static void x264_predict_8x8_filter_c( pixel *src, pixel edge[33], int i_neighbo
- }
- else
- {
- - M64( edge+24 ) = SRC(7,-1) * 0x0101010101010101ULL;
- + MPIXEL_X4( edge+24 ) = PIXEL_SPLAT_X4( SRC(7,-1) );
- + MPIXEL_X4( edge+28 ) = PIXEL_SPLAT_X4( SRC(7,-1) );
- edge[32] = SRC(7,-1);
- }
- }
- @@ -523,7 +524,7 @@ static void x264_predict_8x8_filter_c( pixel *src, pixel edge[33], int i_neighbo
- static void x264_predict_8x8_dc_128_c( pixel *src, pixel edge[33] )
- {
- - PREDICT_8x8_DC( PIXEL_SPLAT_X4( 0x80 ) );
- + PREDICT_8x8_DC( PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) ) );
- }
- static void x264_predict_8x8_dc_left_c( pixel *src, pixel edge[33] )
- {
- @@ -554,9 +555,13 @@ void x264_predict_8x8_h_c( pixel *src, pixel edge[33] )
- }
- void x264_predict_8x8_v_c( pixel *src, pixel edge[33] )
- {
- - uint64_t top = M64( edge+16 );
- + pixel4 top[2] = { MPIXEL_X4( edge+16 ),
- + MPIXEL_X4( edge+20 ) };
- for( int y = 0; y < 8; y++ )
- - M64( src+y*FDEC_STRIDE ) = top;
- + {
- + MPIXEL_X4( src+y*FDEC_STRIDE+0 ) = top[0];
- + MPIXEL_X4( src+y*FDEC_STRIDE+4 ) = top[1];
- + }
- }
- static void x264_predict_8x8_ddl_c( pixel *src, pixel edge[33] )
- {
- diff --git a/common/quant.c b/common/quant.c
- index ece52f9..a7b72cf 100644
- --- a/common/quant.c
- +++ b/common/quant.c
- @@ -142,7 +142,7 @@ static void x264_denoise_dct( dctcoef *dct, uint32_t *sum, uint16_t *offset, int
- for( int i = 1; i < size; i++ )
- {
- int level = dct[i];
- - int sign = level>>15;
- + int sign = level>>31;
- level = (level+sign)^sign;
- sum[i] += level;
- level -= offset[i];
- @@ -177,10 +177,7 @@ static int ALWAYS_INLINE x264_decimate_score_internal( dctcoef *dct, int i_max )
- int i_score = 0;
- int idx = i_max - 1;
- - /* Yes, dct[idx-1] is guaranteed to be 32-bit aligned. idx>=0 instead of 1 works correctly for the same reason */
- - while( idx >= 0 && MDCT_X2( &dct[idx-1] ) == 0 )
- - idx -= 2;
- - if( idx >= 0 && dct[idx] == 0 )
- + while( idx >= 0 && dct[idx] == 0 )
- idx--;
- while( idx >= 0 )
- {
- @@ -216,10 +213,7 @@ static int x264_decimate_score64( dctcoef *dct )
- static int ALWAYS_INLINE x264_coeff_last_internal( dctcoef *l, int i_count )
- {
- - int i_last;
- - for( i_last = i_count-1; i_last >= 3; i_last -= 4 )
- - if( M64( l+i_last-3 ) )
- - break;
- + int i_last = i_count-1;
- while( i_last >= 0 && l[i_last] == 0 )
- i_last--;
- return i_last;
- @@ -287,6 +281,7 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
- pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15;
- pf->coeff_level_run[ DCT_LUMA_4x4] = x264_coeff_level_run16;
- +#if !X264_HIGH_BIT_DEPTH
- #if HAVE_MMX
- if( cpu&X264_CPU_MMX )
- {
- @@ -425,6 +420,7 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
- pf->coeff_last[DCT_LUMA_8x8] = x264_coeff_last64_neon;
- }
- #endif
- +#endif // !X264_HIGH_BIT_DEPTH
- pf->coeff_last[ DCT_LUMA_DC] = pf->coeff_last[DCT_LUMA_4x4];
- pf->coeff_last[DCT_CHROMA_AC] = pf->coeff_last[ DCT_LUMA_AC];
- pf->coeff_level_run[ DCT_LUMA_DC] = pf->coeff_level_run[DCT_LUMA_4x4];
- diff --git a/common/set.c b/common/set.c
- index 16cff8e..86f3854 100644
- --- a/common/set.c
- +++ b/common/set.c
- @@ -78,6 +78,7 @@ int x264_cqm_init( x264_t *h )
- 32 - 11, 32 - 21 };
- int max_qp_err = -1;
- int max_chroma_qp_err = -1;
- + int min_qp_err = QP_MAX+1;
- for( int i = 0; i < 6; i++ )
- {
- @@ -94,9 +95,9 @@ int x264_cqm_init( x264_t *h )
- }
- else
- {
- - CHECKED_MALLOC( h-> quant4_mf[i], 52*size*sizeof(uint16_t) );
- + CHECKED_MALLOC( h-> quant4_mf[i], (QP_MAX+1)*size*sizeof(uint16_t) );
- CHECKED_MALLOC( h->dequant4_mf[i], 6*size*sizeof(int) );
- - CHECKED_MALLOC( h->unquant4_mf[i], 52*size*sizeof(int) );
- + CHECKED_MALLOC( h->unquant4_mf[i], (QP_MAX+1)*size*sizeof(int) );
- }
- for( j = (i<4 ? 0 : 4); j < i; j++ )
- @@ -106,7 +107,7 @@ int x264_cqm_init( x264_t *h )
- if( j < i )
- h->quant4_bias[i] = h->quant4_bias[j];
- else
- - CHECKED_MALLOC( h->quant4_bias[i], 52*size*sizeof(uint16_t) );
- + CHECKED_MALLOC( h->quant4_bias[i], (QP_MAX+1)*size*sizeof(uint16_t) );
- }
- for( int q = 0; q < 6; q++ )
- @@ -140,7 +141,7 @@ int x264_cqm_init( x264_t *h )
- quant8_mf[i_list][q][i] = DIV(def_quant8[q][i] * 16, h->pps->scaling_list[4+i_list][i]);
- }
- }
- - for( int q = 0; q < 52; q++ )
- + for( int q = 0; q < QP_MAX+1; q++ )
- {
- int j;
- for( int i_list = 0; i_list < 4; i_list++ )
- @@ -148,6 +149,11 @@ int x264_cqm_init( x264_t *h )
- {
- h->unquant4_mf[i_list][q][i] = (1ULL << (q/6 + 15 + 8)) / quant4_mf[i_list][q%6][i];
- h->quant4_mf[i_list][q][i] = j = SHIFT(quant4_mf[i_list][q%6][i], q/6 - 1);
- + if( !j )
- + {
- + min_qp_err = X264_MIN( min_qp_err, q );
- + continue;
- + }
- // round to nearest, unless that would cause the deadzone to be negative
- h->quant4_bias[i_list][q][i] = X264_MIN( DIV(deadzone[i_list]<<10, j), (1<<15)/j );
- if( j > 0xffff && q > max_qp_err && (i_list == CQM_4IY || i_list == CQM_4PY) )
- @@ -161,6 +167,11 @@ int x264_cqm_init( x264_t *h )
- {
- h->unquant8_mf[i_list][q][i] = (1ULL << (q/6 + 16 + 8)) / quant8_mf[i_list][q%6][i];
- h->quant8_mf[i_list][q][i] = j = SHIFT(quant8_mf[i_list][q%6][i], q/6);
- + if( !j )
- + {
- + min_qp_err = X264_MIN( min_qp_err, q );
- + continue;
- + }
- h->quant8_bias[i_list][q][i] = X264_MIN( DIV(deadzone[i_list]<<10, j), (1<<15)/j );
- if( j > 0xffff && q > max_qp_err )
- max_qp_err = q;
- @@ -179,6 +190,12 @@ int x264_cqm_init( x264_t *h )
- x264_log( h, X264_LOG_ERROR, "but min chroma QP is implied to be %d.\n", h->chroma_qp_table[h->param.rc.i_qp_min] );
- return -1;
- }
- + if( !h->mb.b_lossless && min_qp_err <= h->param.rc.i_qp_max )
- + {
- + x264_log( h, X264_LOG_ERROR, "Quantization underflow. Your CQM is incompatible with QP > %d,\n", min_qp_err-1 );
- + x264_log( h, X264_LOG_ERROR, "but max QP is implied to be %d.\n", h->param.rc.i_qp_max );
- + return -1;
- + }
- return 0;
- fail:
- x264_cqm_delete( h );
- diff --git a/common/x86/mc-c.c b/common/x86/mc-c.c
- index 2dcd671..4bb5f33 100644
- --- a/common/x86/mc-c.c
- +++ b/common/x86/mc-c.c
- @@ -125,6 +125,7 @@ PIXEL_AVG_WALL(sse2)
- PIXEL_AVG_WALL(sse2_misalign)
- PIXEL_AVG_WALL(cache64_ssse3)
- +#if !X264_HIGH_BIT_DEPTH
- #define PIXEL_AVG_WTAB(instr, name1, name2, name3, name4, name5)\
- static void (* const x264_pixel_avg_wtab_##instr[6])( uint8_t *, int, uint8_t *, int, uint8_t *, int ) =\
- {\
- @@ -355,24 +356,28 @@ static void x264_plane_copy_mmxext( uint8_t *dst, int i_dst, uint8_t *src, int i
- x264_plane_copy_core_mmxext( dst+i_dst, i_dst, src+i_src, i_src, (w+15)&~15, h-1 );
- }
- }
- +#endif // !X264_HIGH_BIT_DEPTH
- void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
- {
- if( !(cpu&X264_CPU_MMX) )
- return;
- + pf->memcpy_aligned = x264_memcpy_aligned_mmx;
- + pf->memzero_aligned = x264_memzero_aligned_mmx;
- +#if !X264_HIGH_BIT_DEPTH
- pf->copy_16x16_unaligned = x264_mc_copy_w16_mmx;
- pf->copy[PIXEL_16x16] = x264_mc_copy_w16_mmx;
- pf->copy[PIXEL_8x8] = x264_mc_copy_w8_mmx;
- pf->copy[PIXEL_4x4] = x264_mc_copy_w4_mmx;
- - pf->memcpy_aligned = x264_memcpy_aligned_mmx;
- - pf->memzero_aligned = x264_memzero_aligned_mmx;
- pf->integral_init4v = x264_integral_init4v_mmx;
- pf->integral_init8v = x264_integral_init8v_mmx;
- +#endif // !X264_HIGH_BIT_DEPTH
- if( !(cpu&X264_CPU_MMXEXT) )
- return;
- +#if !X264_HIGH_BIT_DEPTH
- pf->mc_luma = mc_luma_mmxext;
- pf->get_ref = get_ref_mmxext;
- pf->mc_chroma = x264_mc_chroma_mmxext;
- @@ -412,12 +417,14 @@ void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
- pf->frame_init_lowres_core = x264_frame_init_lowres_core_cache32_mmxext;
- }
- #endif
- +#endif // !X264_HIGH_BIT_DEPTH
- if( !(cpu&X264_CPU_SSE2) )
- return;
- pf->memcpy_aligned = x264_memcpy_aligned_sse2;
- pf->memzero_aligned = x264_memzero_aligned_sse2;
- +#if !X264_HIGH_BIT_DEPTH
- pf->integral_init4v = x264_integral_init4v_sse2;
- pf->integral_init8v = x264_integral_init8v_sse2;
- pf->hpel_filter = x264_hpel_filter_sse2_amd;
- @@ -492,4 +499,5 @@ void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
- pf->integral_init4h = x264_integral_init4h_sse4;
- pf->integral_init8h = x264_integral_init8h_sse4;
- +#endif // !X264_HIGH_BIT_DEPTH
- }
- diff --git a/common/x86/predict-c.c b/common/x86/predict-c.c
- index e771431..4004265 100644
- --- a/common/x86/predict-c.c
- +++ b/common/x86/predict-c.c
- @@ -75,6 +75,7 @@
- void x264_predict_16x16_v_sse2( uint8_t *src );
- void x264_predict_16x16_p_core_sse2( uint8_t *src, int i00, int b, int c );
- +#if !X264_HIGH_BIT_DEPTH
- ALIGNED_8( static const int8_t pb_12345678[8] ) = {1,2,3,4,5,6,7,8};
- ALIGNED_8( static const int8_t pb_m87654321[8] ) = {-8,-7,-6,-5,-4,-3,-2,-1};
- ALIGNED_8( static const int8_t pb_m32101234[8] ) = {-3,-2,-1,0,1,2,3,4};
- @@ -364,6 +365,7 @@ INTRA_SA8D_X3(ssse3)
- #else
- INTRA_SA8D_X3(mmxext)
- #endif
- +#endif // !X264_HIGH_BIT_DEPTH
- /****************************************************************************
- * Exported functions:
- @@ -372,6 +374,7 @@ void x264_predict_16x16_init_mmx( int cpu, x264_predict_t pf[7] )
- {
- if( !(cpu&X264_CPU_MMX) )
- return;
- +#if !X264_HIGH_BIT_DEPTH
- pf[I_PRED_16x16_V] = x264_predict_16x16_v_mmx;
- if( !(cpu&X264_CPU_MMXEXT) )
- return;
- @@ -397,12 +400,14 @@ void x264_predict_16x16_init_mmx( int cpu, x264_predict_t pf[7] )
- #ifdef __GNUC__
- pf[I_PRED_16x16_P] = x264_predict_16x16_p_ssse3;
- #endif
- +#endif // !X264_HIGH_BIT_DEPTH
- }
- void x264_predict_8x8c_init_mmx( int cpu, x264_predict_t pf[7] )
- {
- if( !(cpu&X264_CPU_MMX) )
- return;
- +#if !X264_HIGH_BIT_DEPTH
- #if ARCH_X86_64
- pf[I_PRED_CHROMA_DC_LEFT] = x264_predict_8x8c_dc_left;
- #endif
- @@ -424,12 +429,14 @@ void x264_predict_8x8c_init_mmx( int cpu, x264_predict_t pf[7] )
- #ifdef __GNUC__
- pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_ssse3;
- #endif
- +#endif // !X264_HIGH_BIT_DEPTH
- }
- void x264_predict_8x8_init_mmx( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_8x8_filter )
- {
- if( !(cpu&X264_CPU_MMXEXT) )
- return;
- +#if !X264_HIGH_BIT_DEPTH
- pf[I_PRED_8x8_V] = x264_predict_8x8_v_mmxext;
- pf[I_PRED_8x8_H] = x264_predict_8x8_h_mmxext;
- pf[I_PRED_8x8_DC] = x264_predict_8x8_dc_mmxext;
- @@ -456,12 +463,14 @@ void x264_predict_8x8_init_mmx( int cpu, x264_predict8x8_t pf[12], x264_predict_
- pf[I_PRED_8x8_HD] = x264_predict_8x8_hd_ssse3;
- pf[I_PRED_8x8_HU] = x264_predict_8x8_hu_ssse3;
- *predict_8x8_filter = x264_predict_8x8_filter_ssse3;
- +#endif // !X264_HIGH_BIT_DEPTH
- }
- void x264_predict_4x4_init_mmx( int cpu, x264_predict_t pf[12] )
- {
- if( !(cpu&X264_CPU_MMXEXT) )
- return;
- +#if !X264_HIGH_BIT_DEPTH
- pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_mmxext;
- pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_mmxext;
- pf[I_PRED_4x4_VL] = x264_predict_4x4_vl_mmxext;
- @@ -474,4 +483,5 @@ void x264_predict_4x4_init_mmx( int cpu, x264_predict_t pf[12] )
- pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_ssse3;
- pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_ssse3;
- pf[I_PRED_4x4_HD] = x264_predict_4x4_hd_ssse3;
- +#endif // !X264_HIGH_BIT_DEPTH
- }
- diff --git a/configure b/configure
- index 24d15ad..5cdd82d 100755
- --- a/configure
- +++ b/configure
- @@ -18,6 +18,7 @@ echo " --enable-gprof adds -pg, doesn't strip"
- echo " --enable-visualize enables visualization (X11 only)"
- echo " --enable-pic build position-independent code"
- echo " --enable-shared build libx264.so"
- +echo " --bit-depth=BIT_DEPTH sets output bit depth (8-10), default 8"
- echo " --extra-asflags=EASFLAGS add EASFLAGS to ASFLAGS"
- echo " --extra-cflags=ECFLAGS add ECFLAGS to CFLAGS"
- echo " --extra-ldflags=ELDFLAGS add ELDFLAGS to LDFLAGS"
- @@ -124,6 +125,7 @@ gprof="no"
- pic="no"
- vis="no"
- shared="no"
- +bit_depth="8"
- CFLAGS="$CFLAGS -Wall -I."
- LDFLAGS="$LDFLAGS"
- @@ -208,6 +210,13 @@ for opt do
- CFLAGS="$CFLAGS --sysroot=${opt#--sysroot=}"
- LDFLAGS="$LDFLAGS --sysroot=${opt#--sysroot=}"
- ;;
- + --bit-depth=*)
- + bit_depth="${opt#--bit-depth=}"
- + if [ "$bit_depth" -lt "8" ] || [ "$bit_depth" -gt "10" ]; then
- + echo "Supplied bit depth must be in range [8,10]."
- + exit 1
- + fi
- + ;;
- *)
- echo "Unknown option $opt, ignored"
- ;;
- @@ -644,6 +653,12 @@ if cc_check '' -Wshadow ; then
- CFLAGS="-Wshadow $CFLAGS"
- fi
- +if [ "$bit_depth" -gt "8" ]; then
- + define X264_HIGH_BIT_DEPTH
- +fi
- +
- +define BIT_DEPTH $bit_depth
- +
- rm -f conftest*
- # generate config files
- @@ -724,6 +739,7 @@ gprof: $gprof
- PIC: $pic
- shared: $shared
- visualize: $vis
- +bit depth: $bit_depth
- EOF
- echo >> config.log
- diff --git a/encoder/analyse.c b/encoder/analyse.c
- index cdbdd1e..93f7eed 100644
- --- a/encoder/analyse.c
- +++ b/encoder/analyse.c
- @@ -134,25 +134,27 @@ typedef struct
- } x264_mb_analysis_t;
- /* lambda = pow(2,qp/6-2) */
- -const uint8_t x264_lambda_tab[52] = {
- - 1, 1, 1, 1, 1, 1, 1, 1, /* 0-7 */
- - 1, 1, 1, 1, /* 8-11 */
- - 1, 1, 1, 1, 2, 2, 2, 2, /* 12-19 */
- - 3, 3, 3, 4, 4, 4, 5, 6, /* 20-27 */
- - 6, 7, 8, 9,10,11,13,14, /* 28-35 */
- - 16,18,20,23,25,29,32,36, /* 36-43 */
- - 40,45,51,57,64,72,81,91 /* 44-51 */
- +const uint16_t x264_lambda_tab[QP_MAX_MAX+1] = {
- + 1, 1, 1, 1, 1, 1, 1, 1, /* 0- 7 */
- + 1, 1, 1, 1, 1, 1, 1, 1, /* 8-15 */
- + 2, 2, 2, 2, 3, 3, 3, 4, /* 16-23 */
- + 4, 4, 5, 6, 6, 7, 8, 9, /* 24-31 */
- + 10, 11, 13, 14, 16, 18, 20, 23, /* 32-39 */
- + 25, 29, 32, 36, 40, 45, 51, 57, /* 40-47 */
- + 64, 72, 81, 91, 102, 114, 128, 144, /* 48-55 */
- + 161, 181, 203, 228, 256, 287, 323, 362, /* 56-63 */
- };
- /* lambda2 = pow(lambda,2) * .9 * 256 */
- -const int x264_lambda2_tab[52] = {
- - 14, 18, 22, 28, 36, 45, 57, 72, /* 0 - 7 */
- - 91, 115, 145, 182, 230, 290, 365, 460, /* 8 - 15 */
- - 580, 731, 921, 1161, 1462, 1843, 2322, 2925, /* 16 - 23 */
- - 3686, 4644, 5851, 7372, 9289, 11703, 14745, 18578, /* 24 - 31 */
- - 23407, 29491, 37156, 46814, 58982, 74313, 93628, 117964, /* 32 - 39 */
- -148626, 187257, 235929, 297252, 374514, 471859, 594505, 749029, /* 40 - 47 */
- -943718, 1189010, 1498059, 1887436 /* 48 - 51 */
- +const int x264_lambda2_tab[QP_MAX_MAX+1] = {
- + 14, 18, 22, 28, 36, 45, 57, 72, /* 0- 7 */
- + 91, 115, 145, 182, 230, 290, 365, 460, /* 8-15 */
- + 580, 731, 921, 1161, 1462, 1843, 2322, 2925, /* 16-23 */
- + 3686, 4644, 5851, 7372, 9289, 11703, 14745, 18578, /* 24-31 */
- + 23407, 29491, 37156, 46814, 58982, 74313, 93628, 117964, /* 32-39 */
- + 148626, 187257, 235929, 297252, 374514, 471859, 594505, 749029, /* 40-47 */
- + 943718,1189010,1498059, 1887436, 2378021, 2996119, 3774873, 4756042, /* 48-55 */
- +5992238,7549747,9512085,11984476,15099494,19024170,23968953,30198988, /* 56-63 */
- };
- const uint8_t x264_exp2_lut[64] = {
- @@ -188,27 +190,31 @@ const float x264_log2_lz_lut[32] = {
- // should the intra and inter lambdas be different?
- // I'm just matching the behaviour of deadzone quant.
- -static const int x264_trellis_lambda2_tab[2][52] = {
- +static const int x264_trellis_lambda2_tab[2][QP_MAX_MAX+1] = {
- // inter lambda = .85 * .85 * 2**(qp/3. + 10 - LAMBDA_BITS)
- - { 46, 58, 73, 92, 117, 147,
- - 185, 233, 294, 370, 466, 587,
- - 740, 932, 1174, 1480, 1864, 2349,
- - 2959, 3728, 4697, 5918, 7457, 9395,
- - 11837, 14914, 18790, 23674, 29828, 37581,
- - 47349, 59656, 75163, 94699, 119313, 150326,
- - 189399, 238627, 300652, 378798, 477255, 601304,
- - 757596, 954511, 1202608, 1515192, 1909022, 2405217,
- - 3030384, 3818045, 4810435, 6060769 },
- + { 46, 58, 73, 92, 117, 147,
- + 185, 233, 294, 370, 466, 587,
- + 740, 932, 1174, 1480, 1864, 2349,
- + 2959, 3728, 4697, 5918, 7457, 9395,
- + 11837, 14914, 18790, 23674, 29828, 37581,
- + 47349, 59656, 75163, 94699, 119313, 150326,
- + 189399, 238627, 300652, 378798, 477255, 601304,
- + 757596, 954511, 1202608, 1515192, 1909022, 2405217,
- + 3030384, 3818045, 4810435, 6060769, 7636091, 9620872,
- + 12121539,15272182,19241743,24243077,30544363,38483486,
- + 48486154,61088726,76966972,96972308 },
- // intra lambda = .65 * .65 * 2**(qp/3. + 10 - LAMBDA_BITS)
- - { 27, 34, 43, 54, 68, 86,
- - 108, 136, 172, 216, 273, 343,
- - 433, 545, 687, 865, 1090, 1374,
- - 1731, 2180, 2747, 3461, 4361, 5494,
- - 6922, 8721, 10988, 13844, 17442, 21976,
- - 27688, 34885, 43953, 55377, 69771, 87906,
- - 110755, 139543, 175813, 221511, 279087, 351627,
- - 443023, 558174, 703255, 886046, 1116348, 1406511,
- - 1772093, 2232697, 2813022, 3544186 }
- + { 27, 34, 43, 54, 68, 86,
- + 108, 136, 172, 216, 273, 343,
- + 433, 545, 687, 865, 1090, 1374,
- + 1731, 2180, 2747, 3461, 4361, 5494,
- + 6922, 8721, 10988, 13844, 17442, 21976,
- + 27688, 34885, 43953, 55377, 69771, 87906,
- + 110755, 139543, 175813, 221511, 279087, 351627,
- + 443023, 558174, 703255, 886046, 1116348, 1406511,
- + 1772093, 2232697, 2813022, 3544186, 4465396, 5626046,
- + 7088374, 8930791,11252092,14176748,17861583,22504184,
- + 28353495,35723165,45008368,56706990 }
- };
- static const uint16_t x264_chroma_lambda2_offset_tab[] = {
- @@ -237,7 +243,7 @@ static const uint8_t i_sub_mb_p_cost_table[4] = {
- static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a );
- -static uint16_t x264_cost_ref[92][3][33];
- +static uint16_t x264_cost_ref[LAMBDA_MAX+1][3][33];
- static UNUSED x264_pthread_mutex_t cost_ref_mutex = X264_PTHREAD_MUTEX_INITIALIZER;
- int x264_analyse_init_costs( x264_t *h, int qp )
- @@ -275,7 +281,7 @@ fail:
- void x264_analyse_free_costs( x264_t *h )
- {
- - for( int i = 0; i < 92; i++ )
- + for( int i = 0; i < LAMBDA_MAX+1; i++ )
- {
- if( h->cost_mv[i] )
- x264_free( h->cost_mv[i] - 2*4*2048 );
- diff --git a/encoder/cabac.c b/encoder/cabac.c
- index 8bd40f1..e82d7e9 100644
- --- a/encoder/cabac.c
- +++ b/encoder/cabac.c
- @@ -262,9 +262,9 @@ static void x264_cabac_mb_qp_delta( x264_t *h, x264_cabac_t *cb )
- if( i_dqp != 0 )
- {
- int val = i_dqp <= 0 ? (-2*i_dqp) : (2*i_dqp - 1);
- - /* dqp is interpreted modulo 52 */
- - if( val >= 51 && val != 52 )
- - val = 103 - val;
- + /* dqp is interpreted modulo (QP_MAX+1) */
- + if( val >= QP_MAX && val != QP_MAX+1 )
- + val = 2*QP_MAX+1 - val;
- do
- {
- x264_cabac_encode_decision( cb, 60 + ctx, 1 );
- @@ -767,15 +767,18 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
- i_mb_pos_tex = x264_cabac_pos( cb );
- h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start;
- - memcpy( cb->p, h->mb.pic.p_fenc[0], 256 );
- - cb->p += 256;
- - for( int i = 0; i < 8; i++ )
- - memcpy( cb->p + i*8, h->mb.pic.p_fenc[1] + i*FENC_STRIDE, 8 );
- - cb->p += 64;
- - for( int i = 0; i < 8; i++ )
- - memcpy( cb->p + i*8, h->mb.pic.p_fenc[2] + i*FENC_STRIDE, 8 );
- - cb->p += 64;
- + bs_t s;
- + bs_init( &s, cb->p, cb->p_end - cb->p );
- + for( int i = 0; i < 256; i++ )
- + bs_write( &s, BIT_DEPTH, h->mb.pic.p_fenc[0][i] );
- + for( int ch = 0; ch < 2; ch++ )
- + for( int i = 0; i < 8; i++ )
- + for( int j = 0; j < 8; j++ )
- + bs_write( &s, BIT_DEPTH, h->mb.pic.p_fenc[ch][i*FENC_STRIDE+j] );
- +
- + bs_flush( &s );
- + cb->p = s.p;
- x264_cabac_encode_init_core( cb );
- h->stat.frame.i_tex_bits += x264_cabac_pos( cb ) - i_mb_pos_tex;
- diff --git a/encoder/cavlc.c b/encoder/cavlc.c
- index e2f60b1..632ed41 100644
- --- a/encoder/cavlc.c
- +++ b/encoder/cavlc.c
- @@ -66,7 +66,7 @@ static inline int block_residual_write_cavlc_escape( x264_t *h, int i_suffix_len
- bs_t *s = &h->out.bs;
- static const uint16_t next_suffix[7] = { 0, 3, 6, 12, 24, 48, 0xffff };
- int i_level_prefix = 15;
- - int mask = level >> 15;
- + int mask = level >> 31;
- int abs_level = (level^mask)-mask;
- int i_level_code = abs_level*2-mask-2;
- if( ( i_level_code >> i_suffix_length ) < 15 )
- @@ -219,10 +219,10 @@ static void cavlc_qp_delta( x264_t *h )
- if( i_dqp )
- {
- - if( i_dqp < -26 )
- - i_dqp += 52;
- - else if( i_dqp > 25 )
- - i_dqp -= 52;
- + if( i_dqp < -(QP_MAX+1)/2 )
- + i_dqp += QP_MAX+1;
- + else if( i_dqp > QP_MAX/2 )
- + i_dqp -= QP_MAX+1;
- }
- bs_write_se( s, i_dqp );
- }
- @@ -309,14 +309,12 @@ void x264_macroblock_write_cavlc( x264_t *h )
- bs_align_0( s );
- - memcpy( s->p, h->mb.pic.p_fenc[0], 256 );
- - s->p += 256;
- - for( int i = 0; i < 8; i++ )
- - memcpy( s->p + i*8, h->mb.pic.p_fenc[1] + i*FENC_STRIDE, 8 );
- - s->p += 64;
- - for( int i = 0; i < 8; i++ )
- - memcpy( s->p + i*8, h->mb.pic.p_fenc[2] + i*FENC_STRIDE, 8 );
- - s->p += 64;
- + for( int i = 0; i < 256; i++ )
- + bs_write( s, BIT_DEPTH, h->mb.pic.p_fenc[0][i] );
- + for( int ch = 0; ch < 2; ch++ )
- + for( int i = 0; i < 8; i++ )
- + for( int j = 0; j < 8; j++ )
- + bs_write( s, BIT_DEPTH, h->mb.pic.p_fenc[ch][i*FENC_STRIDE+j] );
- bs_init( s, s->p, s->p_end - s->p );
- s->p_start = p_start;
- diff --git a/encoder/encoder.c b/encoder/encoder.c
- index 31cb84a..f7e0e38 100644
- --- a/encoder/encoder.c
- +++ b/encoder/encoder.c
- @@ -51,7 +51,7 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
- ****************************************************************************/
- static float x264_psnr( int64_t i_sqe, int64_t i_size )
- {
- - double f_mse = (double)i_sqe / ((double)65025.0 * (double)i_size);
- + double f_mse = (double)i_sqe / (PIXEL_MAX*PIXEL_MAX * (double)i_size);
- if( f_mse <= 0.0000000001 ) /* Max 100dB */
- return 100;
- @@ -68,11 +68,13 @@ static void x264_frame_dump( x264_t *h )
- FILE *f = fopen( h->param.psz_dump_yuv, "r+b" );
- if( !f )
- return;
- + int bytes_per_pixel = (BIT_DEPTH+7)/8;
- /* Write the frame in display order */
- - fseek( f, (uint64_t)h->fdec->i_frame * h->param.i_height * h->param.i_width * 3/2, SEEK_SET );
- + fseek( f, (uint64_t)h->fdec->i_frame * h->param.i_height * h->param.i_width * 3/2 * bytes_per_pixel, SEEK_SET );
- for( int i = 0; i < h->fdec->i_plane; i++ )
- for( int y = 0; y < h->param.i_height >> !!i; y++ )
- - fwrite( &h->fdec->plane[i][y*h->fdec->i_stride[i]], 1, h->param.i_width >> !!i, f );
- + for( int j = 0; j < h->param.i_width >> !!i; j++ )
- + fwrite( &h->fdec->plane[i][y*h->fdec->i_stride[i]]+j, bytes_per_pixel, 1, f );
- fclose( f );
- }
- @@ -469,8 +471,8 @@ static int x264_validate_parameters( x264_t *h )
- x264_log( h, X264_LOG_ERROR, "no ratecontrol method specified\n" );
- return -1;
- }
- - h->param.rc.f_rf_constant = x264_clip3f( h->param.rc.f_rf_constant, 0, 51 );
- - h->param.rc.i_qp_constant = x264_clip3( h->param.rc.i_qp_constant, 0, 51 );
- + h->param.rc.f_rf_constant = x264_clip3f( h->param.rc.f_rf_constant, 0, QP_MAX );
- + h->param.rc.i_qp_constant = x264_clip3( h->param.rc.i_qp_constant, 0, QP_MAX );
- if( h->param.rc.i_rc_method == X264_RC_CRF )
- {
- h->param.rc.i_qp_constant = h->param.rc.f_rf_constant;
- @@ -502,12 +504,12 @@ static int x264_validate_parameters( x264_t *h )
- float qp_p = h->param.rc.i_qp_constant;
- float qp_i = qp_p - 6*log2f( h->param.rc.f_ip_factor );
- float qp_b = qp_p + 6*log2f( h->param.rc.f_pb_factor );
- - h->param.rc.i_qp_min = x264_clip3( (int)(X264_MIN3( qp_p, qp_i, qp_b )), 0, 51 );
- - h->param.rc.i_qp_max = x264_clip3( (int)(X264_MAX3( qp_p, qp_i, qp_b ) + .999), 0, 51 );
- + h->param.rc.i_qp_min = x264_clip3( (int)(X264_MIN3( qp_p, qp_i, qp_b )), 0, QP_MAX );
- + h->param.rc.i_qp_max = x264_clip3( (int)(X264_MAX3( qp_p, qp_i, qp_b ) + .999), 0, QP_MAX );
- h->param.rc.i_aq_mode = 0;
- h->param.rc.b_mb_tree = 0;
- }
- - h->param.rc.i_qp_max = x264_clip3( h->param.rc.i_qp_max, 0, 51 );
- + h->param.rc.i_qp_max = x264_clip3( h->param.rc.i_qp_max, 0, QP_MAX );
- h->param.rc.i_qp_min = x264_clip3( h->param.rc.i_qp_min, 0, h->param.rc.i_qp_max );
- if( h->param.rc.i_vbv_buffer_size )
- {
- @@ -1054,8 +1056,9 @@ x264_t *x264_encoder_open( x264_param_t *param )
- if( x264_analyse_init_costs( h, X264_LOOKAHEAD_QP ) )
- goto fail;
- + static const uint16_t cost_mv_correct[7] = { 24, 47, 95, 189, 379, 757, 1515 };
- /* Checks for known miscompilation issues. */
- - if( h->cost_mv[1][2013] != 24 )
- + if( h->cost_mv[x264_lambda_tab[X264_LOOKAHEAD_QP]][2013] != cost_mv_correct[BIT_DEPTH-8] )
- {
- x264_log( h, X264_LOG_ERROR, "MV cost test failed: x264 has been miscompiled!\n" );
- goto fail;
- @@ -1147,11 +1150,22 @@ x264_t *x264_encoder_open( x264_param_t *param )
- fclose( f );
- }
- - x264_log( h, X264_LOG_INFO, "profile %s, level %d.%d\n",
- - h->sps->i_profile_idc == PROFILE_BASELINE ? "Baseline" :
- - h->sps->i_profile_idc == PROFILE_MAIN ? "Main" :
- - h->sps->i_profile_idc == PROFILE_HIGH ? "High" :
- - "High 4:4:4 Predictive", h->sps->i_level_idc/10, h->sps->i_level_idc%10 );
- + const char *profile = h->sps->i_profile_idc == PROFILE_BASELINE ? "Baseline" :
- + h->sps->i_profile_idc == PROFILE_MAIN ? "Main" :
- + h->sps->i_profile_idc == PROFILE_HIGH ? "High" :
- + h->sps->i_profile_idc == PROFILE_HIGH10 ? "High 10" :
- + "High 4:4:4 Predictive";
- +
- + if( h->sps->i_profile_idc < PROFILE_HIGH10 )
- + {
- + x264_log( h, X264_LOG_INFO, "profile %s, level %d.%d\n",
- + profile, h->sps->i_level_idc/10, h->sps->i_level_idc%10 );
- + }
- + else
- + {
- + x264_log( h, X264_LOG_INFO, "profile %s, level %d.%d, bit depth %d\n",
- + profile, h->sps->i_level_idc/10, h->sps->i_level_idc%10, BIT_DEPTH );
- + }
- return h;
- fail:
- @@ -1836,7 +1850,7 @@ static int x264_slice_write( x264_t *h )
- bs_align_1( &h->out.bs );
- /* init cabac */
- - x264_cabac_context_init( &h->cabac, h->sh.i_type, h->sh.i_qp, h->sh.i_cabac_init_idc );
- + x264_cabac_context_init( &h->cabac, h->sh.i_type, x264_clip3( h->sh.i_qp-QP_BD_OFFSET, 0, 51 ), h->sh.i_cabac_init_idc );
- x264_cabac_encode_init ( &h->cabac, h->out.bs.p, h->out.bs.p_end );
- }
- h->mb.i_last_qp = h->sh.i_qp;
- @@ -2705,6 +2719,7 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
- for( int i = 0; i < 3; i++ )
- {
- pic_out->img.i_stride[i] = h->fdec->i_stride[i];
- + // FIXME This breaks the API when pixel != uint8_t.
- pic_out->img.plane[i] = h->fdec->plane[i];
- }
- diff --git a/encoder/macroblock.h b/encoder/macroblock.h
- index b1b02fa..7c83344 100644
- --- a/encoder/macroblock.h
- +++ b/encoder/macroblock.h
- @@ -26,8 +26,8 @@
- #include "common/macroblock.h"
- -extern const int x264_lambda2_tab[52];
- -extern const uint8_t x264_lambda_tab[52];
- +extern const int x264_lambda2_tab[QP_MAX_MAX+1];
- +extern const uint16_t x264_lambda_tab[QP_MAX_MAX+1];
- void x264_rdo_init( void );
- diff --git a/encoder/me.h b/encoder/me.h
- index 912b05d..b125f3d 100644
- --- a/encoder/me.h
- +++ b/encoder/me.h
- @@ -68,7 +68,7 @@ void x264_me_refine_bidir_rd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_wei
- void x264_me_refine_bidir_satd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight );
- uint64_t x264_rd_cost_part( x264_t *h, int i_lambda2, int i8, int i_pixel );
- -extern uint16_t *x264_cost_mv_fpel[92][4];
- +extern uint16_t *x264_cost_mv_fpel[LAMBDA_MAX+1][4];
- #define COPY1_IF_LT(x,y)\
- if((y)<(x))\
- diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
- index 6fdaa98..bdf44dc 100644
- --- a/encoder/ratecontrol.c
- +++ b/encoder/ratecontrol.c
- @@ -219,7 +219,7 @@ static ALWAYS_INLINE uint32_t ac_energy_plane( x264_t *h, int mb_x, int mb_y, x2
- uint32_t ssd = res >> 32;
- frame->i_pixel_sum[i] += sum;
- frame->i_pixel_ssd[i] += ssd;
- - return ssd - (sum * sum >> shift);
- + return ssd - ((uint64_t)sum * sum >> shift);
- }
- // Find the total AC energy of the block in all planes.
- @@ -300,7 +300,7 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame, float *quant_off
- avg_adj /= h->mb.i_mb_count;
- avg_adj_pow2 /= h->mb.i_mb_count;
- strength = h->param.rc.f_aq_strength * avg_adj;
- - avg_adj = avg_adj - 0.5f * (avg_adj_pow2 - 14.f) / avg_adj;
- + avg_adj = avg_adj - 0.5f * (avg_adj_pow2 - (14.f + 2*(BIT_DEPTH-8))) / avg_adj;
- }
- else
- strength = h->param.rc.f_aq_strength * 1.0397f;
- @@ -318,7 +318,7 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame, float *quant_off
- else
- {
- uint32_t energy = x264_ac_energy_mb( h, mb_x, mb_y, frame );
- - qp_adj = strength * (x264_log2( X264_MAX(energy, 1) ) - 14.427f);
- + qp_adj = strength * (x264_log2( X264_MAX(energy, 1) ) - (14.427f + 2*(BIT_DEPTH-8)));
- }
- if( quant_offsets )
- qp_adj += quant_offsets[mb_xy];
- @@ -620,8 +620,8 @@ int x264_ratecontrol_new( x264_t *h )
- rc->ip_offset = 6.0 * log2f( h->param.rc.f_ip_factor );
- rc->pb_offset = 6.0 * log2f( h->param.rc.f_pb_factor );
- rc->qp_constant[SLICE_TYPE_P] = h->param.rc.i_qp_constant;
- - rc->qp_constant[SLICE_TYPE_I] = x264_clip3( h->param.rc.i_qp_constant - rc->ip_offset + 0.5, 0, 51 );
- - rc->qp_constant[SLICE_TYPE_B] = x264_clip3( h->param.rc.i_qp_constant + rc->pb_offset + 0.5, 0, 51 );
- + rc->qp_constant[SLICE_TYPE_I] = x264_clip3( h->param.rc.i_qp_constant - rc->ip_offset + 0.5, 0, QP_MAX );
- + rc->qp_constant[SLICE_TYPE_B] = x264_clip3( h->param.rc.i_qp_constant + rc->pb_offset + 0.5, 0, QP_MAX );
- h->mb.ip_offset = rc->ip_offset + 0.5;
- rc->lstep = pow( 2, h->param.rc.i_qp_step / 6.0 );
- @@ -1231,7 +1231,7 @@ void x264_ratecontrol_start( x264_t *h, int i_force_qp, int overhead )
- rc->qpa_rc =
- rc->qpa_aq = 0;
- - rc->qp = x264_clip3( (int)(q + 0.5), 0, 51 );
- + rc->qp = x264_clip3( (int)(q + 0.5), 0, QP_MAX );
- h->fdec->f_qp_avg_rc =
- h->fdec->f_qp_avg_aq =
- rc->qpm = q;
- @@ -1416,9 +1416,9 @@ int x264_ratecontrol_slice_type( x264_t *h, int frame_num )
- * So just calculate the average QP used so far. */
- h->param.rc.i_qp_constant = (h->stat.i_frame_count[SLICE_TYPE_P] == 0) ? 24
- : 1 + h->stat.f_frame_qp[SLICE_TYPE_P] / h->stat.i_frame_count[SLICE_TYPE_P];
- - rc->qp_constant[SLICE_TYPE_P] = x264_clip3( h->param.rc.i_qp_constant, 0, 51 );
- - rc->qp_constant[SLICE_TYPE_I] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) / fabs( h->param.rc.f_ip_factor )) + 0.5 ), 0, 51 );
- - rc->qp_constant[SLICE_TYPE_B] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) * fabs( h->param.rc.f_pb_factor )) + 0.5 ), 0, 51 );
- + rc->qp_constant[SLICE_TYPE_P] = x264_clip3( h->param.rc.i_qp_constant, 0, QP_MAX );
- + rc->qp_constant[SLICE_TYPE_I] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) / fabs( h->param.rc.f_ip_factor )) + 0.5 ), 0, QP_MAX );
- + rc->qp_constant[SLICE_TYPE_B] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) * fabs( h->param.rc.f_pb_factor )) + 0.5 ), 0, QP_MAX );
- x264_log(h, X264_LOG_ERROR, "2nd pass has more frames than 1st pass (%d)\n", rc->num_entries);
- x264_log(h, X264_LOG_ERROR, "continuing anyway, at constant QP=%d\n", h->param.rc.i_qp_constant);
- @@ -2652,7 +2652,7 @@ static int init_pass2( x264_t *h )
- }
- else if( expected_bits > all_available_bits && avgq > h->param.rc.i_qp_max - 2 )
- {
- - if( h->param.rc.i_qp_max < 51 )
- + if( h->param.rc.i_qp_max < QP_MAX )
- x264_log( h, X264_LOG_WARNING, "try increasing target bitrate or increasing qp_max (currently %d)\n", h->param.rc.i_qp_max );
- else
- x264_log( h, X264_LOG_WARNING, "try increasing target bitrate\n");
- diff --git a/encoder/rdo.c b/encoder/rdo.c
- index afaa894..4fae811 100644
- --- a/encoder/rdo.c
- +++ b/encoder/rdo.c
- @@ -443,10 +443,7 @@ static ALWAYS_INLINE int quant_trellis_cabac( x264_t *h, dctcoef *dct,
- /* We only need to zero an empty 4x4 block. 8x8 can be
- implicitly emptied via zero nnz, as can dc. */
- if( i_coefs == 16 && !dc )
- - {
- - M128( &dct[0] ) = M128_ZERO;
- - M128( &dct[8] ) = M128_ZERO;
- - }
- + memset( dct, 0, 16 * sizeof(dctcoef) );
- return 0;
- }
- @@ -613,10 +610,7 @@ static ALWAYS_INLINE int quant_trellis_cabac( x264_t *h, dctcoef *dct,
- if( bnode == &nodes_cur[0] )
- {
- if( i_coefs == 16 && !dc )
- - {
- - M128( &dct[0] ) = M128_ZERO;
- - M128( &dct[8] ) = M128_ZERO;
- - }
- + memset( dct, 0, 16 * sizeof(dctcoef) );
- return 0;
- }
- diff --git a/encoder/set.c b/encoder/set.c
- index 9e6e736..55d6df7 100644
- --- a/encoder/set.c
- +++ b/encoder/set.c
- @@ -104,6 +104,8 @@ void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param )
- sps->b_qpprime_y_zero_transform_bypass = param->rc.i_rc_method == X264_RC_CQP && param->rc.i_qp_constant == 0;
- if( sps->b_qpprime_y_zero_transform_bypass )
- sps->i_profile_idc = PROFILE_HIGH444_PREDICTIVE;
- + else if( BIT_DEPTH > 8 )
- + sps->i_profile_idc = PROFILE_HIGH10;
- else if( param->analyse.b_transform_8x8 || param->i_cqm_preset != X264_CQM_FLAT )
- sps->i_profile_idc = PROFILE_HIGH;
- else if( param->b_cabac || param->i_bframe > 0 || param->b_interlaced || param->b_fake_interlaced || param->analyse.i_weighted_pred > 0 )
- @@ -260,8 +262,8 @@ void x264_sps_write( bs_t *s, x264_sps_t *sps )
- if( sps->i_profile_idc >= PROFILE_HIGH )
- {
- bs_write_ue( s, 1 ); // chroma_format_idc = 4:2:0
- - bs_write_ue( s, 0 ); // bit_depth_luma_minus8
- - bs_write_ue( s, 0 ); // bit_depth_chroma_minus8
- + bs_write_ue( s, BIT_DEPTH-8 ); // bit_depth_luma_minus8
- + bs_write_ue( s, BIT_DEPTH-8 ); // bit_depth_chroma_minus8
- bs_write( s, 1, sps->b_qpprime_y_zero_transform_bypass );
- bs_write( s, 1, 0 ); // seq_scaling_matrix_present_flag
- }
- @@ -488,7 +490,7 @@ void x264_pps_write( bs_t *s, x264_pps_t *pps )
- bs_write( s, 1, pps->b_weighted_pred );
- bs_write( s, 2, pps->b_weighted_bipred );
- - bs_write_se( s, pps->i_pic_init_qp - 26 );
- + bs_write_se( s, pps->i_pic_init_qp - 26 - QP_BD_OFFSET );
- bs_write_se( s, pps->i_pic_init_qs - 26 );
- bs_write_se( s, pps->i_chroma_qp_index_offset );
- diff --git a/encoder/slicetype.c b/encoder/slicetype.c
- index 7d69b71..ad2a8c2 100644
- --- a/encoder/slicetype.c
- +++ b/encoder/slicetype.c
- @@ -303,7 +303,7 @@ static void x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
- (mv1)[0], (mv1)[1], 8, 8, w ); \
- h->mc.avg[PIXEL_8x8]( pix1, 16, src1, stride1, src2, stride2, i_bipred_weight ); \
- } \
- - i_cost = penalty + h->pixf.mbcmp[PIXEL_8x8]( \
- + i_cost = penalty * a->i_lambda + h->pixf.mbcmp[PIXEL_8x8]( \
- m[0].p_fenc[0], FENC_STRIDE, pix1, 16 ); \
- COPY2_IF_LT( i_bcost, i_cost, list_used, 3 ); \
- }
- @@ -393,9 +393,9 @@ static void x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
- }
- x264_me_search( h, &m[l], mvc, i_mvc );
- - m[l].cost -= 2; // remove mvcost from skip mbs
- + m[l].cost -= 2 * a->i_lambda; // remove mvcost from skip mbs
- if( M32( m[l].mv ) )
- - m[l].cost += 5;
- + m[l].cost += 5 * a->i_lambda;
- skip_motionest:
- CP32( fenc_mvs[l], m[l].mv );
- @@ -418,7 +418,7 @@ lowres_intra_mb:
- ALIGNED_ARRAY_16( pixel, edge,[33] );
- pixel *pix = &pix1[8+FDEC_STRIDE - 1];
- pixel *src = &fenc->lowres[0][i_pel_offset - 1];
- - const int intra_penalty = 5;
- + const int intra_penalty = 5 * a->i_lambda;
- int satds[3];
- memcpy( pix-FDEC_STRIDE, src-i_stride, 17 * sizeof(pixel) );
- @@ -496,7 +496,7 @@ lowres_intra_mb:
- }
- }
- - fenc->lowres_costs[b-p0][p1-b][i_mb_xy] = i_bcost + (list_used << LOWRES_COST_SHIFT);
- + fenc->lowres_costs[b-p0][p1-b][i_mb_xy] = X264_MIN( i_bcost, LOWRES_COST_MASK ) + (list_used << LOWRES_COST_SHIFT);
- }
- #undef TRY_BIDIR
- diff --git a/tools/checkasm.c b/tools/checkasm.c
- index 7fa2c0c..a5ffa17 100644
- --- a/tools/checkasm.c
- +++ b/tools/checkasm.c
- @@ -40,8 +40,10 @@
- uint8_t *buf1, *buf2;
- /* buf3, buf4: used to store output */
- uint8_t *buf3, *buf4;
- -/* pbuf*: point to the same memory as above, just for type convenience */
- -pixel *pbuf1, *pbuf2, *pbuf3, *pbuf4;
- +/* pbuf1, pbuf2: initialised to random pixel data and shouldn't write into them. */
- +pixel *pbuf1, *pbuf2;
- +/* pbuf3, pbuf4: point to buf3, buf4, just for type convenience */
- +pixel *pbuf3, *pbuf4;
- int quiet = 0;
- @@ -256,11 +258,15 @@ static int check_pixel( int cpu_ref, int cpu_new )
- int z = i|(i>>4);
- z ^= z>>2;
- z ^= z>>1;
- - buf3[i] = ~(buf4[i] = -(z&1));
- + pbuf4[i] = -(z&1) & PIXEL_MAX;
- + pbuf3[i] = ~pbuf4[i] & PIXEL_MAX;
- }
- // random pattern made of maxed pixel differences, in case an intermediate value overflows
- for( int i = 256; i < 0x1000; i++ )
- - buf3[i] = ~(buf4[i] = -(buf1[i&~0x88]&1));
- + {
- + pbuf4[i] = -(pbuf1[i&~0x88]&1) & PIXEL_MAX;
- + pbuf3[i] = ~(pbuf4[i]) & PIXEL_MAX;
- + }
- #define TEST_PIXEL( name, align ) \
- ok = 1, used_asm = 0; \
- @@ -535,22 +541,22 @@ static int check_dct( int cpu_ref, int cpu_new )
- used_asm = 1; \
- call_c( dct_c.name, t1, pbuf1, pbuf2 ); \
- call_a( dct_asm.name, t2, pbuf1, pbuf2 ); \
- - if( memcmp( t1, t2, size ) ) \
- + if( memcmp( t1, t2, size*sizeof(dctcoef) ) ) \
- { \
- ok = 0; \
- fprintf( stderr, #name " [FAILED]\n" ); \
- } \
- }
- ok = 1; used_asm = 0;
- - TEST_DCT( sub4x4_dct, dct1[0], dct2[0], 16*2 );
- - TEST_DCT( sub8x8_dct, dct1, dct2, 16*2*4 );
- - TEST_DCT( sub8x8_dct_dc, dctdc[0], dctdc[1], 4*2 );
- - TEST_DCT( sub16x16_dct, dct1, dct2, 16*2*16 );
- + TEST_DCT( sub4x4_dct, dct1[0], dct2[0], 16 );
- + TEST_DCT( sub8x8_dct, dct1, dct2, 16*4 );
- + TEST_DCT( sub8x8_dct_dc, dctdc[0], dctdc[1], 4 );
- + TEST_DCT( sub16x16_dct, dct1, dct2, 16*16 );
- report( "sub_dct4 :" );
- ok = 1; used_asm = 0;
- - TEST_DCT( sub8x8_dct8, (void*)dct1[0], (void*)dct2[0], 64*2 );
- - TEST_DCT( sub16x16_dct8, (void*)dct1, (void*)dct2, 64*2*4 );
- + TEST_DCT( sub8x8_dct8, (void*)dct1[0], (void*)dct2[0], 64 );
- + TEST_DCT( sub16x16_dct8, (void*)dct1, (void*)dct2, 64*4 );
- report( "sub_dct8 :" );
- #undef TEST_DCT
- @@ -574,13 +580,13 @@ static int check_dct( int cpu_ref, int cpu_new )
- { \
- set_func_name( #name ); \
- used_asm = 1; \
- - memcpy( buf3, buf1, 32*32 * sizeof(pixel) ); \
- - memcpy( buf4, buf1, 32*32 * sizeof(pixel) ); \
- - memcpy( dct1, src, 512 * sizeof(pixel) ); \
- - memcpy( dct2, src, 512 * sizeof(pixel) ); \
- + memcpy( pbuf3, pbuf1, 32*32 * sizeof(pixel) ); \
- + memcpy( pbuf4, pbuf1, 32*32 * sizeof(pixel) ); \
- + memcpy( dct1, src, 256 * sizeof(dctcoef) ); \
- + memcpy( dct2, src, 256 * sizeof(dctcoef) ); \
- call_c1( dct_c.name, pbuf3, (void*)dct1 ); \
- call_a1( dct_asm.name, pbuf4, (void*)dct2 ); \
- - if( memcmp( buf3, buf4, 32*32 * sizeof(pixel) ) ) \
- + if( memcmp( pbuf3, pbuf4, 32*32 * sizeof(pixel) ) ) \
- { \
- ok = 0; \
- fprintf( stderr, #name " [FAILED]\n" ); \
- @@ -615,10 +621,10 @@ static int check_dct( int cpu_ref, int cpu_new )
- dct1[0][j] = !i ? (j^j>>1^j>>2^j>>3)&1 ? 4080 : -4080 /* max dc */\
- : i<8 ? (*p++)&1 ? 4080 : -4080 /* max elements */\
- : ((*p++)&0x1fff)-0x1000; /* general case */\
- - memcpy( dct2, dct1, 32 );\
- + memcpy( dct2, dct1, 16 * sizeof(dctcoef) );\
- call_c1( dct_c.name, dct1[0] );\
- call_a1( dct_asm.name, dct2[0] );\
- - if( memcmp( dct1, dct2, 32 ) )\
- + if( memcmp( dct1, dct2, 16 * sizeof(dctcoef) ) )\
- ok = 0;\
- }\
- call_c2( dct_c.name, dct1[0] );\
- @@ -658,11 +664,11 @@ static int check_dct( int cpu_ref, int cpu_new )
- int nz_a, nz_c; \
- set_func_name( "zigzag_"#name"_%s", interlace?"field":"frame" ); \
- used_asm = 1; \
- - memcpy( buf3, buf1, 16*FDEC_STRIDE * sizeof(pixel) ); \
- - memcpy( buf4, buf1, 16*FDEC_STRIDE * sizeof(pixel) ); \
- + memcpy( pbuf3, pbuf1, 16*FDEC_STRIDE * sizeof(pixel) ); \
- + memcpy( pbuf4, pbuf1, 16*FDEC_STRIDE * sizeof(pixel) ); \
- nz_c = call_c1( zigzag_c.name, t1, pbuf2, pbuf3 ); \
- nz_a = call_a1( zigzag_asm.name, t2, pbuf2, pbuf4 ); \
- - if( memcmp( t1, t2, size*sizeof(dctcoef) )|| memcmp( buf3, buf4, 16*FDEC_STRIDE ) || nz_c != nz_a ) \
- + if( memcmp( t1, t2, size*sizeof(dctcoef) ) || memcmp( pbuf3, pbuf4, 16*FDEC_STRIDE*sizeof(pixel) ) || nz_c != nz_a ) \
- { \
- ok = 0; \
- fprintf( stderr, #name " [FAILED]\n" ); \
- @@ -680,8 +686,8 @@ static int check_dct( int cpu_ref, int cpu_new )
- used_asm = 1; \
- for( int i = 0; i < 2; i++ ) \
- { \
- - memcpy( buf3, buf2, 16*FDEC_STRIDE * sizeof(pixel) ); \
- - memcpy( buf4, buf2, 16*FDEC_STRIDE * sizeof(pixel) ); \
- + memcpy( pbuf3, pbuf2, 16*FDEC_STRIDE * sizeof(pixel) ); \
- + memcpy( pbuf4, pbuf2, 16*FDEC_STRIDE * sizeof(pixel) ); \
- for( int j = 0; j < 4; j++ ) \
- { \
- memcpy( pbuf3 + j*FDEC_STRIDE, (i?pbuf1:pbuf2) + j*FENC_STRIDE, 4 * sizeof(pixel) ); \
- @@ -689,7 +695,7 @@ static int check_dct( int cpu_ref, int cpu_new )
- } \
- nz_c = call_c1( zigzag_c.name, t1, pbuf2, pbuf3, &dc_c ); \
- nz_a = call_a1( zigzag_asm.name, t2, pbuf2, pbuf4, &dc_a ); \
- - if( memcmp( t1+1, t2+1, 15*sizeof(dctcoef) ) || memcmp( buf3, buf4, 16*FDEC_STRIDE * sizeof(pixel) ) || nz_c != nz_a || dc_c != dc_a ) \
- + if( memcmp( t1+1, t2+1, 15*sizeof(dctcoef) ) || memcmp( pbuf3, pbuf4, 16*FDEC_STRIDE * sizeof(pixel) ) || nz_c != nz_a || dc_c != dc_a ) \
- { \
- ok = 0; \
- fprintf( stderr, #name " [FAILED]\n" ); \
- @@ -779,11 +785,11 @@ static int check_mc( int cpu_ref, int cpu_new )
- const x264_weight_t *weight = weight_none; \
- set_func_name( "mc_luma_%dx%d", w, h ); \
- used_asm = 1; \
- - memset( buf3, 0xCD, 1024 ); \
- - memset( buf4, 0xCD, 1024 ); \
- + for( int i = 0; i < 1024; i++ ) \
- + pbuf3[i] = pbuf4[i] = 0xCD; \
- call_c( mc_c.mc_luma, dst1, 32, src2, 64, dx, dy, w, h, weight ); \
- call_a( mc_a.mc_luma, dst2, 32, src2, 64, dx, dy, w, h, weight ); \
- - if( memcmp( buf3, buf4, 1024 ) ) \
- + if( memcmp( pbuf3, pbuf4, 1024 * sizeof(pixel) ) ) \
- { \
- fprintf( stderr, "mc_luma[mv(%d,%d) %2dx%-2d] [FAILED]\n", dx, dy, w, h ); \
- ok = 0; \
- @@ -796,8 +802,8 @@ static int check_mc( int cpu_ref, int cpu_new )
- const x264_weight_t *weight = weight_none; \
- set_func_name( "get_ref_%dx%d", w, h ); \
- used_asm = 1; \
- - memset( buf3, 0xCD, 1024 ); \
- - memset( buf4, 0xCD, 1024 ); \
- + for( int i = 0; i < 1024; i++ ) \
- + pbuf3[i] = pbuf4[i] = 0xCD; \
- call_c( mc_c.mc_luma, dst1, 32, src2, 64, dx, dy, w, h, weight ); \
- ref = (pixel*)call_a( mc_a.get_ref, ref, &ref_stride, src2, 64, dx, dy, w, h, weight ); \
- for( int i = 0; i < h; i++ ) \
- @@ -814,15 +820,15 @@ static int check_mc( int cpu_ref, int cpu_new )
- { \
- set_func_name( "mc_chroma_%dx%d", w, h ); \
- used_asm = 1; \
- - memset( buf3, 0xCD, 1024 ); \
- - memset( buf4, 0xCD, 1024 ); \
- + for( int i = 0; i < 1024; i++ ) \
- + pbuf3[i] = pbuf4[i] = 0xCD; \
- call_c( mc_c.mc_chroma, dst1, 16, src, 64, dx, dy, w, h ); \
- call_a( mc_a.mc_chroma, dst2, 16, src, 64, dx, dy, w, h ); \
- /* mc_chroma width=2 may write garbage to the right of dst. ignore that. */ \
- for( int j = 0; j < h; j++ ) \
- for( int i = w; i < 4; i++ ) \
- dst2[i+j*16] = dst1[i+j*16]; \
- - if( memcmp( buf3, buf4, 1024 ) ) \
- + if( memcmp( pbuf3, pbuf4, 1024 * sizeof(pixel) ) ) \
- { \
- fprintf( stderr, "mc_chroma[mv(%d,%d) %2dx%-2d] [FAILED]\n", dx, dy, w, h ); \
- ok = 0; \
- @@ -867,15 +873,15 @@ static int check_mc( int cpu_ref, int cpu_new )
- ok = 1, used_asm = 0; \
- for( int i = 0; i < 10; i++ ) \
- { \
- - memcpy( buf3, pbuf1+320, 320 * sizeof(pixel) ); \
- - memcpy( buf4, pbuf1+320, 320 * sizeof(pixel) ); \
- + memcpy( pbuf3, pbuf1+320, 320 * sizeof(pixel) ); \
- + memcpy( pbuf4, pbuf1+320, 320 * sizeof(pixel) ); \
- if( mc_a.name[i] != mc_ref.name[i] ) \
- { \
- set_func_name( "%s_%s", #name, pixel_names[i] ); \
- used_asm = 1; \
- call_c1( mc_c.name[i], pbuf3, 16, pbuf2+1, 16, pbuf1+18, 16, weight ); \
- call_a1( mc_a.name[i], pbuf4, 16, pbuf2+1, 16, pbuf1+18, 16, weight ); \
- - if( memcmp( buf3, buf4, 320 * sizeof(pixel) ) ) \
- + if( memcmp( pbuf3, pbuf4, 320 * sizeof(pixel) ) ) \
- { \
- ok = 0; \
- fprintf( stderr, #name "[%d]: [FAILED]\n", i ); \
- @@ -971,8 +977,8 @@ static int check_mc( int cpu_ref, int cpu_new )
- void *tmp = pbuf3+49*64;
- set_func_name( "hpel_filter" );
- ok = 1; used_asm = 1;
- - memset( buf3, 0, 4096 * sizeof(pixel) );
- - memset( buf4, 0, 4096 * sizeof(pixel) );
- + memset( pbuf3, 0, 4096 * sizeof(pixel) );
- + memset( pbuf4, 0, 4096 * sizeof(pixel) );
- call_c( mc_c.hpel_filter, dstc[0], dstc[1], dstc[2], srchpel, 64, 48, 10, tmp );
- call_a( mc_a.hpel_filter, dsta[0], dsta[1], dsta[2], srchpel, 64, 48, 10, tmp );
- for( int i = 0; i < 3; i++ )
- @@ -1030,13 +1036,13 @@ static int check_mc( int cpu_ref, int cpu_new )
- int stride = 80;\
- set_func_name( #name );\
- used_asm = 1;\
- - memcpy( buf3, buf1, size*2*stride * sizeof(pixel) );\
- - memcpy( buf4, buf1, size*2*stride * sizeof(pixel) );\
- - uint16_t *sum = (uint16_t*)buf3;\
- + memcpy( pbuf3, pbuf1, size*2*stride * sizeof(pixel) );\
- + memcpy( pbuf4, pbuf1, size*2*stride * sizeof(pixel) );\
- + uint16_t *sum = (uint16_t*)pbuf3;\
- call_c1( mc_c.name, __VA_ARGS__ );\
- - sum = (uint16_t*)buf4;\
- + sum = (uint16_t*)pbuf4;\
- call_a1( mc_a.name, __VA_ARGS__ );\
- - if( memcmp( buf3, buf4, (stride-8)*2 * sizeof(pixel) )\
- + if( memcmp( pbuf3, pbuf4, (stride-8)*2 * sizeof(pixel) )\
- || (size>9 && memcmp( pbuf3+18*stride, pbuf4+18*stride, (stride-8)*2 * sizeof(pixel) )))\
- ok = 0;\
- call_c2( mc_c.name, __VA_ARGS__ );\
- @@ -1096,11 +1102,11 @@ static int check_deblock( int cpu_ref, int cpu_new )
- /* not exactly the real values of a,b,tc but close enough */
- for( int i = 35, a = 255, c = 250; i >= 0; i-- )
- {
- - alphas[i] = a;
- - betas[i] = (i+1)/2;
- - tcs[i][0] = tcs[i][3] = (c+6)/10;
- - tcs[i][1] = (c+7)/15;
- - tcs[i][2] = (c+9)/20;
- + alphas[i] = a << (BIT_DEPTH-8);
- + betas[i] = (i+1)/2 << (BIT_DEPTH-8);
- + tcs[i][0] = tcs[i][3] = (c+6)/10 << (BIT_DEPTH-8);
- + tcs[i][1] = (c+7)/15 << (BIT_DEPTH-8);
- + tcs[i][2] = (c+9)/20 << (BIT_DEPTH-8);
- a = a*9/10;
- c = c*9/10;
- }
- @@ -1111,15 +1117,15 @@ static int check_deblock( int cpu_ref, int cpu_new )
- int off = 8*32 + (i&15)*4*!align; /* benchmark various alignments of h filter */ \
- for( int j = 0; j < 1024; j++ ) \
- /* two distributions of random to excersize different failure modes */ \
- - buf3[j] = rand() & (i&1 ? 0xf : 0xff ); \
- - memcpy( buf4, buf3, 1024 * sizeof(pixel) ); \
- + pbuf3[j] = rand() & (i&1 ? 0xf : PIXEL_MAX ); \
- + memcpy( pbuf4, pbuf3, 1024 * sizeof(pixel) ); \
- if( db_a.name != db_ref.name ) \
- { \
- set_func_name( #name ); \
- used_asm = 1; \
- call_c1( db_c.name, pbuf3+off, 32, alphas[i], betas[i], ##__VA_ARGS__ ); \
- call_a1( db_a.name, pbuf4+off, 32, alphas[i], betas[i], ##__VA_ARGS__ ); \
- - if( memcmp( buf3, buf4, 1024 * sizeof(pixel) ) ) \
- + if( memcmp( pbuf3, pbuf4, 1024 * sizeof(pixel) ) ) \
- { \
- ok = 0; \
- fprintf( stderr, #name "(a=%d, b=%d): [FAILED]\n", alphas[i], betas[i] ); \
- @@ -1200,7 +1206,7 @@ static int check_quant( int cpu_ref, int cpu_new )
- h->pps = h->pps_array;
- x264_param_default( &h->param );
- h->chroma_qp_table = i_chroma_qp_table + 12;
- - h->param.rc.i_qp_min = 26;
- + h->param.rc.i_qp_min = 26 + QP_BD_OFFSET;
- h->param.analyse.b_transform_8x8 = 1;
- for( int i_cqm = 0; i_cqm < 4; i_cqm++ )
- @@ -1219,9 +1225,10 @@ static int check_quant( int cpu_ref, int cpu_new )
- }
- else
- {
- + int max_scale = BIT_DEPTH < 10 ? 255 : 228;
- if( i_cqm == 2 )
- for( int i = 0; i < 64; i++ )
- - cqm_buf[i] = 10 + rand() % 246;
- + cqm_buf[i] = 10 + rand() % (max_scale - 9);
- else
- for( int i = 0; i < 64; i++ )
- cqm_buf[i] = 1;
- @@ -1260,7 +1267,7 @@ static int check_quant( int cpu_ref, int cpu_new )
- { \
- set_func_name( #name ); \
- used_asms[0] = 1; \
- - for( int qp = 51; qp > 0; qp-- ) \
- + for( int qp = QP_MAX; qp > 0; qp-- ) \
- { \
- for( int j = 0; j < 2; j++ ) \
- { \
- @@ -1269,7 +1276,7 @@ static int check_quant( int cpu_ref, int cpu_new )
- dct1[i] = dct2[i] = j ? (rand() & 0x1fff) - 0xfff : 0; \
- result_c = call_c1( qf_c.name, dct1, h->quant4_mf[CQM_4IY][qp][0], h->quant4_bias[CQM_4IY][qp][0] ); \
- result_a = call_a1( qf_a.name, dct2, h->quant4_mf[CQM_4IY][qp][0], h->quant4_bias[CQM_4IY][qp][0] ); \
- - if( memcmp( dct1, dct2, 16*2 ) || result_c != result_a ) \
- + if( memcmp( dct1, dct2, 16*sizeof(dctcoef) ) || result_c != result_a ) \
- { \
- oks[0] = 0; \
- fprintf( stderr, #name "(cqm=%d): [FAILED]\n", i_cqm ); \
- @@ -1286,14 +1293,14 @@ static int check_quant( int cpu_ref, int cpu_new )
- { \
- set_func_name( #qname ); \
- used_asms[0] = 1; \
- - for( int qp = 51; qp > 0; qp-- ) \
- + for( int qp = QP_MAX; qp > 0; qp-- ) \
- { \
- for( int j = 0; j < 2; j++ ) \
- { \
- INIT_QUANT##w(j) \
- int result_c = call_c1( qf_c.qname, dct1, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \
- int result_a = call_a1( qf_a.qname, dct2, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \
- - if( memcmp( dct1, dct2, w*w*2 ) || result_c != result_a ) \
- + if( memcmp( dct1, dct2, w*w*sizeof(dctcoef) ) || result_c != result_a ) \
- { \
- oks[0] = 0; \
- fprintf( stderr, #qname "(qp=%d, cqm=%d, block="#block"): [FAILED]\n", qp, i_cqm ); \
- @@ -1317,14 +1324,14 @@ static int check_quant( int cpu_ref, int cpu_new )
- { \
- set_func_name( "%s_%s", #dqname, i_cqm?"cqm":"flat" ); \
- used_asms[1] = 1; \
- - for( int qp = 51; qp > 0; qp-- ) \
- + for( int qp = QP_MAX; qp > 0; qp-- ) \
- { \
- INIT_QUANT##w(1) \
- call_c1( qf_c.qname, dct1, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \
- - memcpy( dct2, dct1, w*w*2 ); \
- + memcpy( dct2, dct1, w*w*sizeof(dctcoef) ); \
- call_c1( qf_c.dqname, dct1, h->dequant##w##_mf[block], qp ); \
- call_a1( qf_a.dqname, dct2, h->dequant##w##_mf[block], qp ); \
- - if( memcmp( dct1, dct2, w*w*2 ) ) \
- + if( memcmp( dct1, dct2, w*w*sizeof(dctcoef) ) ) \
- { \
- oks[1] = 0; \
- fprintf( stderr, #dqname "(qp=%d, cqm=%d, block="#block"): [FAILED]\n", qp, i_cqm ); \
- @@ -1345,15 +1352,15 @@ static int check_quant( int cpu_ref, int cpu_new )
- { \
- set_func_name( "%s_%s", #dqname, i_cqm?"cqm":"flat" ); \
- used_asms[1] = 1; \
- - for( int qp = 51; qp > 0; qp-- ) \
- + for( int qp = QP_MAX; qp > 0; qp-- ) \
- { \
- for( int i = 0; i < 16; i++ ) \
- dct1[i] = rand(); \
- call_c1( qf_c.qname, dct1, h->quant##w##_mf[block][qp][0]>>1, h->quant##w##_bias[block][qp][0]>>1 ); \
- - memcpy( dct2, dct1, w*w*2 ); \
- + memcpy( dct2, dct1, w*w*sizeof(dctcoef) ); \
- call_c1( qf_c.dqname, dct1, h->dequant##w##_mf[block], qp ); \
- call_a1( qf_a.dqname, dct2, h->dequant##w##_mf[block], qp ); \
- - if( memcmp( dct1, dct2, w*w*2 ) ) \
- + if( memcmp( dct1, dct2, w*w*sizeof(dctcoef) ) ) \
- { \
- oks[1] = 0; \
- fprintf( stderr, #dqname "(qp=%d, cqm=%d, block="#block"): [FAILED]\n", qp, i_cqm ); \
- @@ -1381,12 +1388,12 @@ static int check_quant( int cpu_ref, int cpu_new )
- for( int size = 16; size <= 64; size += 48 )
- {
- set_func_name( "denoise_dct" );
- - memcpy( dct1, buf1, size*2 );
- - memcpy( dct2, buf1, size*2 );
- + memcpy( dct1, buf1, size*sizeof(dctcoef) );
- + memcpy( dct2, buf1, size*sizeof(dctcoef) );
- memcpy( buf3+256, buf3, 256 );
- call_c1( qf_c.denoise_dct, dct1, (uint32_t*)buf3, (uint16_t*)buf2, size );
- call_a1( qf_a.denoise_dct, dct2, (uint32_t*)(buf3+256), (uint16_t*)buf2, size );
- - if( memcmp( dct1, dct2, size*2 ) || memcmp( buf3+4, buf3+256+4, (size-1)*sizeof(uint32_t) ) )
- + if( memcmp( dct1, dct2, size*sizeof(dctcoef) ) || memcmp( buf3+4, buf3+256+4, (size-1)*sizeof(uint32_t) ) )
- ok = 0;
- call_c2( qf_c.denoise_dct, dct1, (uint32_t*)buf3, (uint16_t*)buf2, size );
- call_a2( qf_a.denoise_dct, dct2, (uint32_t*)(buf3+256), (uint16_t*)buf2, size );
- @@ -1431,7 +1438,7 @@ static int check_quant( int cpu_ref, int cpu_new )
- { \
- int nnz = 0; \
- int max = rand() & (w*w-1); \
- - memset( dct1, 0, w*w*2 ); \
- + memset( dct1, 0, w*w*sizeof(dctcoef) ); \
- for( int idx = ac; idx < max; idx++ ) \
- nnz |= dct1[idx] = !(rand()&3) + (!(rand()&15))*rand(); \
- if( !nnz ) \
- @@ -1464,7 +1471,7 @@ static int check_quant( int cpu_ref, int cpu_new )
- x264_run_level_t runlevel_c, runlevel_a; \
- int nnz = 0; \
- int max = rand() & (w*w-1); \
- - memset( dct1, 0, w*w*2 ); \
- + memset( dct1, 0, w*w*sizeof(dctcoef) ); \
- memcpy( &runlevel_a, buf1+i, sizeof(x264_run_level_t) ); \
- memcpy( &runlevel_c, buf1+i, sizeof(x264_run_level_t) ); \
- for( int idx = ac; idx < max; idx++ ) \
- @@ -1474,7 +1481,7 @@ static int check_quant( int cpu_ref, int cpu_new )
- int result_c = call_c( qf_c.lastname, dct1+ac, &runlevel_c ); \
- int result_a = call_a( qf_a.lastname, dct1+ac, &runlevel_a ); \
- if( result_c != result_a || runlevel_c.last != runlevel_a.last || \
- - memcmp(runlevel_c.level, runlevel_a.level, sizeof(int16_t)*result_c) || \
- + memcmp(runlevel_c.level, runlevel_a.level, sizeof(dctcoef)*result_c) || \
- memcmp(runlevel_c.run, runlevel_a.run, sizeof(uint8_t)*(result_c-1)) ) \
- { \
- ok = 0; \
- @@ -1529,11 +1536,11 @@ static int check_intra( int cpu_ref, int cpu_new )
- {\
- set_func_name( "intra_%s_%s", #name, intra_##name##_names[dir] );\
- used_asm = 1;\
- - memcpy( buf3, buf1, 32*20 * sizeof(pixel) );\
- - memcpy( buf4, buf1, 32*20 * sizeof(pixel) );\
- + memcpy( pbuf3, pbuf1, 32*20 * sizeof(pixel) );\
- + memcpy( pbuf4, pbuf1, 32*20 * sizeof(pixel) );\
- call_c( ip_c.name[dir], pbuf3+48, ##__VA_ARGS__ );\
- call_a( ip_a.name[dir], pbuf4+48, ##__VA_ARGS__ );\
- - if( memcmp( buf3, buf4, 32*20 * sizeof(pixel) ) )\
- + if( memcmp( pbuf3, pbuf4, 32*20 * sizeof(pixel) ) )\
- {\
- fprintf( stderr, #name "[%d] : [FAILED]\n", dir );\
- ok = 0;\
- @@ -1544,7 +1551,7 @@ static int check_intra( int cpu_ref, int cpu_new )
- {\
- printf( "%2x ", edge[14-j] );\
- for( int k = 0; k < w; k++ )\
- - printf( "%2x ", buf4[48+k+j*32] );\
- + printf( "%2x ", pbuf4[48+k+j*32] );\
- printf( "\n" );\
- }\
- printf( "\n" );\
- @@ -1552,7 +1559,7 @@ static int check_intra( int cpu_ref, int cpu_new )
- {\
- printf( " " );\
- for( int k = 0; k < w; k++ )\
- - printf( "%2x ", buf3[48+k+j*32] );\
- + printf( "%2x ", pbuf3[48+k+j*32] );\
- printf( "\n" );\
- }\
- }\
- @@ -1831,8 +1838,9 @@ int main(int argc, char *argv[])
- fprintf( stderr, "x264: using random seed %u\n", seed );
- srand( seed );
- - buf1 = x264_malloc( 0x3e00 + 16*BENCH_ALIGNS );
- - if( !buf1 )
- + buf1 = x264_malloc( 0x1e00 + 0x2000*sizeof(pixel) + 16*BENCH_ALIGNS );
- + pbuf1 = x264_malloc( 0x1e00*sizeof(pixel) + 16*BENCH_ALIGNS );
- + if( !buf1 || !pbuf1 )
- {
- fprintf( stderr, "malloc failed, unable to initiate tests!\n" );
- return -1;
- @@ -1840,15 +1848,17 @@ int main(int argc, char *argv[])
- #define INIT_POINTER_OFFSETS\
- buf2 = buf1 + 0xf00;\
- buf3 = buf2 + 0xf00;\
- - buf4 = buf3 + 0x1000;\
- - pbuf1 = (pixel*)buf1;\
- - pbuf2 = (pixel*)buf2;\
- + buf4 = buf3 + 0x1000*sizeof(pixel);\
- + pbuf2 = pbuf1 + 0xf00;\
- pbuf3 = (pixel*)buf3;\
- pbuf4 = (pixel*)buf4;
- INIT_POINTER_OFFSETS;
- for( int i = 0; i < 0x1e00; i++ )
- + {
- buf1[i] = rand() & 0xFF;
- - memset( buf1+0x1e00, 0, 0x2000 );
- + pbuf1[i] = rand() & PIXEL_MAX;
- + }
- + memset( buf1+0x1e00, 0, 0x2000*sizeof(pixel) );
- /* 16-byte alignment is guaranteed whenever it's useful, but some functions also vary in speed depending on %64 */
- if( do_bench )
- @@ -1857,6 +1867,7 @@ int main(int argc, char *argv[])
- INIT_POINTER_OFFSETS;
- ret |= x264_stack_pagealign( check_all_flags, i*16 );
- buf1 += 16;
- + pbuf1 += 16;
- quiet = 1;
- fprintf( stderr, "%d/%d\r", i+1, BENCH_ALIGNS );
- }
- diff --git a/x264.c b/x264.c
- index 0bede93..c1141ab 100644
- --- a/x264.c
- +++ b/x264.c
- @@ -262,6 +262,7 @@ static void Help( x264_param_t *defaults, int longhelp )
- " .mkv -> Matroska\n"
- " .flv -> Flash Video\n"
- " .mp4 -> MP4 if compiled with GPAC support (%s)\n"
- + "Output bit depth: %d (configured at compile time)\n"
- "\n"
- "Options:\n"
- "\n"
- @@ -286,10 +287,11 @@ static void Help( x264_param_t *defaults, int longhelp )
- "no",
- #endif
- #if HAVE_GPAC
- - "yes"
- + "yes",
- #else
- - "no"
- + "no",
- #endif
- + BIT_DEPTH
- );
- H0( "Example usage:\n" );
- H0( "\n" );
- @@ -311,8 +313,8 @@ static void Help( x264_param_t *defaults, int longhelp )
- H0( "\n" );
- H0( "Presets:\n" );
- H0( "\n" );
- - H0( " --profile Force the limits of an H.264 profile [high]\n"
- - " Overrides all settings.\n" );
- + H0( " --profile Force the limits of an H.264 profile [%s]\n"
- + " Overrides all settings.\n", BIT_DEPTH > 8 ? "high10" : "high" );
- H2( " - baseline:\n"
- " --no-8x8dct --bframes 0 --no-cabac\n"
- " --cqm flat --weightp 0\n"
- @@ -322,8 +324,11 @@ static void Help( x264_param_t *defaults, int longhelp )
- " --no-8x8dct --cqm flat\n"
- " No lossless.\n"
- " - high:\n"
- - " No lossless.\n" );
- - else H0( " - baseline,main,high\n" );
- + " No lossless.\n"
- + " - high10:\n"
- + " No lossless.\n"
- + " Support for bit depth 8-10.\n" );
- + else H0( " - baseline,main,high,high10\n" );
- H0( " --preset Use a preset to select encoding settings [medium]\n"
- " Overridden by user settings.\n" );
- H2( " - ultrafast:\n"
- @@ -453,9 +458,9 @@ static void Help( x264_param_t *defaults, int longhelp )
- H0( "\n" );
- H0( "Ratecontrol:\n" );
- H0( "\n" );
- - H1( " -q, --qp <integer> Force constant QP (0-51, 0=lossless)\n" );
- + H1( " -q, --qp <integer> Force constant QP (0-%d, 0=lossless)\n", QP_MAX );
- H0( " -B, --bitrate <integer> Set bitrate (kbit/s)\n" );
- - H0( " --crf <float> Quality-based VBR (0-51, 0=lossless) [%.1f]\n", defaults->rc.f_rf_constant );
- + H0( " --crf <float> Quality-based VBR (0-%d, 0=lossless) [%.1f]\n", QP_MAX, defaults->rc.f_rf_constant );
- H1( " --rc-lookahead <integer> Number of frames for frametype lookahead [%d]\n", defaults->rc.i_lookahead );
- H0( " --vbv-maxrate <integer> Max local bitrate (kbit/s) [%d]\n", defaults->rc.i_vbv_max_bitrate );
- H0( " --vbv-bufsize <integer> Set size of the VBV buffer (kbit) [%d]\n", defaults->rc.i_vbv_buffer_size );
- @@ -1040,6 +1045,7 @@ static int Parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )
- #else
- printf( "using a non-gcc compiler\n" );
- #endif
- + printf( "configuration: --bit-depth=%d\n", BIT_DEPTH );
- exit(0);
- case OPT_FRAMES:
- param->i_frame_total = X264_MAX( atoi( optarg ), 0 );
- @@ -1318,7 +1324,7 @@ static void parse_qpfile( cli_opt_t *opt, x264_picture_t *pic, int i_frame )
- else if( type == 'B' ) pic->i_type = X264_TYPE_BREF;
- else if( type == 'b' ) pic->i_type = X264_TYPE_B;
- else ret = 0;
- - if( ret != 3 || qp < -1 || qp > 51 )
- + if( ret != 3 || qp < -1 || qp > QP_MAX )
- {
- x264_cli_log( "x264", X264_LOG_ERROR, "can't parse qpfile for frame %d\n", i_frame );
- fclose( opt->qpfile );
- diff --git a/x264.h b/x264.h
- index 097365a..4d9b9ca 100644
- --- a/x264.h
- +++ b/x264.h
- @@ -344,7 +344,7 @@ typedef struct x264_param_t
- {
- int i_rc_method; /* X264_RC_* */
- - int i_qp_constant; /* 0-51 */
- + int i_qp_constant; /* 0 to (51 + 6*(BIT_DEPTH-8)) */
- int i_qp_min; /* min allowed QP value */
- int i_qp_max; /* max allowed QP value */
- int i_qp_step; /* max QP step between frames */
- @@ -550,7 +550,7 @@ void x264_param_apply_fastfirstpass( x264_param_t * );
- /* x264_param_apply_profile:
- * Applies the restrictions of the given profile.
- * Currently available profiles are, from most to least restrictive: */
- -static const char * const x264_profile_names[] = { "baseline", "main", "high", 0 };
- +static const char * const x264_profile_names[] = { "baseline", "main", "high", "high10", 0 };
- /* (can be NULL, in which case the function will do nothing)
- *
- --
- 1.7.1
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement