Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- diff --git a/.gitignore b/.gitignore
- old mode 100644
- new mode 100755
- diff --git a/AUTHORS b/AUTHORS
- old mode 100644
- new mode 100755
- diff --git a/COPYING b/COPYING
- old mode 100644
- new mode 100755
- diff --git a/Makefile b/Makefile
- old mode 100644
- new mode 100755
- index 5831091..18f2d66
- --- a/Makefile
- +++ b/Makefile
- @@ -11,7 +11,8 @@ SRCS = common/mc.c common/predict.c common/pixel.c common/macroblock.c \
- common/mvpred.c common/bitstream.c \
- encoder/analyse.c encoder/me.c encoder/ratecontrol.c \
- encoder/set.c encoder/macroblock.c encoder/cabac.c \
- - encoder/cavlc.c encoder/encoder.c encoder/lookahead.c
- + encoder/cavlc.c encoder/encoder.c encoder/lookahead.c \
- + encoder/trellis.c
- SRCCLI = x264.c input/input.c input/timecode.c input/raw.c input/y4m.c \
- output/raw.c output/matroska.c output/matroska_ebml.c \
- diff --git a/common/arm/asm.S b/common/arm/asm.S
- old mode 100644
- new mode 100755
- diff --git a/common/arm/cpu-a.S b/common/arm/cpu-a.S
- old mode 100644
- new mode 100755
- diff --git a/common/arm/dct-a.S b/common/arm/dct-a.S
- old mode 100644
- new mode 100755
- diff --git a/common/arm/dct.h b/common/arm/dct.h
- old mode 100644
- new mode 100755
- diff --git a/common/arm/deblock-a.S b/common/arm/deblock-a.S
- old mode 100644
- new mode 100755
- diff --git a/common/arm/mc-a.S b/common/arm/mc-a.S
- old mode 100644
- new mode 100755
- diff --git a/common/arm/mc-c.c b/common/arm/mc-c.c
- old mode 100644
- new mode 100755
- diff --git a/common/arm/mc.h b/common/arm/mc.h
- old mode 100644
- new mode 100755
- diff --git a/common/arm/pixel-a.S b/common/arm/pixel-a.S
- old mode 100644
- new mode 100755
- diff --git a/common/arm/pixel.h b/common/arm/pixel.h
- old mode 100644
- new mode 100755
- diff --git a/common/arm/predict-a.S b/common/arm/predict-a.S
- old mode 100644
- new mode 100755
- diff --git a/common/arm/predict-c.c b/common/arm/predict-c.c
- old mode 100644
- new mode 100755
- diff --git a/common/arm/predict.h b/common/arm/predict.h
- old mode 100644
- new mode 100755
- diff --git a/common/arm/quant-a.S b/common/arm/quant-a.S
- old mode 100644
- new mode 100755
- diff --git a/common/arm/quant.h b/common/arm/quant.h
- old mode 100644
- new mode 100755
- diff --git a/common/bitstream.c b/common/bitstream.c
- old mode 100644
- new mode 100755
- diff --git a/common/bitstream.h b/common/bitstream.h
- old mode 100644
- new mode 100755
- diff --git a/common/cabac.c b/common/cabac.c
- old mode 100644
- new mode 100755
- diff --git a/common/cabac.h b/common/cabac.h
- old mode 100644
- new mode 100755
- diff --git a/common/common.c b/common/common.c
- old mode 100644
- new mode 100755
- index a30d0d8..6818a01
- --- a/common/common.c
- +++ b/common/common.c
- @@ -976,6 +976,8 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
- p->b_fake_interlaced = atobool(value);
- OPT("frame-packing")
- p->i_frame_packing = atoi(value);
- + OPT("me-trellis")
- + p->analyse.i_me_trellis = atoi(value);
- else
- return X264_PARAM_BAD_NAME;
- #undef OPT
- diff --git a/common/common.h b/common/common.h
- old mode 100644
- new mode 100755
- index 7768571..fee8853
- --- a/common/common.h
- +++ b/common/common.h
- @@ -191,6 +191,7 @@ static const unsigned x264_scan8[16+2*4+3] =
- #include "quant.h"
- #include "cpu.h"
- #include "threadpool.h"
- +#include "trellis.h"
- /****************************************************************************
- * General functions
- @@ -771,6 +772,8 @@ struct x264_t
- int8_t topright_ref[2][3];
- } cache;
- + me_trellis_t trellis;
- +
- /* */
- int i_qp; /* current qp */
- int i_chroma_qp;
- diff --git a/common/cpu.c b/common/cpu.c
- old mode 100644
- new mode 100755
- diff --git a/common/cpu.h b/common/cpu.h
- old mode 100644
- new mode 100755
- diff --git a/common/dct.c b/common/dct.c
- old mode 100644
- new mode 100755
- diff --git a/common/dct.h b/common/dct.h
- old mode 100644
- new mode 100755
- diff --git a/common/deblock.c b/common/deblock.c
- old mode 100644
- new mode 100755
- diff --git a/common/display-x11.c b/common/display-x11.c
- old mode 100644
- new mode 100755
- diff --git a/common/display.h b/common/display.h
- old mode 100644
- new mode 100755
- diff --git a/common/frame.c b/common/frame.c
- old mode 100644
- new mode 100755
- diff --git a/common/frame.h b/common/frame.h
- old mode 100644
- new mode 100755
- diff --git a/common/macroblock.c b/common/macroblock.c
- old mode 100644
- new mode 100755
- index 9eefc0a..61cbbb2
- --- a/common/macroblock.c
- +++ b/common/macroblock.c
- @@ -34,7 +34,7 @@ static NOINLINE void x264_mb_mc_0xywh( x264_t *h, int x, int y, int width, int h
- int i_ref = h->mb.cache.ref[0][i8];
- int mvx = x264_clip3( h->mb.cache.mv[0][i8][0], h->mb.mv_min[0], h->mb.mv_max[0] ) + 4*4*x;
- int mvy = x264_clip3( h->mb.cache.mv[0][i8][1], h->mb.mv_min[1], h->mb.mv_max[1] ) + 4*4*y;
- -
- +
- h->mc.mc_luma( &h->mb.pic.p_fdec[0][4*y*FDEC_STRIDE+4*x], FDEC_STRIDE,
- h->mb.pic.p_fref[0][i_ref], h->mb.pic.i_stride[0],
- mvx, mvy, 4*width, 4*height, &h->sh.weight[i_ref][0] );
- @@ -283,6 +283,34 @@ int x264_macroblock_cache_allocate( x264_t *h )
- CHECKED_MALLOC( h->mb.p_weight_buf[i], luma_plane_size * sizeof(pixel) );
- }
- + if ( h->param.analyse.i_me_trellis ) {
- + h->mb.trellis.max_length = X264_MAX( h->mb.i_mb_width, h->mb.i_mb_height );
- + h->mb.trellis.max_breadth = SQUARE2_CANDIDATES;
- + h->mb.trellis.mb_stride_col = T_P_TOTAL+1;
- + h->mb.trellis.mb_stride_row = h->mb.trellis.mb_stride_col * h->mb.i_mb_width;
- + h->mb.trellis.t_stride = h->mb.trellis.max_breadth;
- + h->mb.trellis.gen = 0;
- +
- + CHECKED_MALLOC( h->mb.trellis.t, h->mb.trellis.max_length * h->mb.trellis.max_breadth * sizeof( me_trellis_node_t ) );
- + CHECKED_MALLOC( h->mb.trellis.mb, h->mb.i_mb_count * h->mb.trellis.mb_stride_col * sizeof( mb_t ) );
- + CHECKED_MALLOC( h->mb.trellis.backup, h->mb.trellis.max_length * h->mb.trellis.mb_stride_col * sizeof( mb_t ) );
- +
- + CHECKED_MALLOCZERO( h->mb.trellis.cache_sad_gen[0], h->mb.i_mb_count * 9 * 8 * 8 * sizeof(uint32_t) );
- + CHECKED_MALLOCZERO( h->mb.trellis.cache_sad_gen[1], h->mb.i_mb_count * 9 * 8 * 8 * sizeof(uint32_t) );
- + CHECKED_MALLOCZERO( h->mb.trellis.cache_sad_tag[0], h->mb.i_mb_count * 9 * 8 * 8 * sizeof(uint32_t) );
- + CHECKED_MALLOCZERO( h->mb.trellis.cache_sad_tag[1], h->mb.i_mb_count * 9 * 8 * 8 * sizeof(uint32_t) );
- + CHECKED_MALLOC( h->mb.trellis.cache_sad_score[0], h->mb.i_mb_count * 9 * 8 * 8 * sizeof(int) );
- + CHECKED_MALLOC( h->mb.trellis.cache_sad_score[1], h->mb.i_mb_count * 9 * 8 * 8 * sizeof(int) );
- + CHECKED_MALLOC( h->mb.trellis.cache_me_t[0], h->mb.i_mb_count * 9 * sizeof(x264_me_t) );
- + CHECKED_MALLOC( h->mb.trellis.cache_me_t[1], h->mb.i_mb_count * 9 * sizeof(x264_me_t) );
- + CHECKED_MALLOC( h->mb.trellis.cache_mode_intra, h->mb.i_mb_count * sizeof(int) );
- + CHECKED_MALLOC( h->mb.trellis.cache_sad_intra, h->mb.i_mb_count * sizeof(int) );
- + CHECKED_MALLOC( h->mb.trellis.best_type[0], h->mb.i_mb_count * sizeof(int) );
- + CHECKED_MALLOC( h->mb.trellis.best_type[1], h->mb.i_mb_count * sizeof(int) );
- + CHECKED_MALLOC( h->mb.trellis.best_part[0], h->mb.i_mb_count * sizeof(int) );
- + CHECKED_MALLOC( h->mb.trellis.best_part[1], h->mb.i_mb_count * sizeof(int) );
- + }
- +
- return 0;
- fail:
- return -1;
- @@ -309,6 +337,27 @@ void x264_macroblock_cache_free( x264_t *h )
- x264_free( h->mb.skipbp );
- x264_free( h->mb.cbp );
- x264_free( h->mb.qp );
- +
- + if ( h->param.analyse.i_me_trellis ) {
- + x264_free( h->mb.trellis.t );
- + x264_free( h->mb.trellis.mb );
- + x264_free( h->mb.trellis.backup );
- +
- + x264_free( h->mb.trellis.cache_sad_gen[0] );
- + x264_free( h->mb.trellis.cache_sad_gen[1] );
- + x264_free( h->mb.trellis.cache_sad_tag[0] );
- + x264_free( h->mb.trellis.cache_sad_tag[1] );
- + x264_free( h->mb.trellis.cache_sad_score[0] );
- + x264_free( h->mb.trellis.cache_sad_score[1] );
- + x264_free( h->mb.trellis.cache_me_t[0] );
- + x264_free( h->mb.trellis.cache_me_t[1] );
- + x264_free( h->mb.trellis.cache_mode_intra );
- + x264_free( h->mb.trellis.cache_sad_intra );
- + x264_free( h->mb.trellis.best_type[0] );
- + x264_free( h->mb.trellis.best_type[1] );
- + x264_free( h->mb.trellis.best_part[0] );
- + x264_free( h->mb.trellis.best_part[1] );
- + }
- }
- int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
- diff --git a/common/macroblock.h b/common/macroblock.h
- old mode 100644
- new mode 100755
- diff --git a/common/mc.c b/common/mc.c
- old mode 100644
- new mode 100755
- diff --git a/common/mc.h b/common/mc.h
- old mode 100644
- new mode 100755
- diff --git a/common/mvpred.c b/common/mvpred.c
- old mode 100644
- new mode 100755
- diff --git a/common/osdep.c b/common/osdep.c
- old mode 100644
- new mode 100755
- diff --git a/common/osdep.h b/common/osdep.h
- old mode 100644
- new mode 100755
- diff --git a/common/pixel.c b/common/pixel.c
- old mode 100644
- new mode 100755
- diff --git a/common/pixel.h b/common/pixel.h
- old mode 100644
- new mode 100755
- diff --git a/common/ppc/dct.c b/common/ppc/dct.c
- old mode 100644
- new mode 100755
- diff --git a/common/ppc/dct.h b/common/ppc/dct.h
- old mode 100644
- new mode 100755
- diff --git a/common/ppc/deblock.c b/common/ppc/deblock.c
- old mode 100644
- new mode 100755
- diff --git a/common/ppc/mc.c b/common/ppc/mc.c
- old mode 100644
- new mode 100755
- diff --git a/common/ppc/mc.h b/common/ppc/mc.h
- old mode 100644
- new mode 100755
- diff --git a/common/ppc/pixel.c b/common/ppc/pixel.c
- old mode 100644
- new mode 100755
- diff --git a/common/ppc/pixel.h b/common/ppc/pixel.h
- old mode 100644
- new mode 100755
- diff --git a/common/ppc/ppccommon.h b/common/ppc/ppccommon.h
- old mode 100644
- new mode 100755
- diff --git a/common/ppc/predict.c b/common/ppc/predict.c
- old mode 100644
- new mode 100755
- diff --git a/common/ppc/predict.h b/common/ppc/predict.h
- old mode 100644
- new mode 100755
- diff --git a/common/ppc/quant.c b/common/ppc/quant.c
- old mode 100644
- new mode 100755
- diff --git a/common/ppc/quant.h b/common/ppc/quant.h
- old mode 100644
- new mode 100755
- diff --git a/common/predict.c b/common/predict.c
- old mode 100644
- new mode 100755
- diff --git a/common/predict.h b/common/predict.h
- old mode 100644
- new mode 100755
- diff --git a/common/quant.c b/common/quant.c
- old mode 100644
- new mode 100755
- diff --git a/common/quant.h b/common/quant.h
- old mode 100644
- new mode 100755
- diff --git a/common/rectangle.c b/common/rectangle.c
- old mode 100644
- new mode 100755
- diff --git a/common/rectangle.h b/common/rectangle.h
- old mode 100644
- new mode 100755
- diff --git a/common/set.c b/common/set.c
- old mode 100644
- new mode 100755
- diff --git a/common/set.h b/common/set.h
- old mode 100644
- new mode 100755
- diff --git a/common/sparc/pixel.asm b/common/sparc/pixel.asm
- old mode 100644
- new mode 100755
- diff --git a/common/sparc/pixel.h b/common/sparc/pixel.h
- old mode 100644
- new mode 100755
- diff --git a/common/threadpool.c b/common/threadpool.c
- old mode 100644
- new mode 100755
- diff --git a/common/threadpool.h b/common/threadpool.h
- old mode 100644
- new mode 100755
- diff --git a/common/trellis.h b/common/trellis.h
- new file mode 100755
- index 0000000..e807e35
- --- /dev/null
- +++ b/common/trellis.h
- @@ -0,0 +1,179 @@
- +#ifndef TRELLIS_H
- +#define TRELLIS_H
- +
- +#define QP_FAKE 24
- +#define ME_RANGE h->param.analyse.i_me_range
- +
- +#include "common.h"
- +#include "encoder/me.h"
- +
- +// tellis mode idents
- +enum trellis_mode_t {
- + MODE_NONE = 0,
- + MODE_COMBINED_DIAMOND = 1,
- + MODE_DECIDE = 2,
- + MODE_DIAMOND = 3,
- + MODE_SQUARE2 = 4
- +};
- +
- +// trellis direction
- +enum trellis_direction_t {
- + ROW_TRELLIS = 0,
- + COL_TRELLIS = 1
- +};
- +
- +// trellis mb type identifiers
- +enum trellis_p_mb_type_t {
- + T_P_16x16 = 0,
- + T_P_8x8 = 1,
- + T_P_16x8 = 2,
- + T_P_8x16 = 3,
- + T_P_INTRA = 4,
- +
- + T_P_BEST = 5,
- + T_P_TOTAL = 5
- +};
- +
- +// mv direction total
- +enum mv_candidates_t {
- + COMBINED_DIAMOND_CANDIDATES = 21,
- + DECIDE_CANDIDATES = 5,
- + DIAMOND_CANDIDATES = 5,
- + SQUARE2_CANDIDATES = 25
- +};
- +
- +// mv direction indices
- +enum mv_candidate_index_t {
- + T___ = 0,
- + B___ = 1,
- + L___ = 2,
- + R___ = 3,
- + C___ = 4,
- + TL__ = 5,
- + TR__ = 6,
- + BL__ = 7,
- + BR__ = 8,
- + LL__ = 9,
- + RR__ = 10,
- + TT__ = 11,
- + BB__ = 12,
- + TTL_ = 13,
- + TTLL = 14,
- + TTR_ = 15,
- + TTRR = 16,
- + TLL_ = 17,
- + TRR_ = 18,
- + BBL_ = 19,
- + BBLL = 20,
- + BBR_ = 21,
- + BBRR = 22,
- + BLL_ = 23,
- + BRR_ = 24
- +};
- +
- +// useful comparison
- +#define X264_MED(a,b,c) ( (a<=b) ? ( (c<=a) ? a : ( (c<=b) ? c : b ) ) : ( (c>=a) ? a : ( (c>=b) ? c : b ) ) )
- +
- +// copy memory between two records
- +#define CP_MV(a,b) memcpy( a, b, 2*sizeof( int16_t ) )
- +#define CP_MB(a,b) memcpy( a, b, sizeof( mb_t ) )
- +
- +// cost macros
- +
- +#define T_SAD_SCORE( class, partition, i_mv, row, col, x, y )\
- + ((class==P_8x8) ? (h->mb.trellis.sad_p8x8[i_mv][row*h->mb.i_mb_stride+col][(2*ME_RANGE*(y+ME_RANGE))+(x+ME_RANGE)]) : \
- + (((class==P_L0)&&(partition==D_16x16)) ? (h->mb.trellis.sad_p16x16[row*h->mb.i_mb_stride+col][(2*ME_RANGE*(y+ME_RANGE))+(x+ME_RANGE)]) : \
- + (((class==P_L0)&&(partition==D_16x8)) ? (h->mb.trellis.sad_p16x8[i_mv][row*h->mb.i_mb_stride+col][((2*ME_RANGE)*(y+ME_RANGE))+(x+ME_RANGE)]) : \
- + (((class==P_L0)&&(partition==D_8x16)) ? (h->mb.trellis.sad_p8x16[i_mv][row*h->mb.i_mb_stride+col][((2*ME_RANGE)*(y+ME_RANGE))+(x+ME_RANGE)]) : \
- + (h->mb.trellis.sad_intra[row*h->mb.i_mb_stride+col])))))
- +
- +#define T_BITS_MVD( mx, my )\
- + (h->cost_mv[QP_FAKE][(mx)<<2] + h->cost_mv[QP_FAKE][(my)<<2])
- +
- +#define T_BITS_MODE( class, partition )\
- + (x264_lambda_tab[QP_FAKE] * ( ( IS_INTRA( class ) ) ? ( 0 ) : ( ( partition == D_16x16 ) ? ( 4 ) : ( ( partition == D_8x8 ) ? ( 12 ) : ( 6 ) ) ) ))
- +
- +#define T_COST_SAD( mx, my )\
- + h->pixf.fpelcmp[i_pixel]( p_fenc, FENC_STRIDE, &p_fref_w[(my)*stride+(mx)], stride )
- +
- +// map x264 mb types to trellis mb types
- +
- +#define TRELLIS_TYPE( block )\
- + ( IS_INTRA( block.class ) ? ( T_P_INTRA ) : \
- + ( ( block.class == P_8x8 ) ? ( T_P_8x8 ) : \
- + ( ( block.part[0] = D_16x16 ) ? ( T_P_16x16 ) : \
- + ( ( block.part[0] = D_16x8 ) ? ( T_P_16x8 ) : ( T_P_8x16 ) \
- + ) ) ) )
- +
- +// holds mvs for partitioned block. unused mvs are zero by default.
- +typedef struct mb_t {
- + int class;
- + int costSAD;
- + int costMV;
- + int16_t mv_min[2];
- + int16_t mv_max[2];
- + int8_t part[4];
- + int16_t ((mv[2])[4])[2];
- + uint8_t (ref[4])[2];
- +} mb_t;
- +
- +typedef struct me_trellis_node_t {
- + mb_t block;
- + int8_t parent;
- + int accCost;
- + int costCC;
- + int costDC;
- + int costDL;
- + int costDR;
- + int costCR;
- +} me_trellis_node_t;
- +
- +
- +typedef struct me_trellis_t {
- + me_trellis_node_t *t;
- + mb_t *mb;
- + mb_t *backup;
- +
- + int mode;
- + int direction;
- + int breadth;
- + int max_breadth;
- + int length;
- + int max_length;
- + uint32_t gen;
- +
- + int b_collected;
- +
- + int mb_stride_col;
- + int mb_stride_row;
- + int t_stride;
- +
- + uint32_t (*cache_sad_gen[2])[9][8][8];
- + uint32_t (*cache_sad_tag[2])[9][8][8];
- + int (*cache_sad_score[2])[9][8][8];
- + x264_me_t (*cache_me_t[2])[9];
- + int *cache_mode_intra;
- + int *cache_sad_intra;
- + int (*best_type[2]);
- + int (*best_part[2]);
- +} me_trellis_t;
- +
- +int get_cached_fpel_sad( x264_t *h, int class, int partition, int i_list, int i_mv, int row, int col, int x, int y );
- +void fill_me_trellis_params( x264_t *h, int mode, int direction );
- +void get_left_predictor( int16_t pred[2], mb_t *block, int i_list, int i_mv );
- +void get_top_predictor( int16_t pred[2], mb_t *block, int i_list, int i_mv );
- +void get_top_right_predictor( int16_t pred[2], mb_t *block, int i_list, int i_mv );
- +void get_top_left_predictor( int16_t pred[2], mb_t *block, int i_list, int i_mv );
- +void choose_predictors( x264_t *h, mb_t *pred, int row, int col, mb_t *current, mb_t *left, mb_t *top, mb_t *diag );
- +int cost_mb( x264_t *h, int row, int col, int i_mv_max, mb_t *block, mb_t *left, mb_t *top, mb_t *diag );
- +void collect_candidates( x264_t *h );
- +int sad_cost( x264_t *h );
- +int mv_cost( x264_t *h );
- +void fill_trellis( x264_t *h, int i_frame );
- +void adjust_trellis( x264_t *h, int sub );
- +void generate_alternate_mvs( x264_t *h, int16_t mv[SQUARE2_CANDIDATES][2], mb_t *block, int i_list, int i_mv );
- +void run_row_trellis( x264_t *h, int iter );
- +void scale_trellis_mv( x264_t *h );
- +void write_back_trellis_mv( x264_t *h, int i_mb );
- +
- +#endif
- diff --git a/common/visualize.c b/common/visualize.c
- old mode 100644
- new mode 100755
- diff --git a/common/visualize.h b/common/visualize.h
- old mode 100644
- new mode 100755
- diff --git a/common/vlc.c b/common/vlc.c
- old mode 100644
- new mode 100755
- diff --git a/common/win32thread.c b/common/win32thread.c
- old mode 100644
- new mode 100755
- diff --git a/common/win32thread.h b/common/win32thread.h
- old mode 100644
- new mode 100755
- diff --git a/common/x86/bitstream-a.asm b/common/x86/bitstream-a.asm
- old mode 100644
- new mode 100755
- diff --git a/common/x86/cabac-a.asm b/common/x86/cabac-a.asm
- old mode 100644
- new mode 100755
- diff --git a/common/x86/const-a.asm b/common/x86/const-a.asm
- old mode 100644
- new mode 100755
- diff --git a/common/x86/cpu-a.asm b/common/x86/cpu-a.asm
- old mode 100644
- new mode 100755
- diff --git a/common/x86/dct-32.asm b/common/x86/dct-32.asm
- old mode 100644
- new mode 100755
- diff --git a/common/x86/dct-64.asm b/common/x86/dct-64.asm
- old mode 100644
- new mode 100755
- diff --git a/common/x86/dct-a.asm b/common/x86/dct-a.asm
- old mode 100644
- new mode 100755
- diff --git a/common/x86/dct.h b/common/x86/dct.h
- old mode 100644
- new mode 100755
- diff --git a/common/x86/deblock-a.asm b/common/x86/deblock-a.asm
- old mode 100644
- new mode 100755
- diff --git a/common/x86/mc-a.asm b/common/x86/mc-a.asm
- old mode 100644
- new mode 100755
- diff --git a/common/x86/mc-a2.asm b/common/x86/mc-a2.asm
- old mode 100644
- new mode 100755
- diff --git a/common/x86/mc-c.c b/common/x86/mc-c.c
- old mode 100644
- new mode 100755
- diff --git a/common/x86/mc.h b/common/x86/mc.h
- old mode 100644
- new mode 100755
- diff --git a/common/x86/pixel-32.asm b/common/x86/pixel-32.asm
- old mode 100644
- new mode 100755
- diff --git a/common/x86/pixel-a.asm b/common/x86/pixel-a.asm
- old mode 100644
- new mode 100755
- diff --git a/common/x86/pixel.h b/common/x86/pixel.h
- old mode 100644
- new mode 100755
- diff --git a/common/x86/predict-a.asm b/common/x86/predict-a.asm
- old mode 100644
- new mode 100755
- diff --git a/common/x86/predict-c.c b/common/x86/predict-c.c
- old mode 100644
- new mode 100755
- diff --git a/common/x86/predict.h b/common/x86/predict.h
- old mode 100644
- new mode 100755
- diff --git a/common/x86/quant-a.asm b/common/x86/quant-a.asm
- old mode 100644
- new mode 100755
- diff --git a/common/x86/quant.h b/common/x86/quant.h
- old mode 100644
- new mode 100755
- diff --git a/common/x86/sad-a.asm b/common/x86/sad-a.asm
- old mode 100644
- new mode 100755
- diff --git a/common/x86/sad16-a.asm b/common/x86/sad16-a.asm
- old mode 100644
- new mode 100755
- diff --git a/common/x86/util.h b/common/x86/util.h
- old mode 100644
- new mode 100755
- diff --git a/common/x86/x86inc.asm b/common/x86/x86inc.asm
- old mode 100644
- new mode 100755
- diff --git a/common/x86/x86util.asm b/common/x86/x86util.asm
- old mode 100644
- new mode 100755
- diff --git a/configure b/configure
- index d164414..b00900f 100755
- --- a/configure
- +++ b/configure
- @@ -857,7 +857,7 @@ if [ "$strip" = "yes" ]; then
- fi
- if [ "$debug" = "yes" ]; then
- - CFLAGS="-O1 -g $CFLAGS"
- + CFLAGS="-O0 -g $CFLAGS"
- elif [ $ARCH = ARM ]; then
- # arm-gcc-4.2 produces incorrect output with -ffast-math
- # and it doesn't save any speed anyway on 4.4, so disable it
- diff --git a/doc/ratecontrol.txt b/doc/ratecontrol.txt
- old mode 100644
- new mode 100755
- diff --git a/doc/regression_test.txt b/doc/regression_test.txt
- old mode 100644
- new mode 100755
- diff --git a/doc/standards.txt b/doc/standards.txt
- old mode 100644
- new mode 100755
- diff --git a/doc/threads.txt b/doc/threads.txt
- old mode 100644
- new mode 100755
- diff --git a/doc/vui.txt b/doc/vui.txt
- old mode 100644
- new mode 100755
- diff --git a/encoder/analyse.c b/encoder/analyse.c
- old mode 100644
- new mode 100755
- index 5b5083d..96061b7
- --- a/encoder/analyse.c
- +++ b/encoder/analyse.c
- @@ -809,7 +809,7 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_
- /* cavlc mb type prefix */
- a->i_satd_i16x16 += lambda * i_mb_b_cost_table[I_16x16];
- - if( a->i_satd_i16x16 > i16x16_thresh )
- + if( !h->param.analyse.i_me_trellis && a->i_satd_i16x16 > i16x16_thresh )
- return;
- /* 8x8 prediction selection */
- @@ -911,7 +911,7 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_
- }
- /* Not heavily tuned */
- static const uint8_t i8x8_thresh[11] = { 4, 4, 4, 5, 5, 5, 6, 6, 6, 6, 6 };
- - if( X264_MIN(i_cost, a->i_satd_i16x16) > (i_satd_inter*i8x8_thresh[h->mb.i_subpel_refine])>>2 )
- + if( !h->param.analyse.i_me_trellis && X264_MIN(i_cost, a->i_satd_i16x16) > (i_satd_inter*i8x8_thresh[h->mb.i_subpel_refine])>>2 )
- return;
- }
- @@ -1258,6 +1258,12 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
- /* 16x16 Search on all ref frame */
- m.i_pixel = PIXEL_16x16;
- LOAD_FENC( &m, h->mb.pic.p_fenc, 0, 0 );
- +
- + if ( h->param.analyse.i_me_trellis ) {
- + m.cache_sad_gen = &(h->mb.trellis.cache_sad_gen[0][h->mb.i_mb_xy][0]);
- + m.cache_sad_tag = &(h->mb.trellis.cache_sad_tag[0][h->mb.i_mb_xy][0]);
- + m.cache_sad_score = &(h->mb.trellis.cache_sad_score[0][h->mb.i_mb_xy][0]);
- + }
- a->l0.me16x16.cost = INT_MAX;
- for( int i_ref = 0; i_ref < h->mb.pic.i_fref[0]; i_ref++ )
- @@ -1307,6 +1313,9 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
- if( m.cost < a->l0.me16x16.cost )
- h->mc.memcpy_aligned( &a->l0.me16x16, &m, sizeof(x264_me_t) );
- }
- +
- + if ( h->param.analyse.i_me_trellis )
- + h->mc.memcpy_aligned( &(h->mb.trellis.cache_me_t[0][h->mb.i_mb_xy][0]), &(a->l0.me16x16), sizeof(x264_me_t) );
- x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.me16x16.i_ref );
- assert( a->l0.me16x16.mv[1] <= h->mb.mv_max_spel[1] || h->i_thread_frames == 1 );
- @@ -1440,6 +1449,12 @@ static void x264_mb_analyse_inter_p8x8( x264_t *h, x264_mb_analysis_t *a )
- x264_me_t *m = &a->l0.me8x8[i];
- int x8 = i&1;
- int y8 = i>>1;
- +
- + if ( h->param.analyse.i_me_trellis ) {
- + m->cache_sad_gen = &(h->mb.trellis.cache_sad_gen[0][h->mb.i_mb_xy][1+i]);
- + m->cache_sad_tag = &(h->mb.trellis.cache_sad_tag[0][h->mb.i_mb_xy][1+i]);
- + m->cache_sad_score = &(h->mb.trellis.cache_sad_score[0][h->mb.i_mb_xy][1+i]);
- + }
- m->i_pixel = PIXEL_8x8;
- m->i_ref_cost = i_ref_cost;
- @@ -1462,6 +1477,10 @@ static void x264_mb_analyse_inter_p8x8( x264_t *h, x264_mb_analysis_t *a )
- m->cost += i_ref_cost;
- if( !h->param.b_cabac || (h->param.analyse.inter & X264_ANALYSE_PSUB8x8) )
- m->cost += a->i_lambda * i_sub_mb_p_cost_table[D_L0_8x8];
- +
- + if ( h->param.analyse.i_me_trellis )
- + h->mc.memcpy_aligned( &(h->mb.trellis.cache_me_t[0][h->mb.i_mb_xy][1+i]), m, sizeof(x264_me_t) );
- +
- }
- a->l0.i_cost8x8 = a->l0.me8x8[0].cost + a->l0.me8x8[1].cost +
- @@ -1493,6 +1512,12 @@ static void x264_mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a, int i
- m.i_pixel = PIXEL_16x8;
- + if ( h->param.analyse.i_me_trellis ) {
- + m.cache_sad_gen = &(h->mb.trellis.cache_sad_gen[0][h->mb.i_mb_xy][5+i]);
- + m.cache_sad_tag = &(h->mb.trellis.cache_sad_tag[0][h->mb.i_mb_xy][5+i]);
- + m.cache_sad_score = &(h->mb.trellis.cache_sad_score[0][h->mb.i_mb_xy][5+i]);
- + }
- +
- LOAD_FENC( &m, p_fenc, 0, 8*i );
- l0m->cost = INT_MAX;
- for( int j = 0; j < i_ref8s; j++ )
- @@ -1524,10 +1549,13 @@ static void x264_mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a, int i
- if( m.cost < l0m->cost )
- h->mc.memcpy_aligned( l0m, &m, sizeof(x264_me_t) );
- }
- +
- + if ( h->param.analyse.i_me_trellis )
- + h->mc.memcpy_aligned( &(h->mb.trellis.cache_me_t[0][h->mb.i_mb_xy][5+i]), l0m, sizeof(x264_me_t) );
- /* Early termination based on the current SATD score of partition[0]
- plus the estimated SATD score of partition[1] */
- - if( !i && l0m->cost + a->i_cost_est16x8[1] > i_best_satd * (4 + !!a->i_mbrd) / 4 )
- + if( !(h->param.analyse.i_me_trellis) && !i && l0m->cost + a->i_cost_est16x8[1] > i_best_satd * (4 + !!a->i_mbrd) / 4 )
- {
- a->l0.i_cost16x8 = COST_MAX;
- return;
- @@ -1559,6 +1587,12 @@ static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a, int i
- m.i_pixel = PIXEL_8x16;
- + if ( h->param.analyse.i_me_trellis ) {
- + m.cache_sad_gen = &(h->mb.trellis.cache_sad_gen[0][h->mb.i_mb_xy][7+i]);
- + m.cache_sad_tag = &(h->mb.trellis.cache_sad_tag[0][h->mb.i_mb_xy][7+i]);
- + m.cache_sad_score = &(h->mb.trellis.cache_sad_score[0][h->mb.i_mb_xy][7+i]);
- + }
- +
- LOAD_FENC( &m, p_fenc, 8*i, 0 );
- l0m->cost = INT_MAX;
- for( int j = 0; j < i_ref8s; j++ )
- @@ -1589,10 +1623,13 @@ static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a, int i
- if( m.cost < l0m->cost )
- h->mc.memcpy_aligned( l0m, &m, sizeof(x264_me_t) );
- }
- +
- + if ( h->param.analyse.i_me_trellis )
- + h->mc.memcpy_aligned( &(h->mb.trellis.cache_me_t[0][h->mb.i_mb_xy][7+i]), l0m, sizeof(x264_me_t) );
- /* Early termination based on the current SATD score of partition[0]
- plus the estimated SATD score of partition[1] */
- - if( !i && l0m->cost + a->i_cost_est8x16[1] > i_best_satd * (4 + !!a->i_mbrd) / 4 )
- + if( !h->param.analyse.i_me_trellis && !i && l0m->cost + a->i_cost_est8x16[1] > i_best_satd * (4 + !!a->i_mbrd) / 4 )
- {
- a->l0.i_cost8x16 = COST_MAX;
- return;
- @@ -2920,8 +2957,8 @@ intra_analysis:
- i_partition = D_16x16;
- i_cost = analysis.l0.me16x16.cost;
- - if( ( flags & X264_ANALYSE_PSUB16x16 ) &&
- - analysis.l0.i_cost8x8 < analysis.l0.me16x16.cost )
- + if( ( ( h->param.analyse.i_me_trellis || ( flags & X264_ANALYSE_PSUB16x16 ) ) &&
- + analysis.l0.i_cost8x8 < analysis.l0.me16x16.cost ) )
- {
- i_type = P_8x8;
- i_partition = D_8x8;
- @@ -2956,8 +2993,8 @@ intra_analysis:
- /* Now do 16x8/8x16 */
- i_thresh16x8 = analysis.l0.me8x8[1].cost_mv + analysis.l0.me8x8[2].cost_mv;
- - if( ( flags & X264_ANALYSE_PSUB16x16 ) &&
- - analysis.l0.i_cost8x8 < analysis.l0.me16x16.cost + i_thresh16x8 )
- + if( h->param.analyse.i_me_trellis || ( ( flags & X264_ANALYSE_PSUB16x16 ) &&
- + analysis.l0.i_cost8x8 < analysis.l0.me16x16.cost + i_thresh16x8 ) )
- {
- int i_avg_mv_ref_cost = (analysis.l0.me8x8[2].cost_mv + analysis.l0.me8x8[2].i_ref_cost
- + analysis.l0.me8x8[3].cost_mv + analysis.l0.me8x8[3].i_ref_cost + 1) >> 1;
- @@ -2975,7 +3012,11 @@ intra_analysis:
- }
- h->mb.i_partition = i_partition;
- -
- + if ( h->param.analyse.i_me_trellis )
- + h->mb.trellis.best_part[0][h->mb.i_mb_xy] = h->mb.i_partition;
- +
- + //printf("(%d,%d,%d,%d)\n",analysis.l0.me16x16.cost,analysis.l0.i_cost8x8,analysis.l0.i_cost16x8,analysis.l0.i_cost8x16);
- +
- /* refine qpel */
- //FIXME mb_type costs?
- if( analysis.i_mbrd || !h->mb.i_subpel_refine )
- @@ -3055,7 +3096,18 @@ intra_analysis:
- i_satd_intra = X264_MIN3( analysis.i_satd_i16x16,
- analysis.i_satd_i8x8,
- analysis.i_satd_i4x4 );
- -
- +
- +
- + if ( h->param.analyse.i_me_trellis ) {
- + h->mb.trellis.cache_sad_intra[h->mb.i_mb_xy] = i_satd_intra;
- + if ( i_satd_intra == analysis.i_satd_i16x16 )
- + h->mb.trellis.cache_mode_intra[h->mb.i_mb_xy] = I_16x16;
- + else if ( i_satd_intra == analysis.i_satd_i8x8 )
- + h->mb.trellis.cache_mode_intra[h->mb.i_mb_xy] = I_8x8;
- + else if ( i_satd_intra == analysis.i_satd_i4x4 )
- + h->mb.trellis.cache_mode_intra[h->mb.i_mb_xy] = I_4x4;
- + }
- +
- if( analysis.i_mbrd )
- {
- x264_mb_analyse_p_rd( h, &analysis, X264_MIN(i_satd_inter, i_satd_intra) );
- @@ -3078,6 +3130,10 @@ intra_analysis:
- COPY2_IF_LT( i_cost, analysis.i_satd_pcm, i_type, I_PCM );
- h->mb.i_type = i_type;
- + if ( h->param.analyse.i_me_trellis )
- + h->mb.trellis.best_type[0][h->mb.i_mb_xy] = h->mb.i_type;
- +
- + //printf("%d,%d\n",i_type,i_partition);
- if( analysis.b_force_intra && !IS_INTRA(i_type) )
- {
- diff --git a/encoder/analyse.h b/encoder/analyse.h
- old mode 100644
- new mode 100755
- diff --git a/encoder/cabac.c b/encoder/cabac.c
- old mode 100644
- new mode 100755
- diff --git a/encoder/cavlc.c b/encoder/cavlc.c
- old mode 100644
- new mode 100755
- diff --git a/encoder/encoder.c b/encoder/encoder.c
- old mode 100644
- new mode 100755
- index 3571a91..ae8a39a
- --- a/encoder/encoder.c
- +++ b/encoder/encoder.c
- @@ -1762,7 +1762,7 @@ static void x264_fdec_filter_row( x264_t *h, int mb_y, int b_inloop )
- if( min_y < h->i_threadslice_start )
- return;
- - if( b_deblock )
- + if( b_deblock && !( h->param.analyse.i_me_trellis && h->sh.i_type != SLICE_TYPE_I && !h->mb.trellis.b_collected ) )
- for( int y = min_y; y < mb_y; y += (1 << SLICE_MBAFF) )
- x264_frame_deblock_row( h, y );
- @@ -1776,7 +1776,7 @@ static void x264_fdec_filter_row( x264_t *h, int mb_y, int b_inloop )
- h->fdec->plane[p] + i*h->fdec->i_stride[p],
- h->mb.i_mb_width*16*sizeof(pixel) );
- - if( b_hpel )
- + if( b_hpel && !( h->param.analyse.i_me_trellis && h->sh.i_type != SLICE_TYPE_I && !h->mb.trellis.b_collected ) )
- {
- int end = mb_y == h->mb.i_mb_height;
- x264_frame_expand_border( h, h->fdec, min_y, end );
- @@ -1797,7 +1797,7 @@ static void x264_fdec_filter_row( x264_t *h, int mb_y, int b_inloop )
- if( h->i_thread_frames > 1 && h->fdec->b_kept_as_ref )
- x264_frame_cond_broadcast( h->fdec, mb_y*16 + (b_end ? 10000 : -(X264_THREAD_HEIGHT << SLICE_MBAFF)) );
- - if( b_measure_quality )
- + if( b_measure_quality && !( h->param.analyse.i_me_trellis && h->sh.i_type != SLICE_TYPE_I && !h->mb.trellis.b_collected ) )
- {
- maxpix_y = X264_MIN( maxpix_y, h->param.i_height );
- if( h->param.analyse.b_psnr )
- @@ -2023,9 +2023,14 @@ static int x264_slice_write( x264_t *h )
- i_mb_x = h->sh.i_first_mb % h->mb.i_mb_width;
- i_skip = 0;
- int mb_size[2];
- -
- +
- + h->param.analyse.b_cache_sads = ( h->param.analyse.i_me_trellis && ! h->mb.trellis.b_collected );
- + if ( h->param.analyse.i_me_trellis && h->sh.i_type != SLICE_TYPE_I && !h->mb.trellis.b_collected )
- + h->mb.trellis.gen++; // update cache generation
- +
- while( 1 )
- {
- +
- mb_xy = i_mb_x + i_mb_y * h->mb.i_mb_width;
- int mb_spos = bs_pos(&h->out.bs) + x264_cabac_pos(&h->cabac);
- @@ -2079,7 +2084,10 @@ static int x264_slice_write( x264_t *h )
- x264_macroblock_cache_load_progressive( h, i_mb_x, i_mb_y );
- x264_macroblock_analyse( h );
- -
- +
- + if ( h->param.analyse.i_me_trellis && h->sh.i_type != SLICE_TYPE_I && h->mb.trellis.b_collected )
- + write_back_trellis_mv( h, mb_xy );
- +
- /* encode this macroblock -> be careful it can change the mb type to P_SKIP if needed */
- reencode:
- x264_macroblock_encode( h );
- @@ -2190,59 +2198,62 @@ reencode:
- /* save cache */
- x264_macroblock_cache_save( h );
- - /* accumulate mb stats */
- - h->stat.frame.i_mb_count[h->mb.i_type]++;
- + if( !( h->param.analyse.i_me_trellis && h->sh.i_type != SLICE_TYPE_I && !h->mb.trellis.b_collected ) ) {
- + /* accumulate mb stats */
- + h->stat.frame.i_mb_count[h->mb.i_type]++;
- - int b_intra = IS_INTRA( h->mb.i_type );
- - int b_skip = IS_SKIP( h->mb.i_type );
- - if( h->param.i_log_level >= X264_LOG_INFO || h->param.rc.b_stat_write )
- - {
- - if( !b_intra && !b_skip && !IS_DIRECT( h->mb.i_type ) )
- + int b_intra = IS_INTRA( h->mb.i_type );
- + int b_skip = IS_SKIP( h->mb.i_type );
- + if( h->param.i_log_level >= X264_LOG_INFO || h->param.rc.b_stat_write )
- {
- - if( h->mb.i_partition != D_8x8 )
- - h->stat.frame.i_mb_partition[h->mb.i_partition] += 4;
- - else
- - for( int i = 0; i < 4; i++ )
- - h->stat.frame.i_mb_partition[h->mb.i_sub_partition[i]] ++;
- - if( h->param.i_frame_reference > 1 )
- - for( int i_list = 0; i_list <= (h->sh.i_type == SLICE_TYPE_B); i_list++ )
- - for( int i = 0; i < 4; i++ )
- - {
- - int i_ref = h->mb.cache.ref[i_list][ x264_scan8[4*i] ];
- - if( i_ref >= 0 )
- - h->stat.frame.i_mb_count_ref[i_list][i_ref] ++;
- - }
- + if( !b_intra && !b_skip && !IS_DIRECT( h->mb.i_type ) )
- + {
- + if( h->mb.i_partition != D_8x8 )
- + h->stat.frame.i_mb_partition[h->mb.i_partition] += 4;
- + else
- + for( int i = 0; i < 4; i++ )
- + h->stat.frame.i_mb_partition[h->mb.i_sub_partition[i]] ++;
- + if( h->param.i_frame_reference > 1 )
- + for( int i_list = 0; i_list <= (h->sh.i_type == SLICE_TYPE_B); i_list++ )
- + for( int i = 0; i < 4; i++ )
- + {
- + int i_ref = h->mb.cache.ref[i_list][ x264_scan8[4*i] ];
- + if( i_ref >= 0 )
- + h->stat.frame.i_mb_count_ref[i_list][i_ref] ++;
- + }
- + }
- }
- - }
- - if( h->param.i_log_level >= X264_LOG_INFO )
- - {
- - if( h->mb.i_cbp_luma | h->mb.i_cbp_chroma )
- + if( h->param.i_log_level >= X264_LOG_INFO )
- {
- - int cbpsum = (h->mb.i_cbp_luma&1) + ((h->mb.i_cbp_luma>>1)&1)
- - + ((h->mb.i_cbp_luma>>2)&1) + (h->mb.i_cbp_luma>>3);
- - h->stat.frame.i_mb_cbp[!b_intra + 0] += cbpsum;
- - h->stat.frame.i_mb_cbp[!b_intra + 2] += !!h->mb.i_cbp_chroma;
- - h->stat.frame.i_mb_cbp[!b_intra + 4] += h->mb.i_cbp_chroma >> 1;
- - }
- - if( h->mb.i_cbp_luma && !b_intra )
- - {
- - h->stat.frame.i_mb_count_8x8dct[0] ++;
- - h->stat.frame.i_mb_count_8x8dct[1] += h->mb.b_transform_8x8;
- - }
- - if( b_intra && h->mb.i_type != I_PCM )
- - {
- - if( h->mb.i_type == I_16x16 )
- - h->stat.frame.i_mb_pred_mode[0][h->mb.i_intra16x16_pred_mode]++;
- - else if( h->mb.i_type == I_8x8 )
- - for( int i = 0; i < 16; i += 4 )
- - h->stat.frame.i_mb_pred_mode[1][h->mb.cache.intra4x4_pred_mode[x264_scan8[i]]]++;
- - else //if( h->mb.i_type == I_4x4 )
- - for( int i = 0; i < 16; i++ )
- - h->stat.frame.i_mb_pred_mode[2][h->mb.cache.intra4x4_pred_mode[x264_scan8[i]]]++;
- - h->stat.frame.i_mb_pred_mode[3][x264_mb_pred_mode8x8c_fix[h->mb.i_chroma_pred_mode]]++;
- + if( h->mb.i_cbp_luma | h->mb.i_cbp_chroma )
- + {
- + int cbpsum = (h->mb.i_cbp_luma&1) + ((h->mb.i_cbp_luma>>1)&1)
- + + ((h->mb.i_cbp_luma>>2)&1) + (h->mb.i_cbp_luma>>3);
- + h->stat.frame.i_mb_cbp[!b_intra + 0] += cbpsum;
- + h->stat.frame.i_mb_cbp[!b_intra + 2] += !!h->mb.i_cbp_chroma;
- + h->stat.frame.i_mb_cbp[!b_intra + 4] += h->mb.i_cbp_chroma >> 1;
- + }
- + if( h->mb.i_cbp_luma && !b_intra )
- + {
- + h->stat.frame.i_mb_count_8x8dct[0] ++;
- + h->stat.frame.i_mb_count_8x8dct[1] += h->mb.b_transform_8x8;
- + }
- + if( b_intra && h->mb.i_type != I_PCM )
- + {
- + if( h->mb.i_type == I_16x16 )
- + h->stat.frame.i_mb_pred_mode[0][h->mb.i_intra16x16_pred_mode]++;
- + else if( h->mb.i_type == I_8x8 )
- + for( int i = 0; i < 16; i += 4 )
- + h->stat.frame.i_mb_pred_mode[1][h->mb.cache.intra4x4_pred_mode[x264_scan8[i]]]++;
- + else //if( h->mb.i_type == I_4x4 )
- + for( int i = 0; i < 16; i++ )
- + h->stat.frame.i_mb_pred_mode[2][h->mb.cache.intra4x4_pred_mode[x264_scan8[i]]]++;
- + h->stat.frame.i_mb_pred_mode[3][x264_mb_pred_mode8x8c_fix[h->mb.i_chroma_pred_mode]]++;
- + }
- + h->stat.frame.i_mb_field[b_intra?0:b_skip?2:1] += MB_INTERLACED;
- }
- - h->stat.frame.i_mb_field[b_intra?0:b_skip?2:1] += MB_INTERLACED;
- +
- }
- /* calculate deblock strength values (actual deblocking is done per-row along with hpel) */
- @@ -2274,6 +2285,28 @@ reencode:
- i_mb_x = 0;
- }
- }
- +
- + if ( h->param.analyse.i_me_trellis && h->sh.i_type != SLICE_TYPE_I && !h->mb.trellis.b_collected ) {
- + collect_candidates( h );
- + h->mb.trellis.b_collected = 1;
- + if ( h->param.analyse.i_me_trellis == X264_ME_TRELLIS_COMBINED_DIA ) {
- + fill_me_trellis_params( h, MODE_COMBINED_DIAMOND, ROW_TRELLIS );
- + run_row_trellis( h, 3 );
- + }
- + else {
- + fill_me_trellis_params( h, MODE_DECIDE, ROW_TRELLIS );
- + run_row_trellis( h, 1 );
- + if ( h->param.analyse.i_me_trellis == X264_ME_TRELLIS_DIA )
- + fill_me_trellis_params( h, MODE_DIAMOND, ROW_TRELLIS );
- + else
- + fill_me_trellis_params( h, MODE_SQUARE2, ROW_TRELLIS );
- + run_row_trellis( h, 3 );
- + }
- + scale_trellis_mv( h );
- +
- + return 0;
- + }
- +
- h->out.nal[h->out.i_nal].i_last_mb = h->sh.i_last_mb;
- if( h->param.b_cabac )
- @@ -2300,6 +2333,9 @@ reencode:
- - h->stat.frame.i_mv_bits;
- x264_fdec_filter_row( h, h->i_threadslice_end, 1 );
- }
- +
- + if ( h->param.analyse.i_me_trellis && h->sh.i_type != SLICE_TYPE_I && h->mb.trellis.b_collected )
- + h->mb.trellis.b_collected = 0;
- return 0;
- }
- @@ -2373,6 +2409,10 @@ static void *x264_slices_write( x264_t *h )
- h->sh.i_last_mb = X264_MIN( h->sh.i_last_mb, last_thread_mb );
- if( x264_stack_align( x264_slice_write, h ) )
- return (void *)-1;
- + if( h->param.analyse.i_me_trellis && h->sh.i_type != SLICE_TYPE_I ) {
- + if ( x264_stack_align( x264_slice_write, h ) )
- + return (void *)-1;
- + }
- h->sh.i_first_mb = h->sh.i_last_mb + 1;
- // if i_first_mb is not the last mb in a row then go to the next mb in MBAFF order
- if( SLICE_MBAFF && h->sh.i_first_mb % h->mb.i_mb_width )
- @@ -2617,6 +2657,8 @@ int x264_encoder_encode( x264_t *h,
- }
- h->i_frame++;
- + // trellis cache fix
- + h->param.analyse.b_cache_sads = 0;
- /* 3: The picture is analyzed in the lookahead */
- if( !h->frames.current[0] )
- x264_lookahead_get_frames( h );
- diff --git a/encoder/lookahead.c b/encoder/lookahead.c
- old mode 100644
- new mode 100755
- diff --git a/encoder/macroblock.c b/encoder/macroblock.c
- old mode 100644
- new mode 100755
- diff --git a/encoder/macroblock.h b/encoder/macroblock.h
- old mode 100644
- new mode 100755
- diff --git a/encoder/me.c b/encoder/me.c
- old mode 100644
- new mode 100755
- index 305e0c4..5fe97cb
- --- a/encoder/me.c
- +++ b/encoder/me.c
- @@ -61,9 +61,13 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
- #define COST_MV( mx, my )\
- {\
- - int cost = h->pixf.fpelcmp[i_pixel]( p_fenc, FENC_STRIDE,\
- - &p_fref_w[(my)*stride+(mx)], stride )\
- - + BITS_MVD(mx,my);\
- + int cost = h->pixf.fpelcmp[i_pixel]( p_fenc, FENC_STRIDE, &p_fref_w[(my)*stride+(mx)], stride );\
- + if ( h->param.analyse.b_cache_sads ) {\
- + (*(m->cache_sad_gen))[(my)&7][(mx)&7] = h->mb.trellis.gen;\
- + (*(m->cache_sad_tag))[(my)&7][(mx)&7] = pack16to32( (mx), (my) );\
- + (*(m->cache_sad_score))[(my)&7][(mx)&7] = cost;\
- + }\
- + cost += BITS_MVD(mx,my);\
- COPY3_IF_LT( bcost, cost, bmx, mx, bmy, my );\
- }
- @@ -123,6 +127,18 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
- COPY3_IF_LT( bcost, costs[3], bmx, omx+(m3x), bmy, omy+(m3y) );\
- }
- +#define CACHE_MV_X3( m0x, m0y, m1x, m1y, m2x, m2y ) {\
- + (*(m->cache_sad_gen))[(m0y)&7][(m0x)&7] = h->mb.trellis.gen;\
- + (*(m->cache_sad_gen))[(m1y)&7][(m1x)&7] = h->mb.trellis.gen;\
- + (*(m->cache_sad_gen))[(m2y)&7][(m2x)&7] = h->mb.trellis.gen;\
- + (*(m->cache_sad_tag))[(m0y)&7][(m0x)&7] = pack16to32( m0x, m0y );\
- + (*(m->cache_sad_tag))[(m1y)&7][(m1x)&7] = pack16to32( m1x, m1y );\
- + (*(m->cache_sad_tag))[(m2y)&7][(m2x)&7] = pack16to32( m2x, m2y );\
- + (*(m->cache_sad_score))[(m0y)&7][(m0x)&7] = costs[0];\
- + (*(m->cache_sad_score))[(m1y)&7][(m1x)&7] = costs[1];\
- + (*(m->cache_sad_score))[(m2y)&7][(m2x)&7] = costs[2];\
- +}
- +
- #define COST_MV_X3_ABS( m0x, m0y, m1x, m1y, m2x, m2y )\
- {\
- h->pixf.fpelcmp_x3[i_pixel]( p_fenc,\
- @@ -130,6 +146,8 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
- p_fref_w + (m1x) + (m1y)*stride,\
- p_fref_w + (m2x) + (m2y)*stride,\
- stride, costs );\
- + if ( h->param.analyse.b_cache_sads )\
- + CACHE_MV_X3( m0x, m0y, m1x, m1y, m2x, m2y )\
- costs[0] += p_cost_mvx[(m0x)<<2]; /* no cost_mvy */\
- costs[1] += p_cost_mvx[(m1x)<<2];\
- costs[2] += p_cost_mvx[(m2x)<<2];\
- @@ -201,6 +219,10 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
- #define pack16to32_mask2(mx,my) ((mx<<16)|(my&0x7FFF))
- uint32_t mv_min = pack16to32_mask2( -mv_x_min, -mv_y_min );
- uint32_t mv_max = pack16to32_mask2( mv_x_max, mv_y_max )|0x8000;
- + m->mv_min[0] = mv_x_min;
- + m->mv_min[1] = mv_y_min;
- + m->mv_max[0] = mv_x_max;
- + m->mv_max[1] = mv_y_max;
- #define CHECK_MVRANGE(mx,my) (!(((pack16to32_mask2(mx,my) + mv_min) | (mv_max - pack16to32_mask2(mx,my))) & 0x80004000))
- diff --git a/encoder/me.h b/encoder/me.h
- old mode 100644
- new mode 100755
- index 3281630..a843d67
- --- a/encoder/me.h
- +++ b/encoder/me.h
- @@ -53,6 +53,12 @@ typedef struct
- int cost_mv; /* lambda * nbits for the chosen mv */
- int cost; /* satd + lambda * nbits */
- ALIGNED_4( int16_t mv[2] );
- + ALIGNED_4( int16_t mv_min[2] );
- + ALIGNED_4( int16_t mv_max[2] );
- +
- + uint32_t (*cache_sad_gen)[8][8];
- + uint32_t (*cache_sad_tag)[8][8];
- + int (*cache_sad_score)[8][8];
- } ALIGNED_16( x264_me_t );
- typedef struct
- diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
- old mode 100644
- new mode 100755
- diff --git a/encoder/ratecontrol.h b/encoder/ratecontrol.h
- old mode 100644
- new mode 100755
- diff --git a/encoder/rdo.c b/encoder/rdo.c
- old mode 100644
- new mode 100755
- diff --git a/encoder/set.c b/encoder/set.c
- old mode 100644
- new mode 100755
- diff --git a/encoder/set.h b/encoder/set.h
- old mode 100644
- new mode 100755
- diff --git a/encoder/slicetype.c b/encoder/slicetype.c
- old mode 100644
- new mode 100755
- diff --git a/encoder/trellis.c b/encoder/trellis.c
- new file mode 100755
- index 0000000..4258bb5
- --- /dev/null
- +++ b/encoder/trellis.c
- @@ -0,0 +1,1042 @@
- +#include "common/common.h"
- +
- +// some constants for initializing structs
- +const int16_t mv_zero[2] = {0,0};
- +const mb_t mb_i_4x4 = {I_4x4, 0, 0, {0, 0}, {0, 0}, {0, 0, 0, 0}, {{{0,0}, {0,0}, {0,0}, {0,0}}, {{0,0}, {0,0}, {0,0}, {0,0}}}, {{0,0}, {0,0}, {0,0}, {0,0}}};
- +const mb_t mb_i_8x8 = {I_8x8, 0, 0, {0, 0}, {0, 0}, {0, 0, 0, 0}, {{{0,0}, {0,0}, {0,0}, {0,0}}, {{0,0}, {0,0}, {0,0}, {0,0}}}, {{0,0}, {0,0}, {0,0}, {0,0}}};
- +const mb_t mb_i_16x16 = {I_16x16, 0, 0, {0, 0}, {0, 0}, {0, 0, 0, 0}, {{{0,0}, {0,0}, {0,0}, {0,0}}, {{0,0}, {0,0}, {0,0}, {0,0}}}, {{0,0}, {0,0}, {0,0}, {0,0}}};
- +const mb_t mb_p_16x16 = {P_L0, 0, 0, {0, 0}, {0, 0}, {D_16x16, 0, 0, 0}, {{{0,0}, {0,0}, {0,0}, {0,0}}, {{0,0}, {0,0}, {0,0}, {0,0}}}, {{0,0}, {0,0}, {0,0}, {0,0}}};
- +const mb_t mb_p_16x8 = {P_L0, 0, 0, {0, 0}, {0, 0}, {D_16x8, D_16x8, 0, 0}, {{{0,0}, {0,0}, {0,0}, {0,0}}, {{0,0}, {0,0}, {0,0}, {0,0}}}, {{0,0}, {0,0}, {0,0}, {0,0}}};
- +const mb_t mb_p_8x16 = {P_L0, 0, 0, {0, 0}, {0, 0}, {D_8x16, D_8x16, 0, 0}, {{{0,0}, {0,0}, {0,0}, {0,0}}, {{0,0}, {0,0}, {0,0}, {0,0}}}, {{0,0}, {0,0}, {0,0}, {0,0}}};
- +const mb_t mb_p_8x8 = {P_8x8, 0, 0, {0, 0}, {0, 0}, {D_8x8, D_8x8, D_8x8, D_8x8}, {{{0,0}, {0,0}, {0,0}, {0,0}}, {{0,0}, {0,0}, {0,0}, {0,0}}}, {{0,0}, {0,0}, {0,0}, {0,0}}};
- +
- +int get_cached_fpel_sad( x264_t *h, int class, int partition, int i_list, int i_mv, int row, int col, int x, int y ) {
- + int result, valid, table, idx, idy;
- + uint32_t tag;
- + int i_mb = row * h->mb.i_mb_width + col;
- + uint32_t (*cache_gen)[8][8];
- + uint32_t (*cache_tag)[8][8];
- + int (*cache_score)[8][8];
- + x264_me_t *m;
- +
- + if ( IS_INTRA( class ) ) {
- + result = h->mb.trellis.cache_sad_intra[i_mb];
- + }
- + else {
- + valid = 1;
- + table = ( class == P_L0 && partition == D_16x16 ) ? ( 0 ) :
- + ( ( class == P_8x8 ) ? ( 1 + i_mv ) :
- + ( ( class == P_L0 && partition == D_16x8 ) ? ( 5 + i_mv ) :
- + ( 7 + i_mv ) ) );
- + tag = pack16to32( x, y );
- + cache_gen = &(h->mb.trellis.cache_sad_gen[i_list][i_mb][table]);
- + cache_tag = &(h->mb.trellis.cache_sad_tag[i_list][i_mb][table]);
- + cache_score = &(h->mb.trellis.cache_sad_score[i_list][i_mb][table]);
- + m = &(h->mb.trellis.cache_me_t[i_list][i_mb][table]);
- + const int i_pixel = m->i_pixel;
- + const int i_fref_stride = m->i_stride[0];
- + const int i_fenc_stride = h->fdec->i_stride[0];
- + const int xoff = ( class == P_L0 && partition == D_16x16 ) ? ( 0 ) :
- + ( ( class == P_8x8 ) ? ( 8*(i_mv&1) ) :
- + ( ( class == P_L0 && partition == D_16x8 ) ? ( 0 ) :
- + ( 8*i_mv ) ) );
- + const int yoff = ( class == P_L0 && partition == D_16x16 ) ? ( 0 ) :
- + ( ( class == P_8x8 ) ? ( 8*(i_mv>>1) ) :
- + ( ( class == P_L0 && partition == D_16x8 ) ? ( 8*i_mv ) :
- + ( 0 ) ) );
- + pixel *p_fenc = &(h->fenc->plane[0][16 * col + xoff + ( 16 * row + yoff) * i_fenc_stride]);
- + pixel *p_fref_w = m->p_fref_w;
- + idx = 7 & x;
- + idy = 7 & y;
- +
- + if ( (*cache_gen)[idy][idx] != h->mb.trellis.gen ) {
- + valid = 0;
- + (*cache_gen)[idy][idx] = h->mb.trellis.gen;
- + }
- + if ( (*cache_tag)[idy][idx] != tag ){
- + valid = 0;
- + (*cache_tag)[idy][idx] = tag;
- + }
- +
- + if ( ! valid ) {
- + (*cache_score)[idy][idx] = h->pixf.fpelcmp[i_pixel]( p_fenc, i_fenc_stride, &p_fref_w[(y)*i_fref_stride+(x)], i_fref_stride );
- + }
- +
- + result = (*cache_score)[idy][idx];
- + }
- +
- + return result;
- +}
- +
- +void collect_candidates( x264_t *h ) {
- + int row, col, boff;
- + int i_mb = 0;
- + int type, part;
- + int mb_stride = h->mb.trellis.mb_stride_col;
- + for ( row = 0; row < h->mb.i_mb_height; row++ ) {
- + for ( col = 0; col < h->mb.i_mb_width; col++ ) {
- + CP_MB( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_16x16]), &mb_p_16x16 );
- + CP_MB( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_8x8]), &mb_p_8x8 );
- + CP_MB( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_16x8]), &mb_p_16x8 );
- + CP_MB( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_8x16]), &mb_p_8x16 );
- + CP_MB( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_INTRA]), ( h->mb.trellis.cache_mode_intra[i_mb] == I_16x16 ) ? ( &mb_i_16x16 ) : ( ( h->mb.trellis.cache_mode_intra[i_mb] == I_8x8 ) ? ( &mb_i_8x8 ) : ( &mb_i_4x4 ) ) );
- +
- + h->mb.trellis.mb[i_mb*mb_stride+T_P_16x16].mv[0][0][0] = h->mb.trellis.cache_me_t[0][i_mb][0].mv[0] >> 2;
- + h->mb.trellis.mb[i_mb*mb_stride+T_P_16x16].mv[0][0][1] = h->mb.trellis.cache_me_t[0][i_mb][0].mv[1] >> 2;
- + h->mb.trellis.mb[i_mb*mb_stride+T_P_8x8].mv[0][0][0] = h->mb.trellis.cache_me_t[0][i_mb][1].mv[0] >> 2;
- + h->mb.trellis.mb[i_mb*mb_stride+T_P_8x8].mv[0][0][1] = h->mb.trellis.cache_me_t[0][i_mb][1].mv[1] >> 2;
- + h->mb.trellis.mb[i_mb*mb_stride+T_P_8x8].mv[0][1][0] = h->mb.trellis.cache_me_t[0][i_mb][2].mv[0] >> 2;
- + h->mb.trellis.mb[i_mb*mb_stride+T_P_8x8].mv[0][1][1] = h->mb.trellis.cache_me_t[0][i_mb][2].mv[1] >> 2;
- + h->mb.trellis.mb[i_mb*mb_stride+T_P_8x8].mv[0][2][0] = h->mb.trellis.cache_me_t[0][i_mb][3].mv[0] >> 2;
- + h->mb.trellis.mb[i_mb*mb_stride+T_P_8x8].mv[0][2][1] = h->mb.trellis.cache_me_t[0][i_mb][3].mv[1] >> 2;
- + h->mb.trellis.mb[i_mb*mb_stride+T_P_8x8].mv[0][3][0] = h->mb.trellis.cache_me_t[0][i_mb][4].mv[0] >> 2;
- + h->mb.trellis.mb[i_mb*mb_stride+T_P_8x8].mv[0][3][1] = h->mb.trellis.cache_me_t[0][i_mb][4].mv[1] >> 2;
- + h->mb.trellis.mb[i_mb*mb_stride+T_P_16x8].mv[0][0][0] = h->mb.trellis.cache_me_t[0][i_mb][5].mv[0] >> 2;
- + h->mb.trellis.mb[i_mb*mb_stride+T_P_16x8].mv[0][0][1] = h->mb.trellis.cache_me_t[0][i_mb][5].mv[1] >> 2;
- + h->mb.trellis.mb[i_mb*mb_stride+T_P_16x8].mv[0][1][0] = h->mb.trellis.cache_me_t[0][i_mb][6].mv[0] >> 2;
- + h->mb.trellis.mb[i_mb*mb_stride+T_P_16x8].mv[0][1][1] = h->mb.trellis.cache_me_t[0][i_mb][6].mv[1] >> 2;
- + h->mb.trellis.mb[i_mb*mb_stride+T_P_8x16].mv[0][0][0] = h->mb.trellis.cache_me_t[0][i_mb][7].mv[0] >> 2;
- + h->mb.trellis.mb[i_mb*mb_stride+T_P_8x16].mv[0][0][1] = h->mb.trellis.cache_me_t[0][i_mb][7].mv[1] >> 2;
- + h->mb.trellis.mb[i_mb*mb_stride+T_P_8x16].mv[0][1][0] = h->mb.trellis.cache_me_t[0][i_mb][8].mv[0] >> 2;
- + h->mb.trellis.mb[i_mb*mb_stride+T_P_8x16].mv[0][1][1] = h->mb.trellis.cache_me_t[0][i_mb][8].mv[1] >> 2;
- +
- + CP_MV( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_16x16].mv_min[0]), &(h->mb.trellis.cache_me_t[0][i_mb][0].mv_min[0]) );
- + CP_MV( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_8x8].mv_min[0]), &(h->mb.trellis.cache_me_t[0][i_mb][1].mv_min[0]) );
- + CP_MV( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_8x8].mv_min[0]), &(h->mb.trellis.cache_me_t[0][i_mb][2].mv_min[0]) );
- + CP_MV( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_8x8].mv_min[0]), &(h->mb.trellis.cache_me_t[0][i_mb][3].mv_min[0]) );
- + CP_MV( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_8x8].mv_min[0]), &(h->mb.trellis.cache_me_t[0][i_mb][4].mv_min[0]) );
- + CP_MV( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_16x8].mv_min[0]), &(h->mb.trellis.cache_me_t[0][i_mb][5].mv_min[0]) );
- + CP_MV( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_16x8].mv_min[0]), &(h->mb.trellis.cache_me_t[0][i_mb][6].mv_min[0]) );
- + CP_MV( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_8x16].mv_min[0]), &(h->mb.trellis.cache_me_t[0][i_mb][7].mv_min[0]) );
- + CP_MV( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_8x16].mv_min[0]), &(h->mb.trellis.cache_me_t[0][i_mb][8].mv_min[0]) );
- +
- + CP_MV( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_16x16].mv_max[0]), &(h->mb.trellis.cache_me_t[0][i_mb][0].mv_max[0]) );
- + CP_MV( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_8x8].mv_max[0]), &(h->mb.trellis.cache_me_t[0][i_mb][1].mv_max[0]) );
- + CP_MV( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_8x8].mv_max[0]), &(h->mb.trellis.cache_me_t[0][i_mb][2].mv_max[0]) );
- + CP_MV( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_8x8].mv_max[0]), &(h->mb.trellis.cache_me_t[0][i_mb][3].mv_max[0]) );
- + CP_MV( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_8x8].mv_max[0]), &(h->mb.trellis.cache_me_t[0][i_mb][4].mv_max[0]) );
- + CP_MV( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_16x8].mv_max[0]), &(h->mb.trellis.cache_me_t[0][i_mb][5].mv_max[0]) );
- + CP_MV( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_16x8].mv_max[0]), &(h->mb.trellis.cache_me_t[0][i_mb][6].mv_max[0]) );
- + CP_MV( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_8x16].mv_max[0]), &(h->mb.trellis.cache_me_t[0][i_mb][7].mv_max[0]) );
- + CP_MV( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_8x16].mv_max[0]), &(h->mb.trellis.cache_me_t[0][i_mb][8].mv_max[0]) );
- +
- + type = h->mb.trellis.best_type[0][i_mb];
- + part = h->mb.trellis.best_part[0][i_mb];
- + boff = ( type == P_L0 && part == D_16x16 ) ? ( T_P_16x16 ) :
- + ( ( type == P_8x8 && part == D_8x8 ) ? ( T_P_8x8 ) :
- + ( ( type == P_L0 && part == D_16x8 ) ? ( T_P_16x8 ) :
- + ( ( type == P_L0 && part == D_8x16 ) ? ( T_P_8x16 ) :
- + ( T_P_INTRA ) ) ) );
- + CP_MB( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_BEST]), &(h->mb.trellis.mb[i_mb*mb_stride+boff]) );
- +
- + i_mb++;
- + }
- + }
- +}
- +
- +void fill_me_trellis_params( x264_t *h, int mode, int direction ) {
- + h->mb.trellis.mode = mode;
- + h->mb.trellis.direction = direction;
- + h->mb.trellis.breadth = ( mode == MODE_COMBINED_DIAMOND ) ? COMBINED_DIAMOND_CANDIDATES :
- + ( ( mode == MODE_DECIDE ) ? DECIDE_CANDIDATES :
- + ( ( MODE_DIAMOND ) ? DIAMOND_CANDIDATES :
- + SQUARE2_CANDIDATES ) );
- + h->mb.trellis.length = ( direction == ROW_TRELLIS ) ? h->mb.i_mb_width : h->mb.i_mb_height;
- +}
- +
- +void get_left_predictor( int16_t pred[2], mb_t *block, int i_list, int i_mv ) {
- + switch ( block->part[i_mv] ) {
- + case D_8x8 :
- + if ( i_mv == 0 )
- + CP_MV( pred, block->mv[i_list][1] );
- + else
- + CP_MV( pred, block->mv[i_list][3] );
- + break;
- + case D_16x8 :
- + if ( i_mv == 0 )
- + CP_MV( pred, block->mv[i_list][0] );
- + else
- + CP_MV( pred, block->mv[i_list][1] );
- + break;
- + case D_8x16 :
- + CP_MV( pred, block->mv[i_list][1] );
- + break;
- + default :
- + CP_MV( pred, block->mv[i_list][0] );
- + break;
- + }
- +}
- +
- +void get_top_predictor( int16_t pred[2], mb_t *block, int i_list, int i_mv ) {
- + switch ( block->part[i_mv] ) {
- + case D_8x8 :
- + if ( i_mv == 0 )
- + CP_MV( pred, block->mv[i_list][2] );
- + else
- + CP_MV( pred, block->mv[i_list][3] );
- + break;
- + case D_16x8 :
- + CP_MV( pred, block->mv[i_list][1] );
- + break;
- + case D_8x16 :
- + if ( i_mv == 0 )
- + CP_MV( pred, block->mv[i_list][0] );
- + else
- + CP_MV( pred, block->mv[i_list][1] );
- + break;
- + default :
- + CP_MV( pred, block->mv[i_list][0] );
- + break;
- + }
- +}
- +
- +void get_top_right_predictor( int16_t pred[2], mb_t *block, int i_list, int i_mv ) {
- + switch ( block->part[i_mv] ) {
- + case D_8x8 :
- + if ( i_mv == 0 )
- + CP_MV( pred, block->mv[i_list][3] );
- + else
- + CP_MV( pred, block->mv[i_list][2] );
- + break;
- + case D_16x8 :
- + CP_MV( pred, block->mv[i_list][1] );
- + break;
- + case D_8x16 :
- + if ( i_mv == 0 )
- + CP_MV( pred, block->mv[i_list][1] );
- + else
- + CP_MV( pred, block->mv[i_list][0] );
- + break;
- + default :
- + CP_MV( pred, block->mv[i_list][0] );
- + break;
- + }
- +}
- +
- +void get_top_left_predictor( int16_t pred[2], mb_t *block, int i_list, int i_mv ) {
- + switch ( block->part[i_mv] ) {
- + case D_8x8 :
- + if ( i_mv == 0 )
- + CP_MV( pred, block->mv[i_list][3] );
- + else
- + CP_MV( pred, block->mv[i_list][2] );
- + break;
- + case D_16x8 :
- + CP_MV( pred, block->mv[i_list][1] );
- + break;
- + case D_8x16 :
- + if ( i_mv == 0 )
- + CP_MV( pred, block->mv[i_list][1] );
- + else
- + CP_MV( pred, block->mv[i_list][0] );
- + break;
- + default :
- + CP_MV( pred, block->mv[i_list][0] );
- + break;
- + }
- +}
- +
- +void choose_predictors( x264_t *h, mb_t *pred, int row, int col, mb_t *current, mb_t *left, mb_t *top, mb_t *diag ) {
- + int16_t a[2], b[2], c[2], fetch[2];
- + int i_list;
- +
- + switch ( current->class ) {
- +
- + case I_4x4 :
- +
- + CP_MB( pred, &mb_i_4x4 );
- + break;
- +
- + case I_8x8 :
- +
- + CP_MB( pred, &mb_i_8x8 );
- + break;
- +
- + case I_16x16 :
- +
- + CP_MB( pred, &mb_i_16x16 );
- + break;
- +
- + case P_L0 :
- +
- + i_list = 0;
- +
- + switch( current->part[0] ) {
- +
- + case D_16x16 :
- +
- + CP_MB( pred, &mb_p_16x16 );
- +
- + if ( col == 0 ) {
- + CP_MV( a, mv_zero );
- + }
- + else {
- + get_left_predictor( fetch, left, i_list, 0 );
- + CP_MV( a, fetch );
- + }
- +
- + if ( row == 0 ) {
- + CP_MV( b, mv_zero );
- + CP_MV( c, mv_zero );
- + } else {
- + get_top_predictor( fetch, top, i_list, 0 );
- + CP_MV( b, fetch );
- +
- + if ( col == h->mb.i_mb_width - 1 )
- + get_top_left_predictor( fetch, diag, i_list, 0 );
- + else
- + get_top_right_predictor( fetch, diag, i_list, 1 );
- + CP_MV( c, fetch );
- + }
- +
- + pred->mv[i_list][0][0] = X264_MED( a[0], b[0], c[0] );
- + pred->mv[i_list][0][1] = X264_MED( a[1], b[1], c[1] );
- +
- + break;
- +
- + case D_16x8 :
- +
- + CP_MB( pred, &mb_p_16x8 );
- +
- + if ( row == 0 )
- + CP_MV( pred->mv[i_list][0], mv_zero );
- + else {
- + get_top_predictor( fetch, top, i_list, 0 );
- + CP_MV( pred->mv[i_list][0], fetch );
- + }
- +
- + if ( col == 0 )
- + CP_MV( pred->mv[i_list][1], mv_zero );
- + else {
- + get_left_predictor( fetch, left, i_list, 1 );
- + CP_MV( pred->mv[i_list][1], fetch );
- + }
- +
- + break;
- +
- + case D_8x16 :
- +
- + CP_MB( pred, &mb_p_8x16 );
- +
- + if ( col == 0 )
- + CP_MV( pred->mv[i_list][0], mv_zero );
- + else {
- + get_left_predictor( fetch, left, i_list, 0 );
- + CP_MV( pred->mv[i_list][0], fetch );
- + }
- +
- + if ( row == 0 )
- + CP_MV( pred->mv[i_list][1], mv_zero );
- + else {
- + if ( col == h->mb.i_mb_width-1 )
- + get_top_left_predictor( fetch, top, i_list, 1 );
- + else
- + get_top_right_predictor( fetch, diag, i_list, 1 );
- + CP_MV( pred->mv[i_list][1], fetch );
- + }
- +
- + break;
- +
- + }
- +
- + case P_8x8 :
- +
- + i_list = 0;
- + CP_MB( pred, &mb_p_8x8 );
- +
- + for ( int i = 0; i < 4; i++ ) {
- +
- + if ( col == 0 && !( i & 1 ) )
- + CP_MV( a, mv_zero );
- + else {
- + if ( i & 1 ) {
- + if ( i == 1 )
- + CP_MV( fetch, current->mv[i_list][0] );
- + else
- + CP_MV( fetch, current->mv[i_list][2] );
- + }
- + else
- + get_left_predictor( fetch, left, i_list, i );
- +
- + CP_MV( a, fetch );
- + }
- +
- + if ( row == 0 && i < 2) {
- + CP_MV( b, mv_zero );
- + CP_MV( c, mv_zero );
- + } else {
- + if ( i > 1 ) {
- + if ( i == 2 )
- + CP_MV( fetch, current->mv[i_list][0] );
- + else
- + CP_MV( fetch, current->mv[i_list][1] );
- + }
- + else
- + get_top_predictor( fetch, top, i_list, i );
- + CP_MV( b, fetch );
- +
- +
- + if ( i == 0 )
- + get_top_right_predictor( fetch, top, i_list, i );
- + else if ( i == 1 ) {
- + if ( col == h->mb.i_mb_width - 1 )
- + get_top_left_predictor( fetch, diag, i_list, i );
- + else
- + get_top_right_predictor( fetch, diag, i_list, i );
- + }
- + else if ( i == 2 )
- + CP_MV( fetch, current->mv[i_list][1] );
- + else
- + CP_MV( fetch, current->mv[i_list][0] );
- +
- + CP_MV( c, fetch );
- + }
- +
- + pred->mv[i_list][i][0] = X264_MED( a[0], b[0], c[0] );
- + pred->mv[i_list][i][1] = X264_MED( a[1], b[1], c[1] );
- +
- + }
- +
- + break;
- +
- + }
- +
- +}
- +
- +int cost_mb( x264_t *h, int row, int col, int i_mv_max, mb_t *block, mb_t *left, mb_t *top, mb_t *diag ) {
- + mb_t pred;
- + choose_predictors( h, &pred, row, col, block, left, top, diag );
- + block->costSAD = 0;
- + block->costMV = 0;
- + int i, x, y, px, py, i_list;
- +
- +
- + switch ( block->class ) {
- +
- + case I_4x4 :
- + case I_8x8 :
- + case I_16x16 :
- + case I_PCM :
- +
- +
- + block->costSAD += get_cached_fpel_sad( h, block->class, 0, 0, 0, row, col, 0, 0 );
- + break;
- +
- + case P_L0 :
- +
- + i_list = 0;
- +
- + switch ( block->part[0] ) {
- +
- + case D_16x16 :
- +
- + x = block->mv[i_list][0][0];
- + y = block->mv[i_list][0][1];
- + px = pred.mv[i_list][0][0];
- + py = pred.mv[i_list][0][1];
- + block->costSAD += get_cached_fpel_sad( h, P_L0, D_16x16, 0, 0, row, col, x, y );
- + block->costMV += T_BITS_MVD( x-px, y-py );
- + break;
- +
- + case D_16x8 :
- +
- + for ( i = 0; i < X264_MIN(i_mv_max+1,2); i++ ) {
- + x = block->mv[i_list][i][0];
- + y = block->mv[i_list][i][1];
- + px = pred.mv[i_list][i][0];
- + py = pred.mv[i_list][i][1];
- + block->costSAD += get_cached_fpel_sad( h, P_L0, D_16x8, 0, i, row, col, x, y );
- + block->costMV += T_BITS_MVD( x-px, y-py );
- + }
- + break;
- +
- + case D_8x16 :
- +
- + for ( i = 0; i < X264_MIN(i_mv_max+1,2); i++ ) {
- + x = block->mv[i_list][i][0];
- + y = block->mv[i_list][i][1];
- + px = pred.mv[i_list][i][0];
- + py = pred.mv[i_list][i][1];
- + block->costSAD += get_cached_fpel_sad( h, P_L0, D_8x16, 0, i, row, col, x, y );
- + block->costMV += T_BITS_MVD( x-px, y-py );
- + }
- + break;
- +
- + }
- +
- + break;
- +
- + case P_8x8 :
- +
- + i_list = 0;
- +
- + for ( i = 0; i < X264_MIN(i_mv_max+1,4); i++ ) {
- + x = block->mv[i_list][i][0];
- + y = block->mv[i_list][i][1];
- + px = pred.mv[i_list][i][0];
- + py = pred.mv[i_list][i][1];
- + block->costSAD += get_cached_fpel_sad( h, P_8x8, D_8x8, 0, i, row, col, x, y );
- + block->costMV += T_BITS_MVD( x-px, y-py );
- + }
- + break;
- +
- + }
- +
- + //block->costMV += T_BITS_MODE( block->type );
- +
- + return ( block->costSAD + block->costMV );
- +}
- +
- +int sad_cost( x264_t *h ) {
- + int row, col;
- + int cost;
- + int i_list, offCC, offCL, offTL, offTR, offTC;
- + int mb_stride_col = h->mb.trellis.mb_stride_col;
- + int mb_stride_row = h->mb.trellis.mb_stride_row;
- + mb_t *block, *left, *top, *diag;
- +
- + cost = 0;
- + i_list = 0;
- + offCC = 0;
- + offCL = offCC - mb_stride_col;
- + offTC = offCC - mb_stride_row;
- + offTL = offTC - mb_stride_col;
- + offTR = offTC + mb_stride_col;
- + for ( row = 0; row < h->mb.i_mb_height; row++ ) {
- + for ( col = 0; col < h->mb.i_mb_width; col++ ) {
- + block = &(h->mb.trellis.mb[offCC+T_P_BEST]);
- + left = ( col == 0 ) ? NULL : &(h->mb.trellis.mb[offCL+T_P_BEST]);
- + top = ( row == 0 ) ? NULL : &(h->mb.trellis.mb[offTC+T_P_BEST]);
- + diag = ( row == 0 ) ? NULL :
- + ( col == h->mb.i_mb_width - 1 ) ? &(h->mb.trellis.mb[offTL+T_P_BEST]) :
- + &(h->mb.trellis.mb[offTR+T_P_BEST]);
- +
- + cost_mb( h, row, col, 3, block, left, top, diag );
- + cost += block->costSAD;
- +
- + offCC += mb_stride_col;
- + offCL += mb_stride_col;
- + offTC += mb_stride_col;
- + offTL += mb_stride_col;
- + offTR += mb_stride_col;
- + }
- + }
- +
- + return cost;
- +}
- +
- +int mv_cost( x264_t *h ) {
- + int row, col;
- + int cost;
- + int i_list, offCC, offCL, offTL, offTR, offTC;
- + int mb_stride_col = h->mb.trellis.mb_stride_col;
- + int mb_stride_row = h->mb.trellis.mb_stride_row;
- + mb_t *block, *left, *top, *diag;
- +
- + cost = 0;
- + i_list = 0;
- + offCC = 0;
- + offCL = offCC - mb_stride_col;
- + offTC = offCC - mb_stride_row;
- + offTL = offTC - mb_stride_col;
- + offTR = offTC + mb_stride_col;
- + for ( row = 0; row < h->mb.i_mb_height; row++ ) {
- + for ( col = 0; col < h->mb.i_mb_width; col++ ) {
- + block = &(h->mb.trellis.mb[offCC+T_P_BEST]);
- + left = ( col == 0 ) ? NULL : &(h->mb.trellis.mb[offCL+T_P_BEST]);
- + top = ( row == 0 ) ? NULL : &(h->mb.trellis.mb[offTC+T_P_BEST]);
- + diag = ( row == 0 ) ? NULL :
- + ( col == h->mb.i_mb_width - 1 ) ? &(h->mb.trellis.mb[offTL+T_P_BEST]) :
- + &(h->mb.trellis.mb[offTR+T_P_BEST]);
- +
- + cost_mb( h, row, col, 3, block, left, top, diag );
- + cost += block->costMV;
- +
- + offCC += mb_stride_col;
- + offCL += mb_stride_col;
- + offTC += mb_stride_col;
- + offTL += mb_stride_col;
- + offTR += mb_stride_col;
- + }
- + }
- +
- + return cost;
- +}
- +
- +void fill_trellis( x264_t *h, int i_frame ) {
- + int i_trellis, i, j;
- + int mb_stride_col = h->mb.trellis.mb_stride_col;
- + int mb_stride_row = h->mb.trellis.mb_stride_row;
- + int t_stride = h->mb.trellis.t_stride;
- + int offR = ( h->mb.trellis.direction == ROW_TRELLIS ) ? i_frame * mb_stride_row : 0 ;
- + int offC = ( h->mb.trellis.direction == ROW_TRELLIS ) ? 0 : i_frame * mb_stride_col ;
- +
- + if ( h->mb.trellis.mode == MODE_COMBINED_DIAMOND ) {
- + for ( i_trellis = 0; i_trellis < h->mb.trellis.length; i_trellis++ ) {
- + for ( i = 0; i < T_P_INTRA; i++ ) {
- + for ( j = 0; j < DIAMOND_CANDIDATES; j++ ) {
- + CP_MB( &(h->mb.trellis.t[i_trellis*t_stride+i*DIAMOND_CANDIDATES+j].block), &(h->mb.trellis.mb[offR+offC+i]) );
- + }
- + }
- + CP_MB( &(h->mb.trellis.t[i_trellis*t_stride+T_P_INTRA*DIAMOND_CANDIDATES].block), &(h->mb.trellis.mb[offR+offC+T_P_INTRA]) );
- +
- + if ( h->mb.trellis.direction == ROW_TRELLIS )
- + offC += mb_stride_col;
- + else
- + offR += mb_stride_row;
- + }
- + }
- + else {
- + for ( i_trellis = 0; i_trellis < h->mb.trellis.length; i_trellis++ ) {
- + for ( j = 0; j < h->mb.trellis.breadth; j++ ) {
- + if ( ( h->mb.trellis.mode == MODE_DIAMOND ) || ( h->mb.trellis.mode == MODE_SQUARE2 ) )
- + CP_MB( &(h->mb.trellis.t[i_trellis*t_stride+j].block), &(h->mb.trellis.mb[offR+offC+T_P_BEST]) );
- + else
- + CP_MB( &(h->mb.trellis.t[i_trellis*t_stride+j].block), &(h->mb.trellis.mb[offR+offC+j]) );
- + }
- +
- + if ( h->mb.trellis.direction == ROW_TRELLIS )
- + offC += mb_stride_col;
- + else
- + offR += mb_stride_row;
- + }
- + }
- +}
- +
- +void adjust_trellis( x264_t *h, int sub ) {
- + mb_t *block;
- + int i_trellis, i_mv, partition, i_list;
- + int t_stride = h->mb.trellis.t_stride;
- + int length = (h->mb.trellis.direction == ROW_TRELLIS) ? h->mb.i_mb_width : h->mb.i_mb_height;
- + int i8x8 = (h->mb.trellis.direction == ROW_TRELLIS) ? sub*2 : sub;
- + int i16x8 = (h->mb.trellis.direction == ROW_TRELLIS) ? sub : 0;
- + int i8x16 = (h->mb.trellis.direction == ROW_TRELLIS) ? 0 : sub;
- +
- + i_list = 0;
- + if ( h->mb.trellis.mode == MODE_COMBINED_DIAMOND ) {
- + for ( i_trellis = 0; i_trellis < length; i_trellis++ ) {
- + // bottom
- + block = &(h->mb.trellis.t[i_trellis*t_stride+T_P_16x16*DIAMOND_CANDIDATES+B___].block);
- + block->mv[i_list][0][1] = X264_MIN( block->mv[i_list][0][1] + 1, block->mv_max[1] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+T_P_8x8*DIAMOND_CANDIDATES+B___].block);
- + block->mv[i_list][i8x8][1] = X264_MIN( block->mv[i_list][i8x8][1] + 1, block->mv_max[1] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+T_P_16x8*DIAMOND_CANDIDATES+B___].block);
- + block->mv[i_list][i16x8][1] = X264_MIN( block->mv[i_list][i16x8][1] + 1, block->mv_max[1] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+T_P_8x16*DIAMOND_CANDIDATES+B___].block);
- + block->mv[i_list][i8x16][1] = X264_MIN( block->mv[i_list][i8x16][1] + 1, block->mv_max[1] );
- +
- + // top
- + block = &(h->mb.trellis.t[i_trellis*t_stride+T_P_16x16*DIAMOND_CANDIDATES+T___].block);
- + block->mv[i_list][0][1] = X264_MAX( block->mv[i_list][0][1] - 1, block->mv_min[1] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+T_P_8x8*DIAMOND_CANDIDATES+T___].block);
- + block->mv[i_list][i8x8][1] = X264_MAX( block->mv[i_list][i8x8][1] - 1, block->mv_min[1] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+T_P_16x8*DIAMOND_CANDIDATES+T___].block);
- + block->mv[i_list][i16x8][1] = X264_MAX( block->mv[i_list][i16x8][1] - 1, block->mv_min[1] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+T_P_8x16*DIAMOND_CANDIDATES+T___].block);
- + block->mv[i_list][i8x16][1] = X264_MAX( block->mv[i_list][i8x16][1] - 1, block->mv_min[1] );
- +
- + // left
- + block = &(h->mb.trellis.t[i_trellis*t_stride+T_P_16x16*DIAMOND_CANDIDATES+L___].block);
- + block->mv[i_list][0][0] = X264_MAX( block->mv[i_list][0][0] - 1, block->mv_min[0] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+T_P_8x8*DIAMOND_CANDIDATES+L___].block);
- + block->mv[i_list][i8x8][0] = X264_MAX( block->mv[i_list][i8x8][0] - 1, block->mv_min[0] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+T_P_16x8*DIAMOND_CANDIDATES+L___].block);
- + block->mv[i_list][i16x8][0] = X264_MAX( block->mv[i_list][i16x8][0] - 1, block->mv_min[0] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+T_P_8x16*DIAMOND_CANDIDATES+L___].block);
- + block->mv[i_list][i8x16][0] = X264_MAX( block->mv[i_list][i8x16][0] - 1, block->mv_min[0] );
- +
- + // right
- + block = &(h->mb.trellis.t[i_trellis*t_stride+T_P_16x16*DIAMOND_CANDIDATES+R___].block);
- + block->mv[i_list][0][0] = X264_MIN( block->mv[i_list][0][0] + 1, block->mv_max[0] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+T_P_8x8*DIAMOND_CANDIDATES+R___].block);
- + block->mv[i_list][i8x8][0] = X264_MIN( block->mv[i_list][i8x8][0] + 1, block->mv_max[0] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+T_P_16x8*DIAMOND_CANDIDATES+R___].block);
- + block->mv[i_list][i16x8][0] = X264_MIN( block->mv[i_list][i16x8][0] + 1, block->mv_max[0] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+T_P_8x16*DIAMOND_CANDIDATES+R___].block);
- + block->mv[i_list][i8x16][0] = X264_MIN( block->mv[i_list][i8x16][0] + 1, block->mv_max[0] );
- + }
- + }
- + else if ( h->mb.trellis.mode == MODE_SQUARE2 ) {
- + for ( i_trellis = 0; i_trellis < length; i_trellis++ ) {
- + if ( ! IS_INTRA( h->mb.trellis.t[i_trellis*t_stride+C___].block.class ) ) {
- + partition = h->mb.trellis.t[i_trellis*t_stride+C___].block.part[0];
- + i_mv = ( ( partition == D_16x16 ) ? 0 : ( ( partition == D_8x16 ) ? i8x16 : ( ( partition == D_8x8 ) ? i8x8 : i16x8 ) ) );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+B___].block);
- + block->mv[i_list][i_mv][1] = X264_MIN( block->mv[i_list][i_mv][1] + 1, block->mv_max[1] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+BL__].block);
- + block->mv[i_list][i_mv][1] = X264_MIN( block->mv[i_list][i_mv][1] + 1, block->mv_max[1] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+BLL_].block);
- + block->mv[i_list][i_mv][1] = X264_MIN( block->mv[i_list][i_mv][1] + 1, block->mv_max[1] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+BR__].block);
- + block->mv[i_list][i_mv][1] = X264_MIN( block->mv[i_list][i_mv][1] + 1, block->mv_max[1] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+BRR_].block);
- + block->mv[i_list][i_mv][1] = X264_MIN( block->mv[i_list][i_mv][1] + 1, block->mv_max[1] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+T___].block);
- + block->mv[i_list][i_mv][1] = X264_MAX( block->mv[i_list][i_mv][1] - 1, block->mv_min[1] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+TL__].block);
- + block->mv[i_list][i_mv][1] = X264_MAX( block->mv[i_list][i_mv][1] - 1, block->mv_min[1] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+TLL_].block);
- + block->mv[i_list][i_mv][1] = X264_MAX( block->mv[i_list][i_mv][1] - 1, block->mv_min[1] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+TR__].block);
- + block->mv[i_list][i_mv][1] = X264_MAX( block->mv[i_list][i_mv][1] - 1, block->mv_min[1] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+TRR_].block);
- + block->mv[i_list][i_mv][1] = X264_MAX( block->mv[i_list][i_mv][1] - 1, block->mv_min[1] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+BB__].block);
- + block->mv[i_list][i_mv][1] = X264_MIN( block->mv[i_list][i_mv][1] + 2, block->mv_max[1] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+BBL_].block);
- + block->mv[i_list][i_mv][1] = X264_MIN( block->mv[i_list][i_mv][1] + 2, block->mv_max[1] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+BBLL].block);
- + block->mv[i_list][i_mv][1] = X264_MIN( block->mv[i_list][i_mv][1] + 2, block->mv_max[1] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+BBR_].block);
- + block->mv[i_list][i_mv][1] = X264_MIN( block->mv[i_list][i_mv][1] + 2, block->mv_max[1] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+BBRR].block);
- + block->mv[i_list][i_mv][1] = X264_MIN( block->mv[i_list][i_mv][1] + 2, block->mv_max[1] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+TT__].block);
- + block->mv[i_list][i_mv][1] = X264_MAX( block->mv[i_list][i_mv][1] - 2, block->mv_min[1] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+TTL_].block);
- + block->mv[i_list][i_mv][1] = X264_MAX( block->mv[i_list][i_mv][1] - 2, block->mv_min[1] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+TTLL].block);
- + block->mv[i_list][i_mv][1] = X264_MAX( block->mv[i_list][i_mv][1] - 2, block->mv_min[1] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+TTR_].block);
- + block->mv[i_list][i_mv][1] = X264_MAX( block->mv[i_list][i_mv][1] - 2, block->mv_min[1] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+TTRR].block);
- + block->mv[i_list][i_mv][1] = X264_MAX( block->mv[i_list][i_mv][1] - 2, block->mv_min[1] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+L___].block);
- + block->mv[i_list][i_mv][0] = X264_MAX( block->mv[i_list][i_mv][0] - 1, block->mv_min[0] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+TL__].block);
- + block->mv[i_list][i_mv][0] = X264_MAX( block->mv[i_list][i_mv][0] - 1, block->mv_min[0] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+TTL_].block);
- + block->mv[i_list][i_mv][0] = X264_MAX( block->mv[i_list][i_mv][0] - 1, block->mv_min[0] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+BL__].block);
- + block->mv[i_list][i_mv][0] = X264_MAX( block->mv[i_list][i_mv][0] - 1, block->mv_min[0] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+BBL_].block);
- + block->mv[i_list][i_mv][0] = X264_MAX( block->mv[i_list][i_mv][0] - 1, block->mv_min[0] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+R___].block);
- + block->mv[i_list][i_mv][0] = X264_MIN( block->mv[i_list][i_mv][0] + 1, block->mv_max[0] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+TR__].block);
- + block->mv[i_list][i_mv][0] = X264_MIN( block->mv[i_list][i_mv][0] + 1, block->mv_max[0] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+TTR_].block);
- + block->mv[i_list][i_mv][0] = X264_MIN( block->mv[i_list][i_mv][0] + 1, block->mv_max[0] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+BR__].block);
- + block->mv[i_list][i_mv][0] = X264_MIN( block->mv[i_list][i_mv][0] + 1, block->mv_max[0] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+BBR_].block);
- + block->mv[i_list][i_mv][0] = X264_MIN( block->mv[i_list][i_mv][0] + 1, block->mv_max[0] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+LL__].block);
- + block->mv[i_list][i_mv][0] = X264_MAX( block->mv[i_list][i_mv][0] - 2, block->mv_min[0] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+TLL_].block);
- + block->mv[i_list][i_mv][0] = X264_MAX( block->mv[i_list][i_mv][0] - 2, block->mv_min[0] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+TTLL].block);
- + block->mv[i_list][i_mv][0] = X264_MAX( block->mv[i_list][i_mv][0] - 2, block->mv_min[0] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+BLL_].block);
- + block->mv[i_list][i_mv][0] = X264_MAX( block->mv[i_list][i_mv][0] - 2, block->mv_min[0] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+BBLL].block);
- + block->mv[i_list][i_mv][0] = X264_MAX( block->mv[i_list][i_mv][0] - 2, block->mv_min[0] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+RR__].block);
- + block->mv[i_list][i_mv][0] = X264_MIN( block->mv[i_list][i_mv][0] + 2, block->mv_max[0] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+TRR_].block);
- + block->mv[i_list][i_mv][0] = X264_MIN( block->mv[i_list][i_mv][0] + 2, block->mv_max[0] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+TTRR].block);
- + block->mv[i_list][i_mv][0] = X264_MIN( block->mv[i_list][i_mv][0] + 2, block->mv_max[0] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+BRR_].block);
- + block->mv[i_list][i_mv][0] = X264_MIN( block->mv[i_list][i_mv][0] + 2, block->mv_max[0] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+BBRR].block);
- + block->mv[i_list][i_mv][0] = X264_MIN( block->mv[i_list][i_mv][0] + 2, block->mv_max[0] );
- + }
- + }
- + }
- + else {
- + for ( i_trellis = 0; i_trellis < length; i_trellis++ ) {
- + if ( ! IS_INTRA( h->mb.trellis.t[i_trellis*t_stride+C___].block.class ) ) {
- + partition = h->mb.trellis.t[i_trellis*t_stride+C___].block.part[0];
- + i_mv = ( ( partition == D_16x16 ) ? 0 : ( ( partition == D_8x16 ) ? i8x16 : ( ( partition == D_8x8 ) ? i8x8 : i16x8 ) ) );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+B___].block);
- + block->mv[i_list][i_mv][1] = X264_MIN( block->mv[i_list][i_mv][1] + 1, block->mv_max[1] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+T___].block);
- + block->mv[i_list][i_mv][1] = X264_MAX( block->mv[i_list][i_mv][1] - 1, block->mv_min[1] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+L___].block);
- + block->mv[i_list][i_mv][0] = X264_MAX( block->mv[i_list][i_mv][0] - 1, block->mv_min[0] );
- + block = &(h->mb.trellis.t[i_trellis*t_stride+R___].block);
- + block->mv[i_list][i_mv][0] = X264_MIN( block->mv[i_list][i_mv][0] + 1, block->mv_max[0] );
- + }
- + }
- + }
- +}
- +
- +void generate_alternate_mvs( x264_t *h, int16_t mv[SQUARE2_CANDIDATES][2], mb_t *block, int i_list, int i_mv ) {
- + int i;
- +
- + if ( h->mb.trellis.mode == MODE_SQUARE2 ) {
- + for ( i = 0; i < SQUARE2_CANDIDATES; i++ )
- + CP_MV( mv[i], block->mv[i_list][i_mv] );
- +
- + mv[B___][1] = X264_MIN( mv[C___][1] + 1, block->mv_max[1] );
- + mv[BLL_][1] = X264_MIN( mv[C___][1] + 1, block->mv_max[1] );
- + mv[BL__][1] = X264_MIN( mv[C___][1] + 1, block->mv_max[1] );
- + mv[BR__][1] = X264_MIN( mv[C___][1] + 1, block->mv_max[1] );
- + mv[BRR_][1] = X264_MIN( mv[C___][1] + 1, block->mv_max[1] );
- + mv[T___][1] = X264_MAX( mv[C___][1] - 1, block->mv_min[1] );
- + mv[TL__][1] = X264_MAX( mv[C___][1] - 1, block->mv_min[1] );
- + mv[TLL_][1] = X264_MAX( mv[C___][1] - 1, block->mv_min[1] );
- + mv[TR__][1] = X264_MAX( mv[C___][1] - 1, block->mv_min[1] );
- + mv[TRR_][1] = X264_MAX( mv[C___][1] - 1, block->mv_min[1] );
- + mv[BB__][1] = X264_MIN( mv[C___][1] + 2, block->mv_max[1] );
- + mv[BBL_][1] = X264_MIN( mv[C___][1] + 2, block->mv_max[1] );
- + mv[BBLL][1] = X264_MIN( mv[C___][1] + 2, block->mv_max[1] );
- + mv[BBR_][1] = X264_MIN( mv[C___][1] + 2, block->mv_max[1] );
- + mv[BBRR][1] = X264_MIN( mv[C___][1] + 2, block->mv_max[1] );
- + mv[TT__][1] = X264_MAX( mv[C___][1] - 2, block->mv_min[1] );
- + mv[TTL_][1] = X264_MAX( mv[C___][1] - 2, block->mv_min[1] );
- + mv[TTLL][1] = X264_MAX( mv[C___][1] - 2, block->mv_min[1] );
- + mv[TTR_][1] = X264_MAX( mv[C___][1] - 2, block->mv_min[1] );
- + mv[TTRR][1] = X264_MAX( mv[C___][1] - 2, block->mv_min[1] );
- + mv[L___][0] = X264_MAX( mv[C___][0] - 1, block->mv_min[0] );
- + mv[TL__][0] = X264_MAX( mv[C___][0] - 1, block->mv_min[0] );
- + mv[TTL_][0] = X264_MAX( mv[C___][0] - 1, block->mv_min[0] );
- + mv[BL__][0] = X264_MAX( mv[C___][0] - 1, block->mv_min[0] );
- + mv[BBL_][0] = X264_MAX( mv[C___][0] - 1, block->mv_min[0] );
- + mv[R___][0] = X264_MIN( mv[C___][0] + 1, block->mv_max[0] );
- + mv[TR__][0] = X264_MIN( mv[C___][0] + 1, block->mv_max[0] );
- + mv[TTR_][0] = X264_MIN( mv[C___][0] + 1, block->mv_max[0] );
- + mv[BR__][0] = X264_MIN( mv[C___][0] + 1, block->mv_max[0] );
- + mv[BBR_][0] = X264_MIN( mv[C___][0] + 1, block->mv_max[0] );
- + mv[LL__][0] = X264_MAX( mv[C___][0] - 2, block->mv_min[0] );
- + mv[TLL_][0] = X264_MAX( mv[C___][0] - 2, block->mv_min[0] );
- + mv[TTLL][0] = X264_MAX( mv[C___][0] - 2, block->mv_min[0] );
- + mv[BLL_][0] = X264_MAX( mv[C___][0] - 2, block->mv_min[0] );
- + mv[BBLL][0] = X264_MAX( mv[C___][0] - 2, block->mv_min[0] );
- + mv[RR__][0] = X264_MIN( mv[C___][0] + 2, block->mv_max[0] );
- + mv[TRR_][0] = X264_MIN( mv[C___][0] + 2, block->mv_max[0] );
- + mv[TTRR][0] = X264_MIN( mv[C___][0] + 2, block->mv_max[0] );
- + mv[BRR_][0] = X264_MIN( mv[C___][0] + 2, block->mv_max[0] );
- + mv[BBRR][0] = X264_MIN( mv[C___][0] + 2, block->mv_max[0] );
- + }
- + else {
- + for ( i = 0; i < DIAMOND_CANDIDATES; i++ )
- + CP_MV( mv[i], block->mv[i_list][i_mv] );
- +
- + mv[B___][1] = X264_MIN( mv[C___][1] + 1, block->mv_max[1] );
- + mv[T___][1] = X264_MAX( mv[C___][1] - 1, block->mv_min[1] );
- + mv[L___][0] = X264_MAX( mv[C___][0] - 1, block->mv_min[0] );
- + mv[R___][0] = X264_MIN( mv[C___][0] + 1, block->mv_max[0] );
- + }
- +}
- +
- +void run_row_trellis( x264_t *h, int iter ) {
- + int i, j, k, m, best, row, col, subrow, i_adj, class, part, alt_states;
- + int cost, min_cost;
- + int i_list, offCC, offCL, offTL, offTR, offTC, offDC, offDL, offDR, offCR;
- + int mb_stride_col = h->mb.trellis.mb_stride_col;
- + int mb_stride_row = h->mb.trellis.mb_stride_row;
- + int t_stride = h->mb.trellis.t_stride;
- + int frameCostMVOld, frameCostMVNew, frameCostSADOld, frameCostSADNew;
- + int16_t alt[SQUARE2_CANDIDATES][2];
- + int t_type;
- +
- + i_list = 0;
- +
- + for ( i = 0; i < iter; i++ ) {
- + for ( row = 0; row < h->mb.i_mb_height; row ++ ) {
- + for ( subrow = 0; subrow <= 1; subrow++ ) {
- +
- + offCC = row * mb_stride_row;
- + for ( col = 0; col < h->mb.i_mb_width; col++ ) {
- + for ( j = 0; j <= T_P_BEST; j++ )
- + CP_MB( &(h->mb.trellis.backup[col*mb_stride_col+j]), &(h->mb.trellis.mb[offCC+j]) );
- + offCC += mb_stride_col;
- + }
- +
- + frameCostMVOld = mv_cost( h );
- + frameCostSADOld = sad_cost( h );
- +
- + fill_trellis( h, row );
- + if ( h->mb.trellis.mode != MODE_DECIDE )
- + adjust_trellis( h, subrow );
- +
- + offCC = row * mb_stride_row ;
- + offCL = offCC - mb_stride_col;
- + offCR = offCC + mb_stride_col;
- + offTC = offCC - mb_stride_row;
- + offDC = offCC + mb_stride_row;
- + offTL = offTC - mb_stride_col;
- + offDL = offDC - mb_stride_col;
- + offTR = offTC + mb_stride_col;
- + offDR = offDC + mb_stride_col;
- + for ( j = 0; j < h->mb.trellis.breadth; j++ ) {
- + h->mb.trellis.t[j].costCC = cost_mb( h, row, 0, 3, &(h->mb.trellis.t[j].block), NULL, ( row == 0 ) ? NULL : &(h->mb.trellis.mb[offTC+T_P_BEST]), ( row == 0 ) ? NULL : &(h->mb.trellis.mb[offTR+T_P_BEST]) );
- + h->mb.trellis.t[j].costDC = ( row == h->mb.i_mb_height-1 ) ? 0 : cost_mb( h, row+1, 0, 3, &(h->mb.trellis.mb[offDC+T_P_BEST]), NULL, &(h->mb.trellis.t[j].block), &(h->mb.trellis.mb[offCR+T_P_BEST]) );
- + h->mb.trellis.t[j].costDL = 0;
- + h->mb.trellis.t[j].costDR = 0;
- + h->mb.trellis.t[j].accCost = h->mb.trellis.t[j].costCC + h->mb.trellis.t[j].costDC;
- + }
- +
- + offCC += mb_stride_col;
- + offCL += mb_stride_col;
- + offCR += mb_stride_col;
- + offTC += mb_stride_col;
- + offDC += mb_stride_col;
- + offTL += mb_stride_col;
- + offDL += mb_stride_col;
- + offTR += mb_stride_col;
- + offDR += mb_stride_col;
- +
- + for ( col = 1; col < h->mb.trellis.length; col++ ) {
- + for ( j = 0; j < h->mb.trellis.breadth; j++ ) {
- + min_cost = INT_MAX;
- + class = h->mb.trellis.t[col*t_stride+j].block.class;
- + part = h->mb.trellis.t[col*t_stride+j].block.part[0];
- + i_adj = ( subrow && h->mb.trellis.t[col*t_stride+j].block.class == P_8x8 ) ? 3 : 1;
- + alt_states = ( class == P_8x8 || ( class == P_L0 && part == D_8x16 ) ) ? ( ( h->mb.trellis.mode == MODE_SQUARE2 ) ? SQUARE2_CANDIDATES : DIAMOND_CANDIDATES ) : 1;
- + if ( class == P_8x8 || ( class == P_L0 && part == D_8x16 ) )
- + generate_alternate_mvs( h, alt, &(h->mb.trellis.t[col*t_stride+j].block), i_list, i_adj );
- + best = 0;
- + for ( k = 0; k < h->mb.trellis.breadth; k++ ) {
- + for ( m = 0; m < alt_states; m++ ) {
- + if ( class == P_8x8 || ( class == P_L0 && part == D_8x16 ) )
- + CP_MV( &(h->mb.trellis.t[col*t_stride+j].block.mv[i_adj]), &(alt[m]) );
- + h->mb.trellis.t[col*t_stride+j].costCC = cost_mb( h, row, col, 3, &(h->mb.trellis.t[col*t_stride+j].block), &(h->mb.trellis.t[(col-1)*t_stride+k].block), ( ( row == 0 ) ? NULL : &(h->mb.trellis.mb[offTC+T_P_BEST]) ), ( ( row == 0 ) ? NULL : ( ( col == h->mb.i_mb_width - 1 ) ? &(h->mb.trellis.mb[offTL+T_P_BEST]) : &(h->mb.trellis.mb[offTR+T_P_BEST]) ) ) );
- + h->mb.trellis.t[col*t_stride+j].costDC = ( row == h->mb.i_mb_height - 1 ) ? 0 : cost_mb( h, row+1, col, 3, &(h->mb.trellis.mb[offDC+T_P_BEST]), &(h->mb.trellis.mb[offDL+T_P_BEST]), &(h->mb.trellis.t[col*t_stride+j].block), ( ( col == h->mb.i_mb_width - 1 ) ? &(h->mb.trellis.t[(col-1)*t_stride+k].block) : &(h->mb.trellis.mb[offCR+T_P_BEST]) ) );
- + h->mb.trellis.t[col*t_stride+j].costDL = ( row == h->mb.i_mb_height - 1 ) ? 0 : cost_mb( h, row+1, col-1, 3, &(h->mb.trellis.mb[offDL+T_P_BEST]), ( ( col == 1 ) ? NULL : &(h->mb.trellis.mb[offDL-mb_stride_col+T_P_BEST]) ), &(h->mb.trellis.t[(col-1)*t_stride+k].block), &(h->mb.trellis.t[col*t_stride+j].block) );
- + h->mb.trellis.t[col*t_stride+j].costDR = ( row == h->mb.i_mb_height - 1 ) ? 0 : ( col == h->mb.i_mb_width - 2 ) ? cost_mb( h, row+1, col+1, 3, &(h->mb.trellis.mb[offDR+T_P_BEST]), &(h->mb.trellis.mb[offDC+T_P_BEST]), &(h->mb.trellis.mb[offCR+T_P_BEST]), &(h->mb.trellis.t[col*t_stride+k].block) ) : 0;
- +
- + cost = h->mb.trellis.t[(col-1)*t_stride+k].accCost - h->mb.trellis.t[(col-1)*t_stride+k].costDC - h->mb.trellis.t[(col-1)*t_stride+k].costDR + h->mb.trellis.t[col*t_stride+j].costCC + h->mb.trellis.t[col*t_stride+j].costDC + h->mb.trellis.t[col*t_stride+j].costDL + h->mb.trellis.t[col*t_stride+j].costDR;
- + if ( cost < min_cost ) {
- + min_cost = cost;
- + best = m;
- + h->mb.trellis.t[col*t_stride+j].parent = k;
- + }
- + }
- + }
- + h->mb.trellis.t[col*t_stride+j].accCost = min_cost;
- + if ( class == P_8x8 || ( class == P_L0 && part == D_8x16 ) )
- + CP_MV( &(h->mb.trellis.t[col*t_stride+j].block.mv[i_adj]), &(alt[T_P_BEST]) );
- + }
- +
- + offCC += mb_stride_col;
- + offCL += mb_stride_col;
- + offCR += mb_stride_col;
- + offTC += mb_stride_col;
- + offDC += mb_stride_col;
- + offTL += mb_stride_col;
- + offDL += mb_stride_col;
- + offTR += mb_stride_col;
- + offDR += mb_stride_col;
- + }
- +
- + min_cost = INT_MAX;
- + k = 0;
- + for ( j = 0; j < h->mb.trellis.breadth; j++ ) {
- + if ( h->mb.trellis.t[(h->mb.trellis.length-1)*t_stride+j].accCost < min_cost ) {
- + min_cost = h->mb.trellis.t[(h->mb.trellis.length-1)*t_stride+j].accCost;
- + k = j;
- + }
- + }
- +
- + offCC = (row+1) * mb_stride_row - mb_stride_col;
- + for ( col = h->mb.trellis.length-1; col >= 0; col-- ) {
- + t_type = TRELLIS_TYPE( h->mb.trellis.t[col*t_stride+k].block );
- + CP_MB( &(h->mb.trellis.mb[offCC+t_type]), &(h->mb.trellis.t[col*t_stride+k].block) );
- + CP_MB( &(h->mb.trellis.mb[offCC+T_P_BEST]), &(h->mb.trellis.t[col*t_stride+k].block) );
- + k = h->mb.trellis.t[col*t_stride+k].parent;
- + offCC -= mb_stride_col;
- + }
- +
- + frameCostMVNew = mv_cost( h );
- + frameCostSADNew = sad_cost( h );
- +
- + if ( ( frameCostMVNew + frameCostSADNew ) > ( frameCostMVOld + frameCostSADOld ) ) {
- + offCC = row * mb_stride_row;
- + for ( col = 0; col < h->mb.i_mb_width; col++ ) {
- + for ( j = 0; j <= T_P_BEST; j++ )
- + CP_MB( &(h->mb.trellis.mb[offCC+j]), &(h->mb.trellis.backup[col*mb_stride_col+j]) );
- + offCC += mb_stride_col;
- + }
- + }
- +
- + }
- + }
- + }
- +
- +}
- +
- +void scale_trellis_mv( x264_t *h ) {
- + mb_t *block;
- + for ( int row = 0; row < h->mb.i_mb_height; row++ ) {
- + for ( int col = 0; col < h->mb.i_mb_width; col++ ) {
- + block = &(h->mb.trellis.mb[row*h->mb.trellis.mb_stride_row+col*h->mb.trellis.mb_stride_col+T_P_BEST]);
- + if ( ! IS_INTRA( block->class ) ) {
- + block->mv[0][0][0] <<= 2;
- + block->mv[0][0][1] <<= 2;
- + if ( block->part[0] != D_16x16 ) {
- + block->mv[0][1][0] <<= 2;
- + block->mv[0][1][1] <<= 2;
- + if ( block->part[0] == D_8x8 ) {
- + block->mv[0][2][0] <<= 2;
- + block->mv[0][2][1] <<= 2;
- + block->mv[0][3][0] <<= 2;
- + block->mv[0][3][1] <<= 2;
- + }
- + }
- + }
- + }
- + }
- +}
- +
- +void write_back_trellis_mv( x264_t *h, int i_mb ) {
- + mb_t *block = &(h->mb.trellis.mb[i_mb*h->mb.trellis.mb_stride_col+T_P_BEST]);
- + h->mb.b_skip_mc = 0;
- +
- + if ( IS_INTRA( block->class ) ) {
- + h->mb.i_type = block->class;
- + }
- + else {
- + h->mb.i_type = block->class;
- + h->mb.i_partition = block->part[0];
- +
- + if ( block->class == P_8x8 ) {
- + h->mb.i_sub_partition[0] = D_L0_8x8;
- + h->mb.i_sub_partition[1] = D_L0_8x8;
- + h->mb.i_sub_partition[2] = D_L0_8x8;
- + h->mb.i_sub_partition[3] = D_L0_8x8;
- +
- + x264_macroblock_cache_ref( h, 0, 0, 2, 2, 0, block->ref[0][0] );
- + x264_macroblock_cache_ref( h, 2, 0, 2, 2, 0, block->ref[1][0] );
- + x264_macroblock_cache_ref( h, 0, 2, 2, 2, 0, block->ref[2][0] );
- + x264_macroblock_cache_ref( h, 2, 2, 2, 2, 0, block->ref[3][0] );
- + x264_macroblock_cache_mv_ptr( h, 0, 0, 2, 2, 0, block->mv[0][0] );
- + x264_macroblock_cache_mv_ptr( h, 2, 0, 2, 2, 0, block->mv[0][1] );
- + x264_macroblock_cache_mv_ptr( h, 0, 2, 2, 2, 0, block->mv[0][2] );
- + x264_macroblock_cache_mv_ptr( h, 2, 2, 2, 2, 0, block->mv[0][3] );
- + }
- + else {
- +
- + switch ( block->part[0] ) {
- +
- + case D_16x16 :
- +
- + x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, block->ref[0][0] );
- + x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, block->mv[0][0] );
- + break;
- +
- + case D_16x8 :
- +
- + x264_macroblock_cache_ref( h, 0, 0, 4, 2, 0, block->ref[0][0] );
- + x264_macroblock_cache_ref( h, 0, 2, 4, 2, 0, block->ref[1][0] );
- + x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 2, 0, block->mv[0][0] );
- + x264_macroblock_cache_mv_ptr( h, 0, 2, 4, 2, 0, block->mv[0][1] );
- + break;
- +
- + case D_8x16 :
- +
- + x264_macroblock_cache_ref( h, 0, 0, 2, 4, 0, block->ref[0][0] );
- + x264_macroblock_cache_ref( h, 2, 0, 2, 4, 0, block->ref[1][0] );
- + x264_macroblock_cache_mv_ptr( h, 0, 0, 2, 4, 0, block->mv[0][0] );
- + x264_macroblock_cache_mv_ptr( h, 2, 0, 2, 4, 0, block->mv[0][1] );
- + break;
- +
- + }
- +
- + }
- +
- + }
- +
- +}
- diff --git a/extras/avisynth_c.h b/extras/avisynth_c.h
- old mode 100644
- new mode 100755
- diff --git a/extras/getopt.c b/extras/getopt.c
- old mode 100644
- new mode 100755
- diff --git a/extras/getopt.h b/extras/getopt.h
- old mode 100644
- new mode 100755
- diff --git a/extras/inttypes.h b/extras/inttypes.h
- old mode 100644
- new mode 100755
- diff --git a/extras/stdint.h b/extras/stdint.h
- old mode 100644
- new mode 100755
- diff --git a/filters/filters.c b/filters/filters.c
- old mode 100644
- new mode 100755
- diff --git a/filters/filters.h b/filters/filters.h
- old mode 100644
- new mode 100755
- diff --git a/filters/video/cache.c b/filters/video/cache.c
- old mode 100644
- new mode 100755
- diff --git a/filters/video/crop.c b/filters/video/crop.c
- old mode 100644
- new mode 100755
- diff --git a/filters/video/depth.c b/filters/video/depth.c
- old mode 100644
- new mode 100755
- diff --git a/filters/video/fix_vfr_pts.c b/filters/video/fix_vfr_pts.c
- old mode 100644
- new mode 100755
- diff --git a/filters/video/internal.c b/filters/video/internal.c
- old mode 100644
- new mode 100755
- diff --git a/filters/video/internal.h b/filters/video/internal.h
- old mode 100644
- new mode 100755
- diff --git a/filters/video/resize.c b/filters/video/resize.c
- old mode 100644
- new mode 100755
- diff --git a/filters/video/select_every.c b/filters/video/select_every.c
- old mode 100644
- new mode 100755
- diff --git a/filters/video/source.c b/filters/video/source.c
- old mode 100644
- new mode 100755
- diff --git a/filters/video/video.c b/filters/video/video.c
- old mode 100644
- new mode 100755
- diff --git a/filters/video/video.h b/filters/video/video.h
- old mode 100644
- new mode 100755
- diff --git a/input/avs.c b/input/avs.c
- old mode 100644
- new mode 100755
- diff --git a/input/ffms.c b/input/ffms.c
- old mode 100644
- new mode 100755
- diff --git a/input/input.c b/input/input.c
- old mode 100644
- new mode 100755
- diff --git a/input/input.h b/input/input.h
- old mode 100644
- new mode 100755
- diff --git a/input/lavf.c b/input/lavf.c
- old mode 100644
- new mode 100755
- diff --git a/input/raw.c b/input/raw.c
- old mode 100644
- new mode 100755
- diff --git a/input/thread.c b/input/thread.c
- old mode 100644
- new mode 100755
- diff --git a/input/timecode.c b/input/timecode.c
- old mode 100644
- new mode 100755
- diff --git a/input/y4m.c b/input/y4m.c
- old mode 100644
- new mode 100755
- diff --git a/output/flv.c b/output/flv.c
- old mode 100644
- new mode 100755
- diff --git a/output/flv_bytestream.c b/output/flv_bytestream.c
- old mode 100644
- new mode 100755
- diff --git a/output/flv_bytestream.h b/output/flv_bytestream.h
- old mode 100644
- new mode 100755
- diff --git a/output/matroska.c b/output/matroska.c
- old mode 100644
- new mode 100755
- diff --git a/output/matroska_ebml.c b/output/matroska_ebml.c
- old mode 100644
- new mode 100755
- diff --git a/output/matroska_ebml.h b/output/matroska_ebml.h
- old mode 100644
- new mode 100755
- diff --git a/output/mp4.c b/output/mp4.c
- old mode 100644
- new mode 100755
- diff --git a/output/output.h b/output/output.h
- old mode 100644
- new mode 100755
- diff --git a/output/raw.c b/output/raw.c
- old mode 100644
- new mode 100755
- diff --git a/tools/checkasm-a.asm b/tools/checkasm-a.asm
- old mode 100644
- new mode 100755
- diff --git a/tools/checkasm.c b/tools/checkasm.c
- old mode 100644
- new mode 100755
- diff --git a/tools/digress/__init__.py b/tools/digress/__init__.py
- old mode 100644
- new mode 100755
- diff --git a/tools/digress/cli.py b/tools/digress/cli.py
- old mode 100644
- new mode 100755
- diff --git a/tools/digress/comparers.py b/tools/digress/comparers.py
- old mode 100644
- new mode 100755
- diff --git a/tools/digress/constants.py b/tools/digress/constants.py
- old mode 100644
- new mode 100755
- diff --git a/tools/digress/errors.py b/tools/digress/errors.py
- old mode 100644
- new mode 100755
- diff --git a/tools/digress/scm/__init__.py b/tools/digress/scm/__init__.py
- old mode 100644
- new mode 100755
- diff --git a/tools/digress/scm/dummy.py b/tools/digress/scm/dummy.py
- old mode 100644
- new mode 100755
- diff --git a/tools/digress/scm/git.py b/tools/digress/scm/git.py
- old mode 100644
- new mode 100755
- diff --git a/tools/digress/testing.py b/tools/digress/testing.py
- old mode 100644
- new mode 100755
- diff --git a/tools/q_matrix_jvt.cfg b/tools/q_matrix_jvt.cfg
- old mode 100644
- new mode 100755
- diff --git a/tools/xyuv.c b/tools/xyuv.c
- old mode 100644
- new mode 100755
- diff --git a/x264.c b/x264.c
- old mode 100644
- new mode 100755
- index 3864bf7..b6a0def
- --- a/x264.c
- +++ b/x264.c
- @@ -966,6 +966,7 @@ static struct option long_options[] =
- { "input-csp", required_argument, NULL, OPT_INPUT_CSP },
- { "input-depth", required_argument, NULL, OPT_INPUT_DEPTH },
- { "dts-compress", no_argument, NULL, OPT_DTS_COMPRESSION },
- + { "me-trellis", required_argument, NULL, 0 },
- {0, 0, 0, 0}
- };
- @@ -1675,7 +1676,7 @@ static int encode( x264_param_t *param, cli_opt_t *opt )
- FAIL_IF_ERROR2( !h, "x264_encoder_open failed\n" );
- x264_encoder_parameters( h, param );
- -
- +
- FAIL_IF_ERROR2( output.set_param( opt->hout, param ), "can't set outfile param\n" );
- i_start = x264_mdate();
- diff --git a/x264.h b/x264.h
- old mode 100644
- new mode 100755
- index 9d663f1..37fcd8f
- --- a/x264.h
- +++ b/x264.h
- @@ -162,6 +162,10 @@ typedef struct
- #define X264_B_PYRAMID_NORMAL 2
- #define X264_KEYINT_MIN_AUTO 0
- #define X264_KEYINT_MAX_INFINITE (1<<30)
- +#define X264_ME_TRELLIS_NONE 0
- +#define X264_ME_TRELLIS_DIA 1
- +#define X264_ME_TRELLIS_ESA 2
- +#define X264_ME_TRELLIS_COMBINED_DIA 3
- static const char * const x264_direct_pred_names[] = { "none", "spatial", "temporal", "auto", 0 };
- static const char * const x264_motion_est_names[] = { "dia", "hex", "umh", "esa", "tesa", 0 };
- @@ -332,6 +336,8 @@ typedef struct x264_param_t
- float f_psy_rd; /* Psy RD strength */
- float f_psy_trellis; /* Psy trellis strength */
- int b_psy; /* Toggle all psy optimizations */
- + int i_me_trellis; /* Type of trellis to use for me */
- + int b_cache_sads; /* Cache SAD scores during motion estimation */
- /* the deadzone size that will be used in luma quantization */
- int i_luma_deadzone[2]; /* {inter, intra} */
- diff --git a/x264cli.h b/x264cli.h
- old mode 100644
- new mode 100755
- diff --git a/x264dll.c b/x264dll.c
- old mode 100644
- new mode 100755
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement