Advertisement
Guest User

Untitled

a guest
Sep 22nd, 2017
67
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Diff 105.58 KB | None | 0 0
  1. diff --git a/.gitignore b/.gitignore
  2. old mode 100644
  3. new mode 100755
  4. diff --git a/AUTHORS b/AUTHORS
  5. old mode 100644
  6. new mode 100755
  7. diff --git a/COPYING b/COPYING
  8. old mode 100644
  9. new mode 100755
  10. diff --git a/Makefile b/Makefile
  11. old mode 100644
  12. new mode 100755
  13. index 5831091..18f2d66
  14. --- a/Makefile
  15. +++ b/Makefile
  16. @@ -11,7 +11,8 @@ SRCS = common/mc.c common/predict.c common/pixel.c common/macroblock.c \
  17.         common/mvpred.c common/bitstream.c \
  18.         encoder/analyse.c encoder/me.c encoder/ratecontrol.c \
  19.         encoder/set.c encoder/macroblock.c encoder/cabac.c \
  20. -       encoder/cavlc.c encoder/encoder.c encoder/lookahead.c
  21. +       encoder/cavlc.c encoder/encoder.c encoder/lookahead.c \
  22. +       encoder/trellis.c
  23.  
  24.  SRCCLI = x264.c input/input.c input/timecode.c input/raw.c input/y4m.c \
  25.           output/raw.c output/matroska.c output/matroska_ebml.c \
  26. diff --git a/common/arm/asm.S b/common/arm/asm.S
  27. old mode 100644
  28. new mode 100755
  29. diff --git a/common/arm/cpu-a.S b/common/arm/cpu-a.S
  30. old mode 100644
  31. new mode 100755
  32. diff --git a/common/arm/dct-a.S b/common/arm/dct-a.S
  33. old mode 100644
  34. new mode 100755
  35. diff --git a/common/arm/dct.h b/common/arm/dct.h
  36. old mode 100644
  37. new mode 100755
  38. diff --git a/common/arm/deblock-a.S b/common/arm/deblock-a.S
  39. old mode 100644
  40. new mode 100755
  41. diff --git a/common/arm/mc-a.S b/common/arm/mc-a.S
  42. old mode 100644
  43. new mode 100755
  44. diff --git a/common/arm/mc-c.c b/common/arm/mc-c.c
  45. old mode 100644
  46. new mode 100755
  47. diff --git a/common/arm/mc.h b/common/arm/mc.h
  48. old mode 100644
  49. new mode 100755
  50. diff --git a/common/arm/pixel-a.S b/common/arm/pixel-a.S
  51. old mode 100644
  52. new mode 100755
  53. diff --git a/common/arm/pixel.h b/common/arm/pixel.h
  54. old mode 100644
  55. new mode 100755
  56. diff --git a/common/arm/predict-a.S b/common/arm/predict-a.S
  57. old mode 100644
  58. new mode 100755
  59. diff --git a/common/arm/predict-c.c b/common/arm/predict-c.c
  60. old mode 100644
  61. new mode 100755
  62. diff --git a/common/arm/predict.h b/common/arm/predict.h
  63. old mode 100644
  64. new mode 100755
  65. diff --git a/common/arm/quant-a.S b/common/arm/quant-a.S
  66. old mode 100644
  67. new mode 100755
  68. diff --git a/common/arm/quant.h b/common/arm/quant.h
  69. old mode 100644
  70. new mode 100755
  71. diff --git a/common/bitstream.c b/common/bitstream.c
  72. old mode 100644
  73. new mode 100755
  74. diff --git a/common/bitstream.h b/common/bitstream.h
  75. old mode 100644
  76. new mode 100755
  77. diff --git a/common/cabac.c b/common/cabac.c
  78. old mode 100644
  79. new mode 100755
  80. diff --git a/common/cabac.h b/common/cabac.h
  81. old mode 100644
  82. new mode 100755
  83. diff --git a/common/common.c b/common/common.c
  84. old mode 100644
  85. new mode 100755
  86. index a30d0d8..6818a01
  87. --- a/common/common.c
  88. +++ b/common/common.c
  89. @@ -976,6 +976,8 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
  90.          p->b_fake_interlaced = atobool(value);
  91.      OPT("frame-packing")
  92.          p->i_frame_packing = atoi(value);
  93. +    OPT("me-trellis")
  94. +        p->analyse.i_me_trellis = atoi(value);
  95.      else
  96.          return X264_PARAM_BAD_NAME;
  97.  #undef OPT
  98. diff --git a/common/common.h b/common/common.h
  99. old mode 100644
  100. new mode 100755
  101. index 7768571..fee8853
  102. --- a/common/common.h
  103. +++ b/common/common.h
  104. @@ -191,6 +191,7 @@ static const unsigned x264_scan8[16+2*4+3] =
  105.  #include "quant.h"
  106.  #include "cpu.h"
  107.  #include "threadpool.h"
  108. +#include "trellis.h"
  109.  
  110.  /****************************************************************************
  111.   * General functions
  112. @@ -771,6 +772,8 @@ struct x264_t
  113.              int8_t  topright_ref[2][3];
  114.          } cache;
  115.  
  116. +        me_trellis_t trellis;
  117. +
  118.          /* */
  119.          int     i_qp;       /* current qp */
  120.          int     i_chroma_qp;
  121. diff --git a/common/cpu.c b/common/cpu.c
  122. old mode 100644
  123. new mode 100755
  124. diff --git a/common/cpu.h b/common/cpu.h
  125. old mode 100644
  126. new mode 100755
  127. diff --git a/common/dct.c b/common/dct.c
  128. old mode 100644
  129. new mode 100755
  130. diff --git a/common/dct.h b/common/dct.h
  131. old mode 100644
  132. new mode 100755
  133. diff --git a/common/deblock.c b/common/deblock.c
  134. old mode 100644
  135. new mode 100755
  136. diff --git a/common/display-x11.c b/common/display-x11.c
  137. old mode 100644
  138. new mode 100755
  139. diff --git a/common/display.h b/common/display.h
  140. old mode 100644
  141. new mode 100755
  142. diff --git a/common/frame.c b/common/frame.c
  143. old mode 100644
  144. new mode 100755
  145. diff --git a/common/frame.h b/common/frame.h
  146. old mode 100644
  147. new mode 100755
  148. diff --git a/common/macroblock.c b/common/macroblock.c
  149. old mode 100644
  150. new mode 100755
  151. index 9eefc0a..61cbbb2
  152. --- a/common/macroblock.c
  153. +++ b/common/macroblock.c
  154. @@ -34,7 +34,7 @@ static NOINLINE void x264_mb_mc_0xywh( x264_t *h, int x, int y, int width, int h
  155.      int i_ref = h->mb.cache.ref[0][i8];
  156.      int mvx   = x264_clip3( h->mb.cache.mv[0][i8][0], h->mb.mv_min[0], h->mb.mv_max[0] ) + 4*4*x;
  157.      int mvy   = x264_clip3( h->mb.cache.mv[0][i8][1], h->mb.mv_min[1], h->mb.mv_max[1] ) + 4*4*y;
  158. -
  159. +    
  160.      h->mc.mc_luma( &h->mb.pic.p_fdec[0][4*y*FDEC_STRIDE+4*x], FDEC_STRIDE,
  161.                     h->mb.pic.p_fref[0][i_ref], h->mb.pic.i_stride[0],
  162.                     mvx, mvy, 4*width, 4*height, &h->sh.weight[i_ref][0] );
  163. @@ -283,6 +283,34 @@ int x264_macroblock_cache_allocate( x264_t *h )
  164.              CHECKED_MALLOC( h->mb.p_weight_buf[i], luma_plane_size * sizeof(pixel) );
  165.      }
  166.  
  167. +    if ( h->param.analyse.i_me_trellis ) {
  168. +        h->mb.trellis.max_length = X264_MAX( h->mb.i_mb_width, h->mb.i_mb_height );
  169. +        h->mb.trellis.max_breadth = SQUARE2_CANDIDATES;
  170. +        h->mb.trellis.mb_stride_col = T_P_TOTAL+1;
  171. +        h->mb.trellis.mb_stride_row = h->mb.trellis.mb_stride_col * h->mb.i_mb_width;
  172. +        h->mb.trellis.t_stride = h->mb.trellis.max_breadth;
  173. +        h->mb.trellis.gen = 0;
  174. +                
  175. +        CHECKED_MALLOC( h->mb.trellis.t, h->mb.trellis.max_length * h->mb.trellis.max_breadth * sizeof( me_trellis_node_t ) );
  176. +        CHECKED_MALLOC( h->mb.trellis.mb, h->mb.i_mb_count * h->mb.trellis.mb_stride_col * sizeof( mb_t ) );
  177. +        CHECKED_MALLOC( h->mb.trellis.backup, h->mb.trellis.max_length * h->mb.trellis.mb_stride_col * sizeof( mb_t ) );
  178. +        
  179. +        CHECKED_MALLOCZERO( h->mb.trellis.cache_sad_gen[0], h->mb.i_mb_count * 9 * 8 * 8 * sizeof(uint32_t) );
  180. +        CHECKED_MALLOCZERO( h->mb.trellis.cache_sad_gen[1], h->mb.i_mb_count * 9 * 8 * 8 * sizeof(uint32_t) );
  181. +        CHECKED_MALLOCZERO( h->mb.trellis.cache_sad_tag[0], h->mb.i_mb_count * 9 * 8 * 8 * sizeof(uint32_t) );
  182. +        CHECKED_MALLOCZERO( h->mb.trellis.cache_sad_tag[1], h->mb.i_mb_count * 9 * 8 * 8 * sizeof(uint32_t) );
  183. +        CHECKED_MALLOC( h->mb.trellis.cache_sad_score[0], h->mb.i_mb_count * 9 * 8 * 8 * sizeof(int) );
  184. +        CHECKED_MALLOC( h->mb.trellis.cache_sad_score[1], h->mb.i_mb_count * 9 * 8 * 8 * sizeof(int) );
  185. +        CHECKED_MALLOC( h->mb.trellis.cache_me_t[0], h->mb.i_mb_count * 9 * sizeof(x264_me_t) );
  186. +        CHECKED_MALLOC( h->mb.trellis.cache_me_t[1], h->mb.i_mb_count * 9 * sizeof(x264_me_t) );
  187. +        CHECKED_MALLOC( h->mb.trellis.cache_mode_intra, h->mb.i_mb_count * sizeof(int) );
  188. +        CHECKED_MALLOC( h->mb.trellis.cache_sad_intra, h->mb.i_mb_count * sizeof(int) );
  189. +        CHECKED_MALLOC( h->mb.trellis.best_type[0], h->mb.i_mb_count * sizeof(int) );
  190. +        CHECKED_MALLOC( h->mb.trellis.best_type[1], h->mb.i_mb_count * sizeof(int) );
  191. +        CHECKED_MALLOC( h->mb.trellis.best_part[0], h->mb.i_mb_count * sizeof(int) );
  192. +        CHECKED_MALLOC( h->mb.trellis.best_part[1], h->mb.i_mb_count * sizeof(int) );
  193. +    }
  194. +
  195.      return 0;
  196.  fail:
  197.      return -1;
  198. @@ -309,6 +337,27 @@ void x264_macroblock_cache_free( x264_t *h )
  199.      x264_free( h->mb.skipbp );
  200.      x264_free( h->mb.cbp );
  201.      x264_free( h->mb.qp );
  202. +
  203. +    if ( h->param.analyse.i_me_trellis ) {
  204. +        x264_free( h->mb.trellis.t );
  205. +        x264_free( h->mb.trellis.mb );
  206. +        x264_free( h->mb.trellis.backup );
  207. +        
  208. +        x264_free( h->mb.trellis.cache_sad_gen[0] );
  209. +        x264_free( h->mb.trellis.cache_sad_gen[1] );
  210. +        x264_free( h->mb.trellis.cache_sad_tag[0] );
  211. +        x264_free( h->mb.trellis.cache_sad_tag[1] );
  212. +        x264_free( h->mb.trellis.cache_sad_score[0] );
  213. +        x264_free( h->mb.trellis.cache_sad_score[1] );
  214. +        x264_free( h->mb.trellis.cache_me_t[0] );
  215. +        x264_free( h->mb.trellis.cache_me_t[1] );
  216. +        x264_free( h->mb.trellis.cache_mode_intra );
  217. +        x264_free( h->mb.trellis.cache_sad_intra );
  218. +        x264_free( h->mb.trellis.best_type[0] );
  219. +        x264_free( h->mb.trellis.best_type[1] );
  220. +        x264_free( h->mb.trellis.best_part[0] );
  221. +        x264_free( h->mb.trellis.best_part[1] );
  222. +    }
  223.  }
  224.  
  225.  int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
  226. diff --git a/common/macroblock.h b/common/macroblock.h
  227. old mode 100644
  228. new mode 100755
  229. diff --git a/common/mc.c b/common/mc.c
  230. old mode 100644
  231. new mode 100755
  232. diff --git a/common/mc.h b/common/mc.h
  233. old mode 100644
  234. new mode 100755
  235. diff --git a/common/mvpred.c b/common/mvpred.c
  236. old mode 100644
  237. new mode 100755
  238. diff --git a/common/osdep.c b/common/osdep.c
  239. old mode 100644
  240. new mode 100755
  241. diff --git a/common/osdep.h b/common/osdep.h
  242. old mode 100644
  243. new mode 100755
  244. diff --git a/common/pixel.c b/common/pixel.c
  245. old mode 100644
  246. new mode 100755
  247. diff --git a/common/pixel.h b/common/pixel.h
  248. old mode 100644
  249. new mode 100755
  250. diff --git a/common/ppc/dct.c b/common/ppc/dct.c
  251. old mode 100644
  252. new mode 100755
  253. diff --git a/common/ppc/dct.h b/common/ppc/dct.h
  254. old mode 100644
  255. new mode 100755
  256. diff --git a/common/ppc/deblock.c b/common/ppc/deblock.c
  257. old mode 100644
  258. new mode 100755
  259. diff --git a/common/ppc/mc.c b/common/ppc/mc.c
  260. old mode 100644
  261. new mode 100755
  262. diff --git a/common/ppc/mc.h b/common/ppc/mc.h
  263. old mode 100644
  264. new mode 100755
  265. diff --git a/common/ppc/pixel.c b/common/ppc/pixel.c
  266. old mode 100644
  267. new mode 100755
  268. diff --git a/common/ppc/pixel.h b/common/ppc/pixel.h
  269. old mode 100644
  270. new mode 100755
  271. diff --git a/common/ppc/ppccommon.h b/common/ppc/ppccommon.h
  272. old mode 100644
  273. new mode 100755
  274. diff --git a/common/ppc/predict.c b/common/ppc/predict.c
  275. old mode 100644
  276. new mode 100755
  277. diff --git a/common/ppc/predict.h b/common/ppc/predict.h
  278. old mode 100644
  279. new mode 100755
  280. diff --git a/common/ppc/quant.c b/common/ppc/quant.c
  281. old mode 100644
  282. new mode 100755
  283. diff --git a/common/ppc/quant.h b/common/ppc/quant.h
  284. old mode 100644
  285. new mode 100755
  286. diff --git a/common/predict.c b/common/predict.c
  287. old mode 100644
  288. new mode 100755
  289. diff --git a/common/predict.h b/common/predict.h
  290. old mode 100644
  291. new mode 100755
  292. diff --git a/common/quant.c b/common/quant.c
  293. old mode 100644
  294. new mode 100755
  295. diff --git a/common/quant.h b/common/quant.h
  296. old mode 100644
  297. new mode 100755
  298. diff --git a/common/rectangle.c b/common/rectangle.c
  299. old mode 100644
  300. new mode 100755
  301. diff --git a/common/rectangle.h b/common/rectangle.h
  302. old mode 100644
  303. new mode 100755
  304. diff --git a/common/set.c b/common/set.c
  305. old mode 100644
  306. new mode 100755
  307. diff --git a/common/set.h b/common/set.h
  308. old mode 100644
  309. new mode 100755
  310. diff --git a/common/sparc/pixel.asm b/common/sparc/pixel.asm
  311. old mode 100644
  312. new mode 100755
  313. diff --git a/common/sparc/pixel.h b/common/sparc/pixel.h
  314. old mode 100644
  315. new mode 100755
  316. diff --git a/common/threadpool.c b/common/threadpool.c
  317. old mode 100644
  318. new mode 100755
  319. diff --git a/common/threadpool.h b/common/threadpool.h
  320. old mode 100644
  321. new mode 100755
  322. diff --git a/common/trellis.h b/common/trellis.h
  323. new file mode 100755
  324. index 0000000..e807e35
  325. --- /dev/null
  326. +++ b/common/trellis.h
  327. @@ -0,0 +1,179 @@
  328. +#ifndef TRELLIS_H
  329. +#define TRELLIS_H
  330. +
  331. +#define QP_FAKE     24
  332. +#define ME_RANGE    h->param.analyse.i_me_range
  333. +
  334. +#include "common.h"
  335. +#include "encoder/me.h"
  336. +
  337. +// tellis mode idents
  338. +enum trellis_mode_t {
  339. +    MODE_NONE               = 0,
  340. +    MODE_COMBINED_DIAMOND   = 1,
  341. +    MODE_DECIDE             = 2,
  342. +    MODE_DIAMOND            = 3,
  343. +    MODE_SQUARE2            = 4
  344. +};
  345. +
  346. +// trellis direction
  347. +enum trellis_direction_t {
  348. +    ROW_TRELLIS = 0,
  349. +    COL_TRELLIS = 1
  350. +};
  351. +
  352. +// trellis mb type identifiers
  353. +enum trellis_p_mb_type_t {
  354. +    T_P_16x16  = 0,
  355. +    T_P_8x8    = 1,
  356. +    T_P_16x8   = 2,
  357. +    T_P_8x16   = 3,
  358. +    T_P_INTRA  = 4,
  359. +    
  360. +    T_P_BEST   = 5,
  361. +    T_P_TOTAL  = 5
  362. +};
  363. +
  364. +// mv direction total
  365. +enum mv_candidates_t {
  366. +    COMBINED_DIAMOND_CANDIDATES = 21,
  367. +    DECIDE_CANDIDATES           = 5,
  368. +    DIAMOND_CANDIDATES          = 5,
  369. +    SQUARE2_CANDIDATES          = 25
  370. +};
  371. +
  372. +// mv direction indices
  373. +enum mv_candidate_index_t {
  374. +    T___    = 0,
  375. +    B___    = 1,
  376. +    L___    = 2,
  377. +    R___    = 3,
  378. +    C___    = 4,
  379. +    TL__    = 5,
  380. +    TR__    = 6,
  381. +    BL__    = 7,
  382. +    BR__    = 8,
  383. +    LL__    = 9,
  384. +    RR__    = 10,
  385. +    TT__    = 11,
  386. +    BB__    = 12,
  387. +    TTL_    = 13,
  388. +    TTLL    = 14,
  389. +    TTR_    = 15,
  390. +    TTRR    = 16,
  391. +    TLL_    = 17,
  392. +    TRR_    = 18,
  393. +    BBL_    = 19,
  394. +    BBLL    = 20,
  395. +    BBR_    = 21,
  396. +    BBRR    = 22,
  397. +    BLL_    = 23,
  398. +    BRR_    = 24
  399. +};
  400. +
  401. +// useful comparison
  402. +#define X264_MED(a,b,c) ( (a<=b) ? ( (c<=a) ? a : ( (c<=b) ? c : b ) ) : ( (c>=a) ? a : ( (c>=b) ? c : b ) ) )
  403. +
  404. +// copy memory between two records
  405. +#define CP_MV(a,b)    memcpy( a, b, 2*sizeof( int16_t ) )
  406. +#define CP_MB(a,b)    memcpy( a, b, sizeof( mb_t ) )
  407. +
  408. +// cost macros
  409. +
  410. +#define T_SAD_SCORE( class, partition, i_mv, row, col, x, y )\
  411. +    ((class==P_8x8) ? (h->mb.trellis.sad_p8x8[i_mv][row*h->mb.i_mb_stride+col][(2*ME_RANGE*(y+ME_RANGE))+(x+ME_RANGE)]) : \
  412. +    (((class==P_L0)&&(partition==D_16x16)) ? (h->mb.trellis.sad_p16x16[row*h->mb.i_mb_stride+col][(2*ME_RANGE*(y+ME_RANGE))+(x+ME_RANGE)]) : \
  413. +    (((class==P_L0)&&(partition==D_16x8)) ? (h->mb.trellis.sad_p16x8[i_mv][row*h->mb.i_mb_stride+col][((2*ME_RANGE)*(y+ME_RANGE))+(x+ME_RANGE)]) : \
  414. +    (((class==P_L0)&&(partition==D_8x16)) ? (h->mb.trellis.sad_p8x16[i_mv][row*h->mb.i_mb_stride+col][((2*ME_RANGE)*(y+ME_RANGE))+(x+ME_RANGE)]) : \
  415. +    (h->mb.trellis.sad_intra[row*h->mb.i_mb_stride+col])))))
  416. +
  417. +#define T_BITS_MVD( mx, my )\
  418. +    (h->cost_mv[QP_FAKE][(mx)<<2] + h->cost_mv[QP_FAKE][(my)<<2])
  419. +
  420. +#define T_BITS_MODE( class, partition )\
  421. +    (x264_lambda_tab[QP_FAKE] * ( ( IS_INTRA( class ) ) ? ( 0 ) : ( ( partition == D_16x16 ) ? ( 4 ) : ( ( partition == D_8x8 ) ? ( 12 ) : ( 6 ) ) ) ))
  422. +
  423. +#define T_COST_SAD( mx, my )\
  424. +    h->pixf.fpelcmp[i_pixel]( p_fenc, FENC_STRIDE, &p_fref_w[(my)*stride+(mx)], stride )
  425. +    
  426. +// map x264 mb types to trellis mb types
  427. +
  428. +#define TRELLIS_TYPE( block )\
  429. +    ( IS_INTRA( block.class ) ? ( T_P_INTRA ) : \
  430. +    ( ( block.class == P_8x8 ) ? ( T_P_8x8 ) : \
  431. +    ( ( block.part[0] = D_16x16 ) ? ( T_P_16x16 ) : \
  432. +    ( ( block.part[0] = D_16x8 ) ? ( T_P_16x8 ) : ( T_P_8x16 ) \
  433. +    ) ) ) )
  434. +
  435. +// holds mvs for partitioned block.  unused mvs are zero by default.
  436. +typedef struct mb_t {
  437. +    int class;
  438. +    int costSAD;
  439. +    int costMV;
  440. +    int16_t mv_min[2];
  441. +    int16_t mv_max[2];
  442. +    int8_t part[4];
  443. +    int16_t ((mv[2])[4])[2];
  444. +    uint8_t (ref[4])[2];
  445. +} mb_t;
  446. +
  447. +typedef struct me_trellis_node_t {
  448. +    mb_t block;
  449. +    int8_t parent;
  450. +    int accCost;
  451. +    int costCC;
  452. +    int costDC;
  453. +    int costDL;
  454. +    int costDR;
  455. +    int costCR;
  456. +} me_trellis_node_t;
  457. +
  458. +
  459. +typedef struct me_trellis_t {
  460. +    me_trellis_node_t *t;
  461. +    mb_t *mb;
  462. +    mb_t *backup;
  463. +
  464. +    int mode;
  465. +    int direction;
  466. +    int breadth;
  467. +    int max_breadth;
  468. +    int length;
  469. +    int max_length;
  470. +    uint32_t gen;
  471. +    
  472. +    int b_collected;
  473. +    
  474. +    int mb_stride_col;
  475. +    int mb_stride_row;
  476. +    int t_stride;
  477. +    
  478. +    uint32_t (*cache_sad_gen[2])[9][8][8];
  479. +    uint32_t (*cache_sad_tag[2])[9][8][8];
  480. +    int (*cache_sad_score[2])[9][8][8];
  481. +    x264_me_t (*cache_me_t[2])[9];
  482. +    int *cache_mode_intra;
  483. +    int *cache_sad_intra;
  484. +    int (*best_type[2]);
  485. +    int (*best_part[2]);
  486. +} me_trellis_t;
  487. +
  488. +int get_cached_fpel_sad( x264_t *h, int class, int partition, int i_list, int i_mv, int row, int col, int x, int y );
  489. +void fill_me_trellis_params( x264_t *h, int mode, int direction );
  490. +void get_left_predictor( int16_t pred[2], mb_t *block, int i_list, int i_mv );
  491. +void get_top_predictor( int16_t pred[2], mb_t *block, int i_list, int i_mv );
  492. +void get_top_right_predictor( int16_t pred[2], mb_t *block, int i_list, int i_mv );
  493. +void get_top_left_predictor( int16_t pred[2], mb_t *block, int i_list, int i_mv );
  494. +void choose_predictors( x264_t *h, mb_t *pred, int row, int col, mb_t *current, mb_t *left, mb_t *top, mb_t *diag );
  495. +int cost_mb( x264_t *h, int row, int col, int i_mv_max, mb_t *block, mb_t *left, mb_t *top, mb_t *diag );
  496. +void collect_candidates( x264_t *h );
  497. +int sad_cost( x264_t *h );
  498. +int mv_cost( x264_t *h );
  499. +void fill_trellis( x264_t *h, int i_frame );
  500. +void adjust_trellis( x264_t *h, int sub );
  501. +void generate_alternate_mvs( x264_t *h, int16_t mv[SQUARE2_CANDIDATES][2], mb_t *block, int i_list, int i_mv );
  502. +void run_row_trellis( x264_t *h, int iter );
  503. +void scale_trellis_mv( x264_t *h );
  504. +void write_back_trellis_mv( x264_t *h, int i_mb );
  505. +
  506. +#endif
  507. diff --git a/common/visualize.c b/common/visualize.c
  508. old mode 100644
  509. new mode 100755
  510. diff --git a/common/visualize.h b/common/visualize.h
  511. old mode 100644
  512. new mode 100755
  513. diff --git a/common/vlc.c b/common/vlc.c
  514. old mode 100644
  515. new mode 100755
  516. diff --git a/common/win32thread.c b/common/win32thread.c
  517. old mode 100644
  518. new mode 100755
  519. diff --git a/common/win32thread.h b/common/win32thread.h
  520. old mode 100644
  521. new mode 100755
  522. diff --git a/common/x86/bitstream-a.asm b/common/x86/bitstream-a.asm
  523. old mode 100644
  524. new mode 100755
  525. diff --git a/common/x86/cabac-a.asm b/common/x86/cabac-a.asm
  526. old mode 100644
  527. new mode 100755
  528. diff --git a/common/x86/const-a.asm b/common/x86/const-a.asm
  529. old mode 100644
  530. new mode 100755
  531. diff --git a/common/x86/cpu-a.asm b/common/x86/cpu-a.asm
  532. old mode 100644
  533. new mode 100755
  534. diff --git a/common/x86/dct-32.asm b/common/x86/dct-32.asm
  535. old mode 100644
  536. new mode 100755
  537. diff --git a/common/x86/dct-64.asm b/common/x86/dct-64.asm
  538. old mode 100644
  539. new mode 100755
  540. diff --git a/common/x86/dct-a.asm b/common/x86/dct-a.asm
  541. old mode 100644
  542. new mode 100755
  543. diff --git a/common/x86/dct.h b/common/x86/dct.h
  544. old mode 100644
  545. new mode 100755
  546. diff --git a/common/x86/deblock-a.asm b/common/x86/deblock-a.asm
  547. old mode 100644
  548. new mode 100755
  549. diff --git a/common/x86/mc-a.asm b/common/x86/mc-a.asm
  550. old mode 100644
  551. new mode 100755
  552. diff --git a/common/x86/mc-a2.asm b/common/x86/mc-a2.asm
  553. old mode 100644
  554. new mode 100755
  555. diff --git a/common/x86/mc-c.c b/common/x86/mc-c.c
  556. old mode 100644
  557. new mode 100755
  558. diff --git a/common/x86/mc.h b/common/x86/mc.h
  559. old mode 100644
  560. new mode 100755
  561. diff --git a/common/x86/pixel-32.asm b/common/x86/pixel-32.asm
  562. old mode 100644
  563. new mode 100755
  564. diff --git a/common/x86/pixel-a.asm b/common/x86/pixel-a.asm
  565. old mode 100644
  566. new mode 100755
  567. diff --git a/common/x86/pixel.h b/common/x86/pixel.h
  568. old mode 100644
  569. new mode 100755
  570. diff --git a/common/x86/predict-a.asm b/common/x86/predict-a.asm
  571. old mode 100644
  572. new mode 100755
  573. diff --git a/common/x86/predict-c.c b/common/x86/predict-c.c
  574. old mode 100644
  575. new mode 100755
  576. diff --git a/common/x86/predict.h b/common/x86/predict.h
  577. old mode 100644
  578. new mode 100755
  579. diff --git a/common/x86/quant-a.asm b/common/x86/quant-a.asm
  580. old mode 100644
  581. new mode 100755
  582. diff --git a/common/x86/quant.h b/common/x86/quant.h
  583. old mode 100644
  584. new mode 100755
  585. diff --git a/common/x86/sad-a.asm b/common/x86/sad-a.asm
  586. old mode 100644
  587. new mode 100755
  588. diff --git a/common/x86/sad16-a.asm b/common/x86/sad16-a.asm
  589. old mode 100644
  590. new mode 100755
  591. diff --git a/common/x86/util.h b/common/x86/util.h
  592. old mode 100644
  593. new mode 100755
  594. diff --git a/common/x86/x86inc.asm b/common/x86/x86inc.asm
  595. old mode 100644
  596. new mode 100755
  597. diff --git a/common/x86/x86util.asm b/common/x86/x86util.asm
  598. old mode 100644
  599. new mode 100755
  600. diff --git a/configure b/configure
  601. index d164414..b00900f 100755
  602. --- a/configure
  603. +++ b/configure
  604. @@ -857,7 +857,7 @@ if [ "$strip" = "yes" ]; then
  605.  fi
  606.  
  607.  if [ "$debug" = "yes" ]; then
  608. -    CFLAGS="-O1 -g $CFLAGS"
  609. +    CFLAGS="-O0 -g $CFLAGS"
  610.  elif [ $ARCH = ARM ]; then
  611.      # arm-gcc-4.2 produces incorrect output with -ffast-math
  612.      # and it doesn't save any speed anyway on 4.4, so disable it
  613. diff --git a/doc/ratecontrol.txt b/doc/ratecontrol.txt
  614. old mode 100644
  615. new mode 100755
  616. diff --git a/doc/regression_test.txt b/doc/regression_test.txt
  617. old mode 100644
  618. new mode 100755
  619. diff --git a/doc/standards.txt b/doc/standards.txt
  620. old mode 100644
  621. new mode 100755
  622. diff --git a/doc/threads.txt b/doc/threads.txt
  623. old mode 100644
  624. new mode 100755
  625. diff --git a/doc/vui.txt b/doc/vui.txt
  626. old mode 100644
  627. new mode 100755
  628. diff --git a/encoder/analyse.c b/encoder/analyse.c
  629. old mode 100644
  630. new mode 100755
  631. index 5b5083d..96061b7
  632. --- a/encoder/analyse.c
  633. +++ b/encoder/analyse.c
  634. @@ -809,7 +809,7 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_
  635.          /* cavlc mb type prefix */
  636.          a->i_satd_i16x16 += lambda * i_mb_b_cost_table[I_16x16];
  637.  
  638. -    if( a->i_satd_i16x16 > i16x16_thresh )
  639. +    if( !h->param.analyse.i_me_trellis && a->i_satd_i16x16 > i16x16_thresh )
  640.          return;
  641.  
  642.      /* 8x8 prediction selection */
  643. @@ -911,7 +911,7 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_
  644.          }
  645.          /* Not heavily tuned */
  646.          static const uint8_t i8x8_thresh[11] = { 4, 4, 4, 5, 5, 5, 6, 6, 6, 6, 6 };
  647. -        if( X264_MIN(i_cost, a->i_satd_i16x16) > (i_satd_inter*i8x8_thresh[h->mb.i_subpel_refine])>>2 )
  648. +        if( !h->param.analyse.i_me_trellis && X264_MIN(i_cost, a->i_satd_i16x16) > (i_satd_inter*i8x8_thresh[h->mb.i_subpel_refine])>>2 )
  649.              return;
  650.      }
  651.  
  652. @@ -1258,6 +1258,12 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
  653.      /* 16x16 Search on all ref frame */
  654.      m.i_pixel = PIXEL_16x16;
  655.      LOAD_FENC( &m, h->mb.pic.p_fenc, 0, 0 );
  656. +    
  657. +    if ( h->param.analyse.i_me_trellis ) {
  658. +        m.cache_sad_gen = &(h->mb.trellis.cache_sad_gen[0][h->mb.i_mb_xy][0]);
  659. +        m.cache_sad_tag = &(h->mb.trellis.cache_sad_tag[0][h->mb.i_mb_xy][0]);
  660. +        m.cache_sad_score = &(h->mb.trellis.cache_sad_score[0][h->mb.i_mb_xy][0]);
  661. +    }
  662.  
  663.      a->l0.me16x16.cost = INT_MAX;
  664.      for( int i_ref = 0; i_ref < h->mb.pic.i_fref[0]; i_ref++ )
  665. @@ -1307,6 +1313,9 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
  666.          if( m.cost < a->l0.me16x16.cost )
  667.              h->mc.memcpy_aligned( &a->l0.me16x16, &m, sizeof(x264_me_t) );
  668.      }
  669. +    
  670. +    if ( h->param.analyse.i_me_trellis )
  671. +        h->mc.memcpy_aligned( &(h->mb.trellis.cache_me_t[0][h->mb.i_mb_xy][0]), &(a->l0.me16x16), sizeof(x264_me_t) );
  672.  
  673.      x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.me16x16.i_ref );
  674.      assert( a->l0.me16x16.mv[1] <= h->mb.mv_max_spel[1] || h->i_thread_frames == 1 );
  675. @@ -1440,6 +1449,12 @@ static void x264_mb_analyse_inter_p8x8( x264_t *h, x264_mb_analysis_t *a )
  676.          x264_me_t *m = &a->l0.me8x8[i];
  677.          int x8 = i&1;
  678.          int y8 = i>>1;
  679. +        
  680. +        if ( h->param.analyse.i_me_trellis ) {
  681. +            m->cache_sad_gen = &(h->mb.trellis.cache_sad_gen[0][h->mb.i_mb_xy][1+i]);
  682. +            m->cache_sad_tag = &(h->mb.trellis.cache_sad_tag[0][h->mb.i_mb_xy][1+i]);
  683. +            m->cache_sad_score = &(h->mb.trellis.cache_sad_score[0][h->mb.i_mb_xy][1+i]);
  684. +        }
  685.  
  686.          m->i_pixel = PIXEL_8x8;
  687.          m->i_ref_cost = i_ref_cost;
  688. @@ -1462,6 +1477,10 @@ static void x264_mb_analyse_inter_p8x8( x264_t *h, x264_mb_analysis_t *a )
  689.          m->cost += i_ref_cost;
  690.          if( !h->param.b_cabac || (h->param.analyse.inter & X264_ANALYSE_PSUB8x8) )
  691.              m->cost += a->i_lambda * i_sub_mb_p_cost_table[D_L0_8x8];
  692. +            
  693. +        if ( h->param.analyse.i_me_trellis )
  694. +            h->mc.memcpy_aligned( &(h->mb.trellis.cache_me_t[0][h->mb.i_mb_xy][1+i]), m, sizeof(x264_me_t) );
  695. +
  696.      }
  697.  
  698.      a->l0.i_cost8x8 = a->l0.me8x8[0].cost + a->l0.me8x8[1].cost +
  699. @@ -1493,6 +1512,12 @@ static void x264_mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a, int i
  700.  
  701.          m.i_pixel = PIXEL_16x8;
  702.  
  703. +        if ( h->param.analyse.i_me_trellis ) {
  704. +            m.cache_sad_gen = &(h->mb.trellis.cache_sad_gen[0][h->mb.i_mb_xy][5+i]);
  705. +            m.cache_sad_tag = &(h->mb.trellis.cache_sad_tag[0][h->mb.i_mb_xy][5+i]);
  706. +            m.cache_sad_score = &(h->mb.trellis.cache_sad_score[0][h->mb.i_mb_xy][5+i]);
  707. +        }
  708. +
  709.          LOAD_FENC( &m, p_fenc, 0, 8*i );
  710.          l0m->cost = INT_MAX;
  711.          for( int j = 0; j < i_ref8s; j++ )
  712. @@ -1524,10 +1549,13 @@ static void x264_mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a, int i
  713.              if( m.cost < l0m->cost )
  714.                  h->mc.memcpy_aligned( l0m, &m, sizeof(x264_me_t) );
  715.          }
  716. +        
  717. +        if ( h->param.analyse.i_me_trellis )
  718. +            h->mc.memcpy_aligned( &(h->mb.trellis.cache_me_t[0][h->mb.i_mb_xy][5+i]), l0m, sizeof(x264_me_t) );
  719.  
  720.          /* Early termination based on the current SATD score of partition[0]
  721.             plus the estimated SATD score of partition[1] */
  722. -        if( !i && l0m->cost + a->i_cost_est16x8[1] > i_best_satd * (4 + !!a->i_mbrd) / 4 )
  723. +        if( !(h->param.analyse.i_me_trellis) && !i && l0m->cost + a->i_cost_est16x8[1] > i_best_satd * (4 + !!a->i_mbrd) / 4 )
  724.          {
  725.              a->l0.i_cost16x8 = COST_MAX;
  726.              return;
  727. @@ -1559,6 +1587,12 @@ static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a, int i
  728.  
  729.          m.i_pixel = PIXEL_8x16;
  730.  
  731. +        if ( h->param.analyse.i_me_trellis ) {
  732. +            m.cache_sad_gen = &(h->mb.trellis.cache_sad_gen[0][h->mb.i_mb_xy][7+i]);
  733. +            m.cache_sad_tag = &(h->mb.trellis.cache_sad_tag[0][h->mb.i_mb_xy][7+i]);
  734. +            m.cache_sad_score = &(h->mb.trellis.cache_sad_score[0][h->mb.i_mb_xy][7+i]);
  735. +        }
  736. +
  737.          LOAD_FENC( &m, p_fenc, 8*i, 0 );
  738.          l0m->cost = INT_MAX;
  739.          for( int j = 0; j < i_ref8s; j++ )
  740. @@ -1589,10 +1623,13 @@ static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a, int i
  741.              if( m.cost < l0m->cost )
  742.                  h->mc.memcpy_aligned( l0m, &m, sizeof(x264_me_t) );
  743.          }
  744. +        
  745. +        if ( h->param.analyse.i_me_trellis )
  746. +            h->mc.memcpy_aligned( &(h->mb.trellis.cache_me_t[0][h->mb.i_mb_xy][7+i]), l0m, sizeof(x264_me_t) );
  747.  
  748.          /* Early termination based on the current SATD score of partition[0]
  749.             plus the estimated SATD score of partition[1] */
  750. -        if( !i && l0m->cost + a->i_cost_est8x16[1] > i_best_satd * (4 + !!a->i_mbrd) / 4 )
  751. +        if( !h->param.analyse.i_me_trellis && !i && l0m->cost + a->i_cost_est8x16[1] > i_best_satd * (4 + !!a->i_mbrd) / 4 )
  752.          {
  753.              a->l0.i_cost8x16 = COST_MAX;
  754.              return;
  755. @@ -2920,8 +2957,8 @@ intra_analysis:
  756.              i_partition = D_16x16;
  757.              i_cost = analysis.l0.me16x16.cost;
  758.  
  759. -            if( ( flags & X264_ANALYSE_PSUB16x16 ) &&
  760. -                analysis.l0.i_cost8x8 < analysis.l0.me16x16.cost )
  761. +            if( ( ( h->param.analyse.i_me_trellis || ( flags & X264_ANALYSE_PSUB16x16 ) ) &&
  762. +                analysis.l0.i_cost8x8 < analysis.l0.me16x16.cost ) )
  763.              {
  764.                  i_type = P_8x8;
  765.                  i_partition = D_8x8;
  766. @@ -2956,8 +2993,8 @@ intra_analysis:
  767.  
  768.              /* Now do 16x8/8x16 */
  769.              i_thresh16x8 = analysis.l0.me8x8[1].cost_mv + analysis.l0.me8x8[2].cost_mv;
  770. -            if( ( flags & X264_ANALYSE_PSUB16x16 ) &&
  771. -                analysis.l0.i_cost8x8 < analysis.l0.me16x16.cost + i_thresh16x8 )
  772. +            if( h->param.analyse.i_me_trellis || ( ( flags & X264_ANALYSE_PSUB16x16 ) &&
  773. +                analysis.l0.i_cost8x8 < analysis.l0.me16x16.cost + i_thresh16x8 ) )
  774.              {
  775.                  int i_avg_mv_ref_cost = (analysis.l0.me8x8[2].cost_mv + analysis.l0.me8x8[2].i_ref_cost
  776.                                        + analysis.l0.me8x8[3].cost_mv + analysis.l0.me8x8[3].i_ref_cost + 1) >> 1;
  777. @@ -2975,7 +3012,11 @@ intra_analysis:
  778.              }
  779.  
  780.              h->mb.i_partition = i_partition;
  781. -
  782. +            if ( h->param.analyse.i_me_trellis )
  783. +                h->mb.trellis.best_part[0][h->mb.i_mb_xy] = h->mb.i_partition;
  784. +                
  785. +            //printf("(%d,%d,%d,%d)\n",analysis.l0.me16x16.cost,analysis.l0.i_cost8x8,analysis.l0.i_cost16x8,analysis.l0.i_cost8x16);
  786. +                
  787.              /* refine qpel */
  788.              //FIXME mb_type costs?
  789.              if( analysis.i_mbrd || !h->mb.i_subpel_refine )
  790. @@ -3055,7 +3096,18 @@ intra_analysis:
  791.              i_satd_intra = X264_MIN3( analysis.i_satd_i16x16,
  792.                                        analysis.i_satd_i8x8,
  793.                                        analysis.i_satd_i4x4 );
  794. -
  795. +            
  796. +            
  797. +            if ( h->param.analyse.i_me_trellis ) {
  798. +                h->mb.trellis.cache_sad_intra[h->mb.i_mb_xy] = i_satd_intra;
  799. +                if ( i_satd_intra == analysis.i_satd_i16x16 )
  800. +                    h->mb.trellis.cache_mode_intra[h->mb.i_mb_xy] = I_16x16;
  801. +                else if ( i_satd_intra == analysis.i_satd_i8x8 )
  802. +                    h->mb.trellis.cache_mode_intra[h->mb.i_mb_xy] = I_8x8;
  803. +                else if ( i_satd_intra == analysis.i_satd_i4x4 )
  804. +                    h->mb.trellis.cache_mode_intra[h->mb.i_mb_xy] = I_4x4;
  805. +            }
  806. +                                      
  807.              if( analysis.i_mbrd )
  808.              {
  809.                  x264_mb_analyse_p_rd( h, &analysis, X264_MIN(i_satd_inter, i_satd_intra) );
  810. @@ -3078,6 +3130,10 @@ intra_analysis:
  811.              COPY2_IF_LT( i_cost, analysis.i_satd_pcm, i_type, I_PCM );
  812.  
  813.              h->mb.i_type = i_type;
  814. +            if ( h->param.analyse.i_me_trellis )
  815. +                h->mb.trellis.best_type[0][h->mb.i_mb_xy] = h->mb.i_type;
  816. +
  817. +            //printf("%d,%d\n",i_type,i_partition);
  818.  
  819.              if( analysis.b_force_intra && !IS_INTRA(i_type) )
  820.              {
  821. diff --git a/encoder/analyse.h b/encoder/analyse.h
  822. old mode 100644
  823. new mode 100755
  824. diff --git a/encoder/cabac.c b/encoder/cabac.c
  825. old mode 100644
  826. new mode 100755
  827. diff --git a/encoder/cavlc.c b/encoder/cavlc.c
  828. old mode 100644
  829. new mode 100755
  830. diff --git a/encoder/encoder.c b/encoder/encoder.c
  831. old mode 100644
  832. new mode 100755
  833. index 3571a91..ae8a39a
  834. --- a/encoder/encoder.c
  835. +++ b/encoder/encoder.c
  836. @@ -1762,7 +1762,7 @@ static void x264_fdec_filter_row( x264_t *h, int mb_y, int b_inloop )
  837.      if( min_y < h->i_threadslice_start )
  838.          return;
  839.  
  840. -    if( b_deblock )
  841. +    if( b_deblock && !( h->param.analyse.i_me_trellis && h->sh.i_type != SLICE_TYPE_I && !h->mb.trellis.b_collected ) )
  842.          for( int y = min_y; y < mb_y; y += (1 << SLICE_MBAFF) )
  843.              x264_frame_deblock_row( h, y );
  844.  
  845. @@ -1776,7 +1776,7 @@ static void x264_fdec_filter_row( x264_t *h, int mb_y, int b_inloop )
  846.                          h->fdec->plane[p]     + i*h->fdec->i_stride[p],
  847.                          h->mb.i_mb_width*16*sizeof(pixel) );
  848.  
  849. -    if( b_hpel )
  850. +    if( b_hpel && !( h->param.analyse.i_me_trellis && h->sh.i_type != SLICE_TYPE_I && !h->mb.trellis.b_collected ) )
  851.      {
  852.          int end = mb_y == h->mb.i_mb_height;
  853.          x264_frame_expand_border( h, h->fdec, min_y, end );
  854. @@ -1797,7 +1797,7 @@ static void x264_fdec_filter_row( x264_t *h, int mb_y, int b_inloop )
  855.      if( h->i_thread_frames > 1 && h->fdec->b_kept_as_ref )
  856.          x264_frame_cond_broadcast( h->fdec, mb_y*16 + (b_end ? 10000 : -(X264_THREAD_HEIGHT << SLICE_MBAFF)) );
  857.  
  858. -    if( b_measure_quality )
  859. +    if( b_measure_quality && !( h->param.analyse.i_me_trellis && h->sh.i_type != SLICE_TYPE_I && !h->mb.trellis.b_collected ) )
  860.      {
  861.          maxpix_y = X264_MIN( maxpix_y, h->param.i_height );
  862.          if( h->param.analyse.b_psnr )
  863. @@ -2023,9 +2023,14 @@ static int x264_slice_write( x264_t *h )
  864.      i_mb_x = h->sh.i_first_mb % h->mb.i_mb_width;
  865.      i_skip = 0;
  866.      int mb_size[2];
  867. -
  868. +    
  869. +    h->param.analyse.b_cache_sads = ( h->param.analyse.i_me_trellis && ! h->mb.trellis.b_collected );
  870. +    if ( h->param.analyse.i_me_trellis && h->sh.i_type != SLICE_TYPE_I && !h->mb.trellis.b_collected )
  871. +        h->mb.trellis.gen++; // update cache generation
  872. +    
  873.      while( 1 )
  874.      {
  875. +
  876.          mb_xy = i_mb_x + i_mb_y * h->mb.i_mb_width;
  877.          int mb_spos = bs_pos(&h->out.bs) + x264_cabac_pos(&h->cabac);
  878.  
  879. @@ -2079,7 +2084,10 @@ static int x264_slice_write( x264_t *h )
  880.              x264_macroblock_cache_load_progressive( h, i_mb_x, i_mb_y );
  881.  
  882.          x264_macroblock_analyse( h );
  883. -
  884. +        
  885. +        if ( h->param.analyse.i_me_trellis && h->sh.i_type != SLICE_TYPE_I && h->mb.trellis.b_collected )
  886. +            write_back_trellis_mv( h, mb_xy );
  887. +            
  888.          /* encode this macroblock -> be careful it can change the mb type to P_SKIP if needed */
  889.  reencode:
  890.          x264_macroblock_encode( h );
  891. @@ -2190,59 +2198,62 @@ reencode:
  892.          /* save cache */
  893.          x264_macroblock_cache_save( h );
  894.  
  895. -        /* accumulate mb stats */
  896. -        h->stat.frame.i_mb_count[h->mb.i_type]++;
  897. +        if( !( h->param.analyse.i_me_trellis && h->sh.i_type != SLICE_TYPE_I && !h->mb.trellis.b_collected ) ) {
  898. +            /* accumulate mb stats */
  899. +            h->stat.frame.i_mb_count[h->mb.i_type]++;
  900.  
  901. -        int b_intra = IS_INTRA( h->mb.i_type );
  902. -        int b_skip = IS_SKIP( h->mb.i_type );
  903. -        if( h->param.i_log_level >= X264_LOG_INFO || h->param.rc.b_stat_write )
  904. -        {
  905. -            if( !b_intra && !b_skip && !IS_DIRECT( h->mb.i_type ) )
  906. +            int b_intra = IS_INTRA( h->mb.i_type );
  907. +            int b_skip = IS_SKIP( h->mb.i_type );
  908. +            if( h->param.i_log_level >= X264_LOG_INFO || h->param.rc.b_stat_write )
  909.              {
  910. -                if( h->mb.i_partition != D_8x8 )
  911. -                        h->stat.frame.i_mb_partition[h->mb.i_partition] += 4;
  912. -                    else
  913. -                        for( int i = 0; i < 4; i++ )
  914. -                            h->stat.frame.i_mb_partition[h->mb.i_sub_partition[i]] ++;
  915. -                if( h->param.i_frame_reference > 1 )
  916. -                    for( int i_list = 0; i_list <= (h->sh.i_type == SLICE_TYPE_B); i_list++ )
  917. -                        for( int i = 0; i < 4; i++ )
  918. -                        {
  919. -                            int i_ref = h->mb.cache.ref[i_list][ x264_scan8[4*i] ];
  920. -                            if( i_ref >= 0 )
  921. -                                h->stat.frame.i_mb_count_ref[i_list][i_ref] ++;
  922. -                        }
  923. +                if( !b_intra && !b_skip && !IS_DIRECT( h->mb.i_type ) )
  924. +                {
  925. +                    if( h->mb.i_partition != D_8x8 )
  926. +                            h->stat.frame.i_mb_partition[h->mb.i_partition] += 4;
  927. +                        else
  928. +                            for( int i = 0; i < 4; i++ )
  929. +                                h->stat.frame.i_mb_partition[h->mb.i_sub_partition[i]] ++;
  930. +                    if( h->param.i_frame_reference > 1 )
  931. +                        for( int i_list = 0; i_list <= (h->sh.i_type == SLICE_TYPE_B); i_list++ )
  932. +                            for( int i = 0; i < 4; i++ )
  933. +                            {
  934. +                                int i_ref = h->mb.cache.ref[i_list][ x264_scan8[4*i] ];
  935. +                                if( i_ref >= 0 )
  936. +                                    h->stat.frame.i_mb_count_ref[i_list][i_ref] ++;
  937. +                            }
  938. +                }
  939.              }
  940. -        }
  941.  
  942. -        if( h->param.i_log_level >= X264_LOG_INFO )
  943. -        {
  944. -            if( h->mb.i_cbp_luma | h->mb.i_cbp_chroma )
  945. +            if( h->param.i_log_level >= X264_LOG_INFO )
  946.              {
  947. -                int cbpsum = (h->mb.i_cbp_luma&1) + ((h->mb.i_cbp_luma>>1)&1)
  948. -                           + ((h->mb.i_cbp_luma>>2)&1) + (h->mb.i_cbp_luma>>3);
  949. -                h->stat.frame.i_mb_cbp[!b_intra + 0] += cbpsum;
  950. -                h->stat.frame.i_mb_cbp[!b_intra + 2] += !!h->mb.i_cbp_chroma;
  951. -                h->stat.frame.i_mb_cbp[!b_intra + 4] += h->mb.i_cbp_chroma >> 1;
  952. -            }
  953. -            if( h->mb.i_cbp_luma && !b_intra )
  954. -            {
  955. -                h->stat.frame.i_mb_count_8x8dct[0] ++;
  956. -                h->stat.frame.i_mb_count_8x8dct[1] += h->mb.b_transform_8x8;
  957. -            }
  958. -            if( b_intra && h->mb.i_type != I_PCM )
  959. -            {
  960. -                if( h->mb.i_type == I_16x16 )
  961. -                    h->stat.frame.i_mb_pred_mode[0][h->mb.i_intra16x16_pred_mode]++;
  962. -                else if( h->mb.i_type == I_8x8 )
  963. -                    for( int i = 0; i < 16; i += 4 )
  964. -                        h->stat.frame.i_mb_pred_mode[1][h->mb.cache.intra4x4_pred_mode[x264_scan8[i]]]++;
  965. -                else //if( h->mb.i_type == I_4x4 )
  966. -                    for( int i = 0; i < 16; i++ )
  967. -                        h->stat.frame.i_mb_pred_mode[2][h->mb.cache.intra4x4_pred_mode[x264_scan8[i]]]++;
  968. -                h->stat.frame.i_mb_pred_mode[3][x264_mb_pred_mode8x8c_fix[h->mb.i_chroma_pred_mode]]++;
  969. +                if( h->mb.i_cbp_luma | h->mb.i_cbp_chroma )
  970. +                {
  971. +                    int cbpsum = (h->mb.i_cbp_luma&1) + ((h->mb.i_cbp_luma>>1)&1)
  972. +                               + ((h->mb.i_cbp_luma>>2)&1) + (h->mb.i_cbp_luma>>3);
  973. +                    h->stat.frame.i_mb_cbp[!b_intra + 0] += cbpsum;
  974. +                    h->stat.frame.i_mb_cbp[!b_intra + 2] += !!h->mb.i_cbp_chroma;
  975. +                    h->stat.frame.i_mb_cbp[!b_intra + 4] += h->mb.i_cbp_chroma >> 1;
  976. +                }
  977. +                if( h->mb.i_cbp_luma && !b_intra )
  978. +                {
  979. +                    h->stat.frame.i_mb_count_8x8dct[0] ++;
  980. +                    h->stat.frame.i_mb_count_8x8dct[1] += h->mb.b_transform_8x8;
  981. +                }
  982. +                if( b_intra && h->mb.i_type != I_PCM )
  983. +                {
  984. +                    if( h->mb.i_type == I_16x16 )
  985. +                        h->stat.frame.i_mb_pred_mode[0][h->mb.i_intra16x16_pred_mode]++;
  986. +                    else if( h->mb.i_type == I_8x8 )
  987. +                        for( int i = 0; i < 16; i += 4 )
  988. +                            h->stat.frame.i_mb_pred_mode[1][h->mb.cache.intra4x4_pred_mode[x264_scan8[i]]]++;
  989. +                    else //if( h->mb.i_type == I_4x4 )
  990. +                        for( int i = 0; i < 16; i++ )
  991. +                            h->stat.frame.i_mb_pred_mode[2][h->mb.cache.intra4x4_pred_mode[x264_scan8[i]]]++;
  992. +                    h->stat.frame.i_mb_pred_mode[3][x264_mb_pred_mode8x8c_fix[h->mb.i_chroma_pred_mode]]++;
  993. +                }
  994. +                h->stat.frame.i_mb_field[b_intra?0:b_skip?2:1] += MB_INTERLACED;
  995.              }
  996. -            h->stat.frame.i_mb_field[b_intra?0:b_skip?2:1] += MB_INTERLACED;
  997. +            
  998.          }
  999.  
  1000.          /* calculate deblock strength values (actual deblocking is done per-row along with hpel) */
  1001. @@ -2274,6 +2285,28 @@ reencode:
  1002.              i_mb_x = 0;
  1003.          }
  1004.      }
  1005. +
  1006. +    if ( h->param.analyse.i_me_trellis && h->sh.i_type != SLICE_TYPE_I && !h->mb.trellis.b_collected ) {
  1007. +        collect_candidates( h );
  1008. +        h->mb.trellis.b_collected = 1;
  1009. +        if ( h->param.analyse.i_me_trellis == X264_ME_TRELLIS_COMBINED_DIA ) {
  1010. +            fill_me_trellis_params( h, MODE_COMBINED_DIAMOND, ROW_TRELLIS );
  1011. +            run_row_trellis( h, 3 );
  1012. +        }
  1013. +        else {
  1014. +            fill_me_trellis_params( h, MODE_DECIDE, ROW_TRELLIS );
  1015. +            run_row_trellis( h, 1 );
  1016. +            if ( h->param.analyse.i_me_trellis == X264_ME_TRELLIS_DIA )
  1017. +                fill_me_trellis_params( h, MODE_DIAMOND, ROW_TRELLIS );
  1018. +            else
  1019. +                fill_me_trellis_params( h, MODE_SQUARE2, ROW_TRELLIS );                
  1020. +            run_row_trellis( h, 3 );
  1021. +        }
  1022. +        scale_trellis_mv( h );
  1023. +        
  1024. +        return 0;
  1025. +    }
  1026. +    
  1027.      h->out.nal[h->out.i_nal].i_last_mb = h->sh.i_last_mb;
  1028.  
  1029.      if( h->param.b_cabac )
  1030. @@ -2300,6 +2333,9 @@ reencode:
  1031.                                    - h->stat.frame.i_mv_bits;
  1032.          x264_fdec_filter_row( h, h->i_threadslice_end, 1 );
  1033.      }
  1034. +    
  1035. +    if ( h->param.analyse.i_me_trellis && h->sh.i_type != SLICE_TYPE_I && h->mb.trellis.b_collected )
  1036. +        h->mb.trellis.b_collected = 0;
  1037.  
  1038.      return 0;
  1039.  }
  1040. @@ -2373,6 +2409,10 @@ static void *x264_slices_write( x264_t *h )
  1041.          h->sh.i_last_mb = X264_MIN( h->sh.i_last_mb, last_thread_mb );
  1042.          if( x264_stack_align( x264_slice_write, h ) )
  1043.              return (void *)-1;
  1044. +        if( h->param.analyse.i_me_trellis && h->sh.i_type != SLICE_TYPE_I ) {
  1045. +            if ( x264_stack_align( x264_slice_write, h ) )
  1046. +                return (void *)-1;
  1047. +        }
  1048.          h->sh.i_first_mb = h->sh.i_last_mb + 1;
  1049.          // if i_first_mb is not the last mb in a row then go to the next mb in MBAFF order
  1050.          if( SLICE_MBAFF && h->sh.i_first_mb % h->mb.i_mb_width )
  1051. @@ -2617,6 +2657,8 @@ int     x264_encoder_encode( x264_t *h,
  1052.      }
  1053.  
  1054.      h->i_frame++;
  1055. +    // trellis cache fix
  1056. +    h->param.analyse.b_cache_sads = 0;
  1057.      /* 3: The picture is analyzed in the lookahead */
  1058.      if( !h->frames.current[0] )
  1059.          x264_lookahead_get_frames( h );
  1060. diff --git a/encoder/lookahead.c b/encoder/lookahead.c
  1061. old mode 100644
  1062. new mode 100755
  1063. diff --git a/encoder/macroblock.c b/encoder/macroblock.c
  1064. old mode 100644
  1065. new mode 100755
  1066. diff --git a/encoder/macroblock.h b/encoder/macroblock.h
  1067. old mode 100644
  1068. new mode 100755
  1069. diff --git a/encoder/me.c b/encoder/me.c
  1070. old mode 100644
  1071. new mode 100755
  1072. index 305e0c4..5fe97cb
  1073. --- a/encoder/me.c
  1074. +++ b/encoder/me.c
  1075. @@ -61,9 +61,13 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
  1076.  
  1077.  #define COST_MV( mx, my )\
  1078.  {\
  1079. -    int cost = h->pixf.fpelcmp[i_pixel]( p_fenc, FENC_STRIDE,\
  1080. -                   &p_fref_w[(my)*stride+(mx)], stride )\
  1081. -             + BITS_MVD(mx,my);\
  1082. +    int cost = h->pixf.fpelcmp[i_pixel]( p_fenc, FENC_STRIDE, &p_fref_w[(my)*stride+(mx)], stride );\
  1083. +    if ( h->param.analyse.b_cache_sads ) {\
  1084. +        (*(m->cache_sad_gen))[(my)&7][(mx)&7] = h->mb.trellis.gen;\
  1085. +        (*(m->cache_sad_tag))[(my)&7][(mx)&7] = pack16to32( (mx), (my) );\
  1086. +        (*(m->cache_sad_score))[(my)&7][(mx)&7] = cost;\
  1087. +    }\
  1088. +    cost += BITS_MVD(mx,my);\
  1089.      COPY3_IF_LT( bcost, cost, bmx, mx, bmy, my );\
  1090.  }
  1091.  
  1092. @@ -123,6 +127,18 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
  1093.      COPY3_IF_LT( bcost, costs[3], bmx, omx+(m3x), bmy, omy+(m3y) );\
  1094.  }
  1095.  
  1096. +#define CACHE_MV_X3( m0x, m0y, m1x, m1y, m2x, m2y ) {\
  1097. +    (*(m->cache_sad_gen))[(m0y)&7][(m0x)&7] = h->mb.trellis.gen;\
  1098. +    (*(m->cache_sad_gen))[(m1y)&7][(m1x)&7] = h->mb.trellis.gen;\
  1099. +    (*(m->cache_sad_gen))[(m2y)&7][(m2x)&7] = h->mb.trellis.gen;\
  1100. +    (*(m->cache_sad_tag))[(m0y)&7][(m0x)&7] = pack16to32( m0x, m0y );\
  1101. +    (*(m->cache_sad_tag))[(m1y)&7][(m1x)&7] = pack16to32( m1x, m1y );\
  1102. +    (*(m->cache_sad_tag))[(m2y)&7][(m2x)&7] = pack16to32( m2x, m2y );\
  1103. +    (*(m->cache_sad_score))[(m0y)&7][(m0x)&7] = costs[0];\
  1104. +    (*(m->cache_sad_score))[(m1y)&7][(m1x)&7] = costs[1];\
  1105. +    (*(m->cache_sad_score))[(m2y)&7][(m2x)&7] = costs[2];\
  1106. +}
  1107. +
  1108.  #define COST_MV_X3_ABS( m0x, m0y, m1x, m1y, m2x, m2y )\
  1109.  {\
  1110.      h->pixf.fpelcmp_x3[i_pixel]( p_fenc,\
  1111. @@ -130,6 +146,8 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
  1112.          p_fref_w + (m1x) + (m1y)*stride,\
  1113.          p_fref_w + (m2x) + (m2y)*stride,\
  1114.          stride, costs );\
  1115. +    if ( h->param.analyse.b_cache_sads )\
  1116. +        CACHE_MV_X3( m0x, m0y, m1x, m1y, m2x, m2y )\
  1117.      costs[0] += p_cost_mvx[(m0x)<<2]; /* no cost_mvy */\
  1118.      costs[1] += p_cost_mvx[(m1x)<<2];\
  1119.      costs[2] += p_cost_mvx[(m2x)<<2];\
  1120. @@ -201,6 +219,10 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
  1121.  #define pack16to32_mask2(mx,my) ((mx<<16)|(my&0x7FFF))
  1122.      uint32_t mv_min = pack16to32_mask2( -mv_x_min, -mv_y_min );
  1123.      uint32_t mv_max = pack16to32_mask2( mv_x_max, mv_y_max )|0x8000;
  1124. +    m->mv_min[0] = mv_x_min;
  1125. +    m->mv_min[1] = mv_y_min;
  1126. +    m->mv_max[0] = mv_x_max;
  1127. +    m->mv_max[1] = mv_y_max;
  1128.  
  1129.  #define CHECK_MVRANGE(mx,my) (!(((pack16to32_mask2(mx,my) + mv_min) | (mv_max - pack16to32_mask2(mx,my))) & 0x80004000))
  1130.  
  1131. diff --git a/encoder/me.h b/encoder/me.h
  1132. old mode 100644
  1133. new mode 100755
  1134. index 3281630..a843d67
  1135. --- a/encoder/me.h
  1136. +++ b/encoder/me.h
  1137. @@ -53,6 +53,12 @@ typedef struct
  1138.      int cost_mv;        /* lambda * nbits for the chosen mv */
  1139.      int cost;           /* satd + lambda * nbits */
  1140.      ALIGNED_4( int16_t mv[2] );
  1141. +    ALIGNED_4( int16_t mv_min[2] );
  1142. +    ALIGNED_4( int16_t mv_max[2] );
  1143. +    
  1144. +    uint32_t (*cache_sad_gen)[8][8];
  1145. +    uint32_t (*cache_sad_tag)[8][8];
  1146. +    int (*cache_sad_score)[8][8];
  1147.  } ALIGNED_16( x264_me_t );
  1148.  
  1149.  typedef struct
  1150. diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
  1151. old mode 100644
  1152. new mode 100755
  1153. diff --git a/encoder/ratecontrol.h b/encoder/ratecontrol.h
  1154. old mode 100644
  1155. new mode 100755
  1156. diff --git a/encoder/rdo.c b/encoder/rdo.c
  1157. old mode 100644
  1158. new mode 100755
  1159. diff --git a/encoder/set.c b/encoder/set.c
  1160. old mode 100644
  1161. new mode 100755
  1162. diff --git a/encoder/set.h b/encoder/set.h
  1163. old mode 100644
  1164. new mode 100755
  1165. diff --git a/encoder/slicetype.c b/encoder/slicetype.c
  1166. old mode 100644
  1167. new mode 100755
  1168. diff --git a/encoder/trellis.c b/encoder/trellis.c
  1169. new file mode 100755
  1170. index 0000000..4258bb5
  1171. --- /dev/null
  1172. +++ b/encoder/trellis.c
  1173. @@ -0,0 +1,1042 @@
  1174. +#include "common/common.h"
  1175. +
  1176. +// some constants for initializing structs
  1177. +const int16_t mv_zero[2] = {0,0};
  1178. +const mb_t mb_i_4x4 = {I_4x4, 0, 0, {0, 0}, {0, 0}, {0, 0, 0, 0}, {{{0,0}, {0,0}, {0,0}, {0,0}}, {{0,0}, {0,0}, {0,0}, {0,0}}}, {{0,0}, {0,0}, {0,0}, {0,0}}};
  1179. +const mb_t mb_i_8x8 = {I_8x8, 0, 0, {0, 0}, {0, 0}, {0, 0, 0, 0}, {{{0,0}, {0,0}, {0,0}, {0,0}}, {{0,0}, {0,0}, {0,0}, {0,0}}}, {{0,0}, {0,0}, {0,0}, {0,0}}};
  1180. +const mb_t mb_i_16x16 = {I_16x16, 0, 0, {0, 0}, {0, 0}, {0, 0, 0, 0}, {{{0,0}, {0,0}, {0,0}, {0,0}}, {{0,0}, {0,0}, {0,0}, {0,0}}}, {{0,0}, {0,0}, {0,0}, {0,0}}};
  1181. +const mb_t mb_p_16x16 = {P_L0, 0, 0, {0, 0}, {0, 0}, {D_16x16, 0, 0, 0}, {{{0,0}, {0,0}, {0,0}, {0,0}}, {{0,0}, {0,0}, {0,0}, {0,0}}}, {{0,0}, {0,0}, {0,0}, {0,0}}};
  1182. +const mb_t mb_p_16x8 = {P_L0, 0, 0, {0, 0}, {0, 0}, {D_16x8, D_16x8, 0, 0}, {{{0,0}, {0,0}, {0,0}, {0,0}}, {{0,0}, {0,0}, {0,0}, {0,0}}}, {{0,0}, {0,0}, {0,0}, {0,0}}};
  1183. +const mb_t mb_p_8x16 = {P_L0, 0, 0, {0, 0}, {0, 0}, {D_8x16, D_8x16, 0, 0}, {{{0,0}, {0,0}, {0,0}, {0,0}}, {{0,0}, {0,0}, {0,0}, {0,0}}}, {{0,0}, {0,0}, {0,0}, {0,0}}};
  1184. +const mb_t mb_p_8x8 = {P_8x8, 0, 0, {0, 0}, {0, 0}, {D_8x8, D_8x8, D_8x8, D_8x8}, {{{0,0}, {0,0}, {0,0}, {0,0}}, {{0,0}, {0,0}, {0,0}, {0,0}}}, {{0,0}, {0,0}, {0,0}, {0,0}}};
  1185. +
  1186. +int get_cached_fpel_sad( x264_t *h, int class, int partition, int i_list, int i_mv, int row, int col, int x, int y ) {
  1187. +    int result, valid, table, idx, idy;
  1188. +    uint32_t tag;
  1189. +    int i_mb = row * h->mb.i_mb_width + col;
  1190. +    uint32_t (*cache_gen)[8][8];
  1191. +    uint32_t (*cache_tag)[8][8];
  1192. +    int (*cache_score)[8][8];
  1193. +    x264_me_t *m;
  1194. +        
  1195. +    if ( IS_INTRA( class ) ) {
  1196. +        result = h->mb.trellis.cache_sad_intra[i_mb];
  1197. +    }
  1198. +    else {
  1199. +        valid = 1;
  1200. +        table = ( class == P_L0 && partition == D_16x16 ) ? ( 0 ) :
  1201. +                ( ( class == P_8x8 ) ? ( 1 + i_mv ) :
  1202. +                ( ( class == P_L0 && partition == D_16x8 ) ? ( 5 + i_mv ) :
  1203. +                ( 7 + i_mv ) ) );
  1204. +        tag = pack16to32( x, y );
  1205. +        cache_gen = &(h->mb.trellis.cache_sad_gen[i_list][i_mb][table]);
  1206. +        cache_tag = &(h->mb.trellis.cache_sad_tag[i_list][i_mb][table]);
  1207. +        cache_score = &(h->mb.trellis.cache_sad_score[i_list][i_mb][table]);
  1208. +        m = &(h->mb.trellis.cache_me_t[i_list][i_mb][table]);
  1209. +        const int i_pixel = m->i_pixel;
  1210. +        const int i_fref_stride = m->i_stride[0];
  1211. +        const int i_fenc_stride = h->fdec->i_stride[0];
  1212. +        const int xoff = ( class == P_L0 && partition == D_16x16 ) ? ( 0 ) :
  1213. +                ( ( class == P_8x8 ) ? ( 8*(i_mv&1) ) :
  1214. +                ( ( class == P_L0 && partition == D_16x8 ) ? ( 0 ) :
  1215. +                ( 8*i_mv ) ) );
  1216. +        const int yoff = ( class == P_L0 && partition == D_16x16 ) ? ( 0 ) :
  1217. +                ( ( class == P_8x8 ) ? ( 8*(i_mv>>1) ) :
  1218. +                ( ( class == P_L0 && partition == D_16x8 ) ? ( 8*i_mv ) :
  1219. +                ( 0 ) ) );
  1220. +        pixel *p_fenc = &(h->fenc->plane[0][16 * col + xoff + ( 16 * row + yoff) * i_fenc_stride]);
  1221. +        pixel *p_fref_w = m->p_fref_w;
  1222. +        idx = 7 & x;
  1223. +        idy = 7 & y;
  1224. +        
  1225. +        if ( (*cache_gen)[idy][idx] != h->mb.trellis.gen ) {
  1226. +            valid = 0;
  1227. +            (*cache_gen)[idy][idx] = h->mb.trellis.gen;
  1228. +        }
  1229. +        if ( (*cache_tag)[idy][idx] != tag ){
  1230. +            valid = 0;
  1231. +            (*cache_tag)[idy][idx] = tag;
  1232. +        }
  1233. +        
  1234. +        if ( ! valid ) {
  1235. +            (*cache_score)[idy][idx] = h->pixf.fpelcmp[i_pixel]( p_fenc, i_fenc_stride, &p_fref_w[(y)*i_fref_stride+(x)], i_fref_stride );
  1236. +        }
  1237. +            
  1238. +        result = (*cache_score)[idy][idx];
  1239. +    }
  1240. +
  1241. +    return result;
  1242. +}
  1243. +
  1244. +void collect_candidates( x264_t *h ) {
  1245. +    int row, col, boff;
  1246. +    int i_mb = 0;
  1247. +    int type, part;
  1248. +    int mb_stride = h->mb.trellis.mb_stride_col;
  1249. +    for ( row = 0; row < h->mb.i_mb_height; row++ ) {
  1250. +        for ( col = 0; col < h->mb.i_mb_width; col++ ) {
  1251. +            CP_MB( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_16x16]), &mb_p_16x16 );
  1252. +            CP_MB( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_8x8]), &mb_p_8x8 );
  1253. +            CP_MB( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_16x8]), &mb_p_16x8 );
  1254. +            CP_MB( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_8x16]), &mb_p_8x16 );
  1255. +            CP_MB( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_INTRA]), ( h->mb.trellis.cache_mode_intra[i_mb] == I_16x16 ) ? ( &mb_i_16x16 ) : ( ( h->mb.trellis.cache_mode_intra[i_mb] == I_8x8 ) ? ( &mb_i_8x8 ) : ( &mb_i_4x4 ) ) );
  1256. +            
  1257. +            h->mb.trellis.mb[i_mb*mb_stride+T_P_16x16].mv[0][0][0] = h->mb.trellis.cache_me_t[0][i_mb][0].mv[0] >> 2;
  1258. +            h->mb.trellis.mb[i_mb*mb_stride+T_P_16x16].mv[0][0][1] = h->mb.trellis.cache_me_t[0][i_mb][0].mv[1] >> 2;
  1259. +            h->mb.trellis.mb[i_mb*mb_stride+T_P_8x8].mv[0][0][0] = h->mb.trellis.cache_me_t[0][i_mb][1].mv[0] >> 2;
  1260. +            h->mb.trellis.mb[i_mb*mb_stride+T_P_8x8].mv[0][0][1] = h->mb.trellis.cache_me_t[0][i_mb][1].mv[1] >> 2;
  1261. +            h->mb.trellis.mb[i_mb*mb_stride+T_P_8x8].mv[0][1][0] = h->mb.trellis.cache_me_t[0][i_mb][2].mv[0] >> 2;
  1262. +            h->mb.trellis.mb[i_mb*mb_stride+T_P_8x8].mv[0][1][1] = h->mb.trellis.cache_me_t[0][i_mb][2].mv[1] >> 2;
  1263. +            h->mb.trellis.mb[i_mb*mb_stride+T_P_8x8].mv[0][2][0] = h->mb.trellis.cache_me_t[0][i_mb][3].mv[0] >> 2;
  1264. +            h->mb.trellis.mb[i_mb*mb_stride+T_P_8x8].mv[0][2][1] = h->mb.trellis.cache_me_t[0][i_mb][3].mv[1] >> 2;
  1265. +            h->mb.trellis.mb[i_mb*mb_stride+T_P_8x8].mv[0][3][0] = h->mb.trellis.cache_me_t[0][i_mb][4].mv[0] >> 2;
  1266. +            h->mb.trellis.mb[i_mb*mb_stride+T_P_8x8].mv[0][3][1] = h->mb.trellis.cache_me_t[0][i_mb][4].mv[1] >> 2;
  1267. +            h->mb.trellis.mb[i_mb*mb_stride+T_P_16x8].mv[0][0][0] = h->mb.trellis.cache_me_t[0][i_mb][5].mv[0] >> 2;
  1268. +            h->mb.trellis.mb[i_mb*mb_stride+T_P_16x8].mv[0][0][1] = h->mb.trellis.cache_me_t[0][i_mb][5].mv[1] >> 2;
  1269. +            h->mb.trellis.mb[i_mb*mb_stride+T_P_16x8].mv[0][1][0] = h->mb.trellis.cache_me_t[0][i_mb][6].mv[0] >> 2;
  1270. +            h->mb.trellis.mb[i_mb*mb_stride+T_P_16x8].mv[0][1][1] = h->mb.trellis.cache_me_t[0][i_mb][6].mv[1] >> 2;
  1271. +            h->mb.trellis.mb[i_mb*mb_stride+T_P_8x16].mv[0][0][0] = h->mb.trellis.cache_me_t[0][i_mb][7].mv[0] >> 2;
  1272. +            h->mb.trellis.mb[i_mb*mb_stride+T_P_8x16].mv[0][0][1] = h->mb.trellis.cache_me_t[0][i_mb][7].mv[1] >> 2;
  1273. +            h->mb.trellis.mb[i_mb*mb_stride+T_P_8x16].mv[0][1][0] = h->mb.trellis.cache_me_t[0][i_mb][8].mv[0] >> 2;
  1274. +            h->mb.trellis.mb[i_mb*mb_stride+T_P_8x16].mv[0][1][1] = h->mb.trellis.cache_me_t[0][i_mb][8].mv[1] >> 2;
  1275. +
  1276. +            CP_MV( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_16x16].mv_min[0]), &(h->mb.trellis.cache_me_t[0][i_mb][0].mv_min[0]) );
  1277. +            CP_MV( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_8x8].mv_min[0]), &(h->mb.trellis.cache_me_t[0][i_mb][1].mv_min[0]) );
  1278. +            CP_MV( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_8x8].mv_min[0]), &(h->mb.trellis.cache_me_t[0][i_mb][2].mv_min[0]) );
  1279. +            CP_MV( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_8x8].mv_min[0]), &(h->mb.trellis.cache_me_t[0][i_mb][3].mv_min[0]) );
  1280. +            CP_MV( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_8x8].mv_min[0]), &(h->mb.trellis.cache_me_t[0][i_mb][4].mv_min[0]) );
  1281. +            CP_MV( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_16x8].mv_min[0]), &(h->mb.trellis.cache_me_t[0][i_mb][5].mv_min[0]) );
  1282. +            CP_MV( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_16x8].mv_min[0]), &(h->mb.trellis.cache_me_t[0][i_mb][6].mv_min[0]) );
  1283. +            CP_MV( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_8x16].mv_min[0]), &(h->mb.trellis.cache_me_t[0][i_mb][7].mv_min[0]) );
  1284. +            CP_MV( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_8x16].mv_min[0]), &(h->mb.trellis.cache_me_t[0][i_mb][8].mv_min[0]) );
  1285. +
  1286. +            CP_MV( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_16x16].mv_max[0]), &(h->mb.trellis.cache_me_t[0][i_mb][0].mv_max[0]) );
  1287. +            CP_MV( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_8x8].mv_max[0]), &(h->mb.trellis.cache_me_t[0][i_mb][1].mv_max[0]) );
  1288. +            CP_MV( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_8x8].mv_max[0]), &(h->mb.trellis.cache_me_t[0][i_mb][2].mv_max[0]) );
  1289. +            CP_MV( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_8x8].mv_max[0]), &(h->mb.trellis.cache_me_t[0][i_mb][3].mv_max[0]) );
  1290. +            CP_MV( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_8x8].mv_max[0]), &(h->mb.trellis.cache_me_t[0][i_mb][4].mv_max[0]) );
  1291. +            CP_MV( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_16x8].mv_max[0]), &(h->mb.trellis.cache_me_t[0][i_mb][5].mv_max[0]) );
  1292. +            CP_MV( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_16x8].mv_max[0]), &(h->mb.trellis.cache_me_t[0][i_mb][6].mv_max[0]) );
  1293. +            CP_MV( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_8x16].mv_max[0]), &(h->mb.trellis.cache_me_t[0][i_mb][7].mv_max[0]) );
  1294. +            CP_MV( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_8x16].mv_max[0]), &(h->mb.trellis.cache_me_t[0][i_mb][8].mv_max[0]) );
  1295. +            
  1296. +            type = h->mb.trellis.best_type[0][i_mb];
  1297. +            part = h->mb.trellis.best_part[0][i_mb];
  1298. +            boff = ( type == P_L0 && part == D_16x16 ) ? ( T_P_16x16 ) :
  1299. +                    ( ( type == P_8x8 && part == D_8x8 ) ? ( T_P_8x8 ) :
  1300. +                    ( ( type == P_L0 && part == D_16x8 ) ? ( T_P_16x8 ) :
  1301. +                    ( ( type == P_L0 && part == D_8x16 ) ? ( T_P_8x16 ) :
  1302. +                    ( T_P_INTRA ) ) ) );
  1303. +            CP_MB( &(h->mb.trellis.mb[i_mb*mb_stride+T_P_BEST]), &(h->mb.trellis.mb[i_mb*mb_stride+boff]) );
  1304. +
  1305. +            i_mb++;
  1306. +        }
  1307. +    }
  1308. +}
  1309. +
  1310. +void fill_me_trellis_params( x264_t *h, int mode, int direction ) {
  1311. +    h->mb.trellis.mode = mode;
  1312. +    h->mb.trellis.direction = direction;
  1313. +    h->mb.trellis.breadth = ( mode == MODE_COMBINED_DIAMOND ) ? COMBINED_DIAMOND_CANDIDATES :
  1314. +            ( ( mode == MODE_DECIDE ) ? DECIDE_CANDIDATES :
  1315. +            ( ( MODE_DIAMOND ) ? DIAMOND_CANDIDATES :
  1316. +            SQUARE2_CANDIDATES ) );
  1317. +    h->mb.trellis.length = ( direction == ROW_TRELLIS ) ? h->mb.i_mb_width : h->mb.i_mb_height;
  1318. +}
  1319. +
  1320. +void get_left_predictor( int16_t pred[2], mb_t *block, int i_list, int i_mv ) {
  1321. +    switch ( block->part[i_mv] ) {
  1322. +        case D_8x8 :
  1323. +            if ( i_mv == 0 )
  1324. +                CP_MV( pred, block->mv[i_list][1] );
  1325. +            else
  1326. +                CP_MV( pred, block->mv[i_list][3] );
  1327. +            break;
  1328. +        case D_16x8 :
  1329. +            if ( i_mv == 0 )
  1330. +                CP_MV( pred, block->mv[i_list][0] );
  1331. +            else
  1332. +                CP_MV( pred, block->mv[i_list][1] );
  1333. +            break;
  1334. +        case D_8x16 :
  1335. +            CP_MV( pred, block->mv[i_list][1] );
  1336. +            break;
  1337. +        default :
  1338. +            CP_MV( pred, block->mv[i_list][0] );
  1339. +            break;
  1340. +    }
  1341. +}
  1342. +
  1343. +void get_top_predictor( int16_t pred[2], mb_t *block, int i_list, int i_mv ) {
  1344. +    switch ( block->part[i_mv] ) {
  1345. +        case D_8x8 :
  1346. +            if ( i_mv == 0 )
  1347. +                CP_MV( pred, block->mv[i_list][2] );
  1348. +            else
  1349. +                CP_MV( pred, block->mv[i_list][3] );
  1350. +            break;
  1351. +        case D_16x8 :
  1352. +            CP_MV( pred, block->mv[i_list][1] );
  1353. +            break;
  1354. +        case D_8x16 :
  1355. +            if ( i_mv == 0 )
  1356. +                CP_MV( pred, block->mv[i_list][0] );
  1357. +            else
  1358. +                CP_MV( pred, block->mv[i_list][1] );
  1359. +            break;
  1360. +        default :
  1361. +            CP_MV( pred, block->mv[i_list][0] );
  1362. +            break;
  1363. +    }
  1364. +}
  1365. +
  1366. +void get_top_right_predictor( int16_t pred[2], mb_t *block, int i_list, int i_mv ) {
  1367. +    switch ( block->part[i_mv] ) {
  1368. +        case D_8x8 :
  1369. +            if ( i_mv == 0 )
  1370. +                CP_MV( pred, block->mv[i_list][3] );
  1371. +            else
  1372. +                CP_MV( pred, block->mv[i_list][2] );
  1373. +            break;
  1374. +        case D_16x8 :
  1375. +            CP_MV( pred, block->mv[i_list][1] );
  1376. +            break;
  1377. +        case D_8x16 :
  1378. +            if ( i_mv == 0 )
  1379. +                CP_MV( pred, block->mv[i_list][1] );
  1380. +            else
  1381. +                CP_MV( pred, block->mv[i_list][0] );
  1382. +            break;
  1383. +        default :
  1384. +            CP_MV( pred, block->mv[i_list][0] );
  1385. +            break;
  1386. +    }
  1387. +}
  1388. +
  1389. +void get_top_left_predictor( int16_t pred[2], mb_t *block, int i_list, int i_mv ) {
  1390. +    switch ( block->part[i_mv] ) {
  1391. +        case D_8x8 :
  1392. +            if ( i_mv == 0 )
  1393. +                CP_MV( pred, block->mv[i_list][3] );
  1394. +            else
  1395. +                CP_MV( pred, block->mv[i_list][2] );
  1396. +            break;
  1397. +        case D_16x8 :
  1398. +            CP_MV( pred, block->mv[i_list][1] );
  1399. +            break;
  1400. +        case D_8x16 :
  1401. +            if ( i_mv == 0 )
  1402. +                CP_MV( pred, block->mv[i_list][1] );
  1403. +            else
  1404. +                CP_MV( pred, block->mv[i_list][0] );
  1405. +            break;
  1406. +        default :
  1407. +            CP_MV( pred, block->mv[i_list][0] );
  1408. +            break;
  1409. +    }
  1410. +}
  1411. +
  1412. +void choose_predictors( x264_t *h, mb_t *pred, int row, int col, mb_t *current, mb_t *left, mb_t *top, mb_t *diag ) {
  1413. +    int16_t a[2], b[2], c[2], fetch[2];
  1414. +    int i_list;
  1415. +
  1416. +    switch ( current->class ) {
  1417. +        
  1418. +        case I_4x4 :
  1419. +        
  1420. +            CP_MB( pred, &mb_i_4x4 );
  1421. +            break;
  1422. +
  1423. +        case I_8x8 :
  1424. +        
  1425. +            CP_MB( pred, &mb_i_8x8 );
  1426. +            break;
  1427. +
  1428. +        case I_16x16 :
  1429. +        
  1430. +            CP_MB( pred, &mb_i_16x16 );
  1431. +            break;
  1432. +
  1433. +        case P_L0 :
  1434. +        
  1435. +            i_list = 0;
  1436. +            
  1437. +            switch( current->part[0] ) {
  1438. +
  1439. +                case D_16x16 :
  1440. +
  1441. +                    CP_MB( pred, &mb_p_16x16 );
  1442. +
  1443. +                    if ( col == 0 ) {
  1444. +                        CP_MV( a, mv_zero );
  1445. +                    }
  1446. +                    else {
  1447. +                        get_left_predictor( fetch, left, i_list, 0 );
  1448. +                        CP_MV( a, fetch );
  1449. +                    }
  1450. +
  1451. +                    if ( row == 0 ) {
  1452. +                        CP_MV( b, mv_zero );
  1453. +                        CP_MV( c, mv_zero );
  1454. +                    } else {
  1455. +                        get_top_predictor( fetch, top, i_list, 0 );
  1456. +                        CP_MV( b, fetch );
  1457. +
  1458. +                        if ( col == h->mb.i_mb_width - 1 )
  1459. +                            get_top_left_predictor( fetch, diag, i_list, 0 );
  1460. +                        else
  1461. +                            get_top_right_predictor( fetch, diag, i_list, 1 );
  1462. +                        CP_MV( c, fetch );
  1463. +                    }
  1464. +
  1465. +                    pred->mv[i_list][0][0] = X264_MED( a[0], b[0], c[0] );
  1466. +                    pred->mv[i_list][0][1] = X264_MED( a[1], b[1], c[1] );
  1467. +
  1468. +                    break;
  1469. +
  1470. +                case D_16x8 :
  1471. +
  1472. +                    CP_MB( pred, &mb_p_16x8 );
  1473. +
  1474. +                    if ( row == 0 )
  1475. +                        CP_MV( pred->mv[i_list][0], mv_zero );
  1476. +                    else {
  1477. +                        get_top_predictor( fetch, top, i_list, 0 );
  1478. +                        CP_MV( pred->mv[i_list][0], fetch );
  1479. +                    }
  1480. +
  1481. +                    if ( col == 0 )
  1482. +                        CP_MV( pred->mv[i_list][1], mv_zero );
  1483. +                    else {
  1484. +                        get_left_predictor( fetch, left, i_list, 1 );
  1485. +                        CP_MV( pred->mv[i_list][1], fetch );
  1486. +                    }
  1487. +
  1488. +                    break;
  1489. +                    
  1490. +                case D_8x16 :
  1491. +
  1492. +                    CP_MB( pred, &mb_p_8x16 );
  1493. +
  1494. +                    if ( col == 0 )
  1495. +                        CP_MV( pred->mv[i_list][0], mv_zero );
  1496. +                    else {
  1497. +                        get_left_predictor( fetch, left, i_list, 0 );
  1498. +                        CP_MV( pred->mv[i_list][0], fetch );
  1499. +                    }
  1500. +
  1501. +                    if ( row == 0 )
  1502. +                        CP_MV( pred->mv[i_list][1], mv_zero );
  1503. +                    else {
  1504. +                        if ( col == h->mb.i_mb_width-1 )
  1505. +                            get_top_left_predictor( fetch, top, i_list, 1 );
  1506. +                        else
  1507. +                            get_top_right_predictor( fetch, diag, i_list, 1 );
  1508. +                        CP_MV( pred->mv[i_list][1], fetch );
  1509. +                    }
  1510. +
  1511. +                    break;
  1512. +                    
  1513. +            }
  1514. +
  1515. +        case P_8x8 :
  1516. +        
  1517. +            i_list = 0;
  1518. +            CP_MB( pred, &mb_p_8x8 );
  1519. +
  1520. +            for ( int i = 0; i < 4; i++ ) {
  1521. +
  1522. +                if ( col == 0 && !( i & 1 ) )
  1523. +                    CP_MV( a, mv_zero );
  1524. +                else {
  1525. +                    if ( i & 1 ) {
  1526. +                        if ( i == 1 )
  1527. +                            CP_MV( fetch, current->mv[i_list][0] );
  1528. +                        else
  1529. +                            CP_MV( fetch, current->mv[i_list][2] );
  1530. +                    }
  1531. +                    else
  1532. +                        get_left_predictor( fetch, left, i_list, i );
  1533. +                        
  1534. +                    CP_MV( a, fetch );
  1535. +                }
  1536. +
  1537. +                if ( row == 0 && i < 2) {
  1538. +                    CP_MV( b, mv_zero );
  1539. +                    CP_MV( c, mv_zero );
  1540. +                } else {
  1541. +                    if ( i > 1 ) {
  1542. +                        if ( i == 2 )
  1543. +                            CP_MV( fetch, current->mv[i_list][0] );
  1544. +                        else
  1545. +                            CP_MV( fetch, current->mv[i_list][1] );
  1546. +                    }
  1547. +                    else
  1548. +                        get_top_predictor( fetch, top, i_list, i );
  1549. +                    CP_MV( b, fetch );
  1550. +
  1551. +                    
  1552. +                    if ( i == 0 )
  1553. +                        get_top_right_predictor( fetch, top, i_list, i );
  1554. +                    else if ( i == 1 ) {
  1555. +                        if ( col == h->mb.i_mb_width - 1 )
  1556. +                            get_top_left_predictor( fetch, diag, i_list, i );
  1557. +                        else
  1558. +                            get_top_right_predictor( fetch, diag, i_list, i );
  1559. +                    }
  1560. +                    else if ( i == 2 )
  1561. +                        CP_MV( fetch, current->mv[i_list][1] );
  1562. +                    else
  1563. +                        CP_MV( fetch, current->mv[i_list][0] );
  1564. +
  1565. +                    CP_MV( c, fetch );
  1566. +                }
  1567. +
  1568. +                pred->mv[i_list][i][0] = X264_MED( a[0], b[0], c[0] );
  1569. +                pred->mv[i_list][i][1] = X264_MED( a[1], b[1], c[1] );
  1570. +
  1571. +            }
  1572. +
  1573. +            break;
  1574. +
  1575. +    }
  1576. +
  1577. +}
  1578. +
  1579. +int cost_mb( x264_t *h, int row, int col, int i_mv_max, mb_t *block, mb_t *left, mb_t *top, mb_t *diag ) {
  1580. +    mb_t pred;
  1581. +    choose_predictors( h, &pred, row, col, block, left, top, diag );
  1582. +    block->costSAD = 0;
  1583. +    block->costMV = 0;
  1584. +    int i, x, y, px, py, i_list;
  1585. +    
  1586. +
  1587. +    switch ( block->class ) {
  1588. +
  1589. +        case I_4x4 :
  1590. +        case I_8x8 :
  1591. +        case I_16x16 :
  1592. +        case I_PCM :
  1593. +        
  1594. +        
  1595. +            block->costSAD += get_cached_fpel_sad( h, block->class, 0, 0, 0, row, col, 0, 0 );
  1596. +            break;
  1597. +            
  1598. +        case P_L0 :
  1599. +        
  1600. +            i_list = 0;
  1601. +        
  1602. +            switch ( block->part[0] ) {
  1603. +
  1604. +                case D_16x16 :
  1605. +
  1606. +                    x = block->mv[i_list][0][0];
  1607. +                    y = block->mv[i_list][0][1];
  1608. +                    px = pred.mv[i_list][0][0];
  1609. +                    py = pred.mv[i_list][0][1];
  1610. +                    block->costSAD += get_cached_fpel_sad( h, P_L0, D_16x16, 0, 0, row, col, x, y );
  1611. +                    block->costMV += T_BITS_MVD( x-px, y-py );
  1612. +                    break;
  1613. +
  1614. +                case D_16x8 :
  1615. +
  1616. +                    for ( i = 0; i < X264_MIN(i_mv_max+1,2); i++ ) {
  1617. +                        x = block->mv[i_list][i][0];
  1618. +                        y = block->mv[i_list][i][1];
  1619. +                        px = pred.mv[i_list][i][0];
  1620. +                        py = pred.mv[i_list][i][1];
  1621. +                        block->costSAD += get_cached_fpel_sad( h, P_L0, D_16x8, 0, i, row, col, x, y );
  1622. +                        block->costMV += T_BITS_MVD( x-px, y-py );
  1623. +                    }
  1624. +                    break;
  1625. +
  1626. +                case D_8x16 :
  1627. +
  1628. +                    for ( i = 0; i < X264_MIN(i_mv_max+1,2); i++ ) {
  1629. +                        x = block->mv[i_list][i][0];
  1630. +                        y = block->mv[i_list][i][1];
  1631. +                        px = pred.mv[i_list][i][0];
  1632. +                        py = pred.mv[i_list][i][1];
  1633. +                        block->costSAD += get_cached_fpel_sad( h, P_L0, D_8x16, 0, i, row, col, x, y );
  1634. +                        block->costMV += T_BITS_MVD( x-px, y-py );
  1635. +                    }
  1636. +                    break;
  1637. +                    
  1638. +            }
  1639. +            
  1640. +            break;
  1641. +
  1642. +        case P_8x8 :
  1643. +        
  1644. +            i_list = 0;
  1645. +
  1646. +            for ( i = 0; i < X264_MIN(i_mv_max+1,4); i++ ) {
  1647. +                x = block->mv[i_list][i][0];
  1648. +                y = block->mv[i_list][i][1];
  1649. +                px = pred.mv[i_list][i][0];
  1650. +                py = pred.mv[i_list][i][1];
  1651. +                block->costSAD += get_cached_fpel_sad( h, P_8x8, D_8x8, 0, i, row, col, x, y );
  1652. +                block->costMV += T_BITS_MVD( x-px, y-py );
  1653. +            }
  1654. +            break;
  1655. +
  1656. +    }
  1657. +
  1658. +    //block->costMV += T_BITS_MODE( block->type );
  1659. +    
  1660. +    return ( block->costSAD + block->costMV );
  1661. +}
  1662. +
  1663. +int sad_cost( x264_t *h ) {
  1664. +    int row, col;
  1665. +    int cost;
  1666. +    int i_list, offCC, offCL, offTL, offTR, offTC;
  1667. +    int mb_stride_col = h->mb.trellis.mb_stride_col;
  1668. +    int mb_stride_row = h->mb.trellis.mb_stride_row;
  1669. +    mb_t *block, *left, *top, *diag;
  1670. +
  1671. +    cost = 0;
  1672. +    i_list = 0;
  1673. +    offCC = 0;
  1674. +    offCL = offCC - mb_stride_col;
  1675. +    offTC = offCC - mb_stride_row;
  1676. +    offTL = offTC - mb_stride_col;
  1677. +    offTR = offTC + mb_stride_col;
  1678. +    for ( row = 0; row < h->mb.i_mb_height; row++ ) {
  1679. +        for ( col = 0; col < h->mb.i_mb_width; col++ ) {
  1680. +            block = &(h->mb.trellis.mb[offCC+T_P_BEST]);
  1681. +            left = ( col == 0 ) ? NULL : &(h->mb.trellis.mb[offCL+T_P_BEST]);
  1682. +            top = ( row == 0 ) ? NULL : &(h->mb.trellis.mb[offTC+T_P_BEST]);
  1683. +            diag = ( row == 0 ) ? NULL :
  1684. +                    ( col == h->mb.i_mb_width - 1 ) ? &(h->mb.trellis.mb[offTL+T_P_BEST]) :
  1685. +                    &(h->mb.trellis.mb[offTR+T_P_BEST]);
  1686. +
  1687. +            cost_mb( h, row, col, 3, block, left, top, diag );
  1688. +            cost += block->costSAD;
  1689. +
  1690. +            offCC += mb_stride_col;
  1691. +            offCL += mb_stride_col;
  1692. +            offTC += mb_stride_col;
  1693. +            offTL += mb_stride_col;
  1694. +            offTR += mb_stride_col;
  1695. +        }
  1696. +    }
  1697. +
  1698. +    return cost;
  1699. +}
  1700. +
  1701. +int mv_cost( x264_t *h ) {
  1702. +    int row, col;
  1703. +    int cost;
  1704. +    int i_list, offCC, offCL, offTL, offTR, offTC;
  1705. +    int mb_stride_col = h->mb.trellis.mb_stride_col;
  1706. +    int mb_stride_row = h->mb.trellis.mb_stride_row;
  1707. +    mb_t *block, *left, *top, *diag;
  1708. +
  1709. +    cost = 0;
  1710. +    i_list = 0;
  1711. +    offCC = 0;
  1712. +    offCL = offCC - mb_stride_col;
  1713. +    offTC = offCC - mb_stride_row;
  1714. +    offTL = offTC - mb_stride_col;
  1715. +    offTR = offTC + mb_stride_col;
  1716. +    for ( row = 0; row < h->mb.i_mb_height; row++ ) {
  1717. +        for ( col = 0; col < h->mb.i_mb_width; col++ ) {
  1718. +            block = &(h->mb.trellis.mb[offCC+T_P_BEST]);
  1719. +            left = ( col == 0 ) ? NULL : &(h->mb.trellis.mb[offCL+T_P_BEST]);
  1720. +            top = ( row == 0 ) ? NULL : &(h->mb.trellis.mb[offTC+T_P_BEST]);
  1721. +            diag = ( row == 0 ) ? NULL :
  1722. +                    ( col == h->mb.i_mb_width - 1 ) ? &(h->mb.trellis.mb[offTL+T_P_BEST]) :
  1723. +                    &(h->mb.trellis.mb[offTR+T_P_BEST]);
  1724. +
  1725. +            cost_mb( h, row, col, 3, block, left, top, diag );
  1726. +            cost += block->costMV;
  1727. +
  1728. +            offCC += mb_stride_col;
  1729. +            offCL += mb_stride_col;
  1730. +            offTC += mb_stride_col;
  1731. +            offTL += mb_stride_col;
  1732. +            offTR += mb_stride_col;
  1733. +        }
  1734. +    }
  1735. +
  1736. +    return cost;
  1737. +}
  1738. +
  1739. +void fill_trellis( x264_t *h, int i_frame ) {
  1740. +    int i_trellis, i, j;
  1741. +    int mb_stride_col = h->mb.trellis.mb_stride_col;
  1742. +    int mb_stride_row = h->mb.trellis.mb_stride_row;
  1743. +    int t_stride = h->mb.trellis.t_stride;
  1744. +    int offR = ( h->mb.trellis.direction == ROW_TRELLIS ) ? i_frame * mb_stride_row : 0 ;
  1745. +    int offC = ( h->mb.trellis.direction == ROW_TRELLIS ) ? 0 : i_frame * mb_stride_col ;
  1746. +
  1747. +    if ( h->mb.trellis.mode == MODE_COMBINED_DIAMOND ) {
  1748. +        for ( i_trellis = 0; i_trellis < h->mb.trellis.length; i_trellis++ ) {
  1749. +            for ( i = 0; i < T_P_INTRA; i++ ) {
  1750. +                for ( j = 0; j < DIAMOND_CANDIDATES; j++ ) {
  1751. +                    CP_MB( &(h->mb.trellis.t[i_trellis*t_stride+i*DIAMOND_CANDIDATES+j].block), &(h->mb.trellis.mb[offR+offC+i]) );
  1752. +                }
  1753. +            }
  1754. +            CP_MB( &(h->mb.trellis.t[i_trellis*t_stride+T_P_INTRA*DIAMOND_CANDIDATES].block), &(h->mb.trellis.mb[offR+offC+T_P_INTRA]) );
  1755. +            
  1756. +            if ( h->mb.trellis.direction == ROW_TRELLIS )
  1757. +                offC += mb_stride_col;
  1758. +            else
  1759. +                offR += mb_stride_row;
  1760. +        }
  1761. +    }
  1762. +    else {
  1763. +        for ( i_trellis = 0; i_trellis < h->mb.trellis.length; i_trellis++ ) {
  1764. +            for ( j = 0; j < h->mb.trellis.breadth; j++ ) {
  1765. +                if ( ( h->mb.trellis.mode == MODE_DIAMOND ) || ( h->mb.trellis.mode == MODE_SQUARE2 ) )
  1766. +                    CP_MB( &(h->mb.trellis.t[i_trellis*t_stride+j].block), &(h->mb.trellis.mb[offR+offC+T_P_BEST]) );
  1767. +                else
  1768. +                    CP_MB( &(h->mb.trellis.t[i_trellis*t_stride+j].block), &(h->mb.trellis.mb[offR+offC+j]) );
  1769. +            }
  1770. +
  1771. +            if ( h->mb.trellis.direction == ROW_TRELLIS )
  1772. +                offC += mb_stride_col;
  1773. +            else
  1774. +                offR += mb_stride_row;
  1775. +        }
  1776. +    }
  1777. +}
  1778. +
  1779. +void adjust_trellis( x264_t *h, int sub ) {
  1780. +    mb_t *block;
  1781. +    int i_trellis, i_mv, partition, i_list;
  1782. +    int t_stride = h->mb.trellis.t_stride;
  1783. +    int length = (h->mb.trellis.direction == ROW_TRELLIS) ? h->mb.i_mb_width : h->mb.i_mb_height;
  1784. +    int i8x8 = (h->mb.trellis.direction == ROW_TRELLIS) ? sub*2 : sub;
  1785. +    int i16x8 = (h->mb.trellis.direction == ROW_TRELLIS) ? sub : 0;
  1786. +    int i8x16 = (h->mb.trellis.direction == ROW_TRELLIS) ? 0 : sub;
  1787. +
  1788. +    i_list = 0;
  1789. +    if ( h->mb.trellis.mode == MODE_COMBINED_DIAMOND ) {
  1790. +        for ( i_trellis = 0; i_trellis < length; i_trellis++ ) {
  1791. +            // bottom
  1792. +            block = &(h->mb.trellis.t[i_trellis*t_stride+T_P_16x16*DIAMOND_CANDIDATES+B___].block);
  1793. +            block->mv[i_list][0][1] = X264_MIN( block->mv[i_list][0][1] + 1, block->mv_max[1] );
  1794. +            block = &(h->mb.trellis.t[i_trellis*t_stride+T_P_8x8*DIAMOND_CANDIDATES+B___].block);
  1795. +            block->mv[i_list][i8x8][1] = X264_MIN( block->mv[i_list][i8x8][1] + 1, block->mv_max[1] );
  1796. +            block = &(h->mb.trellis.t[i_trellis*t_stride+T_P_16x8*DIAMOND_CANDIDATES+B___].block);
  1797. +            block->mv[i_list][i16x8][1] = X264_MIN( block->mv[i_list][i16x8][1] + 1, block->mv_max[1] );
  1798. +            block = &(h->mb.trellis.t[i_trellis*t_stride+T_P_8x16*DIAMOND_CANDIDATES+B___].block);
  1799. +            block->mv[i_list][i8x16][1] = X264_MIN( block->mv[i_list][i8x16][1] + 1, block->mv_max[1] );
  1800. +
  1801. +            // top
  1802. +            block = &(h->mb.trellis.t[i_trellis*t_stride+T_P_16x16*DIAMOND_CANDIDATES+T___].block);
  1803. +            block->mv[i_list][0][1] = X264_MAX( block->mv[i_list][0][1] - 1, block->mv_min[1] );
  1804. +            block = &(h->mb.trellis.t[i_trellis*t_stride+T_P_8x8*DIAMOND_CANDIDATES+T___].block);
  1805. +            block->mv[i_list][i8x8][1] = X264_MAX( block->mv[i_list][i8x8][1] - 1, block->mv_min[1] );
  1806. +            block = &(h->mb.trellis.t[i_trellis*t_stride+T_P_16x8*DIAMOND_CANDIDATES+T___].block);
  1807. +            block->mv[i_list][i16x8][1] = X264_MAX( block->mv[i_list][i16x8][1] - 1, block->mv_min[1] );
  1808. +            block = &(h->mb.trellis.t[i_trellis*t_stride+T_P_8x16*DIAMOND_CANDIDATES+T___].block);
  1809. +            block->mv[i_list][i8x16][1] = X264_MAX( block->mv[i_list][i8x16][1] - 1, block->mv_min[1] );
  1810. +
  1811. +            // left
  1812. +            block = &(h->mb.trellis.t[i_trellis*t_stride+T_P_16x16*DIAMOND_CANDIDATES+L___].block);
  1813. +            block->mv[i_list][0][0] = X264_MAX( block->mv[i_list][0][0] - 1, block->mv_min[0] );
  1814. +            block = &(h->mb.trellis.t[i_trellis*t_stride+T_P_8x8*DIAMOND_CANDIDATES+L___].block);
  1815. +            block->mv[i_list][i8x8][0] = X264_MAX( block->mv[i_list][i8x8][0] - 1, block->mv_min[0] );
  1816. +            block = &(h->mb.trellis.t[i_trellis*t_stride+T_P_16x8*DIAMOND_CANDIDATES+L___].block);
  1817. +            block->mv[i_list][i16x8][0] = X264_MAX( block->mv[i_list][i16x8][0] - 1, block->mv_min[0] );
  1818. +            block = &(h->mb.trellis.t[i_trellis*t_stride+T_P_8x16*DIAMOND_CANDIDATES+L___].block);
  1819. +            block->mv[i_list][i8x16][0] = X264_MAX( block->mv[i_list][i8x16][0] - 1, block->mv_min[0] );
  1820. +
  1821. +            // right
  1822. +            block = &(h->mb.trellis.t[i_trellis*t_stride+T_P_16x16*DIAMOND_CANDIDATES+R___].block);
  1823. +            block->mv[i_list][0][0] = X264_MIN( block->mv[i_list][0][0] + 1, block->mv_max[0] );
  1824. +            block = &(h->mb.trellis.t[i_trellis*t_stride+T_P_8x8*DIAMOND_CANDIDATES+R___].block);
  1825. +            block->mv[i_list][i8x8][0] = X264_MIN( block->mv[i_list][i8x8][0] + 1, block->mv_max[0] );
  1826. +            block = &(h->mb.trellis.t[i_trellis*t_stride+T_P_16x8*DIAMOND_CANDIDATES+R___].block);
  1827. +            block->mv[i_list][i16x8][0] = X264_MIN( block->mv[i_list][i16x8][0] + 1, block->mv_max[0] );
  1828. +            block = &(h->mb.trellis.t[i_trellis*t_stride+T_P_8x16*DIAMOND_CANDIDATES+R___].block);
  1829. +            block->mv[i_list][i8x16][0] = X264_MIN( block->mv[i_list][i8x16][0] + 1, block->mv_max[0] );
  1830. +        }
  1831. +    }
  1832. +    else if ( h->mb.trellis.mode == MODE_SQUARE2 ) {
  1833. +        for ( i_trellis = 0; i_trellis < length; i_trellis++ ) {
  1834. +            if ( ! IS_INTRA( h->mb.trellis.t[i_trellis*t_stride+C___].block.class ) ) {
  1835. +                partition = h->mb.trellis.t[i_trellis*t_stride+C___].block.part[0];
  1836. +                i_mv = ( ( partition == D_16x16 ) ? 0 : ( ( partition == D_8x16 ) ? i8x16 : ( ( partition == D_8x8 ) ? i8x8 : i16x8 ) ) );
  1837. +                block = &(h->mb.trellis.t[i_trellis*t_stride+B___].block);
  1838. +                block->mv[i_list][i_mv][1] = X264_MIN( block->mv[i_list][i_mv][1] + 1, block->mv_max[1] );
  1839. +                block = &(h->mb.trellis.t[i_trellis*t_stride+BL__].block);
  1840. +                block->mv[i_list][i_mv][1] = X264_MIN( block->mv[i_list][i_mv][1] + 1, block->mv_max[1] );
  1841. +                block = &(h->mb.trellis.t[i_trellis*t_stride+BLL_].block);
  1842. +                block->mv[i_list][i_mv][1] = X264_MIN( block->mv[i_list][i_mv][1] + 1, block->mv_max[1] );
  1843. +                block = &(h->mb.trellis.t[i_trellis*t_stride+BR__].block);
  1844. +                block->mv[i_list][i_mv][1] = X264_MIN( block->mv[i_list][i_mv][1] + 1, block->mv_max[1] );
  1845. +                block = &(h->mb.trellis.t[i_trellis*t_stride+BRR_].block);
  1846. +                block->mv[i_list][i_mv][1] = X264_MIN( block->mv[i_list][i_mv][1] + 1, block->mv_max[1] );
  1847. +                block = &(h->mb.trellis.t[i_trellis*t_stride+T___].block);
  1848. +                block->mv[i_list][i_mv][1] = X264_MAX( block->mv[i_list][i_mv][1] - 1, block->mv_min[1] );
  1849. +                block = &(h->mb.trellis.t[i_trellis*t_stride+TL__].block);
  1850. +                block->mv[i_list][i_mv][1] = X264_MAX( block->mv[i_list][i_mv][1] - 1, block->mv_min[1] );
  1851. +                block = &(h->mb.trellis.t[i_trellis*t_stride+TLL_].block);
  1852. +                block->mv[i_list][i_mv][1] = X264_MAX( block->mv[i_list][i_mv][1] - 1, block->mv_min[1] );
  1853. +                block = &(h->mb.trellis.t[i_trellis*t_stride+TR__].block);
  1854. +                block->mv[i_list][i_mv][1] = X264_MAX( block->mv[i_list][i_mv][1] - 1, block->mv_min[1] );
  1855. +                block = &(h->mb.trellis.t[i_trellis*t_stride+TRR_].block);
  1856. +                block->mv[i_list][i_mv][1] = X264_MAX( block->mv[i_list][i_mv][1] - 1, block->mv_min[1] );
  1857. +                block = &(h->mb.trellis.t[i_trellis*t_stride+BB__].block);
  1858. +                block->mv[i_list][i_mv][1] = X264_MIN( block->mv[i_list][i_mv][1] + 2, block->mv_max[1] );
  1859. +                block = &(h->mb.trellis.t[i_trellis*t_stride+BBL_].block);
  1860. +                block->mv[i_list][i_mv][1] = X264_MIN( block->mv[i_list][i_mv][1] + 2, block->mv_max[1] );
  1861. +                block = &(h->mb.trellis.t[i_trellis*t_stride+BBLL].block);
  1862. +                block->mv[i_list][i_mv][1] = X264_MIN( block->mv[i_list][i_mv][1] + 2, block->mv_max[1] );
  1863. +                block = &(h->mb.trellis.t[i_trellis*t_stride+BBR_].block);
  1864. +                block->mv[i_list][i_mv][1] = X264_MIN( block->mv[i_list][i_mv][1] + 2, block->mv_max[1] );
  1865. +                block = &(h->mb.trellis.t[i_trellis*t_stride+BBRR].block);
  1866. +                block->mv[i_list][i_mv][1] = X264_MIN( block->mv[i_list][i_mv][1] + 2, block->mv_max[1] );
  1867. +                block = &(h->mb.trellis.t[i_trellis*t_stride+TT__].block);
  1868. +                block->mv[i_list][i_mv][1] = X264_MAX( block->mv[i_list][i_mv][1] - 2, block->mv_min[1] );
  1869. +                block = &(h->mb.trellis.t[i_trellis*t_stride+TTL_].block);
  1870. +                block->mv[i_list][i_mv][1] = X264_MAX( block->mv[i_list][i_mv][1] - 2, block->mv_min[1] );
  1871. +                block = &(h->mb.trellis.t[i_trellis*t_stride+TTLL].block);
  1872. +                block->mv[i_list][i_mv][1] = X264_MAX( block->mv[i_list][i_mv][1] - 2, block->mv_min[1] );
  1873. +                block = &(h->mb.trellis.t[i_trellis*t_stride+TTR_].block);
  1874. +                block->mv[i_list][i_mv][1] = X264_MAX( block->mv[i_list][i_mv][1] - 2, block->mv_min[1] );
  1875. +                block = &(h->mb.trellis.t[i_trellis*t_stride+TTRR].block);
  1876. +                block->mv[i_list][i_mv][1] = X264_MAX( block->mv[i_list][i_mv][1] - 2, block->mv_min[1] );
  1877. +                block = &(h->mb.trellis.t[i_trellis*t_stride+L___].block);
  1878. +                block->mv[i_list][i_mv][0] = X264_MAX( block->mv[i_list][i_mv][0] - 1, block->mv_min[0] );
  1879. +                block = &(h->mb.trellis.t[i_trellis*t_stride+TL__].block);
  1880. +                block->mv[i_list][i_mv][0] = X264_MAX( block->mv[i_list][i_mv][0] - 1, block->mv_min[0] );
  1881. +                block = &(h->mb.trellis.t[i_trellis*t_stride+TTL_].block);
  1882. +                block->mv[i_list][i_mv][0] = X264_MAX( block->mv[i_list][i_mv][0] - 1, block->mv_min[0] );
  1883. +                block = &(h->mb.trellis.t[i_trellis*t_stride+BL__].block);
  1884. +                block->mv[i_list][i_mv][0] = X264_MAX( block->mv[i_list][i_mv][0] - 1, block->mv_min[0] );
  1885. +                block = &(h->mb.trellis.t[i_trellis*t_stride+BBL_].block);
  1886. +                block->mv[i_list][i_mv][0] = X264_MAX( block->mv[i_list][i_mv][0] - 1, block->mv_min[0] );
  1887. +                block = &(h->mb.trellis.t[i_trellis*t_stride+R___].block);
  1888. +                block->mv[i_list][i_mv][0] = X264_MIN( block->mv[i_list][i_mv][0] + 1, block->mv_max[0] );
  1889. +                block = &(h->mb.trellis.t[i_trellis*t_stride+TR__].block);
  1890. +                block->mv[i_list][i_mv][0] = X264_MIN( block->mv[i_list][i_mv][0] + 1, block->mv_max[0] );
  1891. +                block = &(h->mb.trellis.t[i_trellis*t_stride+TTR_].block);
  1892. +                block->mv[i_list][i_mv][0] = X264_MIN( block->mv[i_list][i_mv][0] + 1, block->mv_max[0] );
  1893. +                block = &(h->mb.trellis.t[i_trellis*t_stride+BR__].block);
  1894. +                block->mv[i_list][i_mv][0] = X264_MIN( block->mv[i_list][i_mv][0] + 1, block->mv_max[0] );
  1895. +                block = &(h->mb.trellis.t[i_trellis*t_stride+BBR_].block);
  1896. +                block->mv[i_list][i_mv][0] = X264_MIN( block->mv[i_list][i_mv][0] + 1, block->mv_max[0] );
  1897. +                block = &(h->mb.trellis.t[i_trellis*t_stride+LL__].block);
  1898. +                block->mv[i_list][i_mv][0] = X264_MAX( block->mv[i_list][i_mv][0] - 2, block->mv_min[0] );
  1899. +                block = &(h->mb.trellis.t[i_trellis*t_stride+TLL_].block);
  1900. +                block->mv[i_list][i_mv][0] = X264_MAX( block->mv[i_list][i_mv][0] - 2, block->mv_min[0] );
  1901. +                block = &(h->mb.trellis.t[i_trellis*t_stride+TTLL].block);
  1902. +                block->mv[i_list][i_mv][0] = X264_MAX( block->mv[i_list][i_mv][0] - 2, block->mv_min[0] );
  1903. +                block = &(h->mb.trellis.t[i_trellis*t_stride+BLL_].block);
  1904. +                block->mv[i_list][i_mv][0] = X264_MAX( block->mv[i_list][i_mv][0] - 2, block->mv_min[0] );
  1905. +                block = &(h->mb.trellis.t[i_trellis*t_stride+BBLL].block);
  1906. +                block->mv[i_list][i_mv][0] = X264_MAX( block->mv[i_list][i_mv][0] - 2, block->mv_min[0] );
  1907. +                block = &(h->mb.trellis.t[i_trellis*t_stride+RR__].block);
  1908. +                block->mv[i_list][i_mv][0] = X264_MIN( block->mv[i_list][i_mv][0] + 2, block->mv_max[0] );
  1909. +                block = &(h->mb.trellis.t[i_trellis*t_stride+TRR_].block);
  1910. +                block->mv[i_list][i_mv][0] = X264_MIN( block->mv[i_list][i_mv][0] + 2, block->mv_max[0] );
  1911. +                block = &(h->mb.trellis.t[i_trellis*t_stride+TTRR].block);
  1912. +                block->mv[i_list][i_mv][0] = X264_MIN( block->mv[i_list][i_mv][0] + 2, block->mv_max[0] );
  1913. +                block = &(h->mb.trellis.t[i_trellis*t_stride+BRR_].block);
  1914. +                block->mv[i_list][i_mv][0] = X264_MIN( block->mv[i_list][i_mv][0] + 2, block->mv_max[0] );
  1915. +                block = &(h->mb.trellis.t[i_trellis*t_stride+BBRR].block);
  1916. +                block->mv[i_list][i_mv][0] = X264_MIN( block->mv[i_list][i_mv][0] + 2, block->mv_max[0] );
  1917. +            }
  1918. +        }
  1919. +    }
  1920. +    else {
  1921. +        for ( i_trellis = 0; i_trellis < length; i_trellis++ ) {
  1922. +            if ( ! IS_INTRA( h->mb.trellis.t[i_trellis*t_stride+C___].block.class ) ) {
  1923. +                partition = h->mb.trellis.t[i_trellis*t_stride+C___].block.part[0];
  1924. +                i_mv = ( ( partition == D_16x16 ) ? 0 : ( ( partition == D_8x16 ) ? i8x16 : ( ( partition == D_8x8 ) ? i8x8 : i16x8 ) ) );
  1925. +                block = &(h->mb.trellis.t[i_trellis*t_stride+B___].block);
  1926. +                block->mv[i_list][i_mv][1] = X264_MIN( block->mv[i_list][i_mv][1] + 1, block->mv_max[1] );
  1927. +                block = &(h->mb.trellis.t[i_trellis*t_stride+T___].block);
  1928. +                block->mv[i_list][i_mv][1] = X264_MAX( block->mv[i_list][i_mv][1] - 1, block->mv_min[1] );
  1929. +                block = &(h->mb.trellis.t[i_trellis*t_stride+L___].block);
  1930. +                block->mv[i_list][i_mv][0] = X264_MAX( block->mv[i_list][i_mv][0] - 1, block->mv_min[0] );
  1931. +                block = &(h->mb.trellis.t[i_trellis*t_stride+R___].block);
  1932. +                block->mv[i_list][i_mv][0] = X264_MIN( block->mv[i_list][i_mv][0] + 1, block->mv_max[0] );
  1933. +            }
  1934. +        }
  1935. +    }
  1936. +}
  1937. +
  1938. +void generate_alternate_mvs( x264_t *h, int16_t mv[SQUARE2_CANDIDATES][2], mb_t *block, int i_list, int i_mv ) {
  1939. +    int i;
  1940. +
  1941. +    if ( h->mb.trellis.mode == MODE_SQUARE2 ) {
  1942. +        for ( i = 0; i < SQUARE2_CANDIDATES; i++ )
  1943. +            CP_MV( mv[i], block->mv[i_list][i_mv] );
  1944. +
  1945. +        mv[B___][1] = X264_MIN( mv[C___][1] + 1, block->mv_max[1] );
  1946. +        mv[BLL_][1] = X264_MIN( mv[C___][1] + 1, block->mv_max[1] );
  1947. +        mv[BL__][1] = X264_MIN( mv[C___][1] + 1, block->mv_max[1] );
  1948. +        mv[BR__][1] = X264_MIN( mv[C___][1] + 1, block->mv_max[1] );
  1949. +        mv[BRR_][1] = X264_MIN( mv[C___][1] + 1, block->mv_max[1] );
  1950. +        mv[T___][1] = X264_MAX( mv[C___][1] - 1, block->mv_min[1] );
  1951. +        mv[TL__][1] = X264_MAX( mv[C___][1] - 1, block->mv_min[1] );
  1952. +        mv[TLL_][1] = X264_MAX( mv[C___][1] - 1, block->mv_min[1] );
  1953. +        mv[TR__][1] = X264_MAX( mv[C___][1] - 1, block->mv_min[1] );
  1954. +        mv[TRR_][1] = X264_MAX( mv[C___][1] - 1, block->mv_min[1] );
  1955. +        mv[BB__][1] = X264_MIN( mv[C___][1] + 2, block->mv_max[1] );
  1956. +        mv[BBL_][1] = X264_MIN( mv[C___][1] + 2, block->mv_max[1] );
  1957. +        mv[BBLL][1] = X264_MIN( mv[C___][1] + 2, block->mv_max[1] );
  1958. +        mv[BBR_][1] = X264_MIN( mv[C___][1] + 2, block->mv_max[1] );
  1959. +        mv[BBRR][1] = X264_MIN( mv[C___][1] + 2, block->mv_max[1] );
  1960. +        mv[TT__][1] = X264_MAX( mv[C___][1] - 2, block->mv_min[1] );
  1961. +        mv[TTL_][1] = X264_MAX( mv[C___][1] - 2, block->mv_min[1] );
  1962. +        mv[TTLL][1] = X264_MAX( mv[C___][1] - 2, block->mv_min[1] );
  1963. +        mv[TTR_][1] = X264_MAX( mv[C___][1] - 2, block->mv_min[1] );
  1964. +        mv[TTRR][1] = X264_MAX( mv[C___][1] - 2, block->mv_min[1] );
  1965. +        mv[L___][0] = X264_MAX( mv[C___][0] - 1, block->mv_min[0] );
  1966. +        mv[TL__][0] = X264_MAX( mv[C___][0] - 1, block->mv_min[0] );
  1967. +        mv[TTL_][0] = X264_MAX( mv[C___][0] - 1, block->mv_min[0] );
  1968. +        mv[BL__][0] = X264_MAX( mv[C___][0] - 1, block->mv_min[0] );
  1969. +        mv[BBL_][0] = X264_MAX( mv[C___][0] - 1, block->mv_min[0] );
  1970. +        mv[R___][0] = X264_MIN( mv[C___][0] + 1, block->mv_max[0] );
  1971. +        mv[TR__][0] = X264_MIN( mv[C___][0] + 1, block->mv_max[0] );
  1972. +        mv[TTR_][0] = X264_MIN( mv[C___][0] + 1, block->mv_max[0] );
  1973. +        mv[BR__][0] = X264_MIN( mv[C___][0] + 1, block->mv_max[0] );
  1974. +        mv[BBR_][0] = X264_MIN( mv[C___][0] + 1, block->mv_max[0] );
  1975. +        mv[LL__][0] = X264_MAX( mv[C___][0] - 2, block->mv_min[0] );
  1976. +        mv[TLL_][0] = X264_MAX( mv[C___][0] - 2, block->mv_min[0] );
  1977. +        mv[TTLL][0] = X264_MAX( mv[C___][0] - 2, block->mv_min[0] );
  1978. +        mv[BLL_][0] = X264_MAX( mv[C___][0] - 2, block->mv_min[0] );
  1979. +        mv[BBLL][0] = X264_MAX( mv[C___][0] - 2, block->mv_min[0] );
  1980. +        mv[RR__][0] = X264_MIN( mv[C___][0] + 2, block->mv_max[0] );
  1981. +        mv[TRR_][0] = X264_MIN( mv[C___][0] + 2, block->mv_max[0] );
  1982. +        mv[TTRR][0] = X264_MIN( mv[C___][0] + 2, block->mv_max[0] );
  1983. +        mv[BRR_][0] = X264_MIN( mv[C___][0] + 2, block->mv_max[0] );
  1984. +        mv[BBRR][0] = X264_MIN( mv[C___][0] + 2, block->mv_max[0] );
  1985. +    }
  1986. +    else {
  1987. +        for ( i = 0; i < DIAMOND_CANDIDATES; i++ )
  1988. +            CP_MV( mv[i], block->mv[i_list][i_mv] );
  1989. +
  1990. +        mv[B___][1] = X264_MIN( mv[C___][1] + 1, block->mv_max[1] );
  1991. +        mv[T___][1] = X264_MAX( mv[C___][1] - 1, block->mv_min[1] );
  1992. +        mv[L___][0] = X264_MAX( mv[C___][0] - 1, block->mv_min[0] );
  1993. +        mv[R___][0] = X264_MIN( mv[C___][0] + 1, block->mv_max[0] );
  1994. +    }
  1995. +}
  1996. +
  1997. +void run_row_trellis( x264_t *h, int iter ) {
  1998. +    int i, j, k, m, best, row, col, subrow, i_adj, class, part, alt_states;
  1999. +    int cost, min_cost;
  2000. +    int i_list, offCC, offCL, offTL, offTR, offTC, offDC, offDL, offDR, offCR;
  2001. +    int mb_stride_col = h->mb.trellis.mb_stride_col;
  2002. +    int mb_stride_row = h->mb.trellis.mb_stride_row;
  2003. +    int t_stride = h->mb.trellis.t_stride;
  2004. +    int frameCostMVOld, frameCostMVNew, frameCostSADOld, frameCostSADNew;
  2005. +    int16_t alt[SQUARE2_CANDIDATES][2];
  2006. +    int t_type;
  2007. +    
  2008. +    i_list = 0;
  2009. +
  2010. +    for ( i = 0; i < iter; i++ ) {
  2011. +        for ( row = 0; row < h->mb.i_mb_height; row ++ ) {
  2012. +            for ( subrow = 0; subrow <= 1; subrow++ ) {
  2013. +
  2014. +                offCC = row * mb_stride_row;
  2015. +                for ( col = 0; col < h->mb.i_mb_width; col++ ) {
  2016. +                    for ( j = 0; j <= T_P_BEST; j++ )
  2017. +                        CP_MB( &(h->mb.trellis.backup[col*mb_stride_col+j]), &(h->mb.trellis.mb[offCC+j]) );
  2018. +                    offCC += mb_stride_col;
  2019. +                }
  2020. +
  2021. +                frameCostMVOld = mv_cost( h );
  2022. +                frameCostSADOld = sad_cost( h );
  2023. +
  2024. +                fill_trellis( h, row );
  2025. +                if ( h->mb.trellis.mode != MODE_DECIDE )
  2026. +                    adjust_trellis( h, subrow );
  2027. +                                    
  2028. +                offCC = row * mb_stride_row ;
  2029. +                offCL = offCC - mb_stride_col;
  2030. +                offCR = offCC + mb_stride_col;
  2031. +                offTC = offCC - mb_stride_row;
  2032. +                offDC = offCC + mb_stride_row;
  2033. +                offTL = offTC - mb_stride_col;
  2034. +                offDL = offDC - mb_stride_col;
  2035. +                offTR = offTC + mb_stride_col;
  2036. +                offDR = offDC + mb_stride_col;
  2037. +                for ( j = 0; j < h->mb.trellis.breadth; j++ ) {
  2038. +                    h->mb.trellis.t[j].costCC = cost_mb( h, row, 0, 3, &(h->mb.trellis.t[j].block), NULL, ( row == 0 ) ? NULL : &(h->mb.trellis.mb[offTC+T_P_BEST]), ( row == 0 ) ? NULL : &(h->mb.trellis.mb[offTR+T_P_BEST]) );
  2039. +                    h->mb.trellis.t[j].costDC = ( row == h->mb.i_mb_height-1 ) ? 0 : cost_mb( h, row+1, 0, 3, &(h->mb.trellis.mb[offDC+T_P_BEST]), NULL, &(h->mb.trellis.t[j].block), &(h->mb.trellis.mb[offCR+T_P_BEST]) );
  2040. +                    h->mb.trellis.t[j].costDL = 0;
  2041. +                    h->mb.trellis.t[j].costDR = 0;
  2042. +                    h->mb.trellis.t[j].accCost = h->mb.trellis.t[j].costCC + h->mb.trellis.t[j].costDC;
  2043. +                }
  2044. +
  2045. +                offCC += mb_stride_col;
  2046. +                offCL += mb_stride_col;
  2047. +                offCR += mb_stride_col;
  2048. +                offTC += mb_stride_col;
  2049. +                offDC += mb_stride_col;
  2050. +                offTL += mb_stride_col;
  2051. +                offDL += mb_stride_col;
  2052. +                offTR += mb_stride_col;
  2053. +                offDR += mb_stride_col;
  2054. +                
  2055. +                for ( col = 1; col < h->mb.trellis.length; col++ ) {
  2056. +                    for ( j = 0; j < h->mb.trellis.breadth; j++ ) {
  2057. +                        min_cost = INT_MAX;
  2058. +                        class = h->mb.trellis.t[col*t_stride+j].block.class;
  2059. +                        part = h->mb.trellis.t[col*t_stride+j].block.part[0];
  2060. +                        i_adj = ( subrow && h->mb.trellis.t[col*t_stride+j].block.class == P_8x8 ) ? 3 : 1;
  2061. +                        alt_states = ( class == P_8x8 || ( class == P_L0 && part == D_8x16 ) ) ? ( ( h->mb.trellis.mode == MODE_SQUARE2 ) ? SQUARE2_CANDIDATES : DIAMOND_CANDIDATES ) : 1;
  2062. +                        if ( class == P_8x8 || ( class == P_L0 && part == D_8x16 ) )
  2063. +                            generate_alternate_mvs( h, alt, &(h->mb.trellis.t[col*t_stride+j].block), i_list, i_adj );
  2064. +                        best = 0;
  2065. +                        for ( k = 0; k < h->mb.trellis.breadth; k++ )  {
  2066. +                            for ( m = 0; m < alt_states; m++ ) {
  2067. +                                if ( class == P_8x8 || ( class == P_L0 && part == D_8x16 ) )
  2068. +                                    CP_MV( &(h->mb.trellis.t[col*t_stride+j].block.mv[i_adj]), &(alt[m]) );
  2069. +                                h->mb.trellis.t[col*t_stride+j].costCC = cost_mb( h, row, col, 3, &(h->mb.trellis.t[col*t_stride+j].block), &(h->mb.trellis.t[(col-1)*t_stride+k].block), ( ( row == 0 ) ? NULL : &(h->mb.trellis.mb[offTC+T_P_BEST]) ), ( ( row == 0 ) ? NULL : ( ( col == h->mb.i_mb_width - 1 ) ? &(h->mb.trellis.mb[offTL+T_P_BEST]) : &(h->mb.trellis.mb[offTR+T_P_BEST]) ) ) );
  2070. +                                h->mb.trellis.t[col*t_stride+j].costDC = ( row == h->mb.i_mb_height - 1 ) ? 0 : cost_mb( h, row+1, col, 3, &(h->mb.trellis.mb[offDC+T_P_BEST]), &(h->mb.trellis.mb[offDL+T_P_BEST]), &(h->mb.trellis.t[col*t_stride+j].block), ( ( col == h->mb.i_mb_width - 1 ) ? &(h->mb.trellis.t[(col-1)*t_stride+k].block) : &(h->mb.trellis.mb[offCR+T_P_BEST]) ) );
  2071. +                                h->mb.trellis.t[col*t_stride+j].costDL = ( row == h->mb.i_mb_height - 1 ) ? 0 : cost_mb( h, row+1, col-1, 3, &(h->mb.trellis.mb[offDL+T_P_BEST]), ( ( col == 1 ) ? NULL : &(h->mb.trellis.mb[offDL-mb_stride_col+T_P_BEST]) ), &(h->mb.trellis.t[(col-1)*t_stride+k].block), &(h->mb.trellis.t[col*t_stride+j].block) );
  2072. +                                h->mb.trellis.t[col*t_stride+j].costDR = ( row == h->mb.i_mb_height - 1 ) ? 0 : ( col == h->mb.i_mb_width - 2 ) ? cost_mb( h, row+1, col+1, 3, &(h->mb.trellis.mb[offDR+T_P_BEST]), &(h->mb.trellis.mb[offDC+T_P_BEST]), &(h->mb.trellis.mb[offCR+T_P_BEST]), &(h->mb.trellis.t[col*t_stride+k].block) ) : 0;
  2073. +
  2074. +                                cost = h->mb.trellis.t[(col-1)*t_stride+k].accCost - h->mb.trellis.t[(col-1)*t_stride+k].costDC - h->mb.trellis.t[(col-1)*t_stride+k].costDR + h->mb.trellis.t[col*t_stride+j].costCC + h->mb.trellis.t[col*t_stride+j].costDC + h->mb.trellis.t[col*t_stride+j].costDL + h->mb.trellis.t[col*t_stride+j].costDR;
  2075. +                                if ( cost < min_cost ) {
  2076. +                                    min_cost = cost;
  2077. +                                    best = m;
  2078. +                                    h->mb.trellis.t[col*t_stride+j].parent = k;
  2079. +                                }
  2080. +                            }
  2081. +                        }
  2082. +                        h->mb.trellis.t[col*t_stride+j].accCost = min_cost;
  2083. +                        if ( class == P_8x8 || ( class == P_L0 && part == D_8x16 ) )
  2084. +                            CP_MV( &(h->mb.trellis.t[col*t_stride+j].block.mv[i_adj]), &(alt[T_P_BEST]) );
  2085. +                    }
  2086. +
  2087. +                    offCC += mb_stride_col;
  2088. +                    offCL += mb_stride_col;
  2089. +                    offCR += mb_stride_col;
  2090. +                    offTC += mb_stride_col;
  2091. +                    offDC += mb_stride_col;
  2092. +                    offTL += mb_stride_col;
  2093. +                    offDL += mb_stride_col;
  2094. +                    offTR += mb_stride_col;
  2095. +                    offDR += mb_stride_col;
  2096. +                }
  2097. +
  2098. +                min_cost = INT_MAX;
  2099. +                k = 0;
  2100. +                for ( j = 0; j < h->mb.trellis.breadth; j++ ) {
  2101. +                    if ( h->mb.trellis.t[(h->mb.trellis.length-1)*t_stride+j].accCost < min_cost ) {
  2102. +                        min_cost = h->mb.trellis.t[(h->mb.trellis.length-1)*t_stride+j].accCost;
  2103. +                        k = j;
  2104. +                    }
  2105. +                }
  2106. +
  2107. +                offCC = (row+1) * mb_stride_row - mb_stride_col;
  2108. +                for ( col = h->mb.trellis.length-1; col >= 0; col-- ) {
  2109. +                    t_type = TRELLIS_TYPE( h->mb.trellis.t[col*t_stride+k].block );
  2110. +                    CP_MB( &(h->mb.trellis.mb[offCC+t_type]), &(h->mb.trellis.t[col*t_stride+k].block) );
  2111. +                    CP_MB( &(h->mb.trellis.mb[offCC+T_P_BEST]), &(h->mb.trellis.t[col*t_stride+k].block) );
  2112. +                    k = h->mb.trellis.t[col*t_stride+k].parent;
  2113. +                    offCC -= mb_stride_col;
  2114. +                }
  2115. +
  2116. +                frameCostMVNew = mv_cost( h );
  2117. +                frameCostSADNew = sad_cost( h );
  2118. +
  2119. +                if ( ( frameCostMVNew + frameCostSADNew ) > ( frameCostMVOld + frameCostSADOld ) ) {
  2120. +                    offCC = row * mb_stride_row;
  2121. +                    for ( col = 0; col < h->mb.i_mb_width; col++ ) {
  2122. +                        for ( j = 0; j <= T_P_BEST; j++ )
  2123. +                            CP_MB( &(h->mb.trellis.mb[offCC+j]), &(h->mb.trellis.backup[col*mb_stride_col+j]) );
  2124. +                        offCC += mb_stride_col;
  2125. +                    }
  2126. +                }
  2127. +
  2128. +            }
  2129. +        }
  2130. +    }
  2131. +
  2132. +}
  2133. +
  2134. +void scale_trellis_mv( x264_t *h ) {
  2135. +    mb_t *block;
  2136. +    for ( int row = 0; row < h->mb.i_mb_height; row++ ) {
  2137. +        for ( int col = 0; col < h->mb.i_mb_width; col++ ) {
  2138. +            block = &(h->mb.trellis.mb[row*h->mb.trellis.mb_stride_row+col*h->mb.trellis.mb_stride_col+T_P_BEST]);
  2139. +            if ( ! IS_INTRA( block->class ) ) {
  2140. +                block->mv[0][0][0] <<= 2;
  2141. +                block->mv[0][0][1] <<= 2;
  2142. +                if ( block->part[0] != D_16x16 ) {
  2143. +                    block->mv[0][1][0] <<= 2;
  2144. +                    block->mv[0][1][1] <<= 2;
  2145. +                    if ( block->part[0] == D_8x8 ) {
  2146. +                        block->mv[0][2][0] <<= 2;
  2147. +                        block->mv[0][2][1] <<= 2;
  2148. +                        block->mv[0][3][0] <<= 2;
  2149. +                        block->mv[0][3][1] <<= 2;
  2150. +                    }
  2151. +                }
  2152. +            }
  2153. +        }
  2154. +    }
  2155. +}
  2156. +
  2157. +void write_back_trellis_mv( x264_t *h, int i_mb ) {
  2158. +    mb_t *block = &(h->mb.trellis.mb[i_mb*h->mb.trellis.mb_stride_col+T_P_BEST]);
  2159. +    h->mb.b_skip_mc = 0;
  2160. +
  2161. +    if ( IS_INTRA( block->class ) ) {
  2162. +        h->mb.i_type = block->class;    
  2163. +    }
  2164. +    else {
  2165. +        h->mb.i_type = block->class;
  2166. +        h->mb.i_partition = block->part[0];
  2167. +        
  2168. +        if ( block->class == P_8x8 ) {
  2169. +            h->mb.i_sub_partition[0] = D_L0_8x8;
  2170. +            h->mb.i_sub_partition[1] = D_L0_8x8;
  2171. +            h->mb.i_sub_partition[2] = D_L0_8x8;
  2172. +            h->mb.i_sub_partition[3] = D_L0_8x8;
  2173. +            
  2174. +            x264_macroblock_cache_ref( h, 0, 0, 2, 2, 0, block->ref[0][0] );
  2175. +            x264_macroblock_cache_ref( h, 2, 0, 2, 2, 0, block->ref[1][0] );
  2176. +            x264_macroblock_cache_ref( h, 0, 2, 2, 2, 0, block->ref[2][0] );
  2177. +            x264_macroblock_cache_ref( h, 2, 2, 2, 2, 0, block->ref[3][0] );
  2178. +            x264_macroblock_cache_mv_ptr( h, 0, 0, 2, 2, 0, block->mv[0][0] );
  2179. +            x264_macroblock_cache_mv_ptr( h, 2, 0, 2, 2, 0, block->mv[0][1] );
  2180. +            x264_macroblock_cache_mv_ptr( h, 0, 2, 2, 2, 0, block->mv[0][2] );
  2181. +            x264_macroblock_cache_mv_ptr( h, 2, 2, 2, 2, 0, block->mv[0][3] );
  2182. +        }
  2183. +        else {
  2184. +                
  2185. +            switch ( block->part[0] ) {
  2186. +                
  2187. +                case D_16x16 :
  2188. +                
  2189. +                    x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, block->ref[0][0] );
  2190. +                    x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, block->mv[0][0] );
  2191. +                    break;
  2192. +                    
  2193. +                case D_16x8 :
  2194. +                
  2195. +                    x264_macroblock_cache_ref( h, 0, 0, 4, 2, 0, block->ref[0][0] );
  2196. +                    x264_macroblock_cache_ref( h, 0, 2, 4, 2, 0, block->ref[1][0] );
  2197. +                    x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 2, 0, block->mv[0][0] );
  2198. +                    x264_macroblock_cache_mv_ptr( h, 0, 2, 4, 2, 0, block->mv[0][1] );
  2199. +                    break;
  2200. +                    
  2201. +                case D_8x16 :
  2202. +                
  2203. +                    x264_macroblock_cache_ref( h, 0, 0, 2, 4, 0, block->ref[0][0] );
  2204. +                    x264_macroblock_cache_ref( h, 2, 0, 2, 4, 0, block->ref[1][0] );
  2205. +                    x264_macroblock_cache_mv_ptr( h, 0, 0, 2, 4, 0, block->mv[0][0] );
  2206. +                    x264_macroblock_cache_mv_ptr( h, 2, 0, 2, 4, 0, block->mv[0][1] );
  2207. +                    break;
  2208. +                
  2209. +            }
  2210. +            
  2211. +        }
  2212. +        
  2213. +    }
  2214. +    
  2215. +}
  2216. diff --git a/extras/avisynth_c.h b/extras/avisynth_c.h
  2217. old mode 100644
  2218. new mode 100755
  2219. diff --git a/extras/getopt.c b/extras/getopt.c
  2220. old mode 100644
  2221. new mode 100755
  2222. diff --git a/extras/getopt.h b/extras/getopt.h
  2223. old mode 100644
  2224. new mode 100755
  2225. diff --git a/extras/inttypes.h b/extras/inttypes.h
  2226. old mode 100644
  2227. new mode 100755
  2228. diff --git a/extras/stdint.h b/extras/stdint.h
  2229. old mode 100644
  2230. new mode 100755
  2231. diff --git a/filters/filters.c b/filters/filters.c
  2232. old mode 100644
  2233. new mode 100755
  2234. diff --git a/filters/filters.h b/filters/filters.h
  2235. old mode 100644
  2236. new mode 100755
  2237. diff --git a/filters/video/cache.c b/filters/video/cache.c
  2238. old mode 100644
  2239. new mode 100755
  2240. diff --git a/filters/video/crop.c b/filters/video/crop.c
  2241. old mode 100644
  2242. new mode 100755
  2243. diff --git a/filters/video/depth.c b/filters/video/depth.c
  2244. old mode 100644
  2245. new mode 100755
  2246. diff --git a/filters/video/fix_vfr_pts.c b/filters/video/fix_vfr_pts.c
  2247. old mode 100644
  2248. new mode 100755
  2249. diff --git a/filters/video/internal.c b/filters/video/internal.c
  2250. old mode 100644
  2251. new mode 100755
  2252. diff --git a/filters/video/internal.h b/filters/video/internal.h
  2253. old mode 100644
  2254. new mode 100755
  2255. diff --git a/filters/video/resize.c b/filters/video/resize.c
  2256. old mode 100644
  2257. new mode 100755
  2258. diff --git a/filters/video/select_every.c b/filters/video/select_every.c
  2259. old mode 100644
  2260. new mode 100755
  2261. diff --git a/filters/video/source.c b/filters/video/source.c
  2262. old mode 100644
  2263. new mode 100755
  2264. diff --git a/filters/video/video.c b/filters/video/video.c
  2265. old mode 100644
  2266. new mode 100755
  2267. diff --git a/filters/video/video.h b/filters/video/video.h
  2268. old mode 100644
  2269. new mode 100755
  2270. diff --git a/input/avs.c b/input/avs.c
  2271. old mode 100644
  2272. new mode 100755
  2273. diff --git a/input/ffms.c b/input/ffms.c
  2274. old mode 100644
  2275. new mode 100755
  2276. diff --git a/input/input.c b/input/input.c
  2277. old mode 100644
  2278. new mode 100755
  2279. diff --git a/input/input.h b/input/input.h
  2280. old mode 100644
  2281. new mode 100755
  2282. diff --git a/input/lavf.c b/input/lavf.c
  2283. old mode 100644
  2284. new mode 100755
  2285. diff --git a/input/raw.c b/input/raw.c
  2286. old mode 100644
  2287. new mode 100755
  2288. diff --git a/input/thread.c b/input/thread.c
  2289. old mode 100644
  2290. new mode 100755
  2291. diff --git a/input/timecode.c b/input/timecode.c
  2292. old mode 100644
  2293. new mode 100755
  2294. diff --git a/input/y4m.c b/input/y4m.c
  2295. old mode 100644
  2296. new mode 100755
  2297. diff --git a/output/flv.c b/output/flv.c
  2298. old mode 100644
  2299. new mode 100755
  2300. diff --git a/output/flv_bytestream.c b/output/flv_bytestream.c
  2301. old mode 100644
  2302. new mode 100755
  2303. diff --git a/output/flv_bytestream.h b/output/flv_bytestream.h
  2304. old mode 100644
  2305. new mode 100755
  2306. diff --git a/output/matroska.c b/output/matroska.c
  2307. old mode 100644
  2308. new mode 100755
  2309. diff --git a/output/matroska_ebml.c b/output/matroska_ebml.c
  2310. old mode 100644
  2311. new mode 100755
  2312. diff --git a/output/matroska_ebml.h b/output/matroska_ebml.h
  2313. old mode 100644
  2314. new mode 100755
  2315. diff --git a/output/mp4.c b/output/mp4.c
  2316. old mode 100644
  2317. new mode 100755
  2318. diff --git a/output/output.h b/output/output.h
  2319. old mode 100644
  2320. new mode 100755
  2321. diff --git a/output/raw.c b/output/raw.c
  2322. old mode 100644
  2323. new mode 100755
  2324. diff --git a/tools/checkasm-a.asm b/tools/checkasm-a.asm
  2325. old mode 100644
  2326. new mode 100755
  2327. diff --git a/tools/checkasm.c b/tools/checkasm.c
  2328. old mode 100644
  2329. new mode 100755
  2330. diff --git a/tools/digress/__init__.py b/tools/digress/__init__.py
  2331. old mode 100644
  2332. new mode 100755
  2333. diff --git a/tools/digress/cli.py b/tools/digress/cli.py
  2334. old mode 100644
  2335. new mode 100755
  2336. diff --git a/tools/digress/comparers.py b/tools/digress/comparers.py
  2337. old mode 100644
  2338. new mode 100755
  2339. diff --git a/tools/digress/constants.py b/tools/digress/constants.py
  2340. old mode 100644
  2341. new mode 100755
  2342. diff --git a/tools/digress/errors.py b/tools/digress/errors.py
  2343. old mode 100644
  2344. new mode 100755
  2345. diff --git a/tools/digress/scm/__init__.py b/tools/digress/scm/__init__.py
  2346. old mode 100644
  2347. new mode 100755
  2348. diff --git a/tools/digress/scm/dummy.py b/tools/digress/scm/dummy.py
  2349. old mode 100644
  2350. new mode 100755
  2351. diff --git a/tools/digress/scm/git.py b/tools/digress/scm/git.py
  2352. old mode 100644
  2353. new mode 100755
  2354. diff --git a/tools/digress/testing.py b/tools/digress/testing.py
  2355. old mode 100644
  2356. new mode 100755
  2357. diff --git a/tools/q_matrix_jvt.cfg b/tools/q_matrix_jvt.cfg
  2358. old mode 100644
  2359. new mode 100755
  2360. diff --git a/tools/xyuv.c b/tools/xyuv.c
  2361. old mode 100644
  2362. new mode 100755
  2363. diff --git a/x264.c b/x264.c
  2364. old mode 100644
  2365. new mode 100755
  2366. index 3864bf7..b6a0def
  2367. --- a/x264.c
  2368. +++ b/x264.c
  2369. @@ -966,6 +966,7 @@ static struct option long_options[] =
  2370.      { "input-csp",   required_argument, NULL, OPT_INPUT_CSP },
  2371.      { "input-depth", required_argument, NULL, OPT_INPUT_DEPTH },
  2372.      { "dts-compress",      no_argument, NULL, OPT_DTS_COMPRESSION },
  2373. +    { "me-trellis",  required_argument, NULL, 0 },
  2374.      {0, 0, 0, 0}
  2375.  };
  2376.  
  2377. @@ -1675,7 +1676,7 @@ static int encode( x264_param_t *param, cli_opt_t *opt )
  2378.      FAIL_IF_ERROR2( !h, "x264_encoder_open failed\n" );
  2379.  
  2380.      x264_encoder_parameters( h, param );
  2381. -
  2382. +    
  2383.      FAIL_IF_ERROR2( output.set_param( opt->hout, param ), "can't set outfile param\n" );
  2384.  
  2385.      i_start = x264_mdate();
  2386. diff --git a/x264.h b/x264.h
  2387. old mode 100644
  2388. new mode 100755
  2389. index 9d663f1..37fcd8f
  2390. --- a/x264.h
  2391. +++ b/x264.h
  2392. @@ -162,6 +162,10 @@ typedef struct
  2393.  #define X264_B_PYRAMID_NORMAL        2
  2394.  #define X264_KEYINT_MIN_AUTO         0
  2395.  #define X264_KEYINT_MAX_INFINITE     (1<<30)
  2396. +#define X264_ME_TRELLIS_NONE         0
  2397. +#define X264_ME_TRELLIS_DIA          1
  2398. +#define X264_ME_TRELLIS_ESA          2
  2399. +#define X264_ME_TRELLIS_COMBINED_DIA 3
  2400.  
  2401.  static const char * const x264_direct_pred_names[] = { "none", "spatial", "temporal", "auto", 0 };
  2402.  static const char * const x264_motion_est_names[] = { "dia", "hex", "umh", "esa", "tesa", 0 };
  2403. @@ -332,6 +336,8 @@ typedef struct x264_param_t
  2404.          float        f_psy_rd; /* Psy RD strength */
  2405.          float        f_psy_trellis; /* Psy trellis strength */
  2406.          int          b_psy; /* Toggle all psy optimizations */
  2407. +        int          i_me_trellis; /* Type of trellis to use for me */
  2408. +        int          b_cache_sads; /* Cache SAD scores during motion estimation */
  2409.  
  2410.          /* the deadzone size that will be used in luma quantization */
  2411.          int          i_luma_deadzone[2]; /* {inter, intra} */
  2412. diff --git a/x264cli.h b/x264cli.h
  2413. old mode 100644
  2414. new mode 100755
  2415. diff --git a/x264dll.c b/x264dll.c
  2416. old mode 100644
  2417. new mode 100755
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement