Advertisement
Guest User

Untitled

a guest
Jun 30th, 2017
491
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Diff 49.18 KB | None | 0 0
  1. From ee75acd55e1a89eb9a8c2f7d14c923b57e08ceb3 Mon Sep 17 00:00:00 2001
  2. From: Sean McGovern <gseanmcg@gmail.com>
  3. Date: Sun, 10 Oct 2010 19:34:18 -0400
  4. Subject: [PATCH 1/9] Fix build on SPARC Solaris 10
  5.  
  6. ---
  7. common/pixel.c |    6 +++---
  8.  configure      |   29 +++++++++++++++++------------
  9.  2 files changed, 20 insertions(+), 15 deletions(-)
  10.  
  11. diff --git a/common/pixel.c b/common/pixel.c
  12. index 1e21550..7fa497c 100644
  13. --- a/common/pixel.c
  14. +++ b/common/pixel.c
  15. @@ -36,7 +36,7 @@
  16.  #if ARCH_ARM
  17.  #   include "arm/pixel.h"
  18.  #endif
  19. -#if ARCH_UltraSparc
  20. +#if ARCH_UltraSPARC
  21.  #   include "sparc/pixel.h"
  22.  #endif
  23.  
  24. @@ -443,7 +443,7 @@ SAD_X( 4x8 )
  25.  SAD_X( 4x4 )
  26.  
  27.  #if !HIGH_BIT_DEPTH
  28. -#if ARCH_UltraSparc
  29. +#if ARCH_UltraSPARC
  30.  SAD_X( 16x16_vis )
  31.  SAD_X( 16x8_vis )
  32.  SAD_X( 8x16_vis )
  33. @@ -1063,7 +1063,7 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
  34.      }
  35.  #endif
  36.  #if !HIGH_BIT_DEPTH
  37. -#if ARCH_UltraSparc
  38. +#if ARCH_UltraSPARC
  39.      INIT4( sad, _vis );
  40.      INIT4( sad_x3, _vis );
  41.      INIT4( sad_x4, _vis );
  42. diff --git a/configure b/configure
  43. index 2f38154..fd62337 100755
  44. --- a/configure
  45. +++ b/configure
  46. @@ -392,15 +392,20 @@ case $host_cpu in
  47.      fi
  48.      ;;
  49.    sparc)
  50. -    if [ $asm = auto ] && test "$(uname -m)" = "sun4u"; then
  51. -      ARCH="UltraSparc"
  52. -      CFLAGS="$CFLAGS -mcpu=ultrasparc"
  53. -      LDFLAGS="$LDFLAGS -mcpu=ultrasparc"
  54. -      AS="${AS-${cross_prefix}as}"
  55. -      ASFLAGS="$ASFLAGS -xarch=v8plusa"
  56. -    else
  57. -      ARCH="Sparc"
  58. -    fi
  59. +    ARCH="SPARC"
  60. +    case $(uname -m) in
  61. +      sun4u|sun4v)
  62. +        if [ $asm = auto ]; then
  63. +          ARCH="UltraSPARC"
  64. +          if ! echo $CFLAGS | grep -Eq '\-mcpu' ; then
  65. +            CFLAGS="$CFLAGS -mcpu=ultrasparc"
  66. +            LDFLAGS="$LDFLAGS -mcpu=ultrasparc"
  67. +          fi
  68. +          AS="${AS-${cross_prefix}as}"
  69. +          ASFLAGS="$ASFLAGS -xarch=v8plusa"
  70. +        fi
  71. +        ;;
  72. +    esac
  73.      ;;
  74.    mips|mipsel|mips64|mips64el)
  75.      ARCH="MIPS"
  76. @@ -497,11 +502,11 @@ fi
  77.  define ARCH_$ARCH
  78.  define SYS_$SYS
  79.  
  80. -echo "int i = 0x42494745; double f = 0x1.0656e6469616ep+102;" > conftest.c
  81. +echo "int i[2] = {0x42494745,0}; double f[2] = {0x1.0656e6469616ep+102,0};" > conftest.c
  82.  $CC $CFLAGS conftest.c -c -o conftest.o 2>$DEVNULL || die "endian test failed"
  83. -if grep -q BIGE conftest.o && grep -q FPendian conftest.o ; then
  84. +if (strings -a conftest.o | grep -q BIGE) && (strings -a conftest.o | grep -q FPendian) ; then
  85.      define WORDS_BIGENDIAN
  86. -elif !(grep -q EGIB conftest.o && grep -q naidnePF conftest.o) ; then
  87. +elif !(strings -a conftest.o | grep -q EGIB && strings -a conftest.o | grep -q naidnePF) ; then
  88.      die "endian test failed"
  89.  fi
  90.  
  91. --
  92. 1.7.3.2.146.gca209
  93.  
  94.  
  95. From b38fd04c376bac31544782fabe03471567a8badf Mon Sep 17 00:00:00 2001
  96. From: Sean McGovern <gseanmcg@gmail.com>
  97. Date: Sun, 21 Nov 2010 01:59:33 -0500
  98. Subject: [PATCH 2/9] Only build SPARC VIS asm if high bit-depth is disabled
  99.  
  100. ---
  101. Makefile |    2 ++
  102.  1 files changed, 2 insertions(+), 0 deletions(-)
  103.  
  104. diff --git a/Makefile b/Makefile
  105. index 8a3a327..21f57e7 100644
  106. --- a/Makefile
  107. +++ b/Makefile
  108. @@ -110,9 +110,11 @@ endif
  109.  
  110.  # VIS optims
  111.  ifeq ($(ARCH),UltraSparc)
  112. +ifeq ($(findstring HIGH_BIT_DEPTH, $(CONFIG)),)
  113.  ASMSRC += common/sparc/pixel.asm
  114.  OBJASM  = $(ASMSRC:%.asm=%.o)
  115.  endif
  116. +endif
  117.  
  118.  ifneq ($(HAVE_GETOPT_LONG),1)
  119.  SRCCLI += extras/getopt.c
  120. --
  121. 1.7.3.2.146.gca209
  122.  
  123.  
  124. From d9a2a3c5fc53d3c129a32b8401ea115ea860b098 Mon Sep 17 00:00:00 2001
  125. From: Steven Walters <kemuri9@gmail.com>
  126. Date: Mon, 22 Nov 2010 10:31:05 +0900
  127. Subject: [PATCH 3/9] Fix configure so that boolean configuration options are 1/0
  128.  
  129. There are many cases of 1/undef, not 1/0.
  130. ---
  131. Makefile            |   14 +++++++-------
  132.  common/arm/asm.S    |    2 +-
  133.  common/bitstream.c  |    4 ++--
  134.  common/macroblock.h |    2 +-
  135.  configure           |   16 +++++++++-------
  136.  5 files changed, 20 insertions(+), 18 deletions(-)
  137.  
  138. diff --git a/Makefile b/Makefile
  139. index 21f57e7..6344ca4 100644
  140. --- a/Makefile
  141. +++ b/Makefile
  142. @@ -25,34 +25,34 @@ SRCSO =
  143.  CONFIG := $(shell cat config.h)
  144.  
  145.  # GPL-only files
  146. -ifeq ($(GPL),yes)
  147. +ifneq ($(findstring HAVE_GPL 1, $(CONFIG)),)
  148.  SRCCLI +=
  149.  endif
  150.  
  151.  # Optional module sources
  152. -ifneq ($(findstring HAVE_AVS, $(CONFIG)),)
  153. +ifneq ($(findstring HAVE_AVS 1, $(CONFIG)),)
  154.  SRCCLI += input/avs.c
  155.  endif
  156.  
  157. -ifneq ($(findstring HAVE_PTHREAD, $(CONFIG)),)
  158. +ifneq ($(findstring HAVE_PTHREAD 1, $(CONFIG)),)
  159.  SRCCLI += input/thread.c
  160.  SRCS   += common/threadpool.c
  161.  endif
  162.  
  163. -ifneq ($(findstring HAVE_LAVF, $(CONFIG)),)
  164. +ifneq ($(findstring HAVE_LAVF 1, $(CONFIG)),)
  165.  SRCCLI += input/lavf.c
  166.  endif
  167.  
  168. -ifneq ($(findstring HAVE_FFMS, $(CONFIG)),)
  169. +ifneq ($(findstring HAVE_FFMS 1, $(CONFIG)),)
  170.  SRCCLI += input/ffms.c
  171.  endif
  172.  
  173. -ifneq ($(findstring HAVE_GPAC, $(CONFIG)),)
  174. +ifneq ($(findstring HAVE_GPAC 1, $(CONFIG)),)
  175.  SRCCLI += output/mp4.c
  176.  endif
  177.  
  178.  # Visualization sources
  179. -ifeq ($(VIS),yes)
  180. +ifneq ($(findstring HAVE_VISUALIZE 1, $(CONFIG)),)
  181.  SRCS   += common/visualize.c common/display-x11.c
  182.  endif
  183.  
  184. diff --git a/common/arm/asm.S b/common/arm/asm.S
  185. index 7434262..92e3b14 100644
  186. --- a/common/arm/asm.S
  187. +++ b/common/arm/asm.S
  188. @@ -65,7 +65,7 @@ ELF     .type   \name, %function
  189.          .endm
  190.  
  191.  .macro movconst rd, val
  192. -#ifdef HAVE_ARMV6T2
  193. +#if HAVE_ARMV6T2
  194.      movw        \rd, #:lower16:\val
  195.  .if \val >> 16
  196.      movt        \rd, #:upper16:\val
  197. diff --git a/common/bitstream.c b/common/bitstream.c
  198. index 8350fb3..0f2bc9f 100644
  199. --- a/common/bitstream.c
  200. +++ b/common/bitstream.c
  201. @@ -39,7 +39,7 @@ static uint8_t *x264_nal_escape_c( uint8_t *dst, uint8_t *src, uint8_t *end )
  202.      return dst;
  203.  }
  204.  
  205. -#ifdef HAVE_MMX
  206. +#if HAVE_MMX
  207.  uint8_t *x264_nal_escape_mmxext( uint8_t *dst, uint8_t *src, uint8_t *end );
  208.  uint8_t *x264_nal_escape_sse2( uint8_t *dst, uint8_t *src, uint8_t *end );
  209.  #endif
  210. @@ -88,7 +88,7 @@ void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal )
  211.  void x264_bitstream_init( int cpu, x264_bitstream_function_t *pf )
  212.  {
  213.      pf->nal_escape = x264_nal_escape_c;
  214. -#ifdef HAVE_MMX
  215. +#if HAVE_MMX
  216.      if( cpu&X264_CPU_MMXEXT )
  217.          pf->nal_escape = x264_nal_escape_mmxext;
  218.      if( (cpu&X264_CPU_SSE2) && (cpu&X264_CPU_SSE2_IS_FAST) )
  219. diff --git a/common/macroblock.h b/common/macroblock.h
  220. index 7562948..ce4ead9 100644
  221. --- a/common/macroblock.h
  222. +++ b/common/macroblock.h
  223. @@ -364,7 +364,7 @@ static ALWAYS_INLINE uint32_t pack16to32_mask( int a, int b )
  224.  }
  225.  static ALWAYS_INLINE uint64_t pack32to64( uint32_t a, uint32_t b )
  226.  {
  227. -#ifdef WORDS_BIGENDIAN
  228. +#if WORDS_BIGENDIAN
  229.     return b + ((uint64_t)a<<32);
  230.  #else
  231.     return a + ((uint64_t)b<<32);
  232. diff --git a/configure b/configure
  233. index fd62337..8ca3298 100755
  234. --- a/configure
  235. +++ b/configure
  236. @@ -171,6 +171,9 @@ cross_prefix=""
  237.  
  238.  EXE=""
  239.  
  240. +# list of all preprocessor HAVE values we can define
  241. +CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON PTHREAD LOG2F VISUALIZE SWSCALE LAVF FFMS GPAC GF_MALLOC AVS GPL"
  242. +
  243.  # parse options
  244.  
  245.  for opt do
  246. @@ -735,11 +738,12 @@ fi
  247.  define BIT_DEPTH $bit_depth
  248.  ASFLAGS="$ASFLAGS -DBIT_DEPTH=$bit_depth"
  249.  
  250. -if [ $gpl = yes ]; then
  251. -    define HAVE_GPL 1
  252. -else
  253. -    define HAVE_GPL 0
  254. -fi
  255. +[ $gpl = yes ] && define HAVE_GPL
  256. +
  257. +#define undefined vars as 0
  258. +for var in $CONFIG_HAVE; do
  259. +    grep -q "HAVE_$var 1" config.h || define HAVE_$var 0
  260. +done
  261.  
  262.  rm -f conftest*
  263.  
  264. @@ -763,10 +767,8 @@ STRIP=$STRIP
  265.  AS=$AS
  266.  ASFLAGS=$ASFLAGS
  267.  EXE=$EXE
  268. -VIS=$vis
  269.  HAVE_GETOPT_LONG=$HAVE_GETOPT_LONG
  270.  DEVNULL=$DEVNULL
  271. -GPL=$gpl
  272.  EOF
  273.  
  274.  if [ "$shared" = "yes" ]; then
  275. --
  276. 1.7.3.2.146.gca209
  277.  
  278.  
  279. From 0a8ba9e72e50489399b8144e354a65716db689c0 Mon Sep 17 00:00:00 2001
  280. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  281. Date: Sat, 20 Nov 2010 23:30:42 -0800
  282. Subject: [PATCH 4/9] Change qpmin default to 0
  283.  There's probably no real reason to keep it at 10 anymore, and lowering it allows AQ to pick lower quantizers in really flat areas.
  284.  Might help on gradients at high quality levels.
  285.  The previous value of 10 was arbitrary anyways.
  286.  
  287. ---
  288. common/common.c |    2 +-
  289.  1 files changed, 1 insertions(+), 1 deletions(-)
  290.  
  291. diff --git a/common/common.c b/common/common.c
  292. index 6c88556..1f99e9e 100644
  293. --- a/common/common.c
  294. +++ b/common/common.c
  295. @@ -98,7 +98,7 @@ void x264_param_default( x264_param_t *param )
  296.      param->rc.f_vbv_buffer_init = 0.9;
  297.      param->rc.i_qp_constant = 23 + QP_BD_OFFSET;
  298.      param->rc.f_rf_constant = 23;
  299. -    param->rc.i_qp_min = 10;
  300. +    param->rc.i_qp_min = 0;
  301.      param->rc.i_qp_max = QP_MAX;
  302.      param->rc.i_qp_step = 4;
  303.      param->rc.f_ip_factor = 1.4;
  304. --
  305. 1.7.3.2.146.gca209
  306.  
  307.  
  308. From f712ab404a164df63f7049f0bbaf81a73809ac8a Mon Sep 17 00:00:00 2001
  309. From: Yasuhiro Ikeda <wipple625@gmail.com>
  310. Date: Mon, 22 Nov 2010 11:01:57 +0900
  311. Subject: [PATCH 5/9] Add some more info to `x264 --version`
  312.  
  313. ---
  314. x264.c |   23 ++++++++++++++++++++---
  315.  1 files changed, 20 insertions(+), 3 deletions(-)
  316.  
  317. diff --git a/x264.c b/x264.c
  318. index bba17b8..38af46b 100644
  319. --- a/x264.c
  320. +++ b/x264.c
  321. @@ -55,6 +55,14 @@
  322.  #include <libavutil/pixdesc.h>
  323.  #endif
  324.  
  325. +#if HAVE_SWSCALE
  326. +#include <libswscale/swscale.h>
  327. +#endif
  328. +
  329. +#if HAVE_FFMS
  330. +#include <ffms.h>
  331. +#endif
  332. +
  333.  /* Ctrl-C handler */
  334.  static volatile int b_ctrl_c = 0;
  335.  static int          b_exit_on_ctrl_c = 0;
  336. @@ -208,6 +216,15 @@ static void print_version_info()
  337.  #else
  338.      printf( "x264 0.%d.X\n", X264_BUILD );
  339.  #endif
  340. +#if HAVE_SWSCALE
  341. +    printf( "(libswscale %d.%d.%d)\n", LIBSWSCALE_VERSION_MAJOR, LIBSWSCALE_VERSION_MINOR, LIBSWSCALE_VERSION_MICRO );
  342. +#endif
  343. +#if HAVE_LAVF
  344. +    printf( "(libavformat %d.%d.%d)\n", LIBAVFORMAT_VERSION_MAJOR, LIBAVFORMAT_VERSION_MINOR, LIBAVFORMAT_VERSION_MICRO );
  345. +#endif
  346. +#if HAVE_FFMS
  347. +    printf( "(ffmpegsource %d.%d.%d.%d)\n", FFMS_VERSION >> 24, (FFMS_VERSION & 0xff0000) >> 16, (FFMS_VERSION & 0xff00) >> 8, FFMS_VERSION & 0xff );
  348. +#endif
  349.      printf( "built on " __DATE__ ", " );
  350.  #ifdef __GNUC__
  351.      printf( "gcc: " __VERSION__ "\n" );
  352. @@ -221,9 +238,9 @@ static void print_version_info()
  353.  #else
  354.      printf( "Non-GPL commercial\n" );
  355.  #endif
  356. -#if HAVE_LAVF
  357. -    const char *license = avformat_license();
  358. -    printf( "libavformat license: %s\n", license );
  359. +#if HAVE_SWSCALE
  360. +    const char *license = swscale_license();
  361. +    printf( "libswscale%s%s license: %s\n", HAVE_LAVF ? "/libavformat" : "", HAVE_FFMS ? "/ffmpegsource" : "" , license );
  362.      if( !strcmp( license, "nonfree and unredistributable" ) ||
  363.         (!HAVE_GPL && (!strcmp( license, "GPL version 2 or later" )
  364.                    ||  !strcmp( license, "GPL version 3 or later" ))))
  365. --
  366. 1.7.3.2.146.gca209
  367.  
  368.  
  369. From 52d39c53efc94c1114a8416851c763be2d9aac22 Mon Sep 17 00:00:00 2001
  370. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  371. Date: Fri, 19 Nov 2010 16:58:38 -0800
  372. Subject: [PATCH 6/9] Add API function to return max number of delayed frames
  373.  
  374. ---
  375. encoder/encoder.c |    8 +++++---
  376.  x264.h            |    6 +++++-
  377.  2 files changed, 10 insertions(+), 4 deletions(-)
  378.  
  379. diff --git a/encoder/encoder.c b/encoder/encoder.c
  380. index 8b14b41..2d5c778 100644
  381. --- a/encoder/encoder.c
  382. +++ b/encoder/encoder.c
  383. @@ -3320,9 +3320,6 @@ void    x264_encoder_close  ( x264_t *h )
  384.      }
  385.  }
  386.  
  387. -/****************************************************************************
  388. - * x264_encoder_delayed_frames:
  389. - ****************************************************************************/
  390.  int x264_encoder_delayed_frames( x264_t *h )
  391.  {
  392.      int delayed_frames = 0;
  393. @@ -3343,3 +3340,8 @@ int x264_encoder_delayed_frames( x264_t *h )
  394.      x264_pthread_mutex_unlock( &h->lookahead->ofbuf.mutex );
  395.      return delayed_frames;
  396.  }
  397. +
  398. +int x264_encoder_maximum_delayed_frames( x264_t *h )
  399. +{
  400. +    return h->frames.i_delay;
  401. +}
  402. diff --git a/x264.h b/x264.h
  403. index ce79d40..e144e51 100644
  404. --- a/x264.h
  405. +++ b/x264.h
  406. @@ -39,7 +39,7 @@
  407.  
  408.  #include <stdarg.h>
  409.  
  410. -#define X264_BUILD 108
  411. +#define X264_BUILD 109
  412.  
  413.  /* x264_t:
  414.   *      opaque handler for encoder */
  415. @@ -766,6 +766,10 @@ void    x264_encoder_close  ( x264_t * );
  416.   *      return the number of currently delayed (buffered) frames
  417.   *      this should be used at the end of the stream, to know when you have all the encoded frames. */
  418.  int     x264_encoder_delayed_frames( x264_t * );
  419. +/* x264_encoder_maximum_delayed_frames( x264_t *h ):
  420. + *      return the maximum number of delayed (buffered) frames that can occur with the current
  421. + *      parameters. */
  422. +int     x264_encoder_maximum_delayed_frames( x264_t *h );
  423.  /* x264_encoder_intra_refresh:
  424.   *      If an intra refresh is not in progress, begin one with the next P-frame.
  425.   *      If an intra refresh is in progress, begin one as soon as the current one finishes.
  426. --
  427. 1.7.3.2.146.gca209
  428.  
  429.  
  430. From 1de18f2aaa9f1feeb1b0002d4fb4f2abcc77b3b7 Mon Sep 17 00:00:00 2001
  431. From: Anton Mitrofanov <BugMaster@narod.ru>
  432. Date: Tue, 23 Nov 2010 23:06:51 +0300
  433. Subject: [PATCH 7/9] Clean up of weights analyse function
  434.  
  435. ---
  436. encoder/slicetype.c |   42 +++++++++++++++++++-----------------------
  437.  1 files changed, 19 insertions(+), 23 deletions(-)
  438.  
  439. diff --git a/encoder/slicetype.c b/encoder/slicetype.c
  440. index edf74c4..dd6c360 100644
  441. --- a/encoder/slicetype.c
  442. +++ b/encoder/slicetype.c
  443. @@ -57,7 +57,7 @@ static void x264_lowres_context_init( x264_t *h, x264_mb_analysis_t *a )
  444.  }
  445.  
  446.  /* makes a non-h264 weight (i.e. fix7), into an h264 weight */
  447. -static void x264_weight_get_h264( unsigned int weight_nonh264, int offset, x264_weight_t *w )
  448. +static void x264_weight_get_h264( int weight_nonh264, int offset, x264_weight_t *w )
  449.  {
  450.      w->i_offset = offset;
  451.      w->i_denom = 7;
  452. @@ -221,39 +221,37 @@ static NOINLINE unsigned int x264_weight_cost_chroma( x264_t *h, x264_frame_t *f
  453.  
  454.  void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, int b_lookahead )
  455.  {
  456. -    float fenc_mean, ref_mean, fenc_var, ref_var;
  457. -    int offset_search;
  458. -    int minoff, minscale, mindenom;
  459. -    unsigned int minscore, origscore;
  460.      int i_delta_index = fenc->i_frame - ref->i_frame - 1;
  461.      /* epsilon is chosen to require at least a numerator of 127 (with denominator = 128) */
  462. -    const float epsilon = 1.0/128.0;
  463. -    float guess_scale;
  464. -    int found;
  465. +    const float epsilon = 1.f/128.f;
  466.      x264_weight_t *weights = fenc->weight[0];
  467. +    SET_WEIGHT( weights[0], 0, 1, 0, 0 );
  468.      SET_WEIGHT( weights[1], 0, 1, 0, 0 );
  469.      SET_WEIGHT( weights[2], 0, 1, 0, 0 );
  470.      /* Don't check chroma in lookahead, or if there wasn't a luma weight. */
  471.      for( int plane = 0; plane <= 2  && !( plane && ( !weights[0].weightfn || b_lookahead ) ); plane++ )
  472.      {
  473. -        fenc_var = round( sqrt( fenc->i_pixel_ssd[plane] ) );
  474. -        ref_var  = round( sqrt(  ref->i_pixel_ssd[plane] ) );
  475. -        fenc_mean = (float)fenc->i_pixel_sum[plane] / (fenc->i_lines[!!plane] * fenc->i_width[!!plane]);
  476. -        ref_mean  = (float) ref->i_pixel_sum[plane] / (fenc->i_lines[!!plane] * fenc->i_width[!!plane]);
  477. +        int offset_search;
  478. +        int minoff, minscale, mindenom;
  479. +        unsigned int minscore, origscore;
  480. +        int found;
  481. +        float fenc_var = fenc->i_pixel_ssd[plane] + !ref->i_pixel_ssd[plane];
  482. +        float ref_var  =  ref->i_pixel_ssd[plane] + !ref->i_pixel_ssd[plane];
  483. +        float guess_scale = sqrtf( fenc_var / ref_var );
  484. +        float fenc_mean = (float)fenc->i_pixel_sum[plane] / (fenc->i_lines[!!plane] * fenc->i_width[!!plane]);
  485. +        float ref_mean  = (float) ref->i_pixel_sum[plane] / (fenc->i_lines[!!plane] * fenc->i_width[!!plane]);
  486.  
  487.          //early termination
  488. -        if( fabs( ref_mean - fenc_mean ) < 0.5 && fabsf( 1 - (float)fenc_var / ref_var ) < epsilon )
  489. +        if( fabsf( ref_mean - fenc_mean ) < 0.5f && fabsf( 1.f - guess_scale ) < epsilon )
  490.          {
  491.              SET_WEIGHT( weights[plane], 0, 1, 0, 0 );
  492.              continue;
  493.          }
  494.  
  495. -        guess_scale = ref_var ? (float)fenc_var/ref_var : 0;
  496. -
  497.          if( plane )
  498.          {
  499.              weights[plane].i_denom = 6;
  500. -            weights[plane].i_scale = x264_clip3( round(guess_scale * 64.0), 0, 255 );
  501. +            weights[plane].i_scale = x264_clip3( (int)(guess_scale * 64.f + 0.5f), 0, 255 );
  502.              if( weights[plane].i_scale > 127 )
  503.              {
  504.                  weights[1].weightfn = weights[2].weightfn = NULL;
  505. @@ -261,9 +259,7 @@ void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, int
  506.              }
  507.          }
  508.          else
  509. -            x264_weight_get_h264( round( guess_scale * 128 ), 0, &weights[plane] );
  510. -        if( weights[plane].weightfn )
  511. -            h->mc.weight_cache( h, &weights[plane] );
  512. +            x264_weight_get_h264( (int)(guess_scale * 128.f + 0.5f), 0, &weights[plane] );
  513.  
  514.          found = 0;
  515.          mindenom = weights[plane].i_denom;
  516. @@ -280,7 +276,7 @@ void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, int
  517.                  x264_slicetype_frame_cost( h, &a, &fenc, 0, 0, 0, 0 );
  518.              }
  519.              mcbuf = x264_weight_cost_init_luma( h, fenc, ref, h->mb.p_weight_buf[0] );
  520. -            origscore = minscore = x264_weight_cost_luma( h, fenc, mcbuf, 0 );
  521. +            origscore = minscore = x264_weight_cost_luma( h, fenc, mcbuf, NULL );
  522.          }
  523.          else
  524.          {
  525. @@ -290,7 +286,7 @@ void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, int
  526.              if( plane == 1 )
  527.                  x264_weight_cost_init_chroma( h, fenc, ref, dstu, dstv );
  528.              mcbuf = plane == 1 ? dstu : dstv;
  529. -            origscore = minscore = x264_weight_cost_chroma( h, fenc, mcbuf, 0 );
  530. +            origscore = minscore = x264_weight_cost_chroma( h, fenc, mcbuf, NULL );
  531.          }
  532.  
  533.          if( !minscore )
  534. @@ -299,7 +295,7 @@ void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, int
  535.          // This gives a slight improvement due to rounding errors but only tests
  536.          // one offset on lookahead.
  537.          // TODO: currently searches only offset +1. try other offsets/multipliers/combinations thereof?
  538. -        offset_search = x264_clip3( floor( fenc_mean - ref_mean * minscale / (1 << mindenom) + 0.5f*b_lookahead ), -128, 126 );
  539. +        offset_search = x264_clip3( (int)(fenc_mean - ref_mean * minscale / (1 << mindenom) + 0.5f * b_lookahead), -128, 126 );
  540.          for( int i_off = offset_search; i_off <= offset_search+!b_lookahead; i_off++ )
  541.          {
  542.              SET_WEIGHT( weights[plane], 1, minscale, mindenom, i_off );
  543. @@ -314,7 +310,7 @@ void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, int
  544.  
  545.          /* FIXME: More analysis can be done here on SAD vs. SATD termination. */
  546.          /* 0.2% termination derived experimentally to avoid weird weights in frames that are mostly intra. */
  547. -        if( !found || (minscale == 1<<mindenom && minoff == 0) || (float)minscore / origscore > 0.998 )
  548. +        if( !found || (minscale == 1 << mindenom && minoff == 0) || (float)minscore / origscore > 0.998f )
  549.          {
  550.              SET_WEIGHT( weights[plane], 0, 1, 0, 0 );
  551.              continue;
  552. --
  553. 1.7.3.2.146.gca209
  554.  
  555.  
  556. From 5cd2f79169b688db93d3922f618fde2b26ca94b7 Mon Sep 17 00:00:00 2001
  557. From: Daniel Kang <daniel.d.kang@gmail.com>
  558. Date: Tue, 23 Nov 2010 20:29:37 -0500
  559. Subject: [PATCH 8/9] SSE version of high-bit-depth add4x4_idct_sse2
  560.  ~6.3x faster than C.
  561.  Our first Google Code-In patch!
  562.  
  563. ---
  564. common/dct.c           |    4 +
  565.  common/x86/const-a.asm |    1 +
  566.  common/x86/dct-32.asm  |   36 ++++++------
  567.  common/x86/dct-64.asm  |   38 ++++++------
  568.  common/x86/dct-a.asm   |   51 ++++++++++++++---
  569.  common/x86/dct.h       |    1 +
  570.  common/x86/pixel-a.asm |   16 +++---
  571.  common/x86/x86util.asm |  146 ++++++++++++++++++++++++-----------------------
  572.  8 files changed, 168 insertions(+), 125 deletions(-)
  573.  
  574. diff --git a/common/dct.c b/common/dct.c
  575. index 975afef..1b3d87b 100644
  576. --- a/common/dct.c
  577. +++ b/common/dct.c
  578. @@ -429,6 +429,10 @@ void x264_dct_init( int cpu, x264_dct_function_t *dctf )
  579.          dctf->sub8x8_dct    = x264_sub8x8_dct_mmx;
  580.          dctf->sub16x16_dct  = x264_sub16x16_dct_mmx;
  581.      }
  582. +    if( cpu&X264_CPU_SSE2 )
  583. +    {
  584. +        dctf->add4x4_idct   = x264_add4x4_idct_sse2;
  585. +    }
  586.  #endif // HAVE_MMX
  587.  #else // !HIGH_BIT_DEPTH
  588.  #if HAVE_MMX
  589. diff --git a/common/x86/const-a.asm b/common/x86/const-a.asm
  590. index 32579e3..d6e621e 100644
  591. --- a/common/x86/const-a.asm
  592. +++ b/common/x86/const-a.asm
  593. @@ -50,6 +50,7 @@ const pw_3fff,     times 8 dw 0x3fff
  594.  const pw_pixel_max,times 8 dw ((1 << BIT_DEPTH)-1)
  595.  
  596.  const pd_1,        times 4 dd 1
  597. +const pd_32,       times 4 dd 32
  598.  const pd_128,      times 4 dd 128
  599.  const pw_00ff,     times 8 dw 0x00ff
  600.  const pw_ff00,     times 8 dw 0xff00
  601. diff --git a/common/x86/dct-32.asm b/common/x86/dct-32.asm
  602. index 24e6efc..21e70c1 100644
  603. --- a/common/x86/dct-32.asm
  604. +++ b/common/x86/dct-32.asm
  605. @@ -38,13 +38,13 @@ cextern hsub_mul
  606.  ; in: m0..m7
  607.  ; out: 0,4,6 in mem, rest in regs
  608.  %macro DCT8_1D 9
  609. -    SUMSUB_BA  m%8, m%1      ; %8 = s07, %1 = d07
  610. -    SUMSUB_BA  m%7, m%2      ; %7 = s16, %2 = d16
  611. -    SUMSUB_BA  m%6, m%3      ; %6 = s25, %3 = d25
  612. -    SUMSUB_BA  m%5, m%4      ; %5 = s34, %4 = d34
  613. -    SUMSUB_BA  m%5, m%8      ; %5 = a0,  %8 = a2
  614. -    SUMSUB_BA  m%6, m%7      ; %6 = a1,  %7 = a3
  615. -    SUMSUB_BA  m%6, m%5      ; %6 = dst0, %5 = dst4
  616. +    SUMSUB_BA  w, m%8, m%1      ; %8 = s07, %1 = d07
  617. +    SUMSUB_BA  w, m%7, m%2      ; %7 = s16, %2 = d16
  618. +    SUMSUB_BA  w, m%6, m%3      ; %6 = s25, %3 = d25
  619. +    SUMSUB_BA  w, m%5, m%4      ; %5 = s34, %4 = d34
  620. +    SUMSUB_BA  w, m%5, m%8      ; %5 = a0,  %8 = a2
  621. +    SUMSUB_BA  w, m%6, m%7      ; %6 = a1,  %7 = a3
  622. +    SUMSUB_BA  w, m%6, m%5      ; %6 = dst0, %5 = dst4
  623.      mova    [%9+0x00], m%6
  624.      mova    [%9+0x40], m%5
  625.      mova    m%6, m%7         ; a3
  626. @@ -127,13 +127,13 @@ cextern hsub_mul
  627.      psubw     m%2, m%1
  628.      mova      m%1, [%9+0x00]
  629.      mova      m%6, [%9+0x40]
  630. -    SUMSUB_BA m%6, m%1
  631. -    SUMSUB_BA m%7, m%6
  632. -    SUMSUB_BA m%3, m%1
  633. -    SUMSUB_BA m%5, m%7
  634. -    SUMSUB_BA m%2, m%3
  635. -    SUMSUB_BA m%8, m%1
  636. -    SUMSUB_BA m%4, m%6
  637. +    SUMSUB_BA w, m%6, m%1
  638. +    SUMSUB_BA w, m%7, m%6
  639. +    SUMSUB_BA w, m%3, m%1
  640. +    SUMSUB_BA w, m%5, m%7
  641. +    SUMSUB_BA w, m%2, m%3
  642. +    SUMSUB_BA w, m%8, m%1
  643. +    SUMSUB_BA w, m%4, m%6
  644.      SWAP %1, %5, %6
  645.      SWAP %3, %8, %7
  646.  %endmacro
  647. @@ -434,18 +434,18 @@ global add8x8_idct_sse2.skip_prologue
  648.      SBUTTERFLY qdq, 4, 5, 0
  649.      SBUTTERFLY qdq, 6, 7, 0
  650.      UNSPILL r1,0
  651. -    IDCT4_1D 0,1,2,3,r1
  652. +    IDCT4_1D w,0,1,2,3,r1
  653.      SPILL r1, 4
  654.      TRANSPOSE2x4x4W 0,1,2,3,4
  655.      UNSPILL r1, 4
  656. -    IDCT4_1D 4,5,6,7,r1
  657. +    IDCT4_1D w,4,5,6,7,r1
  658.      SPILL r1, 0
  659.      TRANSPOSE2x4x4W 4,5,6,7,0
  660.      UNSPILL r1, 0
  661.      paddw m0, [pw_32]
  662. -    IDCT4_1D 0,1,2,3,r1
  663. +    IDCT4_1D w,0,1,2,3,r1
  664.      paddw m4, [pw_32]
  665. -    IDCT4_1D 4,5,6,7,r1
  666. +    IDCT4_1D w,4,5,6,7,r1
  667.      SPILL r1, 6,7
  668.      pxor m7, m7
  669.      DIFFx2 m0, m1, m6, m7, [r0-4*FDEC_STRIDE], [r0-3*FDEC_STRIDE]; m5
  670. diff --git a/common/x86/dct-64.asm b/common/x86/dct-64.asm
  671. index 5e43b9c..1d6911e 100644
  672. --- a/common/x86/dct-64.asm
  673. +++ b/common/x86/dct-64.asm
  674. @@ -36,13 +36,13 @@ cextern hsub_mul
  675.  INIT_XMM
  676.  
  677.  %macro DCT8_1D 10
  678. -    SUMSUB_BA  m%5, m%4 ; %5=s34, %4=d34
  679. -    SUMSUB_BA  m%6, m%3 ; %6=s25, %3=d25
  680. -    SUMSUB_BA  m%7, m%2 ; %7=s16, %2=d16
  681. -    SUMSUB_BA  m%8, m%1 ; %8=s07, %1=d07
  682. +    SUMSUB_BA  w, m%5, m%4 ; %5=s34, %4=d34
  683. +    SUMSUB_BA  w, m%6, m%3 ; %6=s25, %3=d25
  684. +    SUMSUB_BA  w, m%7, m%2 ; %7=s16, %2=d16
  685. +    SUMSUB_BA  w, m%8, m%1 ; %8=s07, %1=d07
  686.  
  687. -    SUMSUB_BA  m%6, m%7, m%10 ; %6=a1, %7=a3
  688. -    SUMSUB_BA  m%5, m%8, m%10 ; %5=a0, %8=a2
  689. +    SUMSUB_BA  w, m%6, m%7, m%10 ; %6=a1, %7=a3
  690. +    SUMSUB_BA  w, m%5, m%8, m%10 ; %5=a0, %8=a2
  691.  
  692.      movdqa  m%9, m%1
  693.      psraw   m%9, 1
  694. @@ -56,7 +56,7 @@ INIT_XMM
  695.      paddw   m%10, m%2
  696.      psubw   m%10, m%3 ; %10=a7
  697.  
  698. -    SUMSUB_BA  m%4, m%1
  699. +    SUMSUB_BA  w, m%4, m%1
  700.      psubw   m%1, m%3
  701.      psubw   m%4, m%2
  702.      psraw   m%3, 1
  703. @@ -70,7 +70,7 @@ INIT_XMM
  704.      psraw   m%9, 2
  705.      psubw   m%9, m%10 ; %9=b7
  706.  
  707. -    SUMSUB_BA  m%6, m%5, m%10 ; %6=b0, %5=b4
  708. +    SUMSUB_BA  w, m%6, m%5, m%10 ; %6=b0, %5=b4
  709.  
  710.      movdqa  m%3, m%7
  711.      psraw   m%3, 1
  712. @@ -88,7 +88,7 @@ INIT_XMM
  713.  %endmacro
  714.  
  715.  %macro IDCT8_1D 10
  716. -    SUMSUB_BA  m%5, m%1, m%9 ; %5=a0, %1=a2
  717. +    SUMSUB_BA  w, m%5, m%1, m%9 ; %5=a0, %1=a2
  718.  
  719.      movdqa  m%9, m%2
  720.      psraw   m%9, 1
  721. @@ -123,8 +123,8 @@ INIT_XMM
  722.      psraw   m%6, 2
  723.      psubw   m%9, m%6 ; %9=b7
  724.  
  725. -    SUMSUB_BA m%7, m%5, m%6 ; %7=b0, %5=b6
  726. -    SUMSUB_BA m%3, m%1, m%6; %3=b2, %1=b4
  727. +    SUMSUB_BA w, m%7, m%5, m%6 ; %7=b0, %5=b6
  728. +    SUMSUB_BA w, m%3, m%1, m%6 ; %3=b2, %1=b4
  729.  
  730.      movdqa  m%8, m%10
  731.      psraw   m%8, 2
  732. @@ -132,10 +132,10 @@ INIT_XMM
  733.      psraw   m%2, 2
  734.      psubw   m%2, m%10 ; %2=b5
  735.  
  736. -    SUMSUB_BA m%9, m%7, m%6 ; %9=c0, %7=c7
  737. -    SUMSUB_BA m%2, m%3, m%6 ; %2=c1, %3=c6
  738. -    SUMSUB_BA m%8, m%1, m%6 ; %8=c2, %1=c5
  739. -    SUMSUB_BA m%4, m%5, m%6 ; %4=c3, %5=c4
  740. +    SUMSUB_BA w, m%9, m%7, m%6 ; %9=c0, %7=c7
  741. +    SUMSUB_BA w, m%2, m%3, m%6 ; %2=c1, %3=c6
  742. +    SUMSUB_BA w, m%8, m%1, m%6 ; %8=c2, %1=c5
  743. +    SUMSUB_BA w, m%4, m%5, m%6 ; %4=c3, %5=c4
  744.  
  745.      SWAP %1, %9, %6
  746.      SWAP %3, %8, %7
  747. @@ -263,14 +263,14 @@ global add8x8_idct_sse2.skip_prologue
  748.      mova   m7, [r1+112]
  749.      SBUTTERFLY qdq, 4, 5, 8
  750.      SBUTTERFLY qdq, 6, 7, 8
  751. -    IDCT4_1D 0,1,2,3,8,10
  752. +    IDCT4_1D w,0,1,2,3,8,10
  753.      TRANSPOSE2x4x4W 0,1,2,3,8
  754. -    IDCT4_1D 4,5,6,7,8,10
  755. +    IDCT4_1D w,4,5,6,7,8,10
  756.      TRANSPOSE2x4x4W 4,5,6,7,8
  757.      paddw m0, [pw_32]
  758. -    IDCT4_1D 0,1,2,3,8,10
  759. +    IDCT4_1D w,0,1,2,3,8,10
  760.      paddw m4, [pw_32]
  761. -    IDCT4_1D 4,5,6,7,8,10
  762. +    IDCT4_1D w,4,5,6,7,8,10
  763.      DIFFx2 m0, m1, m8, m9, [r0-4*FDEC_STRIDE], [r0-3*FDEC_STRIDE]
  764.      DIFFx2 m2, m3, m8, m9, [r0-2*FDEC_STRIDE], [r0-1*FDEC_STRIDE]
  765.      DIFFx2 m4, m5, m8, m9, [r0+0*FDEC_STRIDE], [r0+1*FDEC_STRIDE]
  766. diff --git a/common/x86/dct-a.asm b/common/x86/dct-a.asm
  767. index 67fa34a..f0163a3 100644
  768. --- a/common/x86/dct-a.asm
  769. +++ b/common/x86/dct-a.asm
  770. @@ -52,13 +52,15 @@ SECTION .text
  771.  cextern pw_32_0
  772.  cextern pw_32
  773.  cextern pw_8000
  774. +cextern pw_pixel_max
  775.  cextern hsub_mul
  776.  cextern pb_1
  777.  cextern pw_1
  778. +cextern pd_32
  779.  
  780.  %macro WALSH4_1D 5
  781. -    SUMSUB_BADC m%4, m%3, m%2, m%1, m%5
  782. -    SUMSUB_BADC m%4, m%2, m%3, m%1, m%5
  783. +    SUMSUB_BADC w, m%4, m%3, m%2, m%1, m%5
  784. +    SUMSUB_BADC w, m%4, m%2, m%3, m%1, m%5
  785.      SWAP %1, %4, %3
  786.  %endmacro
  787.  
  788. @@ -86,7 +88,7 @@ cglobal dct4x4dc_mmx, 1,1
  789.      movq   m7, [pw_8000] ; convert to unsigned and back, so that pavgw works
  790.      WALSH4_1D  0,1,2,3,4
  791.      TRANSPOSE4x4W 0,1,2,3,4
  792. -    SUMSUB_BADC m1, m0, m3, m2, m4
  793. +    SUMSUB_BADC w, m1, m0, m3, m2, m4
  794.      SWAP 0, 1
  795.      SWAP 2, 3
  796.      SUMSUB_17BIT 0,2,4,7
  797. @@ -164,10 +166,41 @@ SUB_DCT4 mmx
  798.  SUB_DCT4 ssse3
  799.  %endif ; !HIGH_BIT_DEPTH
  800.  
  801. -%ifndef HIGH_BIT_DEPTH
  802.  ;-----------------------------------------------------------------------------
  803.  ; void add4x4_idct( uint8_t *p_dst, int16_t dct[4][4] )
  804.  ;-----------------------------------------------------------------------------
  805. +%ifdef HIGH_BIT_DEPTH
  806. +%macro STORE_DIFFx2 6
  807. +    psrad     %1, 6
  808. +    psrad     %2, 6
  809. +    packssdw  %1, %2
  810. +    movq      %3, %5
  811. +    movhps    %3, %6
  812. +    paddsw    %1, %3
  813. +    pxor      %4, %4
  814. +    CLIPW     %1, %4, [pw_pixel_max]
  815. +    movq      %5, %1
  816. +    movhps    %6, %1
  817. +%endmacro
  818. +
  819. +INIT_XMM
  820. +cglobal add4x4_idct_sse2, 2,2,7
  821. +    pxor  m6, m6
  822. +.skip_prologue:
  823. +    mova  m1, [r1+16]
  824. +    mova  m3, [r1+48]
  825. +    mova  m2, [r1+32]
  826. +    mova  m0, [r1+ 0]
  827. +    IDCT4_1D d,0,1,2,3,4,5
  828. +    TRANSPOSE4x4D 0,1,2,3,4
  829. +    paddd m0, [pd_32]
  830. +    IDCT4_1D d,0,1,2,3,4,5
  831. +    STORE_DIFFx2 m0, m1, m4, m6, [r0+0*FDEC_STRIDE], [r0+2*FDEC_STRIDE]
  832. +    STORE_DIFFx2 m2, m3, m4, m6, [r0+4*FDEC_STRIDE], [r0+6*FDEC_STRIDE]
  833. +    RET
  834. +
  835. +%else
  836. +
  837.  cglobal add4x4_idct_mmx, 2,2
  838.      pxor m7, m7
  839.  .skip_prologue:
  840. @@ -175,10 +208,10 @@ cglobal add4x4_idct_mmx, 2,2
  841.      movq  m3, [r1+24]
  842.      movq  m2, [r1+16]
  843.      movq  m0, [r1+ 0]
  844. -    IDCT4_1D 0,1,2,3,4,5
  845. +    IDCT4_1D w,0,1,2,3,4,5
  846.      TRANSPOSE4x4W 0,1,2,3,4
  847.      paddw m0, [pw_32]
  848. -    IDCT4_1D 0,1,2,3,4,5
  849. +    IDCT4_1D w,0,1,2,3,4,5
  850.      STORE_DIFF  m0, m4, m7, [r0+0*FDEC_STRIDE]
  851.      STORE_DIFF  m1, m4, m7, [r0+1*FDEC_STRIDE]
  852.      STORE_DIFF  m2, m4, m7, [r0+2*FDEC_STRIDE]
  853. @@ -198,7 +231,7 @@ cglobal add4x4_idct_sse4, 2,2,6
  854.      psubw     m0, m3            ; row1>>1-row3/row0-2
  855.      paddw     m2, m1            ; row3>>1+row1/row0+2
  856.      SBUTTERFLY2 wd, 0, 2, 1
  857. -    SUMSUB_BA m2, m0, m1
  858. +    SUMSUB_BA w, m2, m0, m1
  859.      pshuflw   m1, m2, 10110001b
  860.      pshufhw   m2, m2, 10110001b
  861.      punpckldq m1, m0
  862. @@ -215,7 +248,7 @@ cglobal add4x4_idct_sse4, 2,2,6
  863.      psubw     m0, m3            ; row1>>1-row3/row0-2
  864.      paddw     m2, m1            ; row3>>1+row1/row0+2
  865.      SBUTTERFLY2 qdq, 0, 2, 1
  866. -    SUMSUB_BA m2, m0, m1
  867. +    SUMSUB_BA w, m2, m0, m1
  868.  
  869.      movd      m4, [r0+FDEC_STRIDE*0]
  870.      movd      m1, [r0+FDEC_STRIDE*1]
  871. @@ -236,7 +269,7 @@ cglobal add4x4_idct_sse4, 2,2,6
  872.      movd     [r0+FDEC_STRIDE*2], m0
  873.      pextrd   [r0+FDEC_STRIDE*3], m0, 1
  874.      RET
  875. -%endif ; !HIGH_BIT_DEPTH
  876. +%endif ; HIGH_BIT_DEPTH
  877.  
  878.  INIT_MMX
  879.  ;-----------------------------------------------------------------------------
  880. diff --git a/common/x86/dct.h b/common/x86/dct.h
  881. index 58b9d17..ec6ec25 100644
  882. --- a/common/x86/dct.h
  883. +++ b/common/x86/dct.h
  884. @@ -40,6 +40,7 @@ void x264_sub8x8_dct_dc_mmxext( int16_t dct    [ 4], uint8_t *pix1, uint8_t *pix
  885.  void x264_sub8x8_dct_dc_sse2  ( int16_t dct    [ 4], uint8_t *pix1, uint8_t *pix2 );
  886.  
  887.  void x264_add4x4_idct_mmx       ( uint8_t *p_dst, int16_t dct    [16] );
  888. +void x264_add4x4_idct_sse2     ( uint16_t *p_dst, int32_t dct    [16] );
  889.  void x264_add4x4_idct_sse4      ( uint8_t *p_dst, int16_t dct    [16] );
  890.  void x264_add8x8_idct_mmx       ( uint8_t *p_dst, int16_t dct[ 4][16] );
  891.  void x264_add8x8_idct_dc_mmx    ( uint8_t *p_dst, int16_t dct    [ 4] );
  892. diff --git a/common/x86/pixel-a.asm b/common/x86/pixel-a.asm
  893. index acb3612..6cd79e1 100644
  894. --- a/common/x86/pixel-a.asm
  895. +++ b/common/x86/pixel-a.asm
  896. @@ -881,7 +881,7 @@ cglobal pixel_var2_8x8_ssse3, 5,6,8
  897.      DEINTB %1, %2, %3, %4, %5
  898.      psubw m%1, m%3
  899.      psubw m%2, m%4
  900. -    SUMSUB_BA m%1, m%2, m%3
  901. +    SUMSUB_BA w, m%1, m%2, m%3
  902.  %endmacro
  903.  
  904.  %macro LOAD_SUMSUB_16x4P 10-13 r0, r2, none
  905. @@ -1278,10 +1278,10 @@ cglobal pixel_sa8d_8x8_internal_%1
  906.  %else ; non-sse2
  907.      HADAMARD4_V m0, m1, m2, m8, m6
  908.      HADAMARD4_V m4, m5, m3, m9, m6
  909. -    SUMSUB_BADC m0, m4, m1, m5, m6
  910. +    SUMSUB_BADC w, m0, m4, m1, m5, m6
  911.      HADAMARD 2, sumsub, 0, 4, 6, 11
  912.      HADAMARD 2, sumsub, 1, 5, 6, 11
  913. -    SUMSUB_BADC m2, m3, m8, m9, m6
  914. +    SUMSUB_BADC w, m2, m3, m8, m9, m6
  915.      HADAMARD 2, sumsub, 2, 3, 6, 11
  916.      HADAMARD 2, sumsub, 8, 9, 6, 11
  917.      HADAMARD 1, amax, 0, 4, 6, 11
  918. @@ -1379,7 +1379,7 @@ cglobal pixel_sa8d_8x8_internal_%1
  919.      mova spill0, m6
  920.      mova spill1, m7
  921.      HADAMARD4_V m0, m1, m2, m3, m7
  922. -    SUMSUB_BADC m0, m4, m1, m5, m7
  923. +    SUMSUB_BADC w, m0, m4, m1, m5, m7
  924.      HADAMARD 2, sumsub, 0, 4, 7, 6
  925.      HADAMARD 2, sumsub, 1, 5, 7, 6
  926.      HADAMARD 1, amax, 0, 4, 7, 6
  927. @@ -1387,7 +1387,7 @@ cglobal pixel_sa8d_8x8_internal_%1
  928.      mova m6, spill0
  929.      mova m7, spill1
  930.      paddw m0, m1
  931. -    SUMSUB_BADC m2, m6, m3, m7, m4
  932. +    SUMSUB_BADC w, m2, m6, m3, m7, m4
  933.      HADAMARD 2, sumsub, 2, 6, 4, 5
  934.      HADAMARD 2, sumsub, 3, 7, 4, 5
  935.      HADAMARD 1, amax, 2, 6, 4, 5
  936. @@ -1994,7 +1994,7 @@ cglobal hadamard_ac_2x2max_mmxext
  937.      mova      m2, [r3+0x40]
  938.      mova      m3, [r3+0x60]
  939.      sub       r3, 8
  940. -    SUMSUB_BADC m0, m1, m2, m3, m4
  941. +    SUMSUB_BADC w, m0, m1, m2, m3, m4
  942.      ABS4 m0, m2, m1, m3, m4, m5
  943.      HADAMARD 0, max, 0, 2, 4, 5
  944.      HADAMARD 0, max, 1, 3, 4, 5
  945. @@ -2059,7 +2059,7 @@ cglobal hadamard_ac_8x8_mmxext
  946.      mova      m1, [r3+0x20]
  947.      mova      m2, [r3+0x40]
  948.      mova      m3, [r3+0x60]
  949. -    SUMSUB_BADC m0, m1, m2, m3, m4
  950. +    SUMSUB_BADC w, m0, m1, m2, m3, m4
  951.      HADAMARD 0, sumsub, 0, 2, 4, 5
  952.      ABS4 m1, m3, m0, m2, m4, m5
  953.      HADAMARD 0, max, 1, 3, 4, 5
  954. @@ -2266,7 +2266,7 @@ cglobal hadamard_ac_8x8_%1
  955.      ABS_MOV   m2, m4
  956.      ABS_MOV   m3, m5
  957.      paddw     m1, m2
  958. -    SUMSUB_BA m0, m4; m2
  959. +    SUMSUB_BA w, m0, m4; m2
  960.  %if vertical
  961.      pand      m1, [mask_ac4]
  962.  %else
  963. diff --git a/common/x86/x86util.asm b/common/x86/x86util.asm
  964. index 9acaa3d..510d609 100644
  965. --- a/common/x86/x86util.asm
  966. +++ b/common/x86/x86util.asm
  967. @@ -241,44 +241,44 @@
  968.      psrlw  m%4, 8   ; src .. y7 .. y5
  969.  %endmacro
  970.  
  971. -%macro SUMSUB_BA 2-3
  972. -%if %0==2
  973. -    paddw   %1, %2
  974. -    paddw   %2, %2
  975. -    psubw   %2, %1
  976. +%macro SUMSUB_BA 3-4
  977. +%if %0==3
  978. +    padd%1  %2, %3
  979. +    padd%1  %3, %3
  980. +    psub%1  %3, %2
  981.  %else
  982. -    mova    %3, %1
  983. -    paddw   %1, %2
  984. -    psubw   %2, %3
  985. +    mova    %4, %2
  986. +    padd%1  %2, %3
  987. +    psub%1  %3, %4
  988.  %endif
  989.  %endmacro
  990.  
  991. -%macro SUMSUB_BADC 4-5
  992. -%if %0==5
  993. -    SUMSUB_BA %1, %2, %5
  994. -    SUMSUB_BA %3, %4, %5
  995. +%macro SUMSUB_BADC 5-6
  996. +%if %0==6
  997. +    SUMSUB_BA %1, %2, %3, %6
  998. +    SUMSUB_BA %1, %4, %5, %6
  999.  %else
  1000. -    paddw   %1, %2
  1001. -    paddw   %3, %4
  1002. -    paddw   %2, %2
  1003. -    paddw   %4, %4
  1004. -    psubw   %2, %1
  1005. -    psubw   %4, %3
  1006. +    padd%1  %2, %3
  1007. +    padd%1  %4, %5
  1008. +    padd%1  %3, %3
  1009. +    padd%1  %5, %5
  1010. +    psub%1  %3, %2
  1011. +    psub%1  %5, %4
  1012.  %endif
  1013.  %endmacro
  1014.  
  1015.  %macro HADAMARD4_V 4+
  1016. -    SUMSUB_BADC %1, %2, %3, %4
  1017. -    SUMSUB_BADC %1, %3, %2, %4
  1018. +    SUMSUB_BADC w, %1, %2, %3, %4
  1019. +    SUMSUB_BADC w, %1, %3, %2, %4
  1020.  %endmacro
  1021.  
  1022.  %macro HADAMARD8_V 8+
  1023. -    SUMSUB_BADC %1, %2, %3, %4
  1024. -    SUMSUB_BADC %5, %6, %7, %8
  1025. -    SUMSUB_BADC %1, %3, %2, %4
  1026. -    SUMSUB_BADC %5, %7, %6, %8
  1027. -    SUMSUB_BADC %1, %5, %2, %6
  1028. -    SUMSUB_BADC %3, %7, %4, %8
  1029. +    SUMSUB_BADC w, %1, %2, %3, %4
  1030. +    SUMSUB_BADC w, %5, %6, %7, %8
  1031. +    SUMSUB_BADC w, %1, %3, %2, %4
  1032. +    SUMSUB_BADC w, %5, %7, %6, %8
  1033. +    SUMSUB_BADC w, %1, %5, %2, %6
  1034. +    SUMSUB_BADC w, %3, %7, %4, %8
  1035.  %endmacro
  1036.  
  1037.  %macro TRANS_SSE2 5-6
  1038. @@ -363,7 +363,7 @@
  1039.      %endif
  1040.  %endif
  1041.  %ifidn %2, sumsub
  1042. -    SUMSUB_BA m%3, m%4, m%5
  1043. +    SUMSUB_BA w, m%3, m%4, m%5
  1044.  %else
  1045.      %ifidn %2, amax
  1046.          %if %0==6
  1047. @@ -426,67 +426,71 @@
  1048.  %endif
  1049.  %endmacro
  1050.  
  1051. -%macro SUMSUB2_AB 3
  1052. -    mova    %3, %1
  1053. -    paddw   %1, %1
  1054. -    paddw   %1, %2
  1055. -    psubw   %3, %2
  1056. -    psubw   %3, %2
  1057. +%macro SUMSUB2_AB 4
  1058. +    mova    %4, %2
  1059. +    padd%1  %2, %2
  1060. +    padd%1  %2, %3
  1061. +    psub%1  %4, %3
  1062. +    psub%1  %4, %3
  1063.  %endmacro
  1064.  
  1065. -%macro SUMSUB2_BA 3
  1066. -    mova    m%3, m%1
  1067. -    paddw   m%1, m%2
  1068. -    paddw   m%1, m%2
  1069. -    psubw   m%2, m%3
  1070. -    psubw   m%2, m%3
  1071. +%macro SUMSUB2_BA 4
  1072. +    mova    m%4, m%2
  1073. +    padd%1  m%2, m%3
  1074. +    padd%1  m%2, m%3
  1075. +    psub%1  m%3, m%4
  1076. +    psub%1  m%3, m%4
  1077.  %endmacro
  1078.  
  1079. -%macro SUMSUBD2_AB 4
  1080. -    mova    %4, %1
  1081. -    mova    %3, %2
  1082. -    psraw   %2, 1  ; %2: %2>>1
  1083. -    psraw   %1, 1  ; %1: %1>>1
  1084. -    paddw   %2, %4 ; %2: %2>>1+%1
  1085. -    psubw   %1, %3 ; %1: %1>>1-%2
  1086. +%macro SUMSUBD2_AB 5
  1087. +    mova    %5, %2
  1088. +    mova    %4, %3
  1089. +    psra%1  %3, 1  ; %3: %3>>1
  1090. +    psra%1  %2, 1  ; %2: %2>>1
  1091. +    padd%1  %3, %5 ; %3: %3>>1+%2
  1092. +    psub%1  %2, %4 ; %2: %2>>1-%3
  1093.  %endmacro
  1094.  
  1095.  %macro DCT4_1D 5
  1096.  %ifnum %5
  1097. -    SUMSUB_BADC m%4, m%1, m%3, m%2; m%5
  1098. -    SUMSUB_BA   m%3, m%4, m%5
  1099. -    SUMSUB2_AB  m%1, m%2, m%5
  1100. +    SUMSUB_BADC w, m%4, m%1, m%3, m%2, m%5
  1101. +    SUMSUB_BA   w, m%3, m%4, m%5
  1102. +    SUMSUB2_AB  w, m%1, m%2, m%5
  1103.      SWAP %1, %3, %4, %5, %2
  1104.  %else
  1105. -    SUMSUB_BADC m%4, m%1, m%3, m%2
  1106. -    SUMSUB_BA   m%3, m%4
  1107. +    SUMSUB_BADC w, m%4, m%1, m%3, m%2
  1108. +    SUMSUB_BA   w, m%3, m%4
  1109.      mova       [%5], m%2
  1110. -    SUMSUB2_AB m%1, [%5], m%2
  1111. +    SUMSUB2_AB w, m%1, [%5], m%2
  1112.      SWAP %1, %3, %4, %2
  1113.  %endif
  1114.  %endmacro
  1115.  
  1116. -%macro IDCT4_1D 5-6
  1117. -%ifnum %5
  1118. -    SUMSUBD2_AB m%2, m%4, m%6, m%5
  1119. -    ; %2: %2>>1-%4 %4: %2+%4>>1
  1120. -    SUMSUB_BA   m%3, m%1, m%6
  1121. -    ; %3: %1+%3 %1: %1-%3
  1122. -    SUMSUB_BADC m%4, m%3, m%2, m%1, m%6
  1123. -    ; %4: %1+%3 + (%2+%4>>1)
  1124. -    ; %3: %1+%3 - (%2+%4>>1)
  1125. -    ; %2: %1-%3 + (%2>>1-%4)
  1126. -    ; %1: %1-%3 - (%2>>1-%4)
  1127. +%macro IDCT4_1D 6-7
  1128. +%ifnum %6
  1129. +    SUMSUBD2_AB %1, m%3, m%5, m%7, m%6
  1130. +    ; %3: %3>>2-%5 %5: %3+%5>>2
  1131. +    SUMSUB_BA   %1, m%4, m%2, m%7
  1132. +    ; %4: %2+%4 %2: %2-%4
  1133. +    SUMSUB_BADC %1, m%5, m%4, m%3, m%2, m%7
  1134. +    ; %5: %2+%4 + (%3+%5>>1)
  1135. +    ; %4: %2+%4 - (%3+%5>>1)
  1136. +    ; %3: %2-%4 + (%3>>1-%5)
  1137. +    ; %2: %2-%4 - (%3>>1-%5)
  1138. +%else
  1139. +%ifidn %1, w
  1140. +    SUMSUBD2_AB %1, m%3, m%5, [%6], [%6+16]
  1141.  %else
  1142. -    SUMSUBD2_AB m%2, m%4, [%5], [%5+16]
  1143. -    SUMSUB_BA   m%3, m%1
  1144. -    SUMSUB_BADC m%4, m%3, m%2, m%1
  1145. +    SUMSUBD2_AB %1, m%3, m%5, [%6], [%6+32]
  1146. +%endif
  1147. +    SUMSUB_BA   %1, m%4, m%2
  1148. +    SUMSUB_BADC %1, m%5, m%4, m%3, m%2
  1149.  %endif
  1150. -    SWAP %1, %4, %3
  1151. -    ; %1: %1+%3 + (%2+%4>>1) row0
  1152. -    ; %2: %1-%3 + (%2>>1-%4) row1
  1153. -    ; %3: %1-%3 - (%2>>1-%4) row2
  1154. -    ; %4: %1+%3 - (%2+%4>>1) row3
  1155. +    SWAP %2, %5, %4
  1156. +    ; %2: %2+%4 + (%3+%5>>1) row0
  1157. +    ; %3: %2-%4 + (%3>>1-%5) row1
  1158. +    ; %4: %2-%4 - (%3>>1-%5) row2
  1159. +    ; %5: %2+%4 - (%3+%5>>1) row3
  1160.  %endmacro
  1161.  
  1162.  
  1163. --
  1164. 1.7.3.2.146.gca209
  1165.  
  1166.  
  1167. From 4b3f4d2d32c4e6867e5b3eaaa03e796fc66979f6 Mon Sep 17 00:00:00 2001
  1168. From: Alex Wright <alexw0885@gmail.com>
  1169. Date: Wed, 24 Nov 2010 02:19:51 -0800
  1170. Subject: [PATCH 9/9] Make --weightp 1 a better speed tradeoff
  1171.  Since fade analysis is now so fast, weightp 1 now does fade analysis but no reference duplication.
  1172.  This is the opposite of what it used to do (reference duplication but no fade analysis).
  1173.  This also gives weightp's better fade quality to faster presets (up to superfast).
  1174.  
  1175. ---
  1176. common/common.c       |    7 ++++---
  1177.  common/macroblock.c   |    8 +++-----
  1178.  encoder/encoder.c     |   23 ++++++++---------------
  1179.  encoder/ratecontrol.c |    4 ++--
  1180.  encoder/slicetype.c   |    5 ++---
  1181.  x264.c                |    4 ++--
  1182.  x264.h                |    4 ++--
  1183.  7 files changed, 23 insertions(+), 32 deletions(-)
  1184.  
  1185. diff --git a/common/common.c b/common/common.c
  1186. index 1f99e9e..1845e3b 100644
  1187. --- a/common/common.c
  1188. +++ b/common/common.c
  1189. @@ -204,7 +204,7 @@ static int x264_param_apply_preset( x264_param_t *param, const char *preset )
  1190.          param->analyse.b_mixed_references = 0;
  1191.          param->analyse.i_trellis = 0;
  1192.          param->rc.b_mb_tree = 0;
  1193. -        param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
  1194. +        param->analyse.i_weighted_pred = X264_WEIGHTP_SIMPLE;
  1195.          param->rc.i_lookahead = 0;
  1196.      }
  1197.      else if( !strcasecmp( preset, "veryfast" ) )
  1198. @@ -214,7 +214,7 @@ static int x264_param_apply_preset( x264_param_t *param, const char *preset )
  1199.          param->i_frame_reference = 1;
  1200.          param->analyse.b_mixed_references = 0;
  1201.          param->analyse.i_trellis = 0;
  1202. -        param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
  1203. +        param->analyse.i_weighted_pred = X264_WEIGHTP_SIMPLE;
  1204.          param->rc.i_lookahead = 10;
  1205.      }
  1206.      else if( !strcasecmp( preset, "faster" ) )
  1207. @@ -222,13 +222,14 @@ static int x264_param_apply_preset( x264_param_t *param, const char *preset )
  1208.          param->analyse.b_mixed_references = 0;
  1209.          param->i_frame_reference = 2;
  1210.          param->analyse.i_subpel_refine = 4;
  1211. -        param->analyse.i_weighted_pred = X264_WEIGHTP_BLIND;
  1212. +        param->analyse.i_weighted_pred = X264_WEIGHTP_SIMPLE;
  1213.          param->rc.i_lookahead = 20;
  1214.      }
  1215.      else if( !strcasecmp( preset, "fast" ) )
  1216.      {
  1217.          param->i_frame_reference = 2;
  1218.          param->analyse.i_subpel_refine = 6;
  1219. +        param->analyse.i_weighted_pred = X264_WEIGHTP_SIMPLE;
  1220.          param->rc.i_lookahead = 30;
  1221.      }
  1222.      else if( !strcasecmp( preset, "medium" ) )
  1223. diff --git a/common/macroblock.c b/common/macroblock.c
  1224. index 5c76d3f..9075efc9 100644
  1225. --- a/common/macroblock.c
  1226. +++ b/common/macroblock.c
  1227. @@ -239,8 +239,6 @@ int x264_macroblock_cache_allocate( x264_t *h )
  1228.          int i_refs = X264_MIN(X264_REF_MAX, (i ? 1 + !!h->param.i_bframe_pyramid : h->param.i_frame_reference) ) << h->param.b_interlaced;
  1229.          if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
  1230.              i_refs = X264_MIN(X264_REF_MAX, i_refs + 1 + (BIT_DEPTH == 8)); //smart weights add two duplicate frames, one in >8-bit
  1231. -        else if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_BLIND )
  1232. -            i_refs = X264_MIN(X264_REF_MAX, i_refs + 1); //blind weights add one duplicate frame
  1233.  
  1234.          for( int j = !i; j < i_refs; j++ )
  1235.          {
  1236. @@ -277,7 +275,7 @@ int x264_macroblock_cache_allocate( x264_t *h )
  1237.                  //SMART can weight one ref and one offset -1
  1238.                  numweightbuf = 2;
  1239.              else
  1240. -                //blind only has one weighted copy (offset -1)
  1241. +                //simple only has one weighted ref
  1242.                  numweightbuf = 1;
  1243.          }
  1244.  
  1245. @@ -398,7 +396,7 @@ void x264_macroblock_slice_init( x264_t *h )
  1246.      {
  1247.          memset( h->mb.cache.skip, 0, sizeof( h->mb.cache.skip ) );
  1248.  
  1249. -        if( h->sh.i_disable_deblocking_filter_idc != 1 && h->param.analyse.i_weighted_pred )
  1250. +        if( h->sh.i_disable_deblocking_filter_idc != 1 && h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
  1251.          {
  1252.              deblock_ref_table(-2) = -2;
  1253.              deblock_ref_table(-1) = -1;
  1254. @@ -999,7 +997,7 @@ void x264_macroblock_cache_load_deblock( x264_t *h )
  1255.          h->mb.i_neighbour = new_neighbour;
  1256.      }
  1257.  
  1258. -    if( h->param.analyse.i_weighted_pred && h->sh.i_type == SLICE_TYPE_P )
  1259. +    if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART && h->sh.i_type == SLICE_TYPE_P )
  1260.      {
  1261.          /* Handle reference frame duplicates */
  1262.          int i8 = x264_scan8[0] - 8;
  1263. diff --git a/encoder/encoder.c b/encoder/encoder.c
  1264. index 2d5c778..591f527 100644
  1265. --- a/encoder/encoder.c
  1266. +++ b/encoder/encoder.c
  1267. @@ -772,8 +772,6 @@ static int x264_validate_parameters( x264_t *h )
  1268.      h->param.analyse.i_weighted_pred = x264_clip3( h->param.analyse.i_weighted_pred, X264_WEIGHTP_NONE, X264_WEIGHTP_SMART );
  1269.      if( !h->param.analyse.i_weighted_pred && h->param.rc.b_mb_tree && h->param.analyse.b_psy && !h->param.b_interlaced )
  1270.          h->param.analyse.i_weighted_pred = X264_WEIGHTP_FAKE;
  1271. -    if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_BLIND && BIT_DEPTH > 8 )
  1272. -        h->param.analyse.i_weighted_pred = X264_WEIGHTP_NONE;
  1273.  
  1274.      if( h->i_thread_frames > 1 )
  1275.      {
  1276. @@ -996,7 +994,7 @@ x264_t *x264_encoder_open( x264_param_t *param )
  1277.            || h->param.i_bframe_adaptive
  1278.            || h->param.i_scenecut_threshold
  1279.            || h->param.rc.b_mb_tree
  1280. -          || h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART );
  1281. +          || h->param.analyse.i_weighted_pred );
  1282.      h->frames.b_have_lowres |= h->param.rc.b_stat_read && h->param.rc.i_vbv_buffer_size > 0;
  1283.      h->frames.b_have_sub8x8_esa = !!(h->param.analyse.inter & X264_ANALYSE_PSUB8x8);
  1284.  
  1285. @@ -1432,6 +1430,10 @@ int x264_weighted_reference_duplicate( x264_t *h, int i_ref, const x264_weight_t
  1286.      if( i <= 1 ) /* empty list, definitely can't duplicate frame */
  1287.          return -1;
  1288.  
  1289. +    //Duplication is only used in X264_WEIGHTP_SMART
  1290. +    if( h->param.analyse.i_weighted_pred != X264_WEIGHTP_SMART )
  1291. +        return -1;
  1292. +
  1293.      /* Duplication is a hack to compensate for crappy rounding in motion compensation.
  1294.       * With high bit depth, it's not worth doing, so turn it off except in the case of
  1295.       * unweighted dupes. */
  1296. @@ -1609,7 +1611,7 @@ static inline void x264_reference_build_list( x264_t *h, int i_poc )
  1297.      if( h->fenc->i_type == X264_TYPE_P )
  1298.      {
  1299.          int idx = -1;
  1300. -        if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
  1301. +        if( h->param.analyse.i_weighted_pred >= X264_WEIGHTP_SIMPLE )
  1302.          {
  1303.              x264_weight_t w[3];
  1304.              w[1].weightfn = w[2].weightfn = NULL;
  1305. @@ -1638,15 +1640,6 @@ static inline void x264_reference_build_list( x264_t *h, int i_poc )
  1306.                  }
  1307.              }
  1308.          }
  1309. -        else if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_BLIND )
  1310. -        {
  1311. -            //weighted offset=-1
  1312. -            x264_weight_t w[3];
  1313. -            SET_WEIGHT( w[0], 1, 1, 0, -1 );
  1314. -            h->fenc->weight[0][0].i_denom = 0;
  1315. -            w[1].weightfn = w[2].weightfn = NULL;
  1316. -            idx = x264_weighted_reference_duplicate( h, 0, w );
  1317. -        }
  1318.          h->mb.ref_blind_dupe = idx;
  1319.      }
  1320.  
  1321. @@ -2876,7 +2869,7 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
  1322.      if( h->sh.i_type == SLICE_TYPE_P )
  1323.      {
  1324.          h->stat.i_consecutive_bframes[h->fdec->i_frame - h->fref0[0]->i_frame - 1]++;
  1325. -        if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
  1326. +        if( h->param.analyse.i_weighted_pred >= X264_WEIGHTP_SIMPLE )
  1327.          {
  1328.              h->stat.i_wpred[0] += !!h->sh.weight[0][0].weightfn;
  1329.              h->stat.i_wpred[1] += !!h->sh.weight[0][1].weightfn || !!h->sh.weight[0][2].weightfn;
  1330. @@ -3225,7 +3218,7 @@ void    x264_encoder_close  ( x264_t *h )
  1331.                        fixed_pred_modes[3][2] * 100.0 / sum_pred_modes[3],
  1332.                        fixed_pred_modes[3][3] * 100.0 / sum_pred_modes[3] );
  1333.  
  1334. -        if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART && h->stat.i_frame_count[SLICE_TYPE_P] > 0 )
  1335. +        if( h->param.analyse.i_weighted_pred >= X264_WEIGHTP_SIMPLE && h->stat.i_frame_count[SLICE_TYPE_P] > 0 )
  1336.              x264_log( h, X264_LOG_INFO, "Weighted P-Frames: Y:%.1f%% UV:%.1f%%\n",
  1337.                        h->stat.i_wpred[0] * 100.0 / h->stat.i_frame_count[SLICE_TYPE_P],
  1338.                        h->stat.i_wpred[1] * 100.0 / h->stat.i_frame_count[SLICE_TYPE_P] );
  1339. diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
  1340. index e949e24..780c0e1 100644
  1341. --- a/encoder/ratecontrol.c
  1342. +++ b/encoder/ratecontrol.c
  1343. @@ -284,7 +284,7 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame, float *quant_off
  1344.              }
  1345.          }
  1346.          /* Need variance data for weighted prediction */
  1347. -        if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_FAKE || h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
  1348. +        if( h->param.analyse.i_weighted_pred )
  1349.          {
  1350.              for( int mb_y = 0; mb_y < h->mb.i_mb_height; mb_y++ )
  1351.                  for( int mb_x = 0; mb_x < h->mb.i_mb_width; mb_x++ )
  1352. @@ -1558,7 +1558,7 @@ int x264_ratecontrol_end( x264_t *h, int bits, int *filler )
  1353.                  goto fail;
  1354.          }
  1355.  
  1356. -        if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART && h->sh.weight[0][0].weightfn )
  1357. +        if( h->param.analyse.i_weighted_pred >= X264_WEIGHTP_SIMPLE && h->sh.weight[0][0].weightfn )
  1358.          {
  1359.              if( fprintf( rc->p_stat_file_out, "w:%d,%d,%d",
  1360.                           h->sh.weight[0][0].i_denom, h->sh.weight[0][0].i_scale, h->sh.weight[0][0].i_offset ) < 0 )
  1361. diff --git a/encoder/slicetype.c b/encoder/slicetype.c
  1362. index dd6c360..4f47710 100644
  1363. --- a/encoder/slicetype.c
  1364. +++ b/encoder/slicetype.c
  1365. @@ -647,8 +647,7 @@ static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
  1366.          do_search[1] = b != p1 && frames[b]->lowres_mvs[1][p1-b-1][0][0] == 0x7FFF;
  1367.          if( do_search[0] )
  1368.          {
  1369. -            if( ( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART ||
  1370. -                  h->param.analyse.i_weighted_pred == X264_WEIGHTP_FAKE ) && b == p1 )
  1371. +            if( h->param.analyse.i_weighted_pred && b == p1 )
  1372.              {
  1373.                  x264_emms();
  1374.                  x264_weights_analyse( h, frames[b], frames[p0], 1 );
  1375. @@ -1549,7 +1548,7 @@ void x264_slicetype_decide( x264_t *h )
  1376.  
  1377.      /* Analyse for weighted P frames */
  1378.      if( !h->param.rc.b_stat_read && h->lookahead->next.list[bframes]->i_type == X264_TYPE_P
  1379. -        && h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
  1380. +        && h->param.analyse.i_weighted_pred >= X264_WEIGHTP_SIMPLE )
  1381.      {
  1382.          x264_emms();
  1383.          x264_weights_analyse( h, h->lookahead->next.list[bframes], h->lookahead->last_nonb, 0 );
  1384. diff --git a/x264.c b/x264.c
  1385. index 38af46b..f0a9f13 100644
  1386. --- a/x264.c
  1387. +++ b/x264.c
  1388. @@ -609,8 +609,8 @@ static void help( x264_param_t *defaults, int longhelp )
  1389.      H2( "      --no-weightb            Disable weighted prediction for B-frames\n" );
  1390.      H1( "      --weightp <integer>     Weighted prediction for P-frames [%d]\n"
  1391.          "                                  - 0: Disabled\n"
  1392. -        "                                  - 1: Blind offset\n"
  1393. -        "                                  - 2: Smart analysis\n", defaults->analyse.i_weighted_pred );
  1394. +        "                                  - 1: Weighted refs\n"
  1395. +        "                                  - 2: Weighted refs + Duplicates\n", defaults->analyse.i_weighted_pred );
  1396.      H1( "      --me <string>           Integer pixel motion estimation method [\"%s\"]\n",
  1397.                                         strtable_lookup( x264_motion_est_names, defaults->analyse.i_me_method ) );
  1398.      H2( "                                  - dia: diamond search, radius 1 (fast)\n"
  1399. diff --git a/x264.h b/x264.h
  1400. index e144e51..bfe478b 100644
  1401. --- a/x264.h
  1402. +++ b/x264.h
  1403. @@ -39,7 +39,7 @@
  1404.  
  1405.  #include <stdarg.h>
  1406.  
  1407. -#define X264_BUILD 109
  1408. +#define X264_BUILD 110
  1409.  
  1410.  /* x264_t:
  1411.   *      opaque handler for encoder */
  1412. @@ -151,7 +151,7 @@ typedef struct
  1413.  #define X264_B_ADAPT_FAST            1
  1414.  #define X264_B_ADAPT_TRELLIS         2
  1415.  #define X264_WEIGHTP_NONE            0
  1416. -#define X264_WEIGHTP_BLIND           1
  1417. +#define X264_WEIGHTP_SIMPLE          1
  1418.  #define X264_WEIGHTP_SMART           2
  1419.  #define X264_B_PYRAMID_NONE          0
  1420.  #define X264_B_PYRAMID_STRICT        1
  1421. --
  1422. 1.7.3.2.146.gca209
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement