Advertisement
Guest User

Untitled

a guest
Jun 30th, 2017
548
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Diff 48.48 KB | None | 0 0
  1. From ee75acd55e1a89eb9a8c2f7d14c923b57e08ceb3 Mon Sep 17 00:00:00 2001
  2. From: Sean McGovern <gseanmcg@gmail.com>
  3. Date: Sun, 10 Oct 2010 19:34:18 -0400
  4. Subject: [PATCH 1/9] Fix build on SPARC Solaris 10
  5.  
  6. ---
  7. common/pixel.c |    6 +++---
  8.  configure      |   29 +++++++++++++++++------------
  9.  2 files changed, 20 insertions(+), 15 deletions(-)
  10.  
  11. diff --git a/common/pixel.c b/common/pixel.c
  12. index 1e21550..7fa497c 100644
  13. --- a/common/pixel.c
  14. +++ b/common/pixel.c
  15. @@ -36,7 +36,7 @@
  16.  #if ARCH_ARM
  17.  #   include "arm/pixel.h"
  18.  #endif
  19. -#if ARCH_UltraSparc
  20. +#if ARCH_UltraSPARC
  21.  #   include "sparc/pixel.h"
  22.  #endif
  23.  
  24. @@ -443,7 +443,7 @@ SAD_X( 4x8 )
  25.  SAD_X( 4x4 )
  26.  
  27.  #if !HIGH_BIT_DEPTH
  28. -#if ARCH_UltraSparc
  29. +#if ARCH_UltraSPARC
  30.  SAD_X( 16x16_vis )
  31.  SAD_X( 16x8_vis )
  32.  SAD_X( 8x16_vis )
  33. @@ -1063,7 +1063,7 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
  34.      }
  35.  #endif
  36.  #if !HIGH_BIT_DEPTH
  37. -#if ARCH_UltraSparc
  38. +#if ARCH_UltraSPARC
  39.      INIT4( sad, _vis );
  40.      INIT4( sad_x3, _vis );
  41.      INIT4( sad_x4, _vis );
  42. diff --git a/configure b/configure
  43. index 2f38154..fd62337 100755
  44. --- a/configure
  45. +++ b/configure
  46. @@ -392,15 +392,20 @@ case $host_cpu in
  47.      fi
  48.      ;;
  49.    sparc)
  50. -    if [ $asm = auto ] && test "$(uname -m)" = "sun4u"; then
  51. -      ARCH="UltraSparc"
  52. -      CFLAGS="$CFLAGS -mcpu=ultrasparc"
  53. -      LDFLAGS="$LDFLAGS -mcpu=ultrasparc"
  54. -      AS="${AS-${cross_prefix}as}"
  55. -      ASFLAGS="$ASFLAGS -xarch=v8plusa"
  56. -    else
  57. -      ARCH="Sparc"
  58. -    fi
  59. +    ARCH="SPARC"
  60. +    case $(uname -m) in
  61. +      sun4u|sun4v)
  62. +        if [ $asm = auto ]; then
  63. +          ARCH="UltraSPARC"
  64. +          if ! echo $CFLAGS | grep -Eq '\-mcpu' ; then
  65. +            CFLAGS="$CFLAGS -mcpu=ultrasparc"
  66. +            LDFLAGS="$LDFLAGS -mcpu=ultrasparc"
  67. +          fi
  68. +          AS="${AS-${cross_prefix}as}"
  69. +          ASFLAGS="$ASFLAGS -xarch=v8plusa"
  70. +        fi
  71. +        ;;
  72. +    esac
  73.      ;;
  74.    mips|mipsel|mips64|mips64el)
  75.      ARCH="MIPS"
  76. @@ -497,11 +502,11 @@ fi
  77.  define ARCH_$ARCH
  78.  define SYS_$SYS
  79.  
  80. -echo "int i = 0x42494745; double f = 0x1.0656e6469616ep+102;" > conftest.c
  81. +echo "int i[2] = {0x42494745,0}; double f[2] = {0x1.0656e6469616ep+102,0};" > conftest.c
  82.  $CC $CFLAGS conftest.c -c -o conftest.o 2>$DEVNULL || die "endian test failed"
  83. -if grep -q BIGE conftest.o && grep -q FPendian conftest.o ; then
  84. +if (strings -a conftest.o | grep -q BIGE) && (strings -a conftest.o | grep -q FPendian) ; then
  85.      define WORDS_BIGENDIAN
  86. -elif !(grep -q EGIB conftest.o && grep -q naidnePF conftest.o) ; then
  87. +elif !(strings -a conftest.o | grep -q EGIB && strings -a conftest.o | grep -q naidnePF) ; then
  88.      die "endian test failed"
  89.  fi
  90.  
  91. --
  92. 1.7.3.2.146.gca209
  93.  
  94.  
  95. From b38fd04c376bac31544782fabe03471567a8badf Mon Sep 17 00:00:00 2001
  96. From: Sean McGovern <gseanmcg@gmail.com>
  97. Date: Sun, 21 Nov 2010 01:59:33 -0500
  98. Subject: [PATCH 2/9] Only build SPARC VIS asm if high bit-depth is disabled
  99.  
  100. ---
  101. Makefile |    2 ++
  102.  1 files changed, 2 insertions(+), 0 deletions(-)
  103.  
  104. diff --git a/Makefile b/Makefile
  105. index 8a3a327..21f57e7 100644
  106. --- a/Makefile
  107. +++ b/Makefile
  108. @@ -110,9 +110,11 @@ endif
  109.  
  110.  # VIS optims
  111.  ifeq ($(ARCH),UltraSparc)
  112. +ifeq ($(findstring HIGH_BIT_DEPTH, $(CONFIG)),)
  113.  ASMSRC += common/sparc/pixel.asm
  114.  OBJASM  = $(ASMSRC:%.asm=%.o)
  115.  endif
  116. +endif
  117.  
  118.  ifneq ($(HAVE_GETOPT_LONG),1)
  119.  SRCCLI += extras/getopt.c
  120. --
  121. 1.7.3.2.146.gca209
  122.  
  123.  
  124. From bb45211ef98ae8b382a15d4b03c2dc5c8d8f67e0 Mon Sep 17 00:00:00 2001
  125. From: Steven Walters <kemuri9@gmail.com>
  126. Date: Mon, 22 Nov 2010 10:31:05 +0900
  127. Subject: [PATCH 3/9] Fix configure so that boolean configuration options are 1/0
  128.  
  129. There are many cases of 1/undef, not 1/0.
  130. ---
  131. Makefile            |   12 ++++++------
  132.  common/arm/asm.S    |    2 +-
  133.  common/bitstream.c  |    4 ++--
  134.  common/macroblock.h |    2 +-
  135.  configure           |   15 +++++++++------
  136.  5 files changed, 19 insertions(+), 16 deletions(-)
  137.  
  138. diff --git a/Makefile b/Makefile
  139. index 21f57e7..82f0bbc 100644
  140. --- a/Makefile
  141. +++ b/Makefile
  142. @@ -25,29 +25,29 @@ SRCSO =
  143.  CONFIG := $(shell cat config.h)
  144.  
  145.  # GPL-only files
  146. -ifeq ($(GPL),yes)
  147. +ifneq ($(findstring HAVE_GPL 1, $(CONFIG)),)
  148.  SRCCLI +=
  149.  endif
  150.  
  151.  # Optional module sources
  152. -ifneq ($(findstring HAVE_AVS, $(CONFIG)),)
  153. +ifneq ($(findstring HAVE_AVS 1, $(CONFIG)),)
  154.  SRCCLI += input/avs.c
  155.  endif
  156.  
  157. -ifneq ($(findstring HAVE_PTHREAD, $(CONFIG)),)
  158. +ifneq ($(findstring HAVE_PTHREAD 1, $(CONFIG)),)
  159.  SRCCLI += input/thread.c
  160.  SRCS   += common/threadpool.c
  161.  endif
  162.  
  163. -ifneq ($(findstring HAVE_LAVF, $(CONFIG)),)
  164. +ifneq ($(findstring HAVE_LAVF 1, $(CONFIG)),)
  165.  SRCCLI += input/lavf.c
  166.  endif
  167.  
  168. -ifneq ($(findstring HAVE_FFMS, $(CONFIG)),)
  169. +ifneq ($(findstring HAVE_FFMS 1, $(CONFIG)),)
  170.  SRCCLI += input/ffms.c
  171.  endif
  172.  
  173. -ifneq ($(findstring HAVE_GPAC, $(CONFIG)),)
  174. +ifneq ($(findstring HAVE_GPAC 1, $(CONFIG)),)
  175.  SRCCLI += output/mp4.c
  176.  endif
  177.  
  178. diff --git a/common/arm/asm.S b/common/arm/asm.S
  179. index 7434262..92e3b14 100644
  180. --- a/common/arm/asm.S
  181. +++ b/common/arm/asm.S
  182. @@ -65,7 +65,7 @@ ELF     .type   \name, %function
  183.          .endm
  184.  
  185.  .macro movconst rd, val
  186. -#ifdef HAVE_ARMV6T2
  187. +#if HAVE_ARMV6T2
  188.      movw        \rd, #:lower16:\val
  189.  .if \val >> 16
  190.      movt        \rd, #:upper16:\val
  191. diff --git a/common/bitstream.c b/common/bitstream.c
  192. index 8350fb3..0f2bc9f 100644
  193. --- a/common/bitstream.c
  194. +++ b/common/bitstream.c
  195. @@ -39,7 +39,7 @@ static uint8_t *x264_nal_escape_c( uint8_t *dst, uint8_t *src, uint8_t *end )
  196.      return dst;
  197.  }
  198.  
  199. -#ifdef HAVE_MMX
  200. +#if HAVE_MMX
  201.  uint8_t *x264_nal_escape_mmxext( uint8_t *dst, uint8_t *src, uint8_t *end );
  202.  uint8_t *x264_nal_escape_sse2( uint8_t *dst, uint8_t *src, uint8_t *end );
  203.  #endif
  204. @@ -88,7 +88,7 @@ void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal )
  205.  void x264_bitstream_init( int cpu, x264_bitstream_function_t *pf )
  206.  {
  207.      pf->nal_escape = x264_nal_escape_c;
  208. -#ifdef HAVE_MMX
  209. +#if HAVE_MMX
  210.      if( cpu&X264_CPU_MMXEXT )
  211.          pf->nal_escape = x264_nal_escape_mmxext;
  212.      if( (cpu&X264_CPU_SSE2) && (cpu&X264_CPU_SSE2_IS_FAST) )
  213. diff --git a/common/macroblock.h b/common/macroblock.h
  214. index 7562948..ce4ead9 100644
  215. --- a/common/macroblock.h
  216. +++ b/common/macroblock.h
  217. @@ -364,7 +364,7 @@ static ALWAYS_INLINE uint32_t pack16to32_mask( int a, int b )
  218.  }
  219.  static ALWAYS_INLINE uint64_t pack32to64( uint32_t a, uint32_t b )
  220.  {
  221. -#ifdef WORDS_BIGENDIAN
  222. +#if WORDS_BIGENDIAN
  223.     return b + ((uint64_t)a<<32);
  224.  #else
  225.     return a + ((uint64_t)b<<32);
  226. diff --git a/configure b/configure
  227. index fd62337..ef33323 100755
  228. --- a/configure
  229. +++ b/configure
  230. @@ -171,6 +171,9 @@ cross_prefix=""
  231.  
  232.  EXE=""
  233.  
  234. +# list of all preprocessor HAVE values we can define
  235. +CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON PTHREAD LOG2F VISUALIZE SWSCALE LAVF FFMS GPAC GF_MALLOC AVS GPL"
  236. +
  237.  # parse options
  238.  
  239.  for opt do
  240. @@ -735,11 +738,12 @@ fi
  241.  define BIT_DEPTH $bit_depth
  242.  ASFLAGS="$ASFLAGS -DBIT_DEPTH=$bit_depth"
  243.  
  244. -if [ $gpl = yes ]; then
  245. -    define HAVE_GPL 1
  246. -else
  247. -    define HAVE_GPL 0
  248. -fi
  249. +[ $gpl = yes ] && define HAVE_GPL
  250. +
  251. +#define undefined vars as 0
  252. +for var in $CONFIG_HAVE; do
  253. +    grep -q "HAVE_$var 1" config.h || define HAVE_$var 0
  254. +done
  255.  
  256.  rm -f conftest*
  257.  
  258. @@ -766,7 +770,6 @@ EXE=$EXE
  259.  VIS=$vis
  260.  HAVE_GETOPT_LONG=$HAVE_GETOPT_LONG
  261.  DEVNULL=$DEVNULL
  262. -GPL=$gpl
  263.  EOF
  264.  
  265.  if [ "$shared" = "yes" ]; then
  266. --
  267. 1.7.3.2.146.gca209
  268.  
  269.  
  270. From 8363900cb951db4acc1ed82fac2ede3996c4703f Mon Sep 17 00:00:00 2001
  271. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  272. Date: Sat, 20 Nov 2010 23:30:42 -0800
  273. Subject: [PATCH 4/9] Change qpmin default to 0
  274.  There's probably no real reason to keep it at 10 anymore, and lowering it allows AQ to pick lower quantizers in really flat areas.
  275.  Might help on gradients at high quality levels.
  276.  The previous value of 10 was arbitrary anyways.
  277.  
  278. ---
  279. common/common.c |    2 +-
  280.  1 files changed, 1 insertions(+), 1 deletions(-)
  281.  
  282. diff --git a/common/common.c b/common/common.c
  283. index 6c88556..1f99e9e 100644
  284. --- a/common/common.c
  285. +++ b/common/common.c
  286. @@ -98,7 +98,7 @@ void x264_param_default( x264_param_t *param )
  287.      param->rc.f_vbv_buffer_init = 0.9;
  288.      param->rc.i_qp_constant = 23 + QP_BD_OFFSET;
  289.      param->rc.f_rf_constant = 23;
  290. -    param->rc.i_qp_min = 10;
  291. +    param->rc.i_qp_min = 0;
  292.      param->rc.i_qp_max = QP_MAX;
  293.      param->rc.i_qp_step = 4;
  294.      param->rc.f_ip_factor = 1.4;
  295. --
  296. 1.7.3.2.146.gca209
  297.  
  298.  
  299. From 07c6b3fd5028057ad8f6511e0773506a2bba69b9 Mon Sep 17 00:00:00 2001
  300. From: Yasuhiro Ikeda <wipple625@gmail.com>
  301. Date: Mon, 22 Nov 2010 11:01:57 +0900
  302. Subject: [PATCH 5/9] Add some more info to `x264 --version`
  303.  
  304. ---
  305. x264.c |   23 ++++++++++++++++++++---
  306.  1 files changed, 20 insertions(+), 3 deletions(-)
  307.  
  308. diff --git a/x264.c b/x264.c
  309. index bba17b8..f9a1c6a 100644
  310. --- a/x264.c
  311. +++ b/x264.c
  312. @@ -55,6 +55,14 @@
  313.  #include <libavutil/pixdesc.h>
  314.  #endif
  315.  
  316. +#if HAVE_SWSCALE
  317. +#include <libswscale/swscale.h>
  318. +#endif
  319. +
  320. +#if HAVE_FFMS
  321. +#include <ffms.h>
  322. +#endif
  323. +
  324.  /* Ctrl-C handler */
  325.  static volatile int b_ctrl_c = 0;
  326.  static int          b_exit_on_ctrl_c = 0;
  327. @@ -208,6 +216,15 @@ static void print_version_info()
  328.  #else
  329.      printf( "x264 0.%d.X\n", X264_BUILD );
  330.  #endif
  331. +#if HAVE_SWSCALE
  332. +    printf( "(libswscale %d.%d.%d)\n", LIBSWSCALE_VERSION_MAJOR, LIBSWSCALE_VERSION_MINOR, LIBSWSCALE_VERSION_MICRO );
  333. +#endif
  334. +#if HAVE_LAVF
  335. +    printf( "(libavformat %d.%d.%d)\n", LIBAVFORMAT_VERSION_MAJOR, LIBAVFORMAT_VERSION_MINOR, LIBAVFORMAT_VERSION_MICRO );
  336. +#endif
  337. +#if HAVE_FFMS
  338. +    printf( "(ffmpegsource %d.%d.%d.%d)\n", FFMS_VERSION >> 24, (FFMS_VERSION & 0xff0000) >> 16, (FFMS_VERSION & 0xff00) >> 8, FFMS_VERSION & 0xff );
  339. +#endif
  340.      printf( "built on " __DATE__ ", " );
  341.  #ifdef __GNUC__
  342.      printf( "gcc: " __VERSION__ "\n" );
  343. @@ -221,9 +238,9 @@ static void print_version_info()
  344.  #else
  345.      printf( "Non-GPL commercial\n" );
  346.  #endif
  347. -#if HAVE_LAVF
  348. -    const char *license = avformat_license();
  349. -    printf( "libavformat license: %s\n", license );
  350. +#if HAVE_SWSCALE
  351. +    const char *license = swscale_license();
  352. +    printf( "libswscale%s%s license: %s\n",HAVE_LAVF ? "/libavformat" : "", HAVE_FFMS ? "/ffmpegsource" : "" , license );
  353.      if( !strcmp( license, "nonfree and unredistributable" ) ||
  354.         (!HAVE_GPL && (!strcmp( license, "GPL version 2 or later" )
  355.                    ||  !strcmp( license, "GPL version 3 or later" ))))
  356. --
  357. 1.7.3.2.146.gca209
  358.  
  359.  
  360. From b4650b65511b1f5c348d298abda02db99e7fcd16 Mon Sep 17 00:00:00 2001
  361. From: Jason Garrett-Glaser <darkshikari@gmail.com>
  362. Date: Fri, 19 Nov 2010 16:58:38 -0800
  363. Subject: [PATCH 6/9] Add API function to return max number of delayed frames
  364.  
  365. ---
  366. encoder/encoder.c |    8 +++++---
  367.  x264.h            |    6 +++++-
  368.  2 files changed, 10 insertions(+), 4 deletions(-)
  369.  
  370. diff --git a/encoder/encoder.c b/encoder/encoder.c
  371. index 8b14b41..2d5c778 100644
  372. --- a/encoder/encoder.c
  373. +++ b/encoder/encoder.c
  374. @@ -3320,9 +3320,6 @@ void    x264_encoder_close  ( x264_t *h )
  375.      }
  376.  }
  377.  
  378. -/****************************************************************************
  379. - * x264_encoder_delayed_frames:
  380. - ****************************************************************************/
  381.  int x264_encoder_delayed_frames( x264_t *h )
  382.  {
  383.      int delayed_frames = 0;
  384. @@ -3343,3 +3340,8 @@ int x264_encoder_delayed_frames( x264_t *h )
  385.      x264_pthread_mutex_unlock( &h->lookahead->ofbuf.mutex );
  386.      return delayed_frames;
  387.  }
  388. +
  389. +int x264_encoder_maximum_delayed_frames( x264_t *h )
  390. +{
  391. +    return h->frames.i_delay;
  392. +}
  393. diff --git a/x264.h b/x264.h
  394. index ce79d40..e144e51 100644
  395. --- a/x264.h
  396. +++ b/x264.h
  397. @@ -39,7 +39,7 @@
  398.  
  399.  #include <stdarg.h>
  400.  
  401. -#define X264_BUILD 108
  402. +#define X264_BUILD 109
  403.  
  404.  /* x264_t:
  405.   *      opaque handler for encoder */
  406. @@ -766,6 +766,10 @@ void    x264_encoder_close  ( x264_t * );
  407.   *      return the number of currently delayed (buffered) frames
  408.   *      this should be used at the end of the stream, to know when you have all the encoded frames. */
  409.  int     x264_encoder_delayed_frames( x264_t * );
  410. +/* x264_encoder_maximum_delayed_frames( x264_t *h ):
  411. + *      return the maximum number of delayed (buffered) frames that can occur with the current
  412. + *      parameters. */
  413. +int     x264_encoder_maximum_delayed_frames( x264_t *h );
  414.  /* x264_encoder_intra_refresh:
  415.   *      If an intra refresh is not in progress, begin one with the next P-frame.
  416.   *      If an intra refresh is in progress, begin one as soon as the current one finishes.
  417. --
  418. 1.7.3.2.146.gca209
  419.  
  420.  
  421. From 3797b68a8201c74a321883f90536ce30a251e187 Mon Sep 17 00:00:00 2001
  422. From: Anton Mitrofanov <BugMaster@narod.ru>
  423. Date: Tue, 23 Nov 2010 23:06:51 +0300
  424. Subject: [PATCH 7/9] Clean up of weights analyse function
  425.  
  426. ---
  427. encoder/slicetype.c |   42 +++++++++++++++++++-----------------------
  428.  1 files changed, 19 insertions(+), 23 deletions(-)
  429.  
  430. diff --git a/encoder/slicetype.c b/encoder/slicetype.c
  431. index edf74c4..dd6c360 100644
  432. --- a/encoder/slicetype.c
  433. +++ b/encoder/slicetype.c
  434. @@ -57,7 +57,7 @@ static void x264_lowres_context_init( x264_t *h, x264_mb_analysis_t *a )
  435.  }
  436.  
  437.  /* makes a non-h264 weight (i.e. fix7), into an h264 weight */
  438. -static void x264_weight_get_h264( unsigned int weight_nonh264, int offset, x264_weight_t *w )
  439. +static void x264_weight_get_h264( int weight_nonh264, int offset, x264_weight_t *w )
  440.  {
  441.      w->i_offset = offset;
  442.      w->i_denom = 7;
  443. @@ -221,39 +221,37 @@ static NOINLINE unsigned int x264_weight_cost_chroma( x264_t *h, x264_frame_t *f
  444.  
  445.  void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, int b_lookahead )
  446.  {
  447. -    float fenc_mean, ref_mean, fenc_var, ref_var;
  448. -    int offset_search;
  449. -    int minoff, minscale, mindenom;
  450. -    unsigned int minscore, origscore;
  451.      int i_delta_index = fenc->i_frame - ref->i_frame - 1;
  452.      /* epsilon is chosen to require at least a numerator of 127 (with denominator = 128) */
  453. -    const float epsilon = 1.0/128.0;
  454. -    float guess_scale;
  455. -    int found;
  456. +    const float epsilon = 1.f/128.f;
  457.      x264_weight_t *weights = fenc->weight[0];
  458. +    SET_WEIGHT( weights[0], 0, 1, 0, 0 );
  459.      SET_WEIGHT( weights[1], 0, 1, 0, 0 );
  460.      SET_WEIGHT( weights[2], 0, 1, 0, 0 );
  461.      /* Don't check chroma in lookahead, or if there wasn't a luma weight. */
  462.      for( int plane = 0; plane <= 2  && !( plane && ( !weights[0].weightfn || b_lookahead ) ); plane++ )
  463.      {
  464. -        fenc_var = round( sqrt( fenc->i_pixel_ssd[plane] ) );
  465. -        ref_var  = round( sqrt(  ref->i_pixel_ssd[plane] ) );
  466. -        fenc_mean = (float)fenc->i_pixel_sum[plane] / (fenc->i_lines[!!plane] * fenc->i_width[!!plane]);
  467. -        ref_mean  = (float) ref->i_pixel_sum[plane] / (fenc->i_lines[!!plane] * fenc->i_width[!!plane]);
  468. +        int offset_search;
  469. +        int minoff, minscale, mindenom;
  470. +        unsigned int minscore, origscore;
  471. +        int found;
  472. +        float fenc_var = fenc->i_pixel_ssd[plane] + !ref->i_pixel_ssd[plane];
  473. +        float ref_var  =  ref->i_pixel_ssd[plane] + !ref->i_pixel_ssd[plane];
  474. +        float guess_scale = sqrtf( fenc_var / ref_var );
  475. +        float fenc_mean = (float)fenc->i_pixel_sum[plane] / (fenc->i_lines[!!plane] * fenc->i_width[!!plane]);
  476. +        float ref_mean  = (float) ref->i_pixel_sum[plane] / (fenc->i_lines[!!plane] * fenc->i_width[!!plane]);
  477.  
  478.          //early termination
  479. -        if( fabs( ref_mean - fenc_mean ) < 0.5 && fabsf( 1 - (float)fenc_var / ref_var ) < epsilon )
  480. +        if( fabsf( ref_mean - fenc_mean ) < 0.5f && fabsf( 1.f - guess_scale ) < epsilon )
  481.          {
  482.              SET_WEIGHT( weights[plane], 0, 1, 0, 0 );
  483.              continue;
  484.          }
  485.  
  486. -        guess_scale = ref_var ? (float)fenc_var/ref_var : 0;
  487. -
  488.          if( plane )
  489.          {
  490.              weights[plane].i_denom = 6;
  491. -            weights[plane].i_scale = x264_clip3( round(guess_scale * 64.0), 0, 255 );
  492. +            weights[plane].i_scale = x264_clip3( (int)(guess_scale * 64.f + 0.5f), 0, 255 );
  493.              if( weights[plane].i_scale > 127 )
  494.              {
  495.                  weights[1].weightfn = weights[2].weightfn = NULL;
  496. @@ -261,9 +259,7 @@ void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, int
  497.              }
  498.          }
  499.          else
  500. -            x264_weight_get_h264( round( guess_scale * 128 ), 0, &weights[plane] );
  501. -        if( weights[plane].weightfn )
  502. -            h->mc.weight_cache( h, &weights[plane] );
  503. +            x264_weight_get_h264( (int)(guess_scale * 128.f + 0.5f), 0, &weights[plane] );
  504.  
  505.          found = 0;
  506.          mindenom = weights[plane].i_denom;
  507. @@ -280,7 +276,7 @@ void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, int
  508.                  x264_slicetype_frame_cost( h, &a, &fenc, 0, 0, 0, 0 );
  509.              }
  510.              mcbuf = x264_weight_cost_init_luma( h, fenc, ref, h->mb.p_weight_buf[0] );
  511. -            origscore = minscore = x264_weight_cost_luma( h, fenc, mcbuf, 0 );
  512. +            origscore = minscore = x264_weight_cost_luma( h, fenc, mcbuf, NULL );
  513.          }
  514.          else
  515.          {
  516. @@ -290,7 +286,7 @@ void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, int
  517.              if( plane == 1 )
  518.                  x264_weight_cost_init_chroma( h, fenc, ref, dstu, dstv );
  519.              mcbuf = plane == 1 ? dstu : dstv;
  520. -            origscore = minscore = x264_weight_cost_chroma( h, fenc, mcbuf, 0 );
  521. +            origscore = minscore = x264_weight_cost_chroma( h, fenc, mcbuf, NULL );
  522.          }
  523.  
  524.          if( !minscore )
  525. @@ -299,7 +295,7 @@ void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, int
  526.          // This gives a slight improvement due to rounding errors but only tests
  527.          // one offset on lookahead.
  528.          // TODO: currently searches only offset +1. try other offsets/multipliers/combinations thereof?
  529. -        offset_search = x264_clip3( floor( fenc_mean - ref_mean * minscale / (1 << mindenom) + 0.5f*b_lookahead ), -128, 126 );
  530. +        offset_search = x264_clip3( (int)(fenc_mean - ref_mean * minscale / (1 << mindenom) + 0.5f * b_lookahead), -128, 126 );
  531.          for( int i_off = offset_search; i_off <= offset_search+!b_lookahead; i_off++ )
  532.          {
  533.              SET_WEIGHT( weights[plane], 1, minscale, mindenom, i_off );
  534. @@ -314,7 +310,7 @@ void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, int
  535.  
  536.          /* FIXME: More analysis can be done here on SAD vs. SATD termination. */
  537.          /* 0.2% termination derived experimentally to avoid weird weights in frames that are mostly intra. */
  538. -        if( !found || (minscale == 1<<mindenom && minoff == 0) || (float)minscore / origscore > 0.998 )
  539. +        if( !found || (minscale == 1 << mindenom && minoff == 0) || (float)minscore / origscore > 0.998f )
  540.          {
  541.              SET_WEIGHT( weights[plane], 0, 1, 0, 0 );
  542.              continue;
  543. --
  544. 1.7.3.2.146.gca209
  545.  
  546.  
  547. From a72ed07c56c9a456c9fa38db0722a37d0eaac101 Mon Sep 17 00:00:00 2001
  548. From: Daniel Kang <daniel.d.kang@gmail.com>
  549. Date: Tue, 23 Nov 2010 20:29:37 -0500
  550. Subject: [PATCH 8/9] SSE version of high-bit-depth add4x4_idct_sse2
  551.  ~6.3x faster than C.
  552.  Our first Google Code-In patch!
  553.  
  554. ---
  555. common/dct.c           |    4 +
  556.  common/x86/const-a.asm |    1 +
  557.  common/x86/dct-32.asm  |   36 ++++++------
  558.  common/x86/dct-64.asm  |   38 ++++++------
  559.  common/x86/dct-a.asm   |   47 +++++++++++++--
  560.  common/x86/dct.h       |    1 +
  561.  common/x86/pixel-a.asm |   16 +++---
  562.  common/x86/x86util.asm |  147 +++++++++++++++++++++++++-----------------------
  563.  8 files changed, 167 insertions(+), 123 deletions(-)
  564.  
  565. diff --git a/common/dct.c b/common/dct.c
  566. index 975afef..1b3d87b 100644
  567. --- a/common/dct.c
  568. +++ b/common/dct.c
  569. @@ -429,6 +429,10 @@ void x264_dct_init( int cpu, x264_dct_function_t *dctf )
  570.          dctf->sub8x8_dct    = x264_sub8x8_dct_mmx;
  571.          dctf->sub16x16_dct  = x264_sub16x16_dct_mmx;
  572.      }
  573. +    if( cpu&X264_CPU_SSE2 )
  574. +    {
  575. +        dctf->add4x4_idct   = x264_add4x4_idct_sse2;
  576. +    }
  577.  #endif // HAVE_MMX
  578.  #else // !HIGH_BIT_DEPTH
  579.  #if HAVE_MMX
  580. diff --git a/common/x86/const-a.asm b/common/x86/const-a.asm
  581. index 32579e3..d6e621e 100644
  582. --- a/common/x86/const-a.asm
  583. +++ b/common/x86/const-a.asm
  584. @@ -50,6 +50,7 @@ const pw_3fff,     times 8 dw 0x3fff
  585.  const pw_pixel_max,times 8 dw ((1 << BIT_DEPTH)-1)
  586.  
  587.  const pd_1,        times 4 dd 1
  588. +const pd_32,       times 4 dd 32
  589.  const pd_128,      times 4 dd 128
  590.  const pw_00ff,     times 8 dw 0x00ff
  591.  const pw_ff00,     times 8 dw 0xff00
  592. diff --git a/common/x86/dct-32.asm b/common/x86/dct-32.asm
  593. index 24e6efc..21e70c1 100644
  594. --- a/common/x86/dct-32.asm
  595. +++ b/common/x86/dct-32.asm
  596. @@ -38,13 +38,13 @@ cextern hsub_mul
  597.  ; in: m0..m7
  598.  ; out: 0,4,6 in mem, rest in regs
  599.  %macro DCT8_1D 9
  600. -    SUMSUB_BA  m%8, m%1      ; %8 = s07, %1 = d07
  601. -    SUMSUB_BA  m%7, m%2      ; %7 = s16, %2 = d16
  602. -    SUMSUB_BA  m%6, m%3      ; %6 = s25, %3 = d25
  603. -    SUMSUB_BA  m%5, m%4      ; %5 = s34, %4 = d34
  604. -    SUMSUB_BA  m%5, m%8      ; %5 = a0,  %8 = a2
  605. -    SUMSUB_BA  m%6, m%7      ; %6 = a1,  %7 = a3
  606. -    SUMSUB_BA  m%6, m%5      ; %6 = dst0, %5 = dst4
  607. +    SUMSUB_BA  w, m%8, m%1      ; %8 = s07, %1 = d07
  608. +    SUMSUB_BA  w, m%7, m%2      ; %7 = s16, %2 = d16
  609. +    SUMSUB_BA  w, m%6, m%3      ; %6 = s25, %3 = d25
  610. +    SUMSUB_BA  w, m%5, m%4      ; %5 = s34, %4 = d34
  611. +    SUMSUB_BA  w, m%5, m%8      ; %5 = a0,  %8 = a2
  612. +    SUMSUB_BA  w, m%6, m%7      ; %6 = a1,  %7 = a3
  613. +    SUMSUB_BA  w, m%6, m%5      ; %6 = dst0, %5 = dst4
  614.      mova    [%9+0x00], m%6
  615.      mova    [%9+0x40], m%5
  616.      mova    m%6, m%7         ; a3
  617. @@ -127,13 +127,13 @@ cextern hsub_mul
  618.      psubw     m%2, m%1
  619.      mova      m%1, [%9+0x00]
  620.      mova      m%6, [%9+0x40]
  621. -    SUMSUB_BA m%6, m%1
  622. -    SUMSUB_BA m%7, m%6
  623. -    SUMSUB_BA m%3, m%1
  624. -    SUMSUB_BA m%5, m%7
  625. -    SUMSUB_BA m%2, m%3
  626. -    SUMSUB_BA m%8, m%1
  627. -    SUMSUB_BA m%4, m%6
  628. +    SUMSUB_BA w, m%6, m%1
  629. +    SUMSUB_BA w, m%7, m%6
  630. +    SUMSUB_BA w, m%3, m%1
  631. +    SUMSUB_BA w, m%5, m%7
  632. +    SUMSUB_BA w, m%2, m%3
  633. +    SUMSUB_BA w, m%8, m%1
  634. +    SUMSUB_BA w, m%4, m%6
  635.      SWAP %1, %5, %6
  636.      SWAP %3, %8, %7
  637.  %endmacro
  638. @@ -434,18 +434,18 @@ global add8x8_idct_sse2.skip_prologue
  639.      SBUTTERFLY qdq, 4, 5, 0
  640.      SBUTTERFLY qdq, 6, 7, 0
  641.      UNSPILL r1,0
  642. -    IDCT4_1D 0,1,2,3,r1
  643. +    IDCT4_1D w,0,1,2,3,r1
  644.      SPILL r1, 4
  645.      TRANSPOSE2x4x4W 0,1,2,3,4
  646.      UNSPILL r1, 4
  647. -    IDCT4_1D 4,5,6,7,r1
  648. +    IDCT4_1D w,4,5,6,7,r1
  649.      SPILL r1, 0
  650.      TRANSPOSE2x4x4W 4,5,6,7,0
  651.      UNSPILL r1, 0
  652.      paddw m0, [pw_32]
  653. -    IDCT4_1D 0,1,2,3,r1
  654. +    IDCT4_1D w,0,1,2,3,r1
  655.      paddw m4, [pw_32]
  656. -    IDCT4_1D 4,5,6,7,r1
  657. +    IDCT4_1D w,4,5,6,7,r1
  658.      SPILL r1, 6,7
  659.      pxor m7, m7
  660.      DIFFx2 m0, m1, m6, m7, [r0-4*FDEC_STRIDE], [r0-3*FDEC_STRIDE]; m5
  661. diff --git a/common/x86/dct-64.asm b/common/x86/dct-64.asm
  662. index 5e43b9c..70edcbd 100644
  663. --- a/common/x86/dct-64.asm
  664. +++ b/common/x86/dct-64.asm
  665. @@ -36,13 +36,13 @@ cextern hsub_mul
  666.  INIT_XMM
  667.  
  668.  %macro DCT8_1D 10
  669. -    SUMSUB_BA  m%5, m%4 ; %5=s34, %4=d34
  670. -    SUMSUB_BA  m%6, m%3 ; %6=s25, %3=d25
  671. -    SUMSUB_BA  m%7, m%2 ; %7=s16, %2=d16
  672. -    SUMSUB_BA  m%8, m%1 ; %8=s07, %1=d07
  673. +    SUMSUB_BA  w, m%5, m%4 ; %5=s34, %4=d34
  674. +    SUMSUB_BA  w, m%6, m%3 ; %6=s25, %3=d25
  675. +    SUMSUB_BA  w, m%7, m%2 ; %7=s16, %2=d16
  676. +    SUMSUB_BA  w, m%8, m%1 ; %8=s07, %1=d07
  677.  
  678. -    SUMSUB_BA  m%6, m%7, m%10 ; %6=a1, %7=a3
  679. -    SUMSUB_BA  m%5, m%8, m%10 ; %5=a0, %8=a2
  680. +    SUMSUB_BA  w, m%6, m%7, m%10 ; %6=a1, %7=a3
  681. +    SUMSUB_BA  w, m%5, m%8, m%10 ; %5=a0, %8=a2
  682.  
  683.      movdqa  m%9, m%1
  684.      psraw   m%9, 1
  685. @@ -56,7 +56,7 @@ INIT_XMM
  686.      paddw   m%10, m%2
  687.      psubw   m%10, m%3 ; %10=a7
  688.  
  689. -    SUMSUB_BA  m%4, m%1
  690. +    SUMSUB_BA  w, m%4, m%1
  691.      psubw   m%1, m%3
  692.      psubw   m%4, m%2
  693.      psraw   m%3, 1
  694. @@ -70,7 +70,7 @@ INIT_XMM
  695.      psraw   m%9, 2
  696.      psubw   m%9, m%10 ; %9=b7
  697.  
  698. -    SUMSUB_BA  m%6, m%5, m%10 ; %6=b0, %5=b4
  699. +    SUMSUB_BA  w, m%6, m%5, m%10 ; %6=b0, %5=b4
  700.  
  701.      movdqa  m%3, m%7
  702.      psraw   m%3, 1
  703. @@ -88,7 +88,7 @@ INIT_XMM
  704.  %endmacro
  705.  
  706.  %macro IDCT8_1D 10
  707. -    SUMSUB_BA  m%5, m%1, m%9 ; %5=a0, %1=a2
  708. +    SUMSUB_BA  w, m%5, m%1, m%9 ; %5=a0, %1=a2
  709.  
  710.      movdqa  m%9, m%2
  711.      psraw   m%9, 1
  712. @@ -123,8 +123,8 @@ INIT_XMM
  713.      psraw   m%6, 2
  714.      psubw   m%9, m%6 ; %9=b7
  715.  
  716. -    SUMSUB_BA m%7, m%5, m%6 ; %7=b0, %5=b6
  717. -    SUMSUB_BA m%3, m%1, m%6; %3=b2, %1=b4
  718. +    SUMSUB_BA w, m%7, m%5, m%6 ; %7=b0, %5=b6
  719. +    SUMSUB_BA w, m%3, m%1, m%6; %3=b2, %1=b4
  720.  
  721.      movdqa  m%8, m%10
  722.      psraw   m%8, 2
  723. @@ -132,10 +132,10 @@ INIT_XMM
  724.      psraw   m%2, 2
  725.      psubw   m%2, m%10 ; %2=b5
  726.  
  727. -    SUMSUB_BA m%9, m%7, m%6 ; %9=c0, %7=c7
  728. -    SUMSUB_BA m%2, m%3, m%6 ; %2=c1, %3=c6
  729. -    SUMSUB_BA m%8, m%1, m%6 ; %8=c2, %1=c5
  730. -    SUMSUB_BA m%4, m%5, m%6 ; %4=c3, %5=c4
  731. +    SUMSUB_BA w, m%9, m%7, m%6 ; %9=c0, %7=c7
  732. +    SUMSUB_BA w, m%2, m%3, m%6 ; %2=c1, %3=c6
  733. +    SUMSUB_BA w, m%8, m%1, m%6 ; %8=c2, %1=c5
  734. +    SUMSUB_BA w, m%4, m%5, m%6 ; %4=c3, %5=c4
  735.  
  736.      SWAP %1, %9, %6
  737.      SWAP %3, %8, %7
  738. @@ -263,14 +263,14 @@ global add8x8_idct_sse2.skip_prologue
  739.      mova   m7, [r1+112]
  740.      SBUTTERFLY qdq, 4, 5, 8
  741.      SBUTTERFLY qdq, 6, 7, 8
  742. -    IDCT4_1D 0,1,2,3,8,10
  743. +    IDCT4_1D w,0,1,2,3,8,10
  744.      TRANSPOSE2x4x4W 0,1,2,3,8
  745. -    IDCT4_1D 4,5,6,7,8,10
  746. +    IDCT4_1D w,4,5,6,7,8,10
  747.      TRANSPOSE2x4x4W 4,5,6,7,8
  748.      paddw m0, [pw_32]
  749. -    IDCT4_1D 0,1,2,3,8,10
  750. +    IDCT4_1D w,0,1,2,3,8,10
  751.      paddw m4, [pw_32]
  752. -    IDCT4_1D 4,5,6,7,8,10
  753. +    IDCT4_1D w,4,5,6,7,8,10
  754.      DIFFx2 m0, m1, m8, m9, [r0-4*FDEC_STRIDE], [r0-3*FDEC_STRIDE]
  755.      DIFFx2 m2, m3, m8, m9, [r0-2*FDEC_STRIDE], [r0-1*FDEC_STRIDE]
  756.      DIFFx2 m4, m5, m8, m9, [r0+0*FDEC_STRIDE], [r0+1*FDEC_STRIDE]
  757. diff --git a/common/x86/dct-a.asm b/common/x86/dct-a.asm
  758. index 67fa34a..0e4b514 100644
  759. --- a/common/x86/dct-a.asm
  760. +++ b/common/x86/dct-a.asm
  761. @@ -52,13 +52,15 @@ SECTION .text
  762.  cextern pw_32_0
  763.  cextern pw_32
  764.  cextern pw_8000
  765. +cextern pw_pixel_max
  766.  cextern hsub_mul
  767.  cextern pb_1
  768.  cextern pw_1
  769. +cextern pd_32
  770.  
  771.  %macro WALSH4_1D 5
  772. -    SUMSUB_BADC m%4, m%3, m%2, m%1, m%5
  773. -    SUMSUB_BADC m%4, m%2, m%3, m%1, m%5
  774. +    SUMSUB_BADC w, m%4, m%3, m%2, m%1, m%5
  775. +    SUMSUB_BADC w, m%4, m%2, m%3, m%1, m%5
  776.      SWAP %1, %4, %3
  777.  %endmacro
  778.  
  779. @@ -86,7 +88,7 @@ cglobal dct4x4dc_mmx, 1,1
  780.      movq   m7, [pw_8000] ; convert to unsigned and back, so that pavgw works
  781.      WALSH4_1D  0,1,2,3,4
  782.      TRANSPOSE4x4W 0,1,2,3,4
  783. -    SUMSUB_BADC m1, m0, m3, m2, m4
  784. +    SUMSUB_BADC w, m1, m0, m3, m2, m4
  785.      SWAP 0, 1
  786.      SWAP 2, 3
  787.      SUMSUB_17BIT 0,2,4,7
  788. @@ -175,10 +177,10 @@ cglobal add4x4_idct_mmx, 2,2
  789.      movq  m3, [r1+24]
  790.      movq  m2, [r1+16]
  791.      movq  m0, [r1+ 0]
  792. -    IDCT4_1D 0,1,2,3,4,5
  793. +    IDCT4_1D w,0,1,2,3,4,5
  794.      TRANSPOSE4x4W 0,1,2,3,4
  795.      paddw m0, [pw_32]
  796. -    IDCT4_1D 0,1,2,3,4,5
  797. +    IDCT4_1D w,0,1,2,3,4,5
  798.      STORE_DIFF  m0, m4, m7, [r0+0*FDEC_STRIDE]
  799.      STORE_DIFF  m1, m4, m7, [r0+1*FDEC_STRIDE]
  800.      STORE_DIFF  m2, m4, m7, [r0+2*FDEC_STRIDE]
  801. @@ -198,7 +200,7 @@ cglobal add4x4_idct_sse4, 2,2,6
  802.      psubw     m0, m3            ; row1>>1-row3/row0-2
  803.      paddw     m2, m1            ; row3>>1+row1/row0+2
  804.      SBUTTERFLY2 wd, 0, 2, 1
  805. -    SUMSUB_BA m2, m0, m1
  806. +    SUMSUB_BA w, m2, m0, m1
  807.      pshuflw   m1, m2, 10110001b
  808.      pshufhw   m2, m2, 10110001b
  809.      punpckldq m1, m0
  810. @@ -215,7 +217,7 @@ cglobal add4x4_idct_sse4, 2,2,6
  811.      psubw     m0, m3            ; row1>>1-row3/row0-2
  812.      paddw     m2, m1            ; row3>>1+row1/row0+2
  813.      SBUTTERFLY2 qdq, 0, 2, 1
  814. -    SUMSUB_BA m2, m0, m1
  815. +    SUMSUB_BA w, m2, m0, m1
  816.  
  817.      movd      m4, [r0+FDEC_STRIDE*0]
  818.      movd      m1, [r0+FDEC_STRIDE*1]
  819. @@ -236,6 +238,37 @@ cglobal add4x4_idct_sse4, 2,2,6
  820.      movd     [r0+FDEC_STRIDE*2], m0
  821.      pextrd   [r0+FDEC_STRIDE*3], m0, 1
  822.      RET
  823. +
  824. +%else
  825. +
  826. +%macro STORE_DIFFx2 6
  827. +    psrad     %1, 6
  828. +    psrad     %2, 6
  829. +    packssdw  %1, %2
  830. +    movq      %3, %5
  831. +    movhps    %3, %6
  832. +    paddsw    %1, %3
  833. +    pxor      %4, %4
  834. +    CLIPW     %1, %4, [pw_pixel_max]
  835. +    movq      %5, %1
  836. +    movhps    %6, %1
  837. +%endmacro
  838. +
  839. +INIT_XMM
  840. +cglobal add4x4_idct_sse2, 2,2,7
  841. +    pxor  m6, m6
  842. +.skip_prologue:
  843. +    mova  m1, [r1+16]
  844. +    mova  m3, [r1+48]
  845. +    mova  m2, [r1+32]
  846. +    mova  m0, [r1+ 0]
  847. +    IDCT4_1D d,0,1,2,3,4,5
  848. +    TRANSPOSE4x4D 0,1,2,3,4
  849. +    paddd m0, [pd_32]
  850. +    IDCT4_1D d,0,1,2,3,4,5
  851. +    STORE_DIFFx2 m0, m1, m4, m6, [r0+0*FDEC_STRIDE], [r0+2*FDEC_STRIDE]
  852. +    STORE_DIFFx2 m2, m3, m4, m6, [r0+4*FDEC_STRIDE], [r0+6*FDEC_STRIDE]
  853. +    RET
  854.  %endif ; !HIGH_BIT_DEPTH
  855.  
  856.  INIT_MMX
  857. diff --git a/common/x86/dct.h b/common/x86/dct.h
  858. index 58b9d17..ec6ec25 100644
  859. --- a/common/x86/dct.h
  860. +++ b/common/x86/dct.h
  861. @@ -40,6 +40,7 @@ void x264_sub8x8_dct_dc_mmxext( int16_t dct    [ 4], uint8_t *pix1, uint8_t *pix
  862.  void x264_sub8x8_dct_dc_sse2  ( int16_t dct    [ 4], uint8_t *pix1, uint8_t *pix2 );
  863.  
  864.  void x264_add4x4_idct_mmx       ( uint8_t *p_dst, int16_t dct    [16] );
  865. +void x264_add4x4_idct_sse2     ( uint16_t *p_dst, int32_t dct    [16] );
  866.  void x264_add4x4_idct_sse4      ( uint8_t *p_dst, int16_t dct    [16] );
  867.  void x264_add8x8_idct_mmx       ( uint8_t *p_dst, int16_t dct[ 4][16] );
  868.  void x264_add8x8_idct_dc_mmx    ( uint8_t *p_dst, int16_t dct    [ 4] );
  869. diff --git a/common/x86/pixel-a.asm b/common/x86/pixel-a.asm
  870. index acb3612..6cd79e1 100644
  871. --- a/common/x86/pixel-a.asm
  872. +++ b/common/x86/pixel-a.asm
  873. @@ -881,7 +881,7 @@ cglobal pixel_var2_8x8_ssse3, 5,6,8
  874.      DEINTB %1, %2, %3, %4, %5
  875.      psubw m%1, m%3
  876.      psubw m%2, m%4
  877. -    SUMSUB_BA m%1, m%2, m%3
  878. +    SUMSUB_BA w, m%1, m%2, m%3
  879.  %endmacro
  880.  
  881.  %macro LOAD_SUMSUB_16x4P 10-13 r0, r2, none
  882. @@ -1278,10 +1278,10 @@ cglobal pixel_sa8d_8x8_internal_%1
  883.  %else ; non-sse2
  884.      HADAMARD4_V m0, m1, m2, m8, m6
  885.      HADAMARD4_V m4, m5, m3, m9, m6
  886. -    SUMSUB_BADC m0, m4, m1, m5, m6
  887. +    SUMSUB_BADC w, m0, m4, m1, m5, m6
  888.      HADAMARD 2, sumsub, 0, 4, 6, 11
  889.      HADAMARD 2, sumsub, 1, 5, 6, 11
  890. -    SUMSUB_BADC m2, m3, m8, m9, m6
  891. +    SUMSUB_BADC w, m2, m3, m8, m9, m6
  892.      HADAMARD 2, sumsub, 2, 3, 6, 11
  893.      HADAMARD 2, sumsub, 8, 9, 6, 11
  894.      HADAMARD 1, amax, 0, 4, 6, 11
  895. @@ -1379,7 +1379,7 @@ cglobal pixel_sa8d_8x8_internal_%1
  896.      mova spill0, m6
  897.      mova spill1, m7
  898.      HADAMARD4_V m0, m1, m2, m3, m7
  899. -    SUMSUB_BADC m0, m4, m1, m5, m7
  900. +    SUMSUB_BADC w, m0, m4, m1, m5, m7
  901.      HADAMARD 2, sumsub, 0, 4, 7, 6
  902.      HADAMARD 2, sumsub, 1, 5, 7, 6
  903.      HADAMARD 1, amax, 0, 4, 7, 6
  904. @@ -1387,7 +1387,7 @@ cglobal pixel_sa8d_8x8_internal_%1
  905.      mova m6, spill0
  906.      mova m7, spill1
  907.      paddw m0, m1
  908. -    SUMSUB_BADC m2, m6, m3, m7, m4
  909. +    SUMSUB_BADC w, m2, m6, m3, m7, m4
  910.      HADAMARD 2, sumsub, 2, 6, 4, 5
  911.      HADAMARD 2, sumsub, 3, 7, 4, 5
  912.      HADAMARD 1, amax, 2, 6, 4, 5
  913. @@ -1994,7 +1994,7 @@ cglobal hadamard_ac_2x2max_mmxext
  914.      mova      m2, [r3+0x40]
  915.      mova      m3, [r3+0x60]
  916.      sub       r3, 8
  917. -    SUMSUB_BADC m0, m1, m2, m3, m4
  918. +    SUMSUB_BADC w, m0, m1, m2, m3, m4
  919.      ABS4 m0, m2, m1, m3, m4, m5
  920.      HADAMARD 0, max, 0, 2, 4, 5
  921.      HADAMARD 0, max, 1, 3, 4, 5
  922. @@ -2059,7 +2059,7 @@ cglobal hadamard_ac_8x8_mmxext
  923.      mova      m1, [r3+0x20]
  924.      mova      m2, [r3+0x40]
  925.      mova      m3, [r3+0x60]
  926. -    SUMSUB_BADC m0, m1, m2, m3, m4
  927. +    SUMSUB_BADC w, m0, m1, m2, m3, m4
  928.      HADAMARD 0, sumsub, 0, 2, 4, 5
  929.      ABS4 m1, m3, m0, m2, m4, m5
  930.      HADAMARD 0, max, 1, 3, 4, 5
  931. @@ -2266,7 +2266,7 @@ cglobal hadamard_ac_8x8_%1
  932.      ABS_MOV   m2, m4
  933.      ABS_MOV   m3, m5
  934.      paddw     m1, m2
  935. -    SUMSUB_BA m0, m4; m2
  936. +    SUMSUB_BA w, m0, m4; m2
  937.  %if vertical
  938.      pand      m1, [mask_ac4]
  939.  %else
  940. diff --git a/common/x86/x86util.asm b/common/x86/x86util.asm
  941. index 9acaa3d..d16a237 100644
  942. --- a/common/x86/x86util.asm
  943. +++ b/common/x86/x86util.asm
  944. @@ -241,44 +241,44 @@
  945.      psrlw  m%4, 8   ; src .. y7 .. y5
  946.  %endmacro
  947.  
  948. -%macro SUMSUB_BA 2-3
  949. -%if %0==2
  950. -    paddw   %1, %2
  951. -    paddw   %2, %2
  952. -    psubw   %2, %1
  953. +%macro SUMSUB_BA 3-4
  954. +%if %0==3
  955. +    padd%1  %2, %3
  956. +    padd%1  %3, %3
  957. +    psub%1  %3, %2
  958.  %else
  959. -    mova    %3, %1
  960. -    paddw   %1, %2
  961. -    psubw   %2, %3
  962. +    mova    %4, %2
  963. +    padd%1  %2, %3
  964. +    psub%1  %3, %4
  965.  %endif
  966.  %endmacro
  967.  
  968. -%macro SUMSUB_BADC 4-5
  969. -%if %0==5
  970. -    SUMSUB_BA %1, %2, %5
  971. -    SUMSUB_BA %3, %4, %5
  972. +%macro SUMSUB_BADC 5-6
  973. +%if %0==6
  974. +    SUMSUB_BA %1, %2, %3, %6
  975. +    SUMSUB_BA %1, %4, %5, %6
  976.  %else
  977. -    paddw   %1, %2
  978. -    paddw   %3, %4
  979. -    paddw   %2, %2
  980. -    paddw   %4, %4
  981. -    psubw   %2, %1
  982. -    psubw   %4, %3
  983. +    padd%1  %2, %3
  984. +    padd%1  %4, %5
  985. +    padd%1  %3, %3
  986. +    padd%1  %5, %5
  987. +    psub%1  %3, %2
  988. +    psub%1  %5, %4
  989.  %endif
  990.  %endmacro
  991.  
  992.  %macro HADAMARD4_V 4+
  993. -    SUMSUB_BADC %1, %2, %3, %4
  994. -    SUMSUB_BADC %1, %3, %2, %4
  995. +    SUMSUB_BADC w, %1, %2, %3, %4
  996. +    SUMSUB_BADC w, %1, %3, %2, %4
  997.  %endmacro
  998.  
  999.  %macro HADAMARD8_V 8+
  1000. -    SUMSUB_BADC %1, %2, %3, %4
  1001. -    SUMSUB_BADC %5, %6, %7, %8
  1002. -    SUMSUB_BADC %1, %3, %2, %4
  1003. -    SUMSUB_BADC %5, %7, %6, %8
  1004. -    SUMSUB_BADC %1, %5, %2, %6
  1005. -    SUMSUB_BADC %3, %7, %4, %8
  1006. +    SUMSUB_BADC w, %1, %2, %3, %4
  1007. +    SUMSUB_BADC w, %5, %6, %7, %8
  1008. +    SUMSUB_BADC w, %1, %3, %2, %4
  1009. +    SUMSUB_BADC w, %5, %7, %6, %8
  1010. +    SUMSUB_BADC w, %1, %5, %2, %6
  1011. +    SUMSUB_BADC w, %3, %7, %4, %8
  1012.  %endmacro
  1013.  
  1014.  %macro TRANS_SSE2 5-6
  1015. @@ -363,7 +363,7 @@
  1016.      %endif
  1017.  %endif
  1018.  %ifidn %2, sumsub
  1019. -    SUMSUB_BA m%3, m%4, m%5
  1020. +    SUMSUB_BA w, m%3, m%4, m%5
  1021.  %else
  1022.      %ifidn %2, amax
  1023.          %if %0==6
  1024. @@ -426,67 +426,72 @@
  1025.  %endif
  1026.  %endmacro
  1027.  
  1028. -%macro SUMSUB2_AB 3
  1029. -    mova    %3, %1
  1030. -    paddw   %1, %1
  1031. -    paddw   %1, %2
  1032. -    psubw   %3, %2
  1033. -    psubw   %3, %2
  1034. +%macro SUMSUB2_AB 4
  1035. +    mova    %4, %2
  1036. +    padd%1  %2, %2
  1037. +    padd%1  %2, %3
  1038. +    psub%1  %4, %3
  1039. +    psub%1  %4, %3
  1040.  %endmacro
  1041.  
  1042. -%macro SUMSUB2_BA 3
  1043. -    mova    m%3, m%1
  1044. -    paddw   m%1, m%2
  1045. -    paddw   m%1, m%2
  1046. -    psubw   m%2, m%3
  1047. -    psubw   m%2, m%3
  1048. +%macro SUMSUB2_BA 4
  1049. +    mova    m%4, m%2
  1050. +    padd%1  m%2, m%3
  1051. +    padd%1  m%2, m%3
  1052. +    psub%1  m%3, m%4
  1053. +    psub%1  m%3, m%4
  1054.  %endmacro
  1055.  
  1056. -%macro SUMSUBD2_AB 4
  1057. -    mova    %4, %1
  1058. -    mova    %3, %2
  1059. -    psraw   %2, 1  ; %2: %2>>1
  1060. -    psraw   %1, 1  ; %1: %1>>1
  1061. -    paddw   %2, %4 ; %2: %2>>1+%1
  1062. -    psubw   %1, %3 ; %1: %1>>1-%2
  1063. +%macro SUMSUBD2_AB 5
  1064. +    mova    %5, %2
  1065. +    mova    %4, %3
  1066. +    psra%1  %3, 1  ; %3: %2>>1
  1067. +    psra%1  %2, 1  ; %2: %1>>1
  1068. +    padd%1  %3, %5 ; %3: %2>>1+%1
  1069. +    psub%1  %2, %4 ; %2: %1>>1-%2
  1070.  %endmacro
  1071.  
  1072.  %macro DCT4_1D 5
  1073.  %ifnum %5
  1074. -    SUMSUB_BADC m%4, m%1, m%3, m%2; m%5
  1075. -    SUMSUB_BA   m%3, m%4, m%5
  1076. -    SUMSUB2_AB  m%1, m%2, m%5
  1077. +    SUMSUB_BADC w, m%4, m%1, m%3, m%2; m%5
  1078. +    SUMSUB_BA   w, m%3, m%4, m%5
  1079. +    SUMSUB2_AB  w, m%1, m%2, m%5
  1080.      SWAP %1, %3, %4, %5, %2
  1081.  %else
  1082. -    SUMSUB_BADC m%4, m%1, m%3, m%2
  1083. -    SUMSUB_BA   m%3, m%4
  1084. +    SUMSUB_BADC w, m%4, m%1, m%3, m%2
  1085. +    SUMSUB_BA   w, m%3, m%4
  1086.      mova       [%5], m%2
  1087. -    SUMSUB2_AB m%1, [%5], m%2
  1088. +    SUMSUB2_AB w, m%1, [%5], m%2
  1089.      SWAP %1, %3, %4, %2
  1090.  %endif
  1091.  %endmacro
  1092.  
  1093. -%macro IDCT4_1D 5-6
  1094. -%ifnum %5
  1095. -    SUMSUBD2_AB m%2, m%4, m%6, m%5
  1096. -    ; %2: %2>>1-%4 %4: %2+%4>>1
  1097. -    SUMSUB_BA   m%3, m%1, m%6
  1098. -    ; %3: %1+%3 %1: %1-%3
  1099. -    SUMSUB_BADC m%4, m%3, m%2, m%1, m%6
  1100. -    ; %4: %1+%3 + (%2+%4>>1)
  1101. -    ; %3: %1+%3 - (%2+%4>>1)
  1102. -    ; %2: %1-%3 + (%2>>1-%4)
  1103. -    ; %1: %1-%3 - (%2>>1-%4)
  1104. +%macro IDCT4_1D 6-7
  1105. +%ifnum %6
  1106. +    SUMSUBD2_AB %1, m%3, m%5, m%7, m%6
  1107. +    ; %3: %3>>2-%5 %5: %3+%5>>2
  1108. +    SUMSUB_BA   %1, m%4, m%2, m%7
  1109. +    ; %4: %2+%4 %2: %2-%4
  1110. +    SUMSUB_BADC %1, m%5, m%4, m%3, m%2, m%7
  1111. +    ; %5: %2+%4 + (%3+%5>>1)
  1112. +    ; %4: %2+%4 - (%3+%5>>1)
  1113. +    ; %3: %2-%4 + (%3>>1-%5)
  1114. +    ; %2: %2-%4 - (%3>>1-%5)
  1115.  %else
  1116. -    SUMSUBD2_AB m%2, m%4, [%5], [%5+16]
  1117. -    SUMSUB_BA   m%3, m%1
  1118. -    SUMSUB_BADC m%4, m%3, m%2, m%1
  1119. +%ifidn %1,w
  1120. +    SUMSUBD2_AB %1, m%3, m%5, [%6], [%6+16]
  1121. +%endif
  1122. +%ifidn %1,d
  1123. +    SUMSUBD2_AB %1, m%3, m%5, [%6], [%6+32]
  1124. +%endif
  1125. +    SUMSUB_BA   %1, m%4, m%2
  1126. +    SUMSUB_BADC %1, m%5, m%4, m%3, m%2
  1127.  %endif
  1128. -    SWAP %1, %4, %3
  1129. -    ; %1: %1+%3 + (%2+%4>>1) row0
  1130. -    ; %2: %1-%3 + (%2>>1-%4) row1
  1131. -    ; %3: %1-%3 - (%2>>1-%4) row2
  1132. -    ; %4: %1+%3 - (%2+%4>>1) row3
  1133. +    SWAP %2, %5, %4
  1134. +    ; %2: %2+%4 + (%3+%5>>1) row0
  1135. +    ; %3: %2-%4 + (%3>>1-%5) row2
  1136. +    ; %4: %2-%4 - (%3>>1-%5) row3
  1137. +    ; %5: %2+%4 - (%3+%5>>1) row4
  1138.  %endmacro
  1139.  
  1140.  
  1141. --
  1142. 1.7.3.2.146.gca209
  1143.  
  1144.  
  1145. From a989eef327f86107f565e448a17ba07a06546d8d Mon Sep 17 00:00:00 2001
  1146. From: Alex Wright <alexw0885@gmail.com>
  1147. Date: Wed, 24 Nov 2010 02:19:51 -0800
  1148. Subject: [PATCH 9/9] Make --weightp 1 a better speed tradeoff
  1149.  Since fade analysis is now so fast, weightp 1 now does fade analysis but no reference duplication.
  1150.  This is the opposite of what it used to do (reference duplication but no fade analysis).
  1151.  This also gives weightp's better fade quality to faster presets (up to superfast).
  1152.  
  1153. ---
  1154. common/common.c       |    7 ++++---
  1155.  common/macroblock.c   |    8 +++-----
  1156.  encoder/encoder.c     |   23 ++++++++---------------
  1157.  encoder/ratecontrol.c |    4 ++--
  1158.  encoder/slicetype.c   |    5 ++---
  1159.  x264.c                |    4 ++--
  1160.  x264.h                |    4 ++--
  1161.  7 files changed, 23 insertions(+), 32 deletions(-)
  1162.  
  1163. diff --git a/common/common.c b/common/common.c
  1164. index 1f99e9e..1845e3b 100644
  1165. --- a/common/common.c
  1166. +++ b/common/common.c
  1167. @@ -204,7 +204,7 @@ static int x264_param_apply_preset( x264_param_t *param, const char *preset )
  1168.          param->analyse.b_mixed_references = 0;
  1169.          param->analyse.i_trellis = 0;
  1170.          param->rc.b_mb_tree = 0;
  1171. -        param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
  1172. +        param->analyse.i_weighted_pred = X264_WEIGHTP_SIMPLE;
  1173.          param->rc.i_lookahead = 0;
  1174.      }
  1175.      else if( !strcasecmp( preset, "veryfast" ) )
  1176. @@ -214,7 +214,7 @@ static int x264_param_apply_preset( x264_param_t *param, const char *preset )
  1177.          param->i_frame_reference = 1;
  1178.          param->analyse.b_mixed_references = 0;
  1179.          param->analyse.i_trellis = 0;
  1180. -        param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
  1181. +        param->analyse.i_weighted_pred = X264_WEIGHTP_SIMPLE;
  1182.          param->rc.i_lookahead = 10;
  1183.      }
  1184.      else if( !strcasecmp( preset, "faster" ) )
  1185. @@ -222,13 +222,14 @@ static int x264_param_apply_preset( x264_param_t *param, const char *preset )
  1186.          param->analyse.b_mixed_references = 0;
  1187.          param->i_frame_reference = 2;
  1188.          param->analyse.i_subpel_refine = 4;
  1189. -        param->analyse.i_weighted_pred = X264_WEIGHTP_BLIND;
  1190. +        param->analyse.i_weighted_pred = X264_WEIGHTP_SIMPLE;
  1191.          param->rc.i_lookahead = 20;
  1192.      }
  1193.      else if( !strcasecmp( preset, "fast" ) )
  1194.      {
  1195.          param->i_frame_reference = 2;
  1196.          param->analyse.i_subpel_refine = 6;
  1197. +        param->analyse.i_weighted_pred = X264_WEIGHTP_SIMPLE;
  1198.          param->rc.i_lookahead = 30;
  1199.      }
  1200.      else if( !strcasecmp( preset, "medium" ) )
  1201. diff --git a/common/macroblock.c b/common/macroblock.c
  1202. index 5c76d3f..9075efc9 100644
  1203. --- a/common/macroblock.c
  1204. +++ b/common/macroblock.c
  1205. @@ -239,8 +239,6 @@ int x264_macroblock_cache_allocate( x264_t *h )
  1206.          int i_refs = X264_MIN(X264_REF_MAX, (i ? 1 + !!h->param.i_bframe_pyramid : h->param.i_frame_reference) ) << h->param.b_interlaced;
  1207.          if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
  1208.              i_refs = X264_MIN(X264_REF_MAX, i_refs + 1 + (BIT_DEPTH == 8)); //smart weights add two duplicate frames, one in >8-bit
  1209. -        else if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_BLIND )
  1210. -            i_refs = X264_MIN(X264_REF_MAX, i_refs + 1); //blind weights add one duplicate frame
  1211.  
  1212.          for( int j = !i; j < i_refs; j++ )
  1213.          {
  1214. @@ -277,7 +275,7 @@ int x264_macroblock_cache_allocate( x264_t *h )
  1215.                  //SMART can weight one ref and one offset -1
  1216.                  numweightbuf = 2;
  1217.              else
  1218. -                //blind only has one weighted copy (offset -1)
  1219. +                //simple only has one weighted ref
  1220.                  numweightbuf = 1;
  1221.          }
  1222.  
  1223. @@ -398,7 +396,7 @@ void x264_macroblock_slice_init( x264_t *h )
  1224.      {
  1225.          memset( h->mb.cache.skip, 0, sizeof( h->mb.cache.skip ) );
  1226.  
  1227. -        if( h->sh.i_disable_deblocking_filter_idc != 1 && h->param.analyse.i_weighted_pred )
  1228. +        if( h->sh.i_disable_deblocking_filter_idc != 1 && h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
  1229.          {
  1230.              deblock_ref_table(-2) = -2;
  1231.              deblock_ref_table(-1) = -1;
  1232. @@ -999,7 +997,7 @@ void x264_macroblock_cache_load_deblock( x264_t *h )
  1233.          h->mb.i_neighbour = new_neighbour;
  1234.      }
  1235.  
  1236. -    if( h->param.analyse.i_weighted_pred && h->sh.i_type == SLICE_TYPE_P )
  1237. +    if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART && h->sh.i_type == SLICE_TYPE_P )
  1238.      {
  1239.          /* Handle reference frame duplicates */
  1240.          int i8 = x264_scan8[0] - 8;
  1241. diff --git a/encoder/encoder.c b/encoder/encoder.c
  1242. index 2d5c778..dbbe9a0 100644
  1243. --- a/encoder/encoder.c
  1244. +++ b/encoder/encoder.c
  1245. @@ -772,8 +772,6 @@ static int x264_validate_parameters( x264_t *h )
  1246.      h->param.analyse.i_weighted_pred = x264_clip3( h->param.analyse.i_weighted_pred, X264_WEIGHTP_NONE, X264_WEIGHTP_SMART );
  1247.      if( !h->param.analyse.i_weighted_pred && h->param.rc.b_mb_tree && h->param.analyse.b_psy && !h->param.b_interlaced )
  1248.          h->param.analyse.i_weighted_pred = X264_WEIGHTP_FAKE;
  1249. -    if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_BLIND && BIT_DEPTH > 8 )
  1250. -        h->param.analyse.i_weighted_pred = X264_WEIGHTP_NONE;
  1251.  
  1252.      if( h->i_thread_frames > 1 )
  1253.      {
  1254. @@ -996,7 +994,7 @@ x264_t *x264_encoder_open( x264_param_t *param )
  1255.            || h->param.i_bframe_adaptive
  1256.            || h->param.i_scenecut_threshold
  1257.            || h->param.rc.b_mb_tree
  1258. -          || h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART );
  1259. +          || h->param.analyse.i_weighted_pred );
  1260.      h->frames.b_have_lowres |= h->param.rc.b_stat_read && h->param.rc.i_vbv_buffer_size > 0;
  1261.      h->frames.b_have_sub8x8_esa = !!(h->param.analyse.inter & X264_ANALYSE_PSUB8x8);
  1262.  
  1263. @@ -1432,6 +1430,10 @@ int x264_weighted_reference_duplicate( x264_t *h, int i_ref, const x264_weight_t
  1264.      if( i <= 1 ) /* empty list, definitely can't duplicate frame */
  1265.          return -1;
  1266.  
  1267. +    //Duplication isn't used for X264_WEIGHTP_SIMPLE
  1268. +    if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SIMPLE )
  1269. +        return -1;
  1270. +
  1271.      /* Duplication is a hack to compensate for crappy rounding in motion compensation.
  1272.       * With high bit depth, it's not worth doing, so turn it off except in the case of
  1273.       * unweighted dupes. */
  1274. @@ -1609,7 +1611,7 @@ static inline void x264_reference_build_list( x264_t *h, int i_poc )
  1275.      if( h->fenc->i_type == X264_TYPE_P )
  1276.      {
  1277.          int idx = -1;
  1278. -        if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
  1279. +        if( h->param.analyse.i_weighted_pred >= X264_WEIGHTP_SIMPLE )
  1280.          {
  1281.              x264_weight_t w[3];
  1282.              w[1].weightfn = w[2].weightfn = NULL;
  1283. @@ -1638,15 +1640,6 @@ static inline void x264_reference_build_list( x264_t *h, int i_poc )
  1284.                  }
  1285.              }
  1286.          }
  1287. -        else if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_BLIND )
  1288. -        {
  1289. -            //weighted offset=-1
  1290. -            x264_weight_t w[3];
  1291. -            SET_WEIGHT( w[0], 1, 1, 0, -1 );
  1292. -            h->fenc->weight[0][0].i_denom = 0;
  1293. -            w[1].weightfn = w[2].weightfn = NULL;
  1294. -            idx = x264_weighted_reference_duplicate( h, 0, w );
  1295. -        }
  1296.          h->mb.ref_blind_dupe = idx;
  1297.      }
  1298.  
  1299. @@ -2876,7 +2869,7 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
  1300.      if( h->sh.i_type == SLICE_TYPE_P )
  1301.      {
  1302.          h->stat.i_consecutive_bframes[h->fdec->i_frame - h->fref0[0]->i_frame - 1]++;
  1303. -        if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
  1304. +        if( h->param.analyse.i_weighted_pred >= X264_WEIGHTP_SIMPLE )
  1305.          {
  1306.              h->stat.i_wpred[0] += !!h->sh.weight[0][0].weightfn;
  1307.              h->stat.i_wpred[1] += !!h->sh.weight[0][1].weightfn || !!h->sh.weight[0][2].weightfn;
  1308. @@ -3225,7 +3218,7 @@ void    x264_encoder_close  ( x264_t *h )
  1309.                        fixed_pred_modes[3][2] * 100.0 / sum_pred_modes[3],
  1310.                        fixed_pred_modes[3][3] * 100.0 / sum_pred_modes[3] );
  1311.  
  1312. -        if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART && h->stat.i_frame_count[SLICE_TYPE_P] > 0 )
  1313. +        if( h->param.analyse.i_weighted_pred >= X264_WEIGHTP_SIMPLE && h->stat.i_frame_count[SLICE_TYPE_P] > 0 )
  1314.              x264_log( h, X264_LOG_INFO, "Weighted P-Frames: Y:%.1f%% UV:%.1f%%\n",
  1315.                        h->stat.i_wpred[0] * 100.0 / h->stat.i_frame_count[SLICE_TYPE_P],
  1316.                        h->stat.i_wpred[1] * 100.0 / h->stat.i_frame_count[SLICE_TYPE_P] );
  1317. diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
  1318. index e949e24..780c0e1 100644
  1319. --- a/encoder/ratecontrol.c
  1320. +++ b/encoder/ratecontrol.c
  1321. @@ -284,7 +284,7 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame, float *quant_off
  1322.              }
  1323.          }
  1324.          /* Need variance data for weighted prediction */
  1325. -        if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_FAKE || h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
  1326. +        if( h->param.analyse.i_weighted_pred )
  1327.          {
  1328.              for( int mb_y = 0; mb_y < h->mb.i_mb_height; mb_y++ )
  1329.                  for( int mb_x = 0; mb_x < h->mb.i_mb_width; mb_x++ )
  1330. @@ -1558,7 +1558,7 @@ int x264_ratecontrol_end( x264_t *h, int bits, int *filler )
  1331.                  goto fail;
  1332.          }
  1333.  
  1334. -        if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART && h->sh.weight[0][0].weightfn )
  1335. +        if( h->param.analyse.i_weighted_pred >= X264_WEIGHTP_SIMPLE && h->sh.weight[0][0].weightfn )
  1336.          {
  1337.              if( fprintf( rc->p_stat_file_out, "w:%d,%d,%d",
  1338.                           h->sh.weight[0][0].i_denom, h->sh.weight[0][0].i_scale, h->sh.weight[0][0].i_offset ) < 0 )
  1339. diff --git a/encoder/slicetype.c b/encoder/slicetype.c
  1340. index dd6c360..4f47710 100644
  1341. --- a/encoder/slicetype.c
  1342. +++ b/encoder/slicetype.c
  1343. @@ -647,8 +647,7 @@ static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
  1344.          do_search[1] = b != p1 && frames[b]->lowres_mvs[1][p1-b-1][0][0] == 0x7FFF;
  1345.          if( do_search[0] )
  1346.          {
  1347. -            if( ( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART ||
  1348. -                  h->param.analyse.i_weighted_pred == X264_WEIGHTP_FAKE ) && b == p1 )
  1349. +            if( h->param.analyse.i_weighted_pred && b == p1 )
  1350.              {
  1351.                  x264_emms();
  1352.                  x264_weights_analyse( h, frames[b], frames[p0], 1 );
  1353. @@ -1549,7 +1548,7 @@ void x264_slicetype_decide( x264_t *h )
  1354.  
  1355.      /* Analyse for weighted P frames */
  1356.      if( !h->param.rc.b_stat_read && h->lookahead->next.list[bframes]->i_type == X264_TYPE_P
  1357. -        && h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
  1358. +        && h->param.analyse.i_weighted_pred >= X264_WEIGHTP_SIMPLE )
  1359.      {
  1360.          x264_emms();
  1361.          x264_weights_analyse( h, h->lookahead->next.list[bframes], h->lookahead->last_nonb, 0 );
  1362. diff --git a/x264.c b/x264.c
  1363. index f9a1c6a..b4530f8 100644
  1364. --- a/x264.c
  1365. +++ b/x264.c
  1366. @@ -609,8 +609,8 @@ static void help( x264_param_t *defaults, int longhelp )
  1367.      H2( "      --no-weightb            Disable weighted prediction for B-frames\n" );
  1368.      H1( "      --weightp <integer>     Weighted prediction for P-frames [%d]\n"
  1369.          "                                  - 0: Disabled\n"
  1370. -        "                                  - 1: Blind offset\n"
  1371. -        "                                  - 2: Smart analysis\n", defaults->analyse.i_weighted_pred );
  1372. +        "                                  - 1: Weighted refs\n"
  1373. +        "                                  - 2: Weighted refs + Duplicates\n", defaults->analyse.i_weighted_pred );
  1374.      H1( "      --me <string>           Integer pixel motion estimation method [\"%s\"]\n",
  1375.                                         strtable_lookup( x264_motion_est_names, defaults->analyse.i_me_method ) );
  1376.      H2( "                                  - dia: diamond search, radius 1 (fast)\n"
  1377. diff --git a/x264.h b/x264.h
  1378. index e144e51..bfe478b 100644
  1379. --- a/x264.h
  1380. +++ b/x264.h
  1381. @@ -39,7 +39,7 @@
  1382.  
  1383.  #include <stdarg.h>
  1384.  
  1385. -#define X264_BUILD 109
  1386. +#define X264_BUILD 110
  1387.  
  1388.  /* x264_t:
  1389.   *      opaque handler for encoder */
  1390. @@ -151,7 +151,7 @@ typedef struct
  1391.  #define X264_B_ADAPT_FAST            1
  1392.  #define X264_B_ADAPT_TRELLIS         2
  1393.  #define X264_WEIGHTP_NONE            0
  1394. -#define X264_WEIGHTP_BLIND           1
  1395. +#define X264_WEIGHTP_SIMPLE          1
  1396.  #define X264_WEIGHTP_SMART           2
  1397.  #define X264_B_PYRAMID_NONE          0
  1398.  #define X264_B_PYRAMID_STRICT        1
  1399. --
  1400. 1.7.3.2.146.gca209
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement