Untitled

From f098398a4069c39639882f5868194991f6c4e786 Mon Sep 17 00:00:00 2001
From: Anton Mitrofanov <BugMaster@narod.ru>
Date: Sun, 1 Apr 2018 14:39:57 +0300
Subject: [PATCH 1/7] squash: x86-64: AVX-512 plane_copy and plane_copy_swap

---
 common/frame.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/common/frame.c b/common/frame.c
index c3f906bf..dcd1ec4d 100644
--- a/common/frame.c
+++ b/common/frame.c
@@ -153,9 +153,9 @@ static x264_frame_t *frame_new( x264_t *h, int b_fdec )
     {
         int chroma_padv = i_padv >> (i_csp == X264_CSP_NV12);
         int chroma_plane_size = (frame->i_stride[1] * (frame->i_lines[1] + 2*chroma_padv));
-        PREALLOC( frame->buffer[1], chroma_plane_size * sizeof(pixel) + padh_align );
+        PREALLOC( frame->buffer[1], (chroma_plane_size + padh_align) * sizeof(pixel) );
         if( PARAM_INTERLACED )
-            PREALLOC( frame->buffer_fld[1], chroma_plane_size * sizeof(pixel) + padh_align );
+            PREALLOC( frame->buffer_fld[1], (chroma_plane_size + padh_align) * sizeof(pixel) );
     }

     /* all 4 luma planes allocated together, since the cacheline split code
@@ -168,9 +168,9 @@ static x264_frame_t *frame_new( x264_t *h, int b_fdec )
             luma_plane_size *= 4;

         /* FIXME: Don't allocate both buffers in non-adaptive MBAFF. */
-        PREALLOC( frame->buffer[p], luma_plane_size * sizeof(pixel) + padh_align );
+        PREALLOC( frame->buffer[p], (luma_plane_size + padh_align) * sizeof(pixel) );
         if( PARAM_INTERLACED )
-            PREALLOC( frame->buffer_fld[p], luma_plane_size * sizeof(pixel) + padh_align );
+            PREALLOC( frame->buffer_fld[p], (luma_plane_size + padh_align) * sizeof(pixel) );
     }

     frame->b_duplicate = 0;
@@ -208,7 +208,7 @@ static x264_frame_t *frame_new( x264_t *h, int b_fdec )
         {
             int luma_plane_size = align_plane_size( frame->i_stride_lowres * (frame->i_lines[0]/2 + 2*PADV), disalign );

-            PREALLOC( frame->buffer_lowres, 4 * luma_plane_size * sizeof(pixel) + padh_align );
+            PREALLOC( frame->buffer_lowres, (4 * luma_plane_size + padh_align) * sizeof(pixel) );

             for( int j = 0; j <= !!h->param.i_bframe; j++ )
                 for( int i = 0; i <= h->param.i_bframe; i++ )
@@ -279,7 +279,6 @@ static x264_frame_t *frame_new( x264_t *h, int b_fdec )
             for( int i = 0; i < 4; i++ )
                 frame->lowres[i] = frame->buffer_lowres + frame->i_stride_lowres * PADV + PADH + padh_align + i * luma_plane_size;

-
             for( int j = 0; j <= !!h->param.i_bframe; j++ )
                 for( int i = 0; i <= h->param.i_bframe; i++ )
                     memset( frame->lowres_mvs[j][i], 0, 2*h->mb.i_mb_count*sizeof(int16_t) );
--
2.13.0.windows.1


From e5476390f69c7f6fd1f70f7f733f5bd2c41df09a Mon Sep 17 00:00:00 2001
From: Anton Mitrofanov <BugMaster@narod.ru>
Date: Mon, 19 Feb 2018 19:53:38 +0300
Subject: [PATCH 2/7] Fix --qpmax default value in fullhelp

---
 x264.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/x264.c b/x264.c
index abedf214..84e42fbc 100644
--- a/x264.c
+++ b/x264.c
@@ -735,7 +735,7 @@ static void help( x264_param_t *defaults, int longhelp )
     H2( "      --crf-max <float>       With CRF+VBV, limit RF to this value\n"
         "                                  May cause VBV underflows!\n" );
     H2( "      --qpmin <integer>       Set min QP [%d]\n", defaults->rc.i_qp_min );
-    H2( "      --qpmax <integer>       Set max QP [%d]\n", defaults->rc.i_qp_max );
+    H2( "      --qpmax <integer>       Set max QP [%d]\n", X264_MIN( defaults->rc.i_qp_max, QP_MAX ) );
     H2( "      --qpstep <integer>      Set max QP step [%d]\n", defaults->rc.i_qp_step );
     H2( "      --ratetol <float>       Tolerance of ABR ratecontrol and VBV [%.1f]\n", defaults->rc.f_rate_tolerance );
     H2( "      --ipratio <float>       QP factor between I and P [%.2f]\n", defaults->rc.f_ip_factor );
--
2.13.0.windows.1


From 56ed4446257ca5ef170462b72723c6ce5899dcb1 Mon Sep 17 00:00:00 2001
From: Anton Mitrofanov <BugMaster@narod.ru>
Date: Sun, 1 Apr 2018 17:42:46 +0300
Subject: [PATCH 3/7] configure: Fix ambiguous "$(("

---
 configure | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure b/configure
index 524053bd..d0770e83 100755
--- a/configure
+++ b/configure
@@ -919,7 +919,7 @@ fi

 if [ $asm = auto -a \( $ARCH = X86 -o $ARCH = X86_64 \) ] ; then
     if ! as_check "vmovdqa32 [eax]{k1}{z}, zmm0" ; then
-        VER="$(($AS --version || echo no assembler) 2>/dev/null | head -n 1)"
+        VER="$( ($AS --version || echo no assembler) 2>/dev/null | head -n 1 )"
         echo "Found $VER"
         echo "Minimum version is nasm-2.13"
         echo "If you really want to compile without asm, configure with --disable-asm."
--
2.13.0.windows.1


From 6d29c80b7fef1b6194a83479610206f9178ca8e0 Mon Sep 17 00:00:00 2001
From: Anton Mitrofanov <BugMaster@narod.ru>
Date: Sun, 1 Apr 2018 17:52:47 +0300
Subject: [PATCH 4/7] Fix theoretically incorrect cost_mv_fpel free

---
 encoder/analyse.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/encoder/analyse.c b/encoder/analyse.c
index 37758261..9a743602 100644
--- a/encoder/analyse.c
+++ b/encoder/analyse.c
@@ -208,9 +208,11 @@ void x264_analyse_free_costs( x264_t *h )
     {
         if( h->cost_mv[i] )
             x264_free( h->cost_mv[i] - 2*4*mv_range );
-        if( h->cost_mv_fpel[i][0] )
-            for( int j = 0; j < 4; j++ )
+        for( int j = 0; j < 4; j++ )
+        {
+            if( h->cost_mv_fpel[i][j] )
                 x264_free( h->cost_mv_fpel[i][j] - 2*mv_range );
+        }
     }
 }

--
2.13.0.windows.1


From 6af21f15e9962b96ace5eb75eab88e8b5bdf8d3e Mon Sep 17 00:00:00 2001
From: Anton Mitrofanov <BugMaster@narod.ru>
Date: Sun, 1 Apr 2018 20:34:18 +0300
Subject: [PATCH 5/7] Make bs_align_10 imply bs_flush same as bs_align_0 and
 bs_align_1

---
 common/bitstream.h | 1 +
 encoder/set.c      | 6 ------
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/common/bitstream.h b/common/bitstream.h
index a9ae4479..40ecc7ad 100644
--- a/common/bitstream.h
+++ b/common/bitstream.h
@@ -188,6 +188,7 @@ static inline void bs_align_10( bs_t *s )
 {
     if( s->i_left&7 )
         bs_write( s, s->i_left&7, 1 << ( (s->i_left&7) - 1 ) );
+    bs_flush( s );
 }

 /* golomb functions */
diff --git a/encoder/set.c b/encoder/set.c
index f8b786a7..628518a6 100644
--- a/encoder/set.c
+++ b/encoder/set.c
@@ -548,7 +548,6 @@ void x264_sei_recovery_point_write( x264_t *h, bs_t *s, int recovery_frame_cnt )
     bs_write( &q, 2, 0 ); //changing_slice_group 0

     bs_align_10( &q );
-    bs_flush( &q );

     x264_sei_write( s, tmp_buf, bs_pos( &q ) / 8, SEI_RECOVERY_POINT );
 }
@@ -603,7 +602,6 @@ void x264_sei_buffering_period_write( x264_t *h, bs_t *s )
     }

     bs_align_10( &q );
-    bs_flush( &q );

     x264_sei_write( s, tmp_buf, bs_pos( &q ) / 8, SEI_BUFFERING_PERIOD );
 }
@@ -635,7 +633,6 @@ void x264_sei_pic_timing_write( x264_t *h, bs_t *s )
     }

     bs_align_10( &q );
-    bs_flush( &q );

     x264_sei_write( s, tmp_buf, bs_pos( &q ) / 8, SEI_PIC_TIMING );
 }
@@ -678,7 +675,6 @@ void x264_sei_frame_packing_write( x264_t *h, bs_t *s )
     bs_write1( &q, 0 );                           // frame_packing_arrangement_extension_flag

     bs_align_10( &q );
-    bs_flush( &q );

     x264_sei_write( s, tmp_buf, bs_pos( &q ) / 8, SEI_FRAME_PACKING );
 }
@@ -695,7 +691,6 @@ void x264_sei_alternative_transfer_write( x264_t *h, bs_t *s )
     bs_write ( &q, 8, h->param.i_alternative_transfer ); // preferred_transfer_characteristics

     bs_align_10( &q );
-    bs_flush( &q );

     x264_sei_write( s, tmp_buf, bs_pos( &q ) / 8, SEI_ALTERNATIVE_TRANSFER );
 }
@@ -739,7 +734,6 @@ void x264_sei_dec_ref_pic_marking_write( x264_t *h, bs_t *s )
     }

     bs_align_10( &q );
-    bs_flush( &q );

     x264_sei_write( s, tmp_buf, bs_pos( &q ) / 8, SEI_DEC_REF_PIC_MARKING );
 }
--
2.13.0.windows.1


From 1f337eff2cfca8f0c3d8ab8dbfa3ce1b533cc8e0 Mon Sep 17 00:00:00 2001
From: Anton Mitrofanov <BugMaster@narod.ru>
Date: Sun, 1 Apr 2018 20:39:30 +0300
Subject: [PATCH 6/7] Fix possible undefined behavior of right shift with
 32-bit uintptr_t

---
 common/bitstream.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/common/bitstream.h b/common/bitstream.h
index 40ecc7ad..36698089 100644
--- a/common/bitstream.h
+++ b/common/bitstream.h
@@ -89,8 +89,13 @@ static inline void bs_init( bs_t *s, void *p_data, int i_data )
     s->p       = s->p_start = (uint8_t*)p_data - offset;
     s->p_end   = (uint8_t*)p_data + i_data;
     s->i_left  = (WORD_SIZE - offset)*8;
-    s->cur_bits = endian_fix32( M32(s->p) );
-    s->cur_bits >>= (4-offset)*8;
+    if( offset )
+    {
+        s->cur_bits = endian_fix32( M32(s->p) );
+        s->cur_bits >>= (4-offset)*8;
+    }
+    else
+        s->cur_bits = 0;
 }
 static inline int bs_pos( bs_t *s )
 {
--
2.13.0.windows.1


From fcced4f78810dae6ac5195de3ff546a0b022477f Mon Sep 17 00:00:00 2001
From: Anton Mitrofanov <BugMaster@narod.ru>
Date: Sun, 1 Apr 2018 20:49:29 +0300
Subject: [PATCH 7/7] Fix missing bs_flush in AUD writing

---
 encoder/encoder.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/encoder/encoder.c b/encoder/encoder.c
index b581d9e2..0dee4832 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -3512,6 +3512,7 @@ int     x264_encoder_encode( x264_t *h,
         nal_start( h, NAL_AUD, NAL_PRIORITY_DISPOSABLE );
         bs_write( &h->out.bs, 3, pic_type );
         bs_rbsp_trailing( &h->out.bs );
+        bs_flush( &h->out.bs );
         if( nal_end( h ) )
             return -1;
         overhead += h->out.nal[h->out.i_nal-1].i_payload + NALU_OVERHEAD;
--
2.13.0.windows.1