Untitled

From d0974fd9e01cd4be6deb56c53dc867e876945926 Mon Sep 17 00:00:00 2001
From: Jason Garrett-Glaser <jason@x264.com>
Date: Tue, 5 Jul 2011 18:08:22 -0700
Subject: [PATCH 1/6] H.264: av_always_inline some more functions

These weren't getting inlined all the time in all gcc versions.
---
 libavcodec/h264.c        |    4 ++--
 libavcodec/h264.h        |    4 ++--
 libavcodec/h264_mvpred.h |    8 ++++----
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index dbac4fd..f7c0345 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -1443,7 +1443,7 @@ static void decode_postinit(H264Context *h, int setup_finished){
         ff_thread_finish_setup(s->avctx);
 }

-static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int chroma444, int simple){
+static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int chroma444, int simple){
     MpegEncContext * const s = &h->s;
     uint8_t *top_border;
     int top_idx = 1;
@@ -1518,7 +1518,7 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src
     }
 }

-static inline void xchg_mb_border(H264Context *h, uint8_t *src_y,
+static av_always_inline void xchg_mb_border(H264Context *h, uint8_t *src_y,
                                   uint8_t *src_cb, uint8_t *src_cr,
                                   int linesize, int uvlinesize,
                                   int xchg, int chroma444,
diff --git a/libavcodec/h264.h b/libavcodec/h264.h
index dad06e2..a2abab9 100644
--- a/libavcodec/h264.h
+++ b/libavcodec/h264.h
@@ -766,11 +766,11 @@ static av_always_inline uint16_t pack8to16(int a, int b){
 /**
  * gets the chroma qp.
  */
-static inline int get_chroma_qp(H264Context *h, int t, int qscale){
+static av_always_inline int get_chroma_qp(H264Context *h, int t, int qscale){
     return h->pps.chroma_qp_table[t][qscale];
 }

-static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my);
+static av_always_inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my);

 static void fill_decode_neighbors(H264Context *h, int mb_type){
     MpegEncContext * const s = &h->s;
diff --git a/libavcodec/h264_mvpred.h b/libavcodec/h264_mvpred.h
index f603e7f..c4e65b5 100644
--- a/libavcodec/h264_mvpred.h
+++ b/libavcodec/h264_mvpred.h
@@ -35,7 +35,7 @@
 //#undef NDEBUG
 #include <assert.h>

-static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
+static av_always_inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
     const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
     MpegEncContext *s = &h->s;

@@ -92,7 +92,7 @@ static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, in
  * @param mx the x component of the predicted motion vector
  * @param my the y component of the predicted motion vector
  */
-static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
+static av_always_inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
     const int index8= scan8[n];
     const int top_ref=      h->ref_cache[list][ index8 - 8 ];
     const int left_ref=     h->ref_cache[list][ index8 - 1 ];
@@ -147,7 +147,7 @@ static inline void pred_motion(H264Context * const h, int n, int part_width, int
  * @param mx the x component of the predicted motion vector
  * @param my the y component of the predicted motion vector
  */
-static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
+static av_always_inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
     if(n==0){
         const int top_ref=      h->ref_cache[list][ scan8[0] - 8 ];
         const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
@@ -182,7 +182,7 @@ static inline void pred_16x8_motion(H264Context * const h, int n, int list, int
  * @param mx the x component of the predicted motion vector
  * @param my the y component of the predicted motion vector
  */
-static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
+static av_always_inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
     if(n==0){
         const int left_ref=      h->ref_cache[list][ scan8[0] - 1 ];
         const int16_t * const A=  h->mv_cache[list][ scan8[0] - 1 ];
--
1.7.3.2.146.gca209


From e61600fa054188eea481a55233b67b2d33f41892 Mon Sep 17 00:00:00 2001
From: Jason Garrett-Glaser <jason@x264.com>
Date: Tue, 5 Jul 2011 17:55:14 -0700
Subject: [PATCH 2/6] H.264: faster P-SKIP decoding

Inline the relevant parts of fill_decode_caches into P-SKIP mv prediction to
avoid calling the whole thing.
---
 libavcodec/h264.h        |    1 -
 libavcodec/h264_mvpred.h |  108 ++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 99 insertions(+), 10 deletions(-)

diff --git a/libavcodec/h264.h b/libavcodec/h264.h
index a2abab9..d34e6db 100644
--- a/libavcodec/h264.h
+++ b/libavcodec/h264.h
@@ -1331,7 +1331,6 @@ static void av_unused decode_mb_skip(H264Context *h){
         mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;

         fill_decode_neighbors(h, mb_type);
-        fill_decode_caches(h, mb_type); //FIXME check what is needed and what not ...
         pred_pskip_motion(h, &mx, &my);
         fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
         fill_rectangle(  h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
diff --git a/libavcodec/h264_mvpred.h b/libavcodec/h264_mvpred.h
index c4e65b5..27adac6 100644
--- a/libavcodec/h264_mvpred.h
+++ b/libavcodec/h264_mvpred.h
@@ -213,21 +213,111 @@ static av_always_inline void pred_8x16_motion(H264Context * const h, int n, int
     pred_motion(h, n, 2, list, ref, mx, my);
 }

-static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
-    const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
-    const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
-
-    tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
+#define FIX_MV_MBAFF(type, refn, mvn, idx)\
+    if(FRAME_MBAFF){\
+        if(MB_FIELD){\
+            if(!IS_INTERLACED(type)){\
+                refn <<= 1;\
+                AV_COPY32(mvbuf[idx], mvn);\
+                mvbuf[idx][1] /= 2;\
+                mvn = mvbuf[idx];\
+            }\
+        }else{\
+            if(IS_INTERLACED(type)){\
+                refn >>= 1;\
+                AV_COPY32(mvbuf[idx], mvn);\
+                mvbuf[idx][1] <<= 1;\
+                mvn = mvbuf[idx];\
+            }\
+        }\
+    }

-    if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
-       || !( top_ref | AV_RN32A(h->mv_cache[0][ scan8[0] - 8 ]))
-       || !(left_ref | AV_RN32A(h->mv_cache[0][ scan8[0] - 1 ]))){
+static av_always_inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
+    DECLARE_ALIGNED(4, static const int16_t, zeromv)[2] = {0};
+    DECLARE_ALIGNED(4, int16_t, mvbuf)[3][2];
+    MpegEncContext * const s = &h->s;
+    int8_t *ref = s->current_picture.ref_index[0];
+    int16_t (*mv)[2] = s->current_picture.motion_val[0];
+    int top_ref, left_ref, diagonal_ref, match_count;
+    const int16_t *A, *B, *C;
+    int b_stride = h->b_stride;
+
+    /* To avoid doing an entire fill_decode_caches, we inline the relevant parts here. */
+    if(USES_LIST(h->left_type[LTOP], 0)){
+        left_ref = ref[4*h->left_mb_xy[LTOP] + 1 + (h->left_block[0]&~1)];
+        A = mv[h->mb2b_xy[h->left_mb_xy[LTOP]] + 3 + b_stride*h->left_block[0]];
+        FIX_MV_MBAFF(h->left_type[LTOP], left_ref, A, 0);
+        if(!(left_ref | AV_RN32A(A))){
+            *mx = *my = 0;
+            return;
+        }
+    }else if(h->left_type[LTOP]){
+        left_ref = LIST_NOT_USED;
+        A = zeromv;
+    }else{
+        *mx = *my = 0;
+        return;
+    }

+    if(USES_LIST(h->top_type, 0)){
+        top_ref = ref[4*h->top_mb_xy + 2];
+        B = mv[h->mb2b_xy[h->top_mb_xy] + 3*b_stride];
+        FIX_MV_MBAFF(h->top_type, top_ref, B, 1);
+        if(!(top_ref | AV_RN32A(B))){
+            *mx = *my = 0;
+            return;
+        }
+    }else if(h->top_type){
+        top_ref = LIST_NOT_USED;
+        B = zeromv;
+    }else{
         *mx = *my = 0;
         return;
     }

-    pred_motion(h, 0, 4, 0, 0, mx, my);
+    tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
+
+    if(USES_LIST(h->topright_type, 0)){
+        diagonal_ref = ref[4*h->topright_mb_xy + 2];
+        C = mv[h->mb2b_xy[h->topright_mb_xy] + 3*b_stride];
+        FIX_MV_MBAFF(h->topright_type, diagonal_ref, C, 2);
+    }else if(h->topright_type){
+        diagonal_ref = LIST_NOT_USED;
+        C = zeromv;
+    }else{
+        if(USES_LIST(h->topleft_type, 0)){
+            diagonal_ref = ref[4*h->topleft_mb_xy + 1 + (h->topleft_partition & 2)];
+            C = mv[h->mb2b_xy[h->topleft_mb_xy] + 3 + b_stride + (h->topleft_partition & 2*b_stride)];
+            FIX_MV_MBAFF(h->topleft_type, diagonal_ref, C, 2);
+        }else if(h->topleft_type){
+            diagonal_ref = LIST_NOT_USED;
+            C = zeromv;
+        }else{
+            diagonal_ref = PART_NOT_AVAILABLE;
+            C = zeromv;
+        }
+    }
+
+    match_count= !diagonal_ref + !top_ref + !left_ref;
+    tprintf(h->s.avctx, "pred_pskip_motion match_count=%d\n", match_count);
+    if(match_count > 1){
+        *mx= mid_pred(A[0], B[0], C[0]);
+        *my= mid_pred(A[1], B[1], C[1]);
+    }else if(match_count==1){
+        if(!left_ref){
+            *mx= A[0];
+            *my= A[1];
+        }else if(!top_ref){
+            *mx= B[0];
+            *my= B[1];
+        }else{
+            *mx= C[0];
+            *my= C[1];
+        }
+    }else{
+        *mx= mid_pred(A[0], B[0], C[0]);
+        *my= mid_pred(A[1], B[1], C[1]);
+    }

     return;
 }
--
1.7.3.2.146.gca209


From d35a5612ef3aa50d7b91d495c0318639d2415a62 Mon Sep 17 00:00:00 2001
From: Jason Garrett-Glaser <jason@x264.com>
Date: Wed, 6 Jul 2011 07:58:50 -0700
Subject: [PATCH 3/6] H.264: merge fill_rectangle into P-SKIP MV prediction, to match B-SKIP

---
 libavcodec/h264.h        |    7 ++-----
 libavcodec/h264_mvpred.h |   42 ++++++++++++++++++++++--------------------
 2 files changed, 24 insertions(+), 25 deletions(-)

diff --git a/libavcodec/h264.h b/libavcodec/h264.h
index d34e6db..e88005a 100644
--- a/libavcodec/h264.h
+++ b/libavcodec/h264.h
@@ -770,7 +770,7 @@ static av_always_inline int get_chroma_qp(H264Context *h, int t, int qscale){
     return h->pps.chroma_qp_table[t][qscale];
 }

-static av_always_inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my);
+static av_always_inline void pred_pskip_motion(H264Context * const h);

 static void fill_decode_neighbors(H264Context *h, int mb_type){
     MpegEncContext * const s = &h->s;
@@ -1327,13 +1327,10 @@ static void av_unused decode_mb_skip(H264Context *h){
     }
     else
     {
-        int mx, my;
         mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;

         fill_decode_neighbors(h, mb_type);
-        pred_pskip_motion(h, &mx, &my);
-        fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
-        fill_rectangle(  h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
+        pred_pskip_motion(h);
     }

     write_back_motion(h, mb_type);
diff --git a/libavcodec/h264_mvpred.h b/libavcodec/h264_mvpred.h
index 27adac6..178eb24 100644
--- a/libavcodec/h264_mvpred.h
+++ b/libavcodec/h264_mvpred.h
@@ -232,31 +232,31 @@ static av_always_inline void pred_8x16_motion(H264Context * const h, int n, int
         }\
     }

-static av_always_inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
+static av_always_inline void pred_pskip_motion(H264Context * const h){
     DECLARE_ALIGNED(4, static const int16_t, zeromv)[2] = {0};
     DECLARE_ALIGNED(4, int16_t, mvbuf)[3][2];
     MpegEncContext * const s = &h->s;
     int8_t *ref = s->current_picture.ref_index[0];
     int16_t (*mv)[2] = s->current_picture.motion_val[0];
-    int top_ref, left_ref, diagonal_ref, match_count;
+    int top_ref, left_ref, diagonal_ref, match_count, mx, my;
     const int16_t *A, *B, *C;
     int b_stride = h->b_stride;

+    fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
+
     /* To avoid doing an entire fill_decode_caches, we inline the relevant parts here. */
     if(USES_LIST(h->left_type[LTOP], 0)){
         left_ref = ref[4*h->left_mb_xy[LTOP] + 1 + (h->left_block[0]&~1)];
         A = mv[h->mb2b_xy[h->left_mb_xy[LTOP]] + 3 + b_stride*h->left_block[0]];
         FIX_MV_MBAFF(h->left_type[LTOP], left_ref, A, 0);
         if(!(left_ref | AV_RN32A(A))){
-            *mx = *my = 0;
-            return;
+            goto zeromv;
         }
     }else if(h->left_type[LTOP]){
         left_ref = LIST_NOT_USED;
         A = zeromv;
     }else{
-        *mx = *my = 0;
-        return;
+        goto zeromv;
     }

     if(USES_LIST(h->top_type, 0)){
@@ -264,15 +264,13 @@ static av_always_inline void pred_pskip_motion(H264Context * const h, int * cons
         B = mv[h->mb2b_xy[h->top_mb_xy] + 3*b_stride];
         FIX_MV_MBAFF(h->top_type, top_ref, B, 1);
         if(!(top_ref | AV_RN32A(B))){
-            *mx = *my = 0;
-            return;
+            goto zeromv;
         }
     }else if(h->top_type){
         top_ref = LIST_NOT_USED;
         B = zeromv;
     }else{
-        *mx = *my = 0;
-        return;
+        goto zeromv;
     }

     tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
@@ -301,24 +299,28 @@ static av_always_inline void pred_pskip_motion(H264Context * const h, int * cons
     match_count= !diagonal_ref + !top_ref + !left_ref;
     tprintf(h->s.avctx, "pred_pskip_motion match_count=%d\n", match_count);
     if(match_count > 1){
-        *mx= mid_pred(A[0], B[0], C[0]);
-        *my= mid_pred(A[1], B[1], C[1]);
+        mx = mid_pred(A[0], B[0], C[0]);
+        my = mid_pred(A[1], B[1], C[1]);
     }else if(match_count==1){
         if(!left_ref){
-            *mx= A[0];
-            *my= A[1];
+            mx = A[0];
+            my = A[1];
         }else if(!top_ref){
-            *mx= B[0];
-            *my= B[1];
+            mx = B[0];
+            my = B[1];
         }else{
-            *mx= C[0];
-            *my= C[1];
+            mx = C[0];
+            my = C[1];
         }
     }else{
-        *mx= mid_pred(A[0], B[0], C[0]);
-        *my= mid_pred(A[1], B[1], C[1]);
+        mx = mid_pred(A[0], B[0], C[0]);
+        my = mid_pred(A[1], B[1], C[1]);
     }

+    fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
+    return;
+zeromv:
+    fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
     return;
 }

--
1.7.3.2.146.gca209


From eda7f1ca1b9748a2027dfe81259b9140bddbdd61 Mon Sep 17 00:00:00 2001
From: Jason Garrett-Glaser <jason@x264.com>
Date: Wed, 6 Jul 2011 12:26:04 -0700
Subject: [PATCH 4/6] H.264: Remove redundant hl_motion_16/8 code

---
 libavcodec/h264.c |   52 ++++++++++------------------------------------------
 1 files changed, 10 insertions(+), 42 deletions(-)

diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index f7c0345..f79f4d1 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -778,24 +778,6 @@ static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t
     prefetch_motion(h, 1, pixel_shift, chroma444);
 }

-#define hl_motion_fn(sh, bits) \
-static av_always_inline void hl_motion_ ## bits(H264Context *h, \
-                                       uint8_t *dest_y, \
-                                       uint8_t *dest_cb, uint8_t *dest_cr, \
-                                       qpel_mc_func (*qpix_put)[16], \
-                                       h264_chroma_mc_func (*chroma_put), \
-                                       qpel_mc_func (*qpix_avg)[16], \
-                                       h264_chroma_mc_func (*chroma_avg), \
-                                       h264_weight_func *weight_op, \
-                                       h264_biweight_func *weight_avg, \
-                                       int chroma444) \
-{ \
-    hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put, \
-              qpix_avg, chroma_avg, weight_op, weight_avg, sh, chroma444); \
-}
-hl_motion_fn(0, 8);
-hl_motion_fn(1, 16);
-
 static void free_tables(H264Context *h, int free_rbsp){
     int i;
     H264Context *hx;
@@ -1876,18 +1858,11 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
             if(h->deblocking_filter)
                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, 0, simple, pixel_shift);
         }else if(is_h264){
-            if (pixel_shift) {
-                hl_motion_16(h, dest_y, dest_cb, dest_cr,
-                             s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
-                             s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
-                             h->h264dsp.weight_h264_pixels_tab,
-                             h->h264dsp.biweight_h264_pixels_tab, 0);
-            } else
-                hl_motion_8(h, dest_y, dest_cb, dest_cr,
-                            s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
-                            s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
-                            h->h264dsp.weight_h264_pixels_tab,
-                            h->h264dsp.biweight_h264_pixels_tab, 0);
+            hl_motion(h, dest_y, dest_cb, dest_cr,
+                      s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
+                      s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
+                      h->h264dsp.weight_h264_pixels_tab,
+                      h->h264dsp.biweight_h264_pixels_tab, pixel_shift, 0);
         }

         hl_decode_mb_idct_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0);
@@ -2017,18 +1992,11 @@ static av_always_inline void hl_decode_mb_444_internal(H264Context *h, int simpl
             if(h->deblocking_filter)
                 xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 0, 1, simple, pixel_shift);
         }else{
-            if (pixel_shift) {
-                hl_motion_16(h, dest[0], dest[1], dest[2],
-                             s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
-                             s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
-                             h->h264dsp.weight_h264_pixels_tab,
-                             h->h264dsp.biweight_h264_pixels_tab, 1);
-            } else
-                hl_motion_8(h, dest[0], dest[1], dest[2],
-                            s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
-                            s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
-                            h->h264dsp.weight_h264_pixels_tab,
-                            h->h264dsp.biweight_h264_pixels_tab, 1);
+            hl_motion(h, dest[0], dest[1], dest[2],
+                      s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
+                      s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
+                      h->h264dsp.weight_h264_pixels_tab,
+                      h->h264dsp.biweight_h264_pixels_tab, pixel_shift, 1);
         }

         for (p = 0; p < plane_count; p++)
--
1.7.3.2.146.gca209


From 50f29cfa15d6ddafd7ebc41f91b9131deebdfa03 Mon Sep 17 00:00:00 2001
From: Jason Garrett-Glaser <jason@x264.com>
Date: Wed, 6 Jul 2011 13:25:13 -0700
Subject: [PATCH 5/6] H.264: use fill_rectangle in CABAC decoding

---
 libavcodec/h264_cabac.c |    3 +--
 1 files changed, 1 insertions(+), 2 deletions(-)

diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c
index 6dacf7a..390a7b6 100644
--- a/libavcodec/h264_cabac.c
+++ b/libavcodec/h264_cabac.c
@@ -1818,8 +1818,7 @@ static av_always_inline void decode_cabac_luma_residual( H264Context *h, const u
                     }
                 }
             } else {
-                uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8+16*p] ];
-                nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
+                fill_rectangle(&h->non_zero_count_cache[scan8[4*i8x8+16*p]], 2, 2, 8, 0, 1);
             }
         }
     }
--
1.7.3.2.146.gca209


From 1644142d17d231cd9a077db600587785dd8a354c Mon Sep 17 00:00:00 2001
From: Jason Garrett-Glaser <jason@x264.com>
Date: Wed, 6 Jul 2011 14:08:30 -0700
Subject: [PATCH 6/6] H.264: improve qp_thresh check

Eliminate redundant check in filter_mb_fast, consider bit depth in calculating qp_thresh.
---
 libavcodec/h264.c            |    4 +++-
 libavcodec/h264_loopfilter.c |    6 +-----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index f79f4d1..33c9527 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -2931,7 +2931,9 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
             }
         }
     }
-    h->qp_thresh= 15 + 52 - FFMIN(h->slice_alpha_c0_offset, h->slice_beta_offset) - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
+    h->qp_thresh = 15 + 52 - FFMIN(h->slice_alpha_c0_offset, h->slice_beta_offset)
+                 - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1])
+                 + 6 * (h->sps.bit_depth_luma - 8);

 #if 0 //FMO
     if( h->pps.num_slice_groups > 1  && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
diff --git a/libavcodec/h264_loopfilter.c b/libavcodec/h264_loopfilter.c
index c716347..5de9f78 100644
--- a/libavcodec/h264_loopfilter.c
+++ b/libavcodec/h264_loopfilter.c
@@ -216,7 +216,7 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
     MpegEncContext * const s = &h->s;
     int mb_xy;
     int mb_type, left_type, top_type;
-    int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
+    int qp, qp0, qp1, qpc, qpc0, qpc1;
     int chroma = !(CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
     int chroma444 = CHROMA444;

@@ -241,10 +241,6 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
     qp1 = (qp + qp1 + 1) >> 1;
     qpc0 = (qpc + qpc0 + 1) >> 1;
     qpc1 = (qpc + qpc1 + 1) >> 1;
-    qp_thresh = 15+52 - h->slice_alpha_c0_offset;
-    if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
-       qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
-        return;

     if( IS_INTRA(mb_type) ) {
         static const int16_t bS4[4] = {4,4,4,4};
--
1.7.3.2.146.gca209