Untitled

From ea071fec1c4637db5e7cef2bcd1d6334061d775f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Ekstr=C3=B6m?= <jeebjp@gmail.com>
Date: Sat, 27 Aug 2011 23:25:40 +0300
Subject: [PATCH 2/2] Correct the limited-range 8->10 conversion algorithm Now
 follows BT.709 precisely.

---
 filters/video/depth.c |   45 ++++++++++++++++++++++++++++++---------------
 input/raw.c           |   13 +++++++++++--
 2 files changed, 41 insertions(+), 17 deletions(-)

diff --git a/filters/video/depth.c b/filters/video/depth.c
index 9ea2cbc..dddc889 100644
--- a/filters/video/depth.c
+++ b/filters/video/depth.c
@@ -36,6 +36,7 @@ typedef struct

     int bit_depth;
     int dst_csp;
+    int full_range;
     cli_pic_t buffer;
     int16_t *error_buf;
 } depth_hnd_t;
@@ -65,7 +66,7 @@ static int csp_num_interleaved( int csp, int plane )
  * depth again is lossless. */
 #define DITHER_PLANE( pitch ) \
 static void dither_plane_##pitch( pixel *dst, int dst_stride, uint16_t *src, int src_stride, \
-                                        int width, int height, int16_t *errors ) \
+                                        int width, int height, int16_t *errors, int full_range ) \
 { \
     const int lshift = 16-BIT_DEPTH; \
     const int rshift = 2*BIT_DEPTH-16; \
@@ -79,7 +80,7 @@ static void dither_plane_##pitch( pixel *dst, int dst_stride, uint16_t *src, int
         { \
             err = err*2 + errors[x] + errors[x+1]; \
             dst[x*pitch] = x264_clip3( (((src[x*pitch]+half)<<2)+err)*pixel_max >> 18, 0, pixel_max ); \
-            errors[x] = err = src[x*pitch] - (dst[x*pitch] << lshift) - (dst[x*pitch] >> rshift); \
+            errors[x] = err = src[x*pitch] - (dst[x*pitch] << lshift) - (full_range*dst[x*pitch] >> rshift); \
         } \
     } \
 }
@@ -87,7 +88,7 @@ static void dither_plane_##pitch( pixel *dst, int dst_stride, uint16_t *src, int
 DITHER_PLANE( 1 )
 DITHER_PLANE( 2 )

-static void dither_image( cli_image_t *out, cli_image_t *img, int16_t *error_buf )
+static void dither_image( cli_image_t *out, cli_image_t *img, int16_t *error_buf, int full_range )
 {
     int csp_mask = img->csp & X264_CSP_MASK;
     for( int i = 0; i < img->planes; i++ )
@@ -98,7 +99,7 @@ static void dither_image( cli_image_t *out, cli_image_t *img, int16_t *error_buf

 #define CALL_DITHER_PLANE( pitch, off ) \
         dither_plane_##pitch( ((pixel*)out->plane[i])+off, out->stride[i]/sizeof(pixel), \
-                ((uint16_t*)img->plane[i])+off, img->stride[i]/2, width, height, error_buf )
+                ((uint16_t*)img->plane[i])+off, img->stride[i]/2, width, height, error_buf, full_range )

         if( num_interleaved == 1 )
         {
@@ -112,7 +113,7 @@ static void dither_image( cli_image_t *out, cli_image_t *img, int16_t *error_buf
     }
 }

-static void scale_image( cli_image_t *output, cli_image_t *img )
+static void scale_image( cli_image_t *output, cli_image_t *img, int full_range )
 {
     /* this function mimics how swscale does upconversion. 8-bit is converted
      * to 16-bit through left shifting the orginal value with 8 and then adding
@@ -120,7 +121,8 @@ static void scale_image( cli_image_t *output, cli_image_t *img )
      * while also being fast. for n-bit we basically do the same thing, but we
      * discard the lower 16-n bits. */
     int csp_mask = img->csp & X264_CSP_MASK;
-    const int shift = 16-BIT_DEPTH;
+    /* Decide the amount of shift needed by the type of color range */
+    const int shift = full_range ? 16 - BIT_DEPTH : BIT_DEPTH - 8;
     for( int i = 0; i < img->planes; i++ )
     {
         uint8_t *src = img->plane[i];
@@ -128,14 +130,25 @@ static void scale_image( cli_image_t *output, cli_image_t *img )
         int height = x264_cli_csps[csp_mask].height[i] * img->height;
         int width = x264_cli_csps[csp_mask].width[i] * img->width;

-        for( int j = 0; j < height; j++ )
-        {
-            for( int k = 0; k < width; k++ )
-                dst[k] = ((src[k] << 8) + src[k]) >> shift;
+#define LOOP(math)                              \
+    do {                                        \
+        for( int j = 0; j < height; j++ )       \
+        {                                       \
+            for( int k = 0; k < width; k++ )    \
+                dst[k] = math;                  \
+            src += img->stride[i];              \
+            dst += output->stride[i]/2;         \
+        }                                       \
+    } while (0)
+
+        if( full_range )
+            /* x264's original algorithm */
+            LOOP( ((src[k] << 8) + src[k]) >> shift );
+        else
+            /* Limited range algorithm mentioned in BT.709, Part 2 */
+            LOOP( src[k] << shift );

-            src += img->stride[i];
-            dst += output->stride[i]/2;
-        }
+#undef LOOP
     }
 }

@@ -148,12 +161,12 @@ static int get_frame( hnd_t handle, cli_pic_t *output, int frame )

     if( h->bit_depth < 16 && output->img.csp & X264_CSP_HIGH_DEPTH )
     {
-        dither_image( &h->buffer.img, &output->img, h->error_buf );
+        dither_image( &h->buffer.img, &output->img, h->error_buf, h->full_range );
         output->img = h->buffer.img;
     }
     else if( h->bit_depth > 8 && !(output->img.csp & X264_CSP_HIGH_DEPTH) )
     {
-        scale_image( &h->buffer.img, &output->img );
+        scale_image( &h->buffer.img, &output->img, h->full_range );
         output->img = h->buffer.img;
     }
     return 0;
@@ -180,6 +193,7 @@ static int init( hnd_t *handle, cli_vid_filter_t *filter, video_info_t *info,
     int change_fmt = (info->csp ^ param->i_csp) & X264_CSP_HIGH_DEPTH;
     int csp = ~(~info->csp ^ change_fmt);
     int bit_depth = 8*x264_cli_csp_depth_factor( csp );
+    int full_range = info->full_range == 1;

     if( opt_string )
     {
@@ -214,6 +228,7 @@ static int init( hnd_t *handle, cli_vid_filter_t *filter, video_info_t *info,

         h->error_buf = (int16_t*)(h + 1);
         h->dst_csp = csp;
+        h->full_range = full_range;
         h->bit_depth = bit_depth;
         h->prev_hnd = *handle;
         h->prev_filter = *filter;
diff --git a/input/raw.c b/input/raw.c
index 6d4bb28..9b08ae0 100644
--- a/input/raw.c
+++ b/input/raw.c
@@ -35,6 +35,7 @@ typedef struct
     uint64_t plane_size[4];
     uint64_t frame_size;
     int bit_depth;
+    int full_range;
 } raw_hnd_t;

 static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, cli_input_opt_t *opt )
@@ -43,6 +44,8 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
     if( !h )
         return -1;

+    h->full_range = info->full_range == 1;
+
     if( !opt->resolution )
     {
         /* try to parse the file name */
@@ -114,8 +117,14 @@ static int read_frame_internal( cli_pic_t *pic, raw_hnd_t *h )
             uint64_t pixel_count = h->plane_size[i];
             int lshift = 16 - h->bit_depth;
             int rshift = 2*h->bit_depth - 16;
-            for( uint64_t j = 0; j < pixel_count; j++ )
-                plane[j] = (plane[j] << lshift) + (plane[j] >> rshift);
+            if( h->full_range )
+                for( uint64_t j = 0; j < pixel_count; j++ )
+                    /* x264's original algorithm */
+                    plane[j] = (plane[j] << lshift) + (plane[j] >> rshift);
+            else
+                for( uint64_t j = 0; j < pixel_count; j++ )
+                    /* Limited range algorithm mentioned in BT.709, Part 2 */
+                    plane[j] = plane[j] << lshift;
         }
     }
     return error;
--
1.7.5.1