Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- From: James Darnley <james.darnley@gmail.com>
- Subject: [PATCH 1/3] Add hqdn3d filter
- Makefile | 2 +-
- configure | 2 +-
- filters/video/hqdn3d.c | 289 ++++++++++++++++++++++++++++++++++++++++++++++++
- filters/video/video.c | 1 +
- 4 files changed, 292 insertions(+), 2 deletions(-)
- create mode 100644 filters/video/hqdn3d.c
- Index: Makefile
- ===================================================================
- --- Makefile_orig
- +++ Makefile
- @@ -18,7 +18,7 @@ SRCCLI = x264.c input/input.c input/timecode.c input/raw.c input/y4m.c \
- output/flv.c output/flv_bytestream.c filters/filters.c \
- filters/video/video.c filters/video/source.c filters/video/internal.c \
- filters/video/resize.c filters/video/cache.c filters/video/fix_vfr_pts.c \
- - filters/video/select_every.c filters/video/crop.c
- + filters/video/select_every.c filters/video/crop.c filters/video/hqdn3d.c
- SRCSO =
- Index: configure
- ===================================================================
- --- configure_orig
- +++ configure
- @@ -770,7 +770,7 @@ Libs: $pclibs
- Cflags: -I$includedir
- EOF
- -filters="crop select_every"
- +filters="crop select_every hqdn3d"
- [ $swscale = yes ] && filters="resize $filters"
- cat > conftest.log <<EOF
- Index: filters/video/hqdn3d.c
- ===================================================================
- --- /dev/null
- +++ filters/video/hqdn3d.c
- @@ -0,0 +1,289 @@
- +/*****************************************************************************
- + * hqdn3d.c: x264 hqdn3d filter
- + *****************************************************************************
- + * Copyright (C) 2003 Daniel Moreno <comac@comac.darktech.org>
- + * Avisynth port (C) 2005 Loren Merritt <lorenm@u.washington.edu>
- + * x264 port (C) 2010 James Darnley <james.darnley@gmail.com>
- + *
- + * This program is free software; you can redistribute it and/or modify
- + * it under the terms of the GNU General Public License as published by
- + * the Free Software Foundation; either version 2 of the License, or
- + * (at your option) any later version.
- + *
- + * This program is distributed in the hope that it will be useful,
- + * but WITHOUT ANY WARRANTY; without even the implied warranty of
- + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- + * GNU General Public License for more details.
- + *
- + * You should have received a copy of the GNU General Public License
- + * along with this program; if not, write to the Free Software
- + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
- + *****************************************************************************/
- +
- +#include <math.h>
- +#include "video.h"
- +#define NAME "hqdn3d"
- +#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, NAME, __VA_ARGS__ )
- +
- +#define PARAM1_DEFAULT 4.0
- +#define PARAM2_DEFAULT 3.0
- +#define PARAM3_DEFAULT 6.0
- +
- +cli_vid_filter_t hqdn3d_filter;
- +
- +typedef struct
- +{
- + hnd_t prev_hnd;
- + cli_vid_filter_t prev_filter;
- + int coefs[4][512*16];
- + unsigned int *line;
- + unsigned short *frame[3];
- + int w, h, cw, ch, first_frame;
- +} hqdn3d_hnd_t;
- +
- +static void help( int longhelp )
- +{
- + printf( " "NAME":ls,cs,lt,ct\n" );
- + if(!longhelp)
- + return;
- + printf( " Denoises the image using mplayer's hqdn3d filter\n"
- + " The four arguments are floats and are optional\n"
- + " If any options are omitted, they will assume a\n"
- + " value based on previous options that you did specify\n"
- + " - ls = luma spatial filter strength [%.1lf]\n"
- + " - cs = chroma spatial filter strength [%.1lf]\n"
- + " - lt = luma temporal filter strength [%.1lf]\n"
- + " - ct = chroma temporal filter strength [%.1lf]\n",
- + PARAM1_DEFAULT, PARAM2_DEFAULT, PARAM3_DEFAULT,
- + PARAM3_DEFAULT * PARAM2_DEFAULT / PARAM1_DEFAULT);
- +}
- +
- +#define ABS(A) ( (A) > 0 ? (A) : -(A) )
- +
- +static void precalc_coefs(int *ct, double dist25)
- +{
- + //int i;
- + double gamma_d, simil, c;
- +
- + gamma_d = log(0.25) / log(1.0 - dist25/255.0 - 0.00001);
- +
- + for (int i = -255*16; i < 256*16; i++)
- + {
- + simil = 1.0 - ABS(i) / (16*255.0);
- + c = pow(simil, gamma_d) * 65536.0 * (double)i / 16.0;
- + ct[16*256+i] = (int)((c<0) ? (c-0.5) : (c+0.5));
- + }
- +}
- +
- +static int init( hnd_t *handle, cli_vid_filter_t *filter, video_info_t *info,
- + x264_param_t *param, char *opt_string )
- +{
- + double lum_spac, lum_tmp, chrom_spac, chrom_tmp;
- + double param1, param2, param3, param4;
- +
- + hqdn3d_hnd_t *h = calloc( 1, sizeof(hqdn3d_hnd_t) );
- + FAIL_IF_ERROR( !h, "Memory allocation error (hqdn3d.c:%d)\n", __LINE__ )
- +
- + h->w = info->width*x264_cli_csps[info->csp].width[0];
- + h->h = info->height*x264_cli_csps[info->csp].height[0];
- + h->cw = info->width*x264_cli_csps[info->csp].width[1];
- + h->ch = info->height*x264_cli_csps[info->csp].height[1];
- +
- + h->line = calloc( 1, info->width*sizeof(int) );
- + h->frame[0] = malloc( h->w * h->h * sizeof(short) );
- + h->frame[1] = malloc( h->cw * h->ch * sizeof(short) );
- + h->frame[2] = malloc( h->cw * h->ch * sizeof(short) );
- + FAIL_IF_ERROR( !h->line || !h->frame[0] || !h->frame[1] || !h->frame[2],
- + "Memory allocation error(hqdn3d.c:%d)\n", __LINE__ )
- +
- + FAIL_IF_ERROR( !(info->csp == X264_CSP_I420 || info->csp == X264_CSP_I422
- + || info->csp == X264_CSP_I444 || info->csp == X264_CSP_YV12 ),
- + "Only planar YUV images supported\n" )
- + if(opt_string)
- + {
- + switch(sscanf( opt_string, "%lf,%lf,%lf,%lf",
- + ¶m1, ¶m2, ¶m3, ¶m4 ))
- + {
- + case 1:
- + lum_spac = param1;
- + lum_tmp = PARAM3_DEFAULT * param1 / PARAM1_DEFAULT;
- + chrom_spac = PARAM2_DEFAULT * param1 / PARAM1_DEFAULT;
- + chrom_tmp = lum_tmp * chrom_spac / lum_spac;
- + break;
- + case 2:
- + lum_spac = param1;
- + lum_tmp = PARAM3_DEFAULT * param1 / PARAM1_DEFAULT;
- + chrom_spac = param2;
- + chrom_tmp = lum_tmp * chrom_spac / lum_spac;
- + break;
- + case 3:
- + lum_spac = param1;
- + lum_tmp = param3;
- + chrom_spac = param2;
- + chrom_tmp = lum_tmp * chrom_spac / lum_spac;
- + break;
- + case 4:
- + lum_spac = param1;
- + lum_tmp = param3;
- + chrom_spac = param2;
- + chrom_tmp = param4;
- + break;
- + default:
- + lum_spac = PARAM1_DEFAULT;
- + lum_tmp = PARAM3_DEFAULT;
- + chrom_spac = PARAM2_DEFAULT;
- + chrom_tmp = lum_tmp * chrom_spac / lum_spac;
- + }
- + }
- + else
- + {
- + lum_spac = PARAM1_DEFAULT;
- + lum_tmp = PARAM3_DEFAULT;
- + chrom_spac = PARAM2_DEFAULT;
- + chrom_tmp = lum_tmp * chrom_spac / lum_spac;
- + }
- +
- + precalc_coefs(h->coefs[0], lum_spac);
- + precalc_coefs(h->coefs[1], lum_tmp);
- + precalc_coefs(h->coefs[2], chrom_spac);
- + precalc_coefs(h->coefs[3], chrom_tmp);
- +
- + x264_cli_log( NAME, X264_LOG_INFO,
- + "using strengths %.1lf,%.1lf,%.1lf,%.1lf\n",
- + lum_spac, chrom_spac, lum_tmp, chrom_tmp );
- +
- + h->first_frame = 1;
- + h->prev_filter = *filter;
- + h->prev_hnd = *handle;
- + *handle = h;
- + *filter = hqdn3d_filter;
- + return 0;
- +}
- +
- +static inline unsigned int low_pass_mul(unsigned int prev_mul, unsigned int curr_mul, int* coef)
- +{
- +// int d_mul= (prev_mul&0xFFFFFF)-(curr_mul&0xFFFFFF);
- + int d_mul= prev_mul-curr_mul;
- + int d=((d_mul+0x10007FF)/(65536/16));
- + return curr_mul + coef[d];
- +}
- +
- +static void denoise(const unsigned char *frame, // mpi->planes[x]
- + unsigned char *frame_dest, // dmpi->planes[x]
- + unsigned int *line_ant, // vf->priv->Line (width bytes)
- + unsigned short *frame_ant,
- + int w, int h, int stride_src, int stride_dest,
- + int *horizontal, int *vertical, int *temporal)
- +{
- + //int X, Y;
- + int line_offs_src = 0, line_offs_dest = 0;
- + unsigned int pixel_ant;
- + int pixel_dst;
- +
- + /* First pixel has no left nor top neightbour. Only previous frame */
- + line_ant[0] = pixel_ant = frame[0]<<16;
- + pixel_dst = low_pass_mul(frame_ant[0]<<8, pixel_ant, temporal);
- + frame_ant[0] = ((pixel_dst+0x1000007F)/256);
- + frame_dest[0]= ((pixel_dst+0x10007FFF)/65536);
- +
- + /* Fist line has no top neightbour. Only left one for each pixel and
- + * last frame */
- + for (int x = 1; x < w; x++){
- + line_ant[x] = pixel_ant = low_pass_mul(pixel_ant, frame[x]<<16, horizontal);
- + pixel_dst = low_pass_mul(frame_ant[x]<<8, pixel_ant, temporal);
- + frame_ant[x] = ((pixel_dst+0x1000007F)/256);
- + frame_dest[x]= ((pixel_dst+0x10007FFF)/65536);
- + }
- +
- + for (int y = 1; y < h; y++){
- + //unsigned int pixel_ant_1;
- + unsigned short* LinePrev=&frame_ant[y*w];
- + line_offs_src += stride_src, line_offs_dest += stride_dest;
- + /* First pixel on each line doesn't have previous pixel */
- + pixel_ant = frame[line_offs_src]<<16;
- + line_ant[0] = low_pass_mul(line_ant[0], pixel_ant, vertical);
- + pixel_dst = low_pass_mul(LinePrev[0]<<8, line_ant[0], temporal);
- + LinePrev[0] = ((pixel_dst+0x1000007F)/256);
- + frame_dest[line_offs_dest]= ((pixel_dst+0x10007FFF)/65536);
- +
- + for (int x = 1; x < w; x++){
- + //int pixel_dst_1;
- + /* The rest are normal */
- + pixel_ant = low_pass_mul(pixel_ant, frame[line_offs_src+x]<<16, horizontal);
- + line_ant[x] = low_pass_mul(line_ant[x], pixel_ant, vertical);
- + pixel_dst = low_pass_mul(LinePrev[x]<<8, line_ant[x], temporal);
- + LinePrev[x] = ((pixel_dst+0x1000007F)/256);
- + frame_dest[line_offs_dest+x]= ((pixel_dst+0x10007FFF)/65536);
- + }
- + }
- +}
- +
- +static int get_frame( hnd_t handle, cli_pic_t *output, int frame )
- +{
- + hqdn3d_hnd_t *h = handle;
- +
- + if( h->prev_filter.get_frame( h->prev_hnd, output, frame ) )
- + return -1;
- +
- + if( h->first_frame )
- + {
- + int width = h->w;
- + int height = h->h;
- + int stride = output->img.stride[0];
- + for(int y = 0; y<height; y++)
- + for(int x = 0; x<width; x++)
- + h->frame[0][y*width+x] = output->img.plane[0][y*stride+x] << 8;
- +
- + width = h->cw;
- + height = h->ch;
- + stride = output->img.stride[1];
- + for(int y = 0; y<height; y++)
- + for(int x = 0; x<width; x++)
- + h->frame[1][y*width+x] = output->img.plane[1][y*stride+x] << 8;
- +
- + stride = output->img.stride[2];
- + for(int y = 0; y<height; y++)
- + for(int x = 0; x<width; x++)
- + h->frame[2][y*width+x] = output->img.plane[2][y*stride+x] << 8;
- +
- + h->first_frame = 0;
- + }
- +
- + denoise(output->img.plane[0],
- + output->img.plane[0],
- + h->line, h->frame[0],
- + h->w, h->h,
- + output->img.stride[0], output->img.stride[0],
- + h->coefs[0], h->coefs[0], h->coefs[1]);
- + denoise(output->img.plane[1],
- + output->img.plane[1],
- + h->line, h->frame[1],
- + h->cw, h->ch,
- + output->img.stride[1], output->img.stride[1],
- + h->coefs[2], h->coefs[2], h->coefs[3]);
- + denoise(output->img.plane[2],
- + output->img.plane[2],
- + h->line, h->frame[2],
- + h->cw, h->ch,
- + output->img.stride[2], output->img.stride[2],
- + h->coefs[2], h->coefs[2], h->coefs[3]);
- + return 0;
- +}
- +
- +static int release_frame( hnd_t handle, cli_pic_t *pic, int frame )
- +{
- + hqdn3d_hnd_t *h = handle;
- + return h->prev_filter.release_frame( h->prev_hnd, pic, frame );
- +}
- +
- +static void free_filter( hnd_t handle )
- +{
- + hqdn3d_hnd_t *h = handle;
- + h->prev_filter.free( h->prev_hnd );
- + free( h->line );
- + for(int i = 0; i<3; i++)
- + free( h->frame[i] );
- + free( h );
- +}
- +
- +cli_vid_filter_t hqdn3d_filter = { NAME, help, init, get_frame, release_frame, free_filter, NULL };
- Index: filters/video/video.c
- ===================================================================
- --- /dev/null
- +++ filters/video/video.c
- @@ -46,6 +46,7 @@ void x264_register_vid_filters()
- REGISTER_VFILTER( fix_vfr_pts );
- REGISTER_VFILTER( resize );
- REGISTER_VFILTER( select_every );
- + REGISTER_VFILTER( hqdn3d );
- }
- int x264_init_vid_filter( const char *name, hnd_t *handle, cli_vid_filter_t *filter,
- From: James Darnley <james.darnley@gmail.com>
- Subject: [PATCH 2/3] Add yadif filter
- Makefile | 3 +-
- configure | 2 +-
- filters/video/avs_vf_yadif_template.h | 245 ++++++++++++++++++++++
- filters/video/video.c | 1 +
- filters/video/yadif.c | 230 +++++++++++++++++++++
- filters/video/yadif_filter_line.c | 358 +++++++++++++++++++++++++++++++++
- filters/video/yadif_filter_line.h | 27 +++
- 7 files changed, 864 insertions(+), 2 deletions(-)
- create mode 100644 filters/video/avs_vf_yadif_template.h
- create mode 100644 filters/video/yadif.c
- create mode 100644 filters/video/yadif_filter_line.c
- create mode 100644 filters/video/yadif_filter_line.h
- Index: Makefile
- ===================================================================
- --- Makefile_orig
- +++ Makefile
- @@ -18,7 +18,8 @@ SRCCLI = x264.c input/input.c input/timecode.c input/raw.c input/y4m.c \
- output/flv.c output/flv_bytestream.c filters/filters.c \
- filters/video/video.c filters/video/source.c filters/video/internal.c \
- filters/video/resize.c filters/video/cache.c filters/video/fix_vfr_pts.c \
- - filters/video/select_every.c filters/video/crop.c filters/video/hqdn3d.c
- + filters/video/select_every.c filters/video/crop.c filters/video/hqdn3d.c \
- + filters/video/yadif.c filters/video/yadif_filter_line.c
- SRCSO =
- Index: configure
- ===================================================================
- --- configure_orig
- +++ configure
- @@ -770,7 +770,7 @@ Libs: $pclibs
- Cflags: -I$includedir
- EOF
- -filters="crop select_every hqdn3d"
- +filters="crop select_every hqdn3d yadif"
- [ $swscale = yes ] && filters="resize $filters"
- cat > conftest.log <<EOF
- Index: filters/video/avs_vf_yadif_template.h
- ===================================================================
- --- /dev/null
- +++ filters/video/avs_vf_yadif_template.h
- @@ -0,0 +1,245 @@
- +/*
- + * Copyright (C) 2006 Michael Niedermayer <michaelni@gmx.at>
- + *
- + * SSE2/SSSE3 version (custom optimization) by h.yamagata
- + *
- + * Small fix by Alexander Balakhnin (fizick@avisynth.org.ru)
- + *
- + * MPlayer is free software; you can redistribute it and/or modify
- + * it under the terms of the GNU General Public License as published by
- + * the Free Software Foundation; either version 2 of the License, or
- + * (at your option) any later version.
- + *
- + * MPlayer is distributed in the hope that it will be useful,
- + * but WITHOUT ANY WARRANTY; without even the implied warranty of
- + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- + * GNU General Public License for more details.
- + *
- + * You should have received a copy of the GNU General Public License along
- + * with MPlayer; if not, write to the Free Software Foundation, Inc.,
- + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- + */
- +
- +#define LOAD8(mem,dst) \
- + "movq "mem", "#dst" \n\t"\
- + "punpcklbw %%xmm7, "#dst" \n\t"
- +
- +#define CHECK(pj,mj) \
- + "movdqu "#pj"(%[cur],%[mrefs]), %%xmm2 \n\t" /* cur[x-refs-1+j] */\
- + "movdqu "#mj"(%[cur],%[prefs]), %%xmm3 \n\t" /* cur[x+refs-1-j] */\
- + "movdqa %%xmm2, %%xmm4 \n\t"\
- + "movdqa %%xmm2, %%xmm5 \n\t"\
- + "pxor %%xmm3, %%xmm4 \n\t"\
- + "pavgb %%xmm3, %%xmm5 \n\t"\
- + "pand %[pb1], %%xmm4 \n\t"\
- + "psubusb %%xmm4, %%xmm5 \n\t"\
- + "psrldq $1, %%xmm5 \n\t"\
- + "punpcklbw %%xmm7, %%xmm5 \n\t" /* (cur[x-refs+j] + cur[x+refs-j])>>1 */\
- + "movdqa %%xmm2, %%xmm4 \n\t"\
- + "psubusb %%xmm3, %%xmm2 \n\t"\
- + "psubusb %%xmm4, %%xmm3 \n\t"\
- + "pmaxub %%xmm3, %%xmm2 \n\t"\
- + "movdqa %%xmm2, %%xmm3 \n\t"\
- + "movdqa %%xmm2, %%xmm4 \n\t" /* ABS(cur[x-refs-1+j] - cur[x+refs-1-j]) */\
- + "psrldq $1, %%xmm3 \n\t" /* ABS(cur[x-refs +j] - cur[x+refs -j]) */\
- + "psrldq $2, %%xmm4 \n\t" /* ABS(cur[x-refs+1+j] - cur[x+refs+1-j]) */\
- + "punpcklbw %%xmm7, %%xmm2 \n\t"\
- + "punpcklbw %%xmm7, %%xmm3 \n\t"\
- + "punpcklbw %%xmm7, %%xmm4 \n\t"\
- + "paddw %%xmm3, %%xmm2 \n\t"\
- + "paddw %%xmm4, %%xmm2 \n\t" /* score */
- +
- +#define CHECK1 \
- + "movdqa %%xmm0, %%xmm3 \n\t"\
- + "pcmpgtw %%xmm2, %%xmm3 \n\t" /* if(score < spatial_score) */\
- + "pminsw %%xmm2, %%xmm0 \n\t" /* spatial_score= score; */\
- + "movdqa %%xmm3, %%xmm6 \n\t"\
- + "pand %%xmm3, %%xmm5 \n\t"\
- + "pandn %%xmm1, %%xmm3 \n\t"\
- + "por %%xmm5, %%xmm3 \n\t"\
- + "movdqa %%xmm3, %%xmm1 \n\t" /* spatial_pred= (cur[x-refs+j] + cur[x+refs-j])>>1; */
- +
- +#define CHECK2 /* pretend not to have checked dir=2 if dir=1 was bad.\
- + hurts both quality and speed, but matches the C version. */\
- + "paddw %[pw1], %%xmm6 \n\t"\
- + "psllw $14, %%xmm6 \n\t"\
- + "paddsw %%xmm6, %%xmm2 \n\t"\
- + "movdqa %%xmm0, %%xmm3 \n\t"\
- + "pcmpgtw %%xmm2, %%xmm3 \n\t"\
- + "pminsw %%xmm2, %%xmm0 \n\t"\
- + "pand %%xmm3, %%xmm5 \n\t"\
- + "pandn %%xmm1, %%xmm3 \n\t"\
- + "por %%xmm5, %%xmm3 \n\t"\
- + "movdqa %%xmm3, %%xmm1 \n\t"
- +
- +/* mode argument mod - Fizick */
- +
- +/* static attribute_align_arg void FILTER_LINE_FUNC_NAME(YadifContext *yadctx, uint8_t *dst, uint8_t *prev, uint8_t *cur, uint8_t *next, int w, int refs, int parity){
- + const int mode = yadctx->mode; */
- +static attribute_align_arg void FILTER_LINE_FUNC_NAME(int mode, uint8_t *dst, const uint8_t *prev, const uint8_t *cur, const uint8_t *next, int w, int refs, int parity){
- + DECLARE_ALIGNED(16, uint8_t, tmp0[16]);
- + DECLARE_ALIGNED(16, uint8_t, tmp1[16]);
- + DECLARE_ALIGNED(16, uint8_t, tmp2[16]);
- + DECLARE_ALIGNED(16, uint8_t, tmp3[16]);
- + int x;
- + static DECLARE_ALIGNED(16, const unsigned short, pw_1[]) =
- + {
- + 0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001
- + };
- +
- + static DECLARE_ALIGNED(16, const unsigned short, pb_1[]) =
- + {
- + 0x0101,0x0101,0x0101,0x0101,0x0101,0x0101,0x0101,0x0101
- + };
- +
- +
- +#define FILTER\
- + for(x=0; x<w; x+=8){\
- + __asm__ volatile(\
- + "pxor %%xmm7, %%xmm7 \n\t"\
- + LOAD8("(%[cur],%[mrefs])", %%xmm0) /* c = cur[x-refs] */\
- + LOAD8("(%[cur],%[prefs])", %%xmm1) /* e = cur[x+refs] */\
- + LOAD8("(%["prev2"])", %%xmm2) /* prev2[x] */\
- + LOAD8("(%["next2"])", %%xmm3) /* next2[x] */\
- + "movdqa %%xmm3, %%xmm4 \n\t"\
- + "paddw %%xmm2, %%xmm3 \n\t"\
- + "psraw $1, %%xmm3 \n\t" /* d = (prev2[x] + next2[x])>>1 */\
- + "movdqa %%xmm0, %[tmp0] \n\t" /* c */\
- + "movdqa %%xmm3, %[tmp1] \n\t" /* d */\
- + "movdqa %%xmm1, %[tmp2] \n\t" /* e */\
- + "psubw %%xmm4, %%xmm2 \n\t"\
- + PABS( %%xmm4, %%xmm2) /* temporal_diff0 */\
- + LOAD8("(%[prev],%[mrefs])", %%xmm3) /* prev[x-refs] */\
- + LOAD8("(%[prev],%[prefs])", %%xmm4) /* prev[x+refs] */\
- + "psubw %%xmm0, %%xmm3 \n\t"\
- + "psubw %%xmm1, %%xmm4 \n\t"\
- + PABS( %%xmm5, %%xmm3)\
- + PABS( %%xmm5, %%xmm4)\
- + "paddw %%xmm4, %%xmm3 \n\t" /* temporal_diff1 */\
- + "psrlw $1, %%xmm2 \n\t"\
- + "psrlw $1, %%xmm3 \n\t"\
- + "pmaxsw %%xmm3, %%xmm2 \n\t"\
- + LOAD8("(%[next],%[mrefs])", %%xmm3) /* next[x-refs] */\
- + LOAD8("(%[next],%[prefs])", %%xmm4) /* next[x+refs] */\
- + "psubw %%xmm0, %%xmm3 \n\t"\
- + "psubw %%xmm1, %%xmm4 \n\t"\
- + PABS( %%xmm5, %%xmm3)\
- + PABS( %%xmm5, %%xmm4)\
- + "paddw %%xmm4, %%xmm3 \n\t" /* temporal_diff2 */\
- + "psrlw $1, %%xmm3 \n\t"\
- + "pmaxsw %%xmm3, %%xmm2 \n\t"\
- + "movdqa %%xmm2, %[tmp3] \n\t" /* diff */\
- +\
- + "paddw %%xmm0, %%xmm1 \n\t"\
- + "paddw %%xmm0, %%xmm0 \n\t"\
- + "psubw %%xmm1, %%xmm0 \n\t"\
- + "psrlw $1, %%xmm1 \n\t" /* spatial_pred */\
- + PABS( %%xmm2, %%xmm0) /* ABS(c-e) */\
- +\
- + "movdqu -1(%[cur],%[mrefs]), %%xmm2 \n\t" /* cur[x-refs-1] */\
- + "movdqu -1(%[cur],%[prefs]), %%xmm3 \n\t" /* cur[x+refs-1] */\
- + "movdqa %%xmm2, %%xmm4 \n\t"\
- + "psubusb %%xmm3, %%xmm2 \n\t"\
- + "psubusb %%xmm4, %%xmm3 \n\t"\
- + "pmaxub %%xmm3, %%xmm2 \n\t"\
- + /*"pshuflw $9,%%xmm2, %%xmm3 \n\t"*/\
- + /*"pshufhw $9,%%xmm2, %%xmm3 \n\t"*/\
- + "movdqa %%xmm2, %%xmm3 \n\t" /* correct replacement (here) */\
- + "psrldq $2, %%xmm3 \n\t"/* for "pshufw $9,%%mm2, %%mm3" - fix by Fizick */\
- + "punpcklbw %%xmm7, %%xmm2 \n\t" /* ABS(cur[x-refs-1] - cur[x+refs-1]) */\
- + "punpcklbw %%xmm7, %%xmm3 \n\t" /* ABS(cur[x-refs+1] - cur[x+refs+1]) */\
- + "paddw %%xmm2, %%xmm0 \n\t"\
- + "paddw %%xmm3, %%xmm0 \n\t"\
- + "psubw %[pw1], %%xmm0 \n\t" /* spatial_score */\
- +\
- + CHECK(-2,0)\
- + CHECK1\
- + CHECK(-3,1)\
- + CHECK2\
- + CHECK(0,-2)\
- + CHECK1\
- + CHECK(1,-3)\
- + CHECK2\
- +\
- + /* if(yadctx->mode<2) ... */\
- + "movdqa %[tmp3], %%xmm6 \n\t" /* diff */\
- + "cmp $2, %[mode] \n\t"\
- + "jge 1f \n\t"\
- + LOAD8("(%["prev2"],%[mrefs],2)", %%xmm2) /* prev2[x-2*refs] */\
- + LOAD8("(%["next2"],%[mrefs],2)", %%xmm4) /* next2[x-2*refs] */\
- + LOAD8("(%["prev2"],%[prefs],2)", %%xmm3) /* prev2[x+2*refs] */\
- + LOAD8("(%["next2"],%[prefs],2)", %%xmm5) /* next2[x+2*refs] */\
- + "paddw %%xmm4, %%xmm2 \n\t"\
- + "paddw %%xmm5, %%xmm3 \n\t"\
- + "psrlw $1, %%xmm2 \n\t" /* b */\
- + "psrlw $1, %%xmm3 \n\t" /* f */\
- + "movdqa %[tmp0], %%xmm4 \n\t" /* c */\
- + "movdqa %[tmp1], %%xmm5 \n\t" /* d */\
- + "movdqa %[tmp2], %%xmm7 \n\t" /* e */\
- + "psubw %%xmm4, %%xmm2 \n\t" /* b-c */\
- + "psubw %%xmm7, %%xmm3 \n\t" /* f-e */\
- + "movdqa %%xmm5, %%xmm0 \n\t"\
- + "psubw %%xmm4, %%xmm5 \n\t" /* d-c */\
- + "psubw %%xmm7, %%xmm0 \n\t" /* d-e */\
- + "movdqa %%xmm2, %%xmm4 \n\t"\
- + "pminsw %%xmm3, %%xmm2 \n\t"\
- + "pmaxsw %%xmm4, %%xmm3 \n\t"\
- + "pmaxsw %%xmm5, %%xmm2 \n\t"\
- + "pminsw %%xmm5, %%xmm3 \n\t"\
- + "pmaxsw %%xmm0, %%xmm2 \n\t" /* max */\
- + "pminsw %%xmm0, %%xmm3 \n\t" /* min */\
- + "pxor %%xmm4, %%xmm4 \n\t"\
- + "pmaxsw %%xmm3, %%xmm6 \n\t"\
- + "psubw %%xmm2, %%xmm4 \n\t" /* -max */\
- + "pmaxsw %%xmm4, %%xmm6 \n\t" /* diff= MAX3(diff, min, -max); */\
- + "1: \n\t"\
- +\
- + "movdqa %[tmp1], %%xmm2 \n\t" /* d */\
- + "movdqa %%xmm2, %%xmm3 \n\t"\
- + "psubw %%xmm6, %%xmm2 \n\t" /* d-diff */\
- + "paddw %%xmm6, %%xmm3 \n\t" /* d+diff */\
- + "pmaxsw %%xmm2, %%xmm1 \n\t"\
- + "pminsw %%xmm3, %%xmm1 \n\t" /* d = clip(spatial_pred, d-diff, d+diff); */\
- + "packuswb %%xmm1, %%xmm1 \n\t"\
- +\
- + :[tmp0]"=m"(tmp0),\
- + [tmp1]"=m"(tmp1),\
- + [tmp2]"=m"(tmp2),\
- + [tmp3]"=m"(tmp3)\
- + :[prev] "r"(prev),\
- + [cur] "r"(cur),\
- + [next] "r"(next),\
- + [prefs]"r"((long)refs),\
- + [mrefs]"r"((long)-refs),\
- + [pw1] "m"(*pw_1),\
- + [pb1] "m"(*pb_1),\
- + [mode] "g"(mode)\
- + );\
- + __asm__ volatile("movq %%xmm1, %0" :"=m"(*dst));\
- + dst += 8;\
- + prev+= 8;\
- + cur += 8;\
- + next+= 8;\
- + }
- +
- + if(parity){
- +#define prev2 "prev"
- +#define next2 "cur"
- + FILTER
- +#undef prev2
- +#undef next2
- + }else{
- +#define prev2 "cur"
- +#define next2 "next"
- + FILTER
- +#undef prev2
- +#undef next2
- + }
- +}
- +#undef LOAD8
- +#undef PABS
- +#undef CHECK
- +#undef CHECK1
- +#undef CHECK2
- +#undef FILTER
- +#undef FILTER_LINE_FUNC_NAME
- Index: filters/video/video.c
- ===================================================================
- --- filters/video/video_orig.c
- +++ filters/video/video.c
- @@ -47,6 +47,7 @@ void x264_register_vid_filters()
- REGISTER_VFILTER( resize );
- REGISTER_VFILTER( select_every );
- REGISTER_VFILTER( hqdn3d );
- + REGISTER_VFILTER( yadif );
- }
- int x264_init_vid_filter( const char *name, hnd_t *handle, cli_vid_filter_t *filter,
- Index: filters/video/yadif.c
- ===================================================================
- --- /dev/null
- +++ filters/video/yadif.c
- @@ -0,0 +1,230 @@
- +/*****************************************************************************
- + * yadif.c: x264 yadif filter
- + *****************************************************************************
- + * Copyright (C) 2006 Michael Niedermayer <michaelni@gmx.at>
- + * Avisynth port (C) 2007 Alexander G. Balakhnin aka Fizick http://avisynth.org.ru
- + * x264 port (C) 2010 James Darnley <james.darnley@gmail.com>
- + *
- + * This program is free software; you can redistribute it and/or modify
- + * it under the terms of the GNU General Public License as published by
- + * the Free Software Foundation; either version 2 of the License, or
- + * (at your option) any later version.
- + *
- + * This program is distributed in the hope that it will be useful,
- + * but WITHOUT ANY WARRANTY; without even the implied warranty of
- + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- + * GNU General Public License for more details.
- + *
- + * You should have received a copy of the GNU General Public License
- + * along with this program; if not, write to the Free Software
- + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
- + *****************************************************************************/
- +
- +#include <string.h>
- +#include "video.h"
- +#include "yadif_filter_line.h"
- +#define NAME "yadif"
- +#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, NAME, __VA_ARGS__ )
- +
- +cli_vid_filter_t yadif_filter;
- +filter_line_func filter_line;
- +
- +typedef struct {
- + hnd_t prev_handle;
- + cli_vid_filter_t prev_filter;
- + int mode;
- + int tff;
- + cli_pic_t buffer;
- +} yadif_handle_t;
- +
- +/***********************
- +* Help *
- +***********************/
- +
- +static void help( int longhelp )
- +{
- + printf( " "NAME":[mode][,order]\n" );
- + if(!longhelp)
- + return;
- + printf(
- +" Deinterlaces the picture using mplayer's YADIF\n"
- +" mode: sets the deinterlacing mode\n"
- +" 0 - single-rate deinterlacing (default)\n"
- +" 1 - double-rate deinterlacing (bob)\n"
- +" 2 - single-rate deinterlacing without spacial interlacing check\n"
- +" 3 - double-rate deinterlacing withput spacial interlacing check\n"
- +" order: forces the field order\n"
- +" tff - top-field first\n"
- +" bff - bottom-field first\n" );
- +}
- +
- +/***********************
- +* Init *
- +***********************/
- +
- +static int yadif_init( hnd_t *handle, cli_vid_filter_t *filter, video_info_t *info, x264_param_t *param, char *opt_string )
- +{
- + yadif_handle_t *h = calloc( 1, sizeof(yadif_handle_t) );
- + if(!h)
- + return -1;
- +
- + FAIL_IF_ERROR( !(info->csp == X264_CSP_I420 || info->csp == X264_CSP_I422
- + || info->csp == X264_CSP_I444 || info->csp == X264_CSP_YV12 ),
- + "Only planar YUV images supported\n" )
- +
- + if(x264_cli_pic_alloc( &h->buffer, info->csp, info->width, info->height ))
- + return -1;
- +
- + char *mode, *order, *opt;
- + static const char *optlist[] = { "mode", "order", NULL };
- + char **opts = x264_split_options( opt_string, optlist );
- +
- + opt = x264_get_option( "mode", opts );
- + mode = (opt) ? opt : "";
- + h->mode = x264_otoi(mode,0);
- + if(h->mode < 0 || h->mode > 3) {
- + x264_cli_log( NAME, X264_LOG_WARNING, "Invalid mode (%s), ignoring\n", mode);
- + mode = 0;
- + }
- +
- + opt = x264_get_option( "order", opts );
- + order = (opt) ? opt : "";
- + if (!strcmp(order, "top") || !strcmp(order, "tff"))
- + h->tff = 1;
- + else if (!strcmp(order, "bottom") || !strcmp(order, "bff"))
- + h->tff = 0;
- + else {
- + if(opt)
- + x264_cli_log( NAME, X264_LOG_WARNING, "Unknown order (%s), ignoring\n", order);
- + h->tff = info->tff;
- + }
- +
- + x264_free_string_array(opts);
- +
- + if (x264_init_vid_filter( "cache", handle, filter, info, param, (void*)3 ))
- + return -1;
- +
- + if(h->mode&1) {
- + info->num_frames *=2;
- + info->fps_num *=2;
- + info->timebase_den *=2;
- + }
- +
- + info->interlaced = 0;
- + h->prev_filter = *filter;
- + h->prev_handle = *handle;
- + *handle = h;
- + *filter = yadif_filter;
- +
- + filter_line = get_filter_func(param->cpu);
- +
- + x264_cli_log( NAME, X264_LOG_INFO, "%s-rate deinterlacing "
- + "%s spatial interlacing check, %s-field first\n",
- + (h->mode&1) ? "double" : "single",
- + (h->mode&2) ? "without" : "with",
- + (h->tff) ? "top" : "bottom" );
- +
- + return 0;
- +}
- +
- +/***********************
- +* Process Frames *
- +***********************/
- +static void interpolate(uint8_t *dst, const uint8_t *cur0, const uint8_t *cur2, int w)
- +{
- + int x;
- + for (x=0; x<w; x++)
- + dst[x] = (cur0[x] + cur2[x] + 1)>>1; // simple average
- +}
- +
- +static int get_frame( hnd_t handle, cli_pic_t *output, int frame_out )
- +{
- + yadif_handle_t *h = handle;
- + cli_pic_t prev, cur, next;
- + int tff = h->tff, ret = 0;
- + int parity = (h->mode & 1) ? (frame_out & 1) ^ (1^tff) : (tff ^ 1);
- + int frame_in = (h->mode&1) ? frame_out/2 : frame_out;
- +
- + *output = h->buffer;
- +
- + if (frame_in==0)
- + {
- + ret |= h->prev_filter.get_frame( h->prev_handle, &prev, frame_in+1 );
- + ret |= h->prev_filter.get_frame( h->prev_handle, &cur, frame_in );
- + ret |= h->prev_filter.get_frame( h->prev_handle, &next, frame_in+1 );
- + }
- + else
- + {
- + ret |= h->prev_filter.get_frame( h->prev_handle, &prev, frame_in-1 );
- + ret |= h->prev_filter.get_frame( h->prev_handle, &cur, frame_in );
- + if (h->prev_filter.get_frame( h->prev_handle, &next, frame_in+1 ))
- + ret |= h->prev_filter.get_frame( h->prev_handle, &next, frame_in );
- + }
- + if(ret)
- + return ret;
- +
- + for (int i=0; i<3; i++)
- + {
- + int width = cur.img.width * x264_cli_csps[cur.img.csp].width[i];
- + int height = cur.img.height * x264_cli_csps[cur.img.csp].height[i];
- + int stride = cur.img.stride[i];
- +
- + int y=0;
- + if((y^parity)&1)
- + memcpy(output->img.plane[i], cur.img.plane[i]+stride, width);// duplicate 1
- + else
- + memcpy(output->img.plane[i], cur.img.plane[i], width);
- + y=1;
- + if((y^parity)&1)
- + interpolate(output->img.plane[i]+stride, cur.img.plane[i], cur.img.plane[i]+2*stride, width); // interpolate 0 and 2
- + else
- + memcpy(output->img.plane[i]+stride, cur.img.plane[i]+stride, width); // copy original
- + for (y=2; y<height-2; y++)
- + {
- + if ((y ^ parity) & 1)
- + filter_line( h->mode,
- + output->img.plane[i]+y*stride,
- + prev.img.plane[i]+y*stride,
- + cur.img.plane[i]+y*stride,
- + next.img.plane[i]+y*stride,
- + width, stride, parity^tff );
- + else
- + memcpy( output->img.plane[i]+y*stride,
- + cur.img.plane[i]+y*stride,
- + width );
- + }
- + y=height-2;
- + if((y^parity)&1)
- + interpolate(output->img.plane[i]+y*stride, cur.img.plane[i]+(y-1)*stride, cur.img.plane[i]+(y+1)*stride, width); // interpolate h-3 and h-1
- + else
- + memcpy(output->img.plane[i]+y*stride, cur.img.plane[i]+y*stride, width); // copy original
- + y=height-1;
- + if((y^parity)&1)
- + memcpy(output->img.plane[i]+y*stride, cur.img.plane[i]+(y-1)*stride, width); // duplicate h-2
- + else
- + memcpy(output->img.plane[i]+y*stride, cur.img.plane[i]+(y-1)*stride, width); // copy original
- + }
- +
- + if(frame_out < 3 || !(frame_out&1))
- + return 0;
- + return h->prev_filter.release_frame( h->prev_handle, &prev, frame_in-1 );
- +}
- +
- +/***********************
- +* Free *
- +***********************/
- +
- +static int release_frame( hnd_t handle, cli_pic_t *pic, int frame )
- +{
- + return 0;
- +}
- +
- +static void free_filter( hnd_t handle )
- +{
- + yadif_handle_t *h = handle;
- + h->prev_filter.free( h->prev_handle );
- + x264_cli_pic_clean( &h->buffer );
- + free( h );
- +}
- +
- +cli_vid_filter_t yadif_filter = { NAME, help, yadif_init, get_frame, release_frame, free_filter, NULL };
- Index: filters/video/yadif_filter_line.c
- ===================================================================
- --- /dev/null
- +++ filters/video/yadif_filter_line.c
- @@ -0,0 +1,358 @@
- +/*****************************************************************************
- + * yadif_filter_line.c: x264 yadif filter
- + *****************************************************************************
- + * Copyright (C) 2006 Michael Niedermayer <michaelni@gmx.at>
- + * Avisynth port (C) 2007 Alexander G. Balakhnin aka Fizick http://avisynth.org.ru
- + * x264 port (C) 2010 James Darnley <james.darnley@gmail.com>
- + *
- + * This program is free software; you can redistribute it and/or modify
- + * it under the terms of the GNU General Public License as published by
- + * the Free Software Foundation; either version 2 of the License, or
- + * (at your option) any later version.
- + *
- + * This program is distributed in the hope that it will be useful,
- + * but WITHOUT ANY WARRANTY; without even the implied warranty of
- + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- + * GNU General Public License for more details.
- + *
- + * You should have received a copy of the GNU General Public License
- + * along with this program; if not, write to the Free Software
- + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
- + *****************************************************************************
- + * Copied nearly verbatim from the Avisynth filter's yadif.c so that x264's
- + * yadif.c is cleaner
- + *****************************************************************************/
- +
- +#include "config.h"
- +#include "yadif_filter_line.h"
- +#include "x264.h"
- +
- +#if defined __GNUC__ && defined HAVE_MMX
- +#define uint64_t unsigned __int64
- +#define LOAD4(mem,dst) \
- + "movd "mem", "#dst" \n\t"\
- + "punpcklbw %%mm7, "#dst" \n\t"
- +
- +#define PABS(tmp,dst) \
- + "pxor "#tmp", "#tmp" \n\t"\
- + "psubw "#dst", "#tmp" \n\t"\
- + "pmaxsw "#tmp", "#dst" \n\t"
- +
- +#define CHECK(pj,mj) \
- + "movq "#pj"(%[cur],%[mrefs]), %%mm2 \n\t" /* cur[x-refs-1+j] */\
- + "movq "#mj"(%[cur],%[prefs]), %%mm3 \n\t" /* cur[x+refs-1-j] */\
- + "movq %%mm2, %%mm4 \n\t"\
- + "movq %%mm2, %%mm5 \n\t"\
- + "pxor %%mm3, %%mm4 \n\t"\
- + "pavgb %%mm3, %%mm5 \n\t"\
- + "pand %[pb1], %%mm4 \n\t"\
- + "psubusb %%mm4, %%mm5 \n\t"\
- + "psrlq $8, %%mm5 \n\t"\
- + "punpcklbw %%mm7, %%mm5 \n\t" /* (cur[x-refs+j] + cur[x+refs-j])>>1 */\
- + "movq %%mm2, %%mm4 \n\t"\
- + "psubusb %%mm3, %%mm2 \n\t"\
- + "psubusb %%mm4, %%mm3 \n\t"\
- + "pmaxub %%mm3, %%mm2 \n\t"\
- + "movq %%mm2, %%mm3 \n\t"\
- + "movq %%mm2, %%mm4 \n\t" /* ABS(cur[x-refs-1+j] - cur[x+refs-1-j]) */\
- + "psrlq $8, %%mm3 \n\t" /* ABS(cur[x-refs +j] - cur[x+refs -j]) */\
- + "psrlq $16, %%mm4 \n\t" /* ABS(cur[x-refs+1+j] - cur[x+refs+1-j]) */\
- + "punpcklbw %%mm7, %%mm2 \n\t"\
- + "punpcklbw %%mm7, %%mm3 \n\t"\
- + "punpcklbw %%mm7, %%mm4 \n\t"\
- + "paddw %%mm3, %%mm2 \n\t"\
- + "paddw %%mm4, %%mm2 \n\t" /* score */
- +
- +#define CHECK1 \
- + "movq %%mm0, %%mm3 \n\t"\
- + "pcmpgtw %%mm2, %%mm3 \n\t" /* if(score < spatial_score) */\
- + "pminsw %%mm2, %%mm0 \n\t" /* spatial_score= score; */\
- + "movq %%mm3, %%mm6 \n\t"\
- + "pand %%mm3, %%mm5 \n\t"\
- + "pandn %%mm1, %%mm3 \n\t"\
- + "por %%mm5, %%mm3 \n\t"\
- + "movq %%mm3, %%mm1 \n\t" /* spatial_pred= (cur[x-refs+j] + cur[x+refs-j])>>1; */
- +
- +#define CHECK2 /* pretend not to have checked dir=2 if dir=1 was bad.\
- + hurts both quality and speed, but matches the C version. */\
- + "paddw %[pw1], %%mm6 \n\t"\
- + "psllw $14, %%mm6 \n\t"\
- + "paddsw %%mm6, %%mm2 \n\t"\
- + "movq %%mm0, %%mm3 \n\t"\
- + "pcmpgtw %%mm2, %%mm3 \n\t"\
- + "pminsw %%mm2, %%mm0 \n\t"\
- + "pand %%mm3, %%mm5 \n\t"\
- + "pandn %%mm1, %%mm3 \n\t"\
- + "por %%mm5, %%mm3 \n\t"\
- + "movq %%mm3, %%mm1 \n\t"
- +
- +static void filter_line_mmx2(int mode, uint8_t *dst, const uint8_t *prev, const uint8_t *cur, const uint8_t *next, int w, int refs, int parity){
- + static const uint64_t pw_1 = 0x0001000100010001ULL;
- + static const uint64_t pb_1 = 0x0101010101010101ULL;
- +// const int mode = p->mode;
- + uint64_t tmp0, tmp1, tmp2, tmp3;
- + int x;
- +
- +#define FILTER\
- + for(x=0; x<w; x+=4){\
- + asm volatile(\
- + "pxor %%mm7, %%mm7 \n\t"\
- + LOAD4("(%[cur],%[mrefs])", %%mm0) /* c = cur[x-refs] */\
- + LOAD4("(%[cur],%[prefs])", %%mm1) /* e = cur[x+refs] */\
- + LOAD4("(%["prev2"])", %%mm2) /* prev2[x] */\
- + LOAD4("(%["next2"])", %%mm3) /* next2[x] */\
- + "movq %%mm3, %%mm4 \n\t"\
- + "paddw %%mm2, %%mm3 \n\t"\
- + "psraw $1, %%mm3 \n\t" /* d = (prev2[x] + next2[x])>>1 */\
- + "movq %%mm0, %[tmp0] \n\t" /* c */\
- + "movq %%mm3, %[tmp1] \n\t" /* d */\
- + "movq %%mm1, %[tmp2] \n\t" /* e */\
- + "psubw %%mm4, %%mm2 \n\t"\
- + PABS( %%mm4, %%mm2) /* temporal_diff0 */\
- + LOAD4("(%[prev],%[mrefs])", %%mm3) /* prev[x-refs] */\
- + LOAD4("(%[prev],%[prefs])", %%mm4) /* prev[x+refs] */\
- + "psubw %%mm0, %%mm3 \n\t"\
- + "psubw %%mm1, %%mm4 \n\t"\
- + PABS( %%mm5, %%mm3)\
- + PABS( %%mm5, %%mm4)\
- + "paddw %%mm4, %%mm3 \n\t" /* temporal_diff1 */\
- + "psrlw $1, %%mm2 \n\t"\
- + "psrlw $1, %%mm3 \n\t"\
- + "pmaxsw %%mm3, %%mm2 \n\t"\
- + LOAD4("(%[next],%[mrefs])", %%mm3) /* next[x-refs] */\
- + LOAD4("(%[next],%[prefs])", %%mm4) /* next[x+refs] */\
- + "psubw %%mm0, %%mm3 \n\t"\
- + "psubw %%mm1, %%mm4 \n\t"\
- + PABS( %%mm5, %%mm3)\
- + PABS( %%mm5, %%mm4)\
- + "paddw %%mm4, %%mm3 \n\t" /* temporal_diff2 */\
- + "psrlw $1, %%mm3 \n\t"\
- + "pmaxsw %%mm3, %%mm2 \n\t"\
- + "movq %%mm2, %[tmp3] \n\t" /* diff */\
- +\
- + "paddw %%mm0, %%mm1 \n\t"\
- + "paddw %%mm0, %%mm0 \n\t"\
- + "psubw %%mm1, %%mm0 \n\t"\
- + "psrlw $1, %%mm1 \n\t" /* spatial_pred */\
- + PABS( %%mm2, %%mm0) /* ABS(c-e) */\
- +\
- + "movq -1(%[cur],%[mrefs]), %%mm2 \n\t" /* cur[x-refs-1] */\
- + "movq -1(%[cur],%[prefs]), %%mm3 \n\t" /* cur[x+refs-1] */\
- + "movq %%mm2, %%mm4 \n\t"\
- + "psubusb %%mm3, %%mm2 \n\t"\
- + "psubusb %%mm4, %%mm3 \n\t"\
- + "pmaxub %%mm3, %%mm2 \n\t"\
- + /*"pshufw $9,%%mm2, %%mm3 \n\t"*/\
- + "movq %%mm2, %%mm3 \n\t" /* replace for "pshufw $9,%%mm2, %%mm3" - Fizick */\
- + "psrlq $16, %%mm3 \n\t"/* replace for "pshufw $9,%%mm2, %%mm3" - Fizick*/\
- + "punpcklbw %%mm7, %%mm2 \n\t" /* ABS(cur[x-refs-1] - cur[x+refs-1]) */\
- + "punpcklbw %%mm7, %%mm3 \n\t" /* ABS(cur[x-refs+1] - cur[x+refs+1]) */\
- + "paddw %%mm2, %%mm0 \n\t"\
- + "paddw %%mm3, %%mm0 \n\t"\
- + "psubw %[pw1], %%mm0 \n\t" /* spatial_score */\
- +\
- + CHECK(-2,0)\
- + CHECK1\
- + CHECK(-3,1)\
- + CHECK2\
- + CHECK(0,-2)\
- + CHECK1\
- + CHECK(1,-3)\
- + CHECK2\
- +\
- + /* if(p->mode<2) ... */\
- + "movq %[tmp3], %%mm6 \n\t" /* diff */\
- + "cmp $2, %[mode] \n\t"\
- + "jge 1f \n\t"\
- + LOAD4("(%["prev2"],%[mrefs],2)", %%mm2) /* prev2[x-2*refs] */\
- + LOAD4("(%["next2"],%[mrefs],2)", %%mm4) /* next2[x-2*refs] */\
- + LOAD4("(%["prev2"],%[prefs],2)", %%mm3) /* prev2[x+2*refs] */\
- + LOAD4("(%["next2"],%[prefs],2)", %%mm5) /* next2[x+2*refs] */\
- + "paddw %%mm4, %%mm2 \n\t"\
- + "paddw %%mm5, %%mm3 \n\t"\
- + "psrlw $1, %%mm2 \n\t" /* b */\
- + "psrlw $1, %%mm3 \n\t" /* f */\
- + "movq %[tmp0], %%mm4 \n\t" /* c */\
- + "movq %[tmp1], %%mm5 \n\t" /* d */\
- + "movq %[tmp2], %%mm7 \n\t" /* e */\
- + "psubw %%mm4, %%mm2 \n\t" /* b-c */\
- + "psubw %%mm7, %%mm3 \n\t" /* f-e */\
- + "movq %%mm5, %%mm0 \n\t"\
- + "psubw %%mm4, %%mm5 \n\t" /* d-c */\
- + "psubw %%mm7, %%mm0 \n\t" /* d-e */\
- + "movq %%mm2, %%mm4 \n\t"\
- + "pminsw %%mm3, %%mm2 \n\t"\
- + "pmaxsw %%mm4, %%mm3 \n\t"\
- + "pmaxsw %%mm5, %%mm2 \n\t"\
- + "pminsw %%mm5, %%mm3 \n\t"\
- + "pmaxsw %%mm0, %%mm2 \n\t" /* max */\
- + "pminsw %%mm0, %%mm3 \n\t" /* min */\
- + "pxor %%mm4, %%mm4 \n\t"\
- + "pmaxsw %%mm3, %%mm6 \n\t"\
- + "psubw %%mm2, %%mm4 \n\t" /* -max */\
- + "pmaxsw %%mm4, %%mm6 \n\t" /* diff= MAX3(diff, min, -max); */\
- + "1: \n\t"\
- +\
- + "movq %[tmp1], %%mm2 \n\t" /* d */\
- + "movq %%mm2, %%mm3 \n\t"\
- + "psubw %%mm6, %%mm2 \n\t" /* d-diff */\
- + "paddw %%mm6, %%mm3 \n\t" /* d+diff */\
- + "pmaxsw %%mm2, %%mm1 \n\t"\
- + "pminsw %%mm3, %%mm1 \n\t" /* d = clip(spatial_pred, d-diff, d+diff); */\
- + "packuswb %%mm1, %%mm1 \n\t"\
- +\
- + :[tmp0]"=m"(tmp0),\
- + [tmp1]"=m"(tmp1),\
- + [tmp2]"=m"(tmp2),\
- + [tmp3]"=m"(tmp3)\
- + :[prev] "r"(prev),\
- + [cur] "r"(cur),\
- + [next] "r"(next),\
- + [prefs]"r"((long)refs),\
- + [mrefs]"r"((long)-refs),\
- + [pw1] "m"(pw_1),\
- + [pb1] "m"(pb_1),\
- + [mode] "g"(mode)\
- + );\
- + asm volatile("movd %%mm1, %0" :"=m"(*dst));\
- + dst += 4;\
- + prev+= 4;\
- + cur += 4;\
- + next+= 4;\
- + }
- +
- + if(parity){
- +#define prev2 "prev"
- +#define next2 "cur"
- + FILTER
- +#undef prev2
- +#undef next2
- + }else{
- +#define prev2 "cur"
- +#define next2 "next"
- + FILTER
- +#undef prev2
- +#undef next2
- + }
- +}
- +#undef LOAD4
- +#undef PABS
- +#undef CHECK
- +#undef CHECK1
- +#undef CHECK2
- +#undef FILTER
- +
- +#ifndef attribute_align_arg
- +#if defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__>1)
- +# define attribute_align_arg __attribute__((force_align_arg_pointer))
- +#else
- +# define attribute_align_arg
- +#endif
- +#endif
- +
- +// for proper alignment SSE2 we need in GCC 4.2 and above
- +#if (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__>1)
- +
- +#ifndef DECLARE_ALIGNED
- +#define DECLARE_ALIGNED(n,t,v) t v __attribute__ ((aligned (n)))
- +#endif
- +
- +// ================= SSE2 =================
- +#define PABS(tmp,dst) \
- + "pxor "#tmp", "#tmp" \n\t"\
- + "psubw "#dst", "#tmp" \n\t"\
- + "pmaxsw "#tmp", "#dst" \n\t"
- +
- +#define FILTER_LINE_FUNC_NAME filter_line_sse2
- +#include "avs_vf_yadif_template.h"
- +
- +// ================ SSSE3 =================
- +#define PABS(tmp,dst) \
- + "pabsw "#dst", "#dst" \n\t"
- +
- +#define FILTER_LINE_FUNC_NAME filter_line_ssse3
- +#include "avs_vf_yadif_template.h"
- +
- +#endif
- +
- +#endif
- +
- +#define MIN(a,b) ( (a)<(b) ? (a) : (b) )
- +#define MAX(a,b) ( (a)>(b) ? (a) : (b) )
- +#define MIN3(a,b,c) MIN((a),MIN((b),(c)))
- +#define MAX3(a,b,c) MAX((a),MAX((b),(c)))
- +#define ABS(a) ( (a) > 0 ? (a) : -(a) )
- +
- +static void filter_line_c(int mode, uint8_t *dst, const uint8_t *prev, const uint8_t *cur, const uint8_t *next, int w, int refs, int parity){
- + int x;
- + const uint8_t *prev2= parity ? prev : cur ;
- + const uint8_t *next2= parity ? cur : next;
- + for(x=0; x<w; x++){
- + int c= cur[-refs];
- + int d= (prev2[0] + next2[0])>>1;
- + int e= cur[+refs];
- + int temporal_diff0= ABS(prev2[0] - next2[0]);
- + int temporal_diff1=( ABS(prev[-refs] - c) + ABS(prev[+refs] - e) )>>1;
- + int temporal_diff2=( ABS(next[-refs] - c) + ABS(next[+refs] - e) )>>1;
- + int diff= MAX3(temporal_diff0>>1, temporal_diff1, temporal_diff2);
- + int spatial_pred= (c+e)>>1;
- + int spatial_score= ABS(cur[-refs-1] - cur[+refs-1]) + ABS(c-e)
- + + ABS(cur[-refs+1] - cur[+refs+1]) - 1;
- +
- +#define CHECK(j)\
- + { int score= ABS(cur[-refs-1+ j] - cur[+refs-1- j])\
- + + ABS(cur[-refs + j] - cur[+refs - j])\
- + + ABS(cur[-refs+1+ j] - cur[+refs+1- j]);\
- + if(score < spatial_score){\
- + spatial_score= score;\
- + spatial_pred= (cur[-refs + j] + cur[+refs - j])>>1;\
- +
- + CHECK(-1) CHECK(-2) }} }}
- + CHECK( 1) CHECK( 2) }} }}
- +
- + if(mode<2){
- + int b= (prev2[-2*refs] + next2[-2*refs])>>1;
- + int f= (prev2[+2*refs] + next2[+2*refs])>>1;
- +#if 0
- + int a= cur[-3*refs];
- + int g= cur[+3*refs];
- + int max= MAX3(d-e, d-c, MIN3(MAX(b-c,f-e),MAX(b-c,b-a),MAX(f-g,f-e)) );
- + int min= MIN3(d-e, d-c, MAX3(MIN(b-c,f-e),MIN(b-c,b-a),MIN(f-g,f-e)) );
- +#else
- + int max= MAX3(d-e, d-c, MIN(b-c, f-e));
- + int min= MIN3(d-e, d-c, MAX(b-c, f-e));
- +#endif
- +
- + diff= MAX3(diff, min, -max);
- + }
- +
- + if(spatial_pred > d + diff)
- + spatial_pred = d + diff;
- + else if(spatial_pred < d - diff)
- + spatial_pred = d - diff;
- +
- + dst[0] = spatial_pred;
- +
- + dst++;
- + cur++;
- + prev++;
- + next++;
- + prev2++;
- + next2++;
- + }
- +}
- +
- +filter_line_func get_filter_func(unsigned int cpu) {
- + filter_line_func ret = filter_line_c;
- +#if defined __GNUC__ && defined HAVE_MMX
- + if (cpu & X264_CPU_MMXEXT)
- + ret = filter_line_mmx2;
- +#if (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__>1)
- + if (cpu & (X264_CPU_SSE2|X264_CPU_SSE2_IS_SLOW|X264_CPU_SSE2_IS_FAST))
- + ret = filter_line_sse2;
- + if (cpu & X264_CPU_SSSE3)
- + ret = filter_line_ssse3;
- +#endif
- +#endif
- + return ret;
- +}
- Index: filters/video/yadif_filter_line.h
- ===================================================================
- --- /dev/null
- +++ filters/video/yadif_filter_line.h
- @@ -0,0 +1,27 @@
- +/*****************************************************************************
- + * yadif_filter_line.h: x264 yadif filter
- + *****************************************************************************
- + * Copyright (C) 2006 Michael Niedermayer <michaelni@gmx.at>
- + * Avisynth port (C) 2007 Alexander G. Balakhnin aka Fizick http://avisynth.org.ru
- + * x264 port (C) 2010 James Darnley <james.darnley@gmail.com>
- + *
- + * This program is free software; you can redistribute it and/or modify
- + * it under the terms of the GNU General Public License as published by
- + * the Free Software Foundation; either version 2 of the License, or
- + * (at your option) any later version.
- + *
- + * This program is distributed in the hope that it will be useful,
- + * but WITHOUT ANY WARRANTY; without even the implied warranty of
- + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- + * GNU General Public License for more details.
- + *
- + * You should have received a copy of the GNU General Public License
- + * along with this program; if not, write to the Free Software
- + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
- + *****************************************************************************/
- +
- +#include <inttypes.h>
- +
- +typedef void (*filter_line_func)(int mode, uint8_t *dst, const uint8_t *prev, const uint8_t *cur, const uint8_t *next, int w, int refs, int parity);
- +
- +filter_line_func get_filter_func(unsigned int cpu);
- From: James Darnley <james.darnley@gmail.com>
- Subject: [PATCH 3/3] Add pad filter
- Makefile | 2 +-
- configure | 2 +-
- filters/video/pad.c | 190 +++++++++++++++++++++++++++++++++++++++++++++++++
- filters/video/video.c | 1 +
- 4 files changed, 193 insertions(+), 2 deletions(-)
- create mode 100644 filters/video/pad.c
- Index: Makefile
- ===================================================================
- --- Makefile_orig
- +++ Makefile
- @@ -19,7 +19,7 @@ SRCCLI = x264.c input/input.c input/timecode.c input/raw.c input/y4m.c \
- filters/video/video.c filters/video/source.c filters/video/internal.c \
- filters/video/resize.c filters/video/cache.c filters/video/fix_vfr_pts.c \
- filters/video/select_every.c filters/video/crop.c filters/video/hqdn3d.c \
- - filters/video/yadif.c filters/video/yadif_filter_line.c
- + filters/video/yadif.c filters/video/yadif_filter_line.c filters/video/pad.c
- SRCSO =
- Index: configure
- ===================================================================
- --- configure_orig
- +++ configure
- @@ -770,7 +770,7 @@ Libs: $pclibs
- Cflags: -I$includedir
- EOF
- -filters="crop select_every hqdn3d yadif"
- +filters="crop select_every hqdn3d yadif pad"
- [ $swscale = yes ] && filters="resize $filters"
- cat > conftest.log <<EOF
- Index: filters/video/pad.c
- ===================================================================
- --- /dev/null
- +++ filters/video/pad.c
- @@ -0,0 +1,190 @@
- +/*****************************************************************************
- + * yadif.c: x264 yadif filter
- + *****************************************************************************
- + * Copyright (C) 2010 James Darnley <james.darnley@gmail.com>
- + *
- + * This program is free software; you can redistribute it and/or modify
- + * it under the terms of the GNU General Public License as published by
- + * the Free Software Foundation; either version 2 of the License, or
- + * (at your option) any later version.
- + *
- + * This program is distributed in the hope that it will be useful,
- + * but WITHOUT ANY WARRANTY; without even the implied warranty of
- + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- + * GNU General Public License for more details.
- + *
- + * You should have received a copy of the GNU General Public License
- + * along with this program; if not, write to the Free Software
- + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
- + *****************************************************************************/
- +
- +#include "internal.h"
- +#include "video.h"
- +#define NAME "pad"
- +#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, NAME, __VA_ARGS__ )
- +
- +cli_vid_filter_t pad_filter;
- +
- +typedef struct {
- + hnd_t prev_handle;
- + cli_vid_filter_t prev_filter;
- + int width;
- + int height;
- + int cols;
- + int rows;
- + char colour[4];
- + cli_pic_t buffer;
- + const x264_cli_csp_t *csp;
- +} pad_handle_t;
- +
- +static void help( int longhelp )
- +{
- + printf( " "NAME":[left][,top][,right][,bottom][,width][,height][,colour]\n" );
- + if( !longhelp )
- + return;
- + printf( " adds pixels to the frame edge\n"
- + " colour values are in YUV not RGB\n"
- + " default colour is black\n" );
- +}
- +
- +static int init( hnd_t *handle, cli_vid_filter_t *filter, video_info_t *info, x264_param_t *param, char *opt_string )
- +{
- + int arg[7];
- + char *opt;
- + const x264_cli_csp_t *csp = x264_cli_get_csp(info->csp);
- + static const char *optlist[] = { "left", "top", "right", "bottom", "width",
- + "height", "colour", "color", NULL };
- + char **opts = x264_split_options( opt_string, optlist );
- +
- + pad_handle_t *h = calloc( 1, sizeof(pad_handle_t) );
- + if( !h )
- + return -1;
- +
- + FAIL_IF_ERROR( !(info->csp == X264_CSP_I420 || info->csp == X264_CSP_I422
- + || info->csp == X264_CSP_I444 || info->csp == X264_CSP_YV12 ),
- + "Only planar YUV images currently supported, patches welcome\n" )
- +
- + for(int i=0; i<6; i++) {
- + int mod = i&1 ? (csp->mod_height << info->interlaced) : csp->mod_width;
- + opt = x264_get_option( optlist[i], opts );
- + arg[i] = x264_otoi(opt, 0);
- + FAIL_IF_ERROR( arg[i] % mod, "%s pad value '%s' is not a "
- + "multiple of %d\n", optlist[i], opt, mod )
- + }
- + opt = x264_get_option( optlist[6], opts );
- + if(!opt)
- + opt = x264_get_option( optlist[7], opts );
- + arg[6] = x264_otoi(opt, -1);
- + if(arg[6] > -1) {
- + h->colour[0] = (arg[6]&0xFF0000) >> 16;
- + h->colour[1] = (arg[6]&0xFF00) >> 8;
- + h->colour[2] = arg[6]&0xFF;
- + } else {
- + h->colour[0] = 0;
- + h->colour[1] = 0x80;
- + h->colour[2] = 0x80;
- + }
- + x264_free_string_array(opts);
- +
- +/* For sanity! */
- +#define left arg[0]
- +#define top arg[1]
- +#define right arg[2]
- +#define bottom arg[3]
- +#define WIDTH arg[4]
- +#define HEIGHT arg[5]
- + FAIL_IF_ERROR( WIDTH && WIDTH < info->width + left + right,
- + "requested width (%d) is less than requested padding (%d + %d + %d)\n",
- + WIDTH, info->width, left, right )
- +
- + FAIL_IF_ERROR( HEIGHT && HEIGHT < info->height + top + bottom,
- + "requested height (%d) is less than requested padding (%d + %d + %d)\n",
- + HEIGHT, info->height, top, bottom )
- +
- + h->width = (WIDTH) ? WIDTH : info->width + left + right;
- + h->height = (HEIGHT) ? HEIGHT : info->height + top + bottom;
- +
- + h->cols = (left) ? left
- + : (right) ? h->width - right - info->width
- + : (h->width - info->width)/2;
- + h->cols = ((h->cols+1) / csp->mod_width) * csp->mod_width;
- +
- + h->rows = (top) ? top
- + : (bottom) ? h->height - bottom - info->height
- + : (h->height - info->height)/2;
- + h->rows = ((h->rows+1) / csp->mod_height) * csp->mod_height;
- +#undef left
- +#undef top
- +#undef right
- +#undef bottom
- +#undef WIDTH
- +#undef HEIGHT
- +
- + if( h->width == info->width && h->height == info->height ) {
- + free(h);
- + return 0;
- + }
- +
- + if(x264_cli_pic_alloc( &h->buffer, info->csp, h->width, h->height ))
- + return -1;
- + for(int i=0; i<h->buffer.img.planes; i++) {
- + memset( h->buffer.img.plane[i], h->colour[i],
- + h->height * csp->height[i] * h->buffer.img.stride[i] );
- + }
- +
- + x264_cli_log( NAME, X264_LOG_INFO,
- + "expanding frame to %dx%d, picture starting at (%d,%d)\n",
- + h->width, h->height, h->cols, h->rows );
- +
- + info->width = h->width;
- + info->height = h->height;
- + h->prev_filter = *filter;
- + h->prev_handle = *handle;
- + h->csp = csp;
- + *handle = h;
- + *filter = pad_filter;
- +
- + return 0;
- +}
- +
- +static int get_frame( hnd_t handle, cli_pic_t *out, int frame )
- +{
- + pad_handle_t *h = handle;
- + cli_pic_t in;
- +
- + if( h->prev_filter.get_frame( h->prev_handle, &in, frame ) )
- + return -1;
- +
- + *out = h->buffer;
- +
- + for(int i=0; i<in.img.planes; i++) {
- + float scale[2] = { h->csp->width[i],
- + h->csp->height[i] };
- + int stride[2] = { in.img.stride[i],
- + out->img.stride[i] };
- + int in_dim[2] = { in.img.width * scale[0],
- + in.img.height * scale[1] };
- + int offset = h->cols*scale[0] + h->rows*scale[1]*stride[1];
- +
- + x264_cli_plane_copy( out->img.plane[i]+offset, stride[1],
- + in.img.plane[i], stride[0], in_dim[0], in_dim[1] );
- + }
- +
- + return h->prev_filter.release_frame( h->prev_handle, &in, frame );
- +}
- +
- +
- +static int release_frame( hnd_t handle, cli_pic_t *pic, int frame )
- +{
- + return 0;
- +}
- +
- +static void free_filter( hnd_t handle )
- +{
- + pad_handle_t *h = handle;
- + h->prev_filter.free( h->prev_handle );
- + x264_cli_pic_clean( &h->buffer );
- + free( h );
- +}
- +
- +cli_vid_filter_t pad_filter = { NAME, help, init, get_frame, release_frame, free_filter, NULL };
- Index: filters/video/video.c
- ===================================================================
- --- filters/video/video_orig.c
- +++ filters/video/video.c
- @@ -48,6 +48,7 @@ void x264_register_vid_filters()
- REGISTER_VFILTER( select_every );
- REGISTER_VFILTER( hqdn3d );
- REGISTER_VFILTER( yadif );
- + REGISTER_VFILTER( pad );
- }
- int x264_init_vid_filter( const char *name, hnd_t *handle, cli_vid_filter_t *filter,
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement