Advertisement
Guest User

Untitled

a guest
Jul 29th, 2010
236
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 59.18 KB | None | 0 0
  1. From: James Darnley <james.darnley@gmail.com>
  2. Subject: [PATCH 1/3] Add hqdn3d filter
  3.  
  4. Makefile | 2 +-
  5. configure | 2 +-
  6. filters/video/hqdn3d.c | 289 ++++++++++++++++++++++++++++++++++++++++++++++++
  7. filters/video/video.c | 1 +
  8. 4 files changed, 292 insertions(+), 2 deletions(-)
  9. create mode 100644 filters/video/hqdn3d.c
  10.  
  11. Index: Makefile
  12. ===================================================================
  13. --- Makefile_orig
  14. +++ Makefile
  15. @@ -18,7 +18,7 @@ SRCCLI = x264.c input/input.c input/timecode.c input/raw.c input/y4m.c \
  16. output/flv.c output/flv_bytestream.c filters/filters.c \
  17. filters/video/video.c filters/video/source.c filters/video/internal.c \
  18. filters/video/resize.c filters/video/cache.c filters/video/fix_vfr_pts.c \
  19. - filters/video/select_every.c filters/video/crop.c
  20. + filters/video/select_every.c filters/video/crop.c filters/video/hqdn3d.c
  21.  
  22. SRCSO =
  23.  
  24. Index: configure
  25. ===================================================================
  26. --- configure_orig
  27. +++ configure
  28. @@ -770,7 +770,7 @@ Libs: $pclibs
  29. Cflags: -I$includedir
  30. EOF
  31.  
  32. -filters="crop select_every"
  33. +filters="crop select_every hqdn3d"
  34. [ $swscale = yes ] && filters="resize $filters"
  35.  
  36. cat > conftest.log <<EOF
  37. Index: filters/video/hqdn3d.c
  38. ===================================================================
  39. --- /dev/null
  40. +++ filters/video/hqdn3d.c
  41. @@ -0,0 +1,289 @@
  42. +/*****************************************************************************
  43. + * hqdn3d.c: x264 hqdn3d filter
  44. + *****************************************************************************
  45. + * Copyright (C) 2003 Daniel Moreno <comac@comac.darktech.org>
  46. + * Avisynth port (C) 2005 Loren Merritt <lorenm@u.washington.edu>
  47. + * x264 port (C) 2010 James Darnley <james.darnley@gmail.com>
  48. + *
  49. + * This program is free software; you can redistribute it and/or modify
  50. + * it under the terms of the GNU General Public License as published by
  51. + * the Free Software Foundation; either version 2 of the License, or
  52. + * (at your option) any later version.
  53. + *
  54. + * This program is distributed in the hope that it will be useful,
  55. + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  56. + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  57. + * GNU General Public License for more details.
  58. + *
  59. + * You should have received a copy of the GNU General Public License
  60. + * along with this program; if not, write to the Free Software
  61. + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
  62. + *****************************************************************************/
  63. +
  64. +#include <math.h>
  65. +#include "video.h"
  66. +#define NAME "hqdn3d"
  67. +#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, NAME, __VA_ARGS__ )
  68. +
  69. +#define PARAM1_DEFAULT 4.0
  70. +#define PARAM2_DEFAULT 3.0
  71. +#define PARAM3_DEFAULT 6.0
  72. +
  73. +cli_vid_filter_t hqdn3d_filter;
  74. +
  75. +typedef struct
  76. +{
  77. + hnd_t prev_hnd;
  78. + cli_vid_filter_t prev_filter;
  79. + int coefs[4][512*16];
  80. + unsigned int *line;
  81. + unsigned short *frame[3];
  82. + int w, h, cw, ch, first_frame;
  83. +} hqdn3d_hnd_t;
  84. +
  85. +static void help( int longhelp )
  86. +{
  87. + printf( " "NAME":ls,cs,lt,ct\n" );
  88. + if(!longhelp)
  89. + return;
  90. + printf( " Denoises the image using mplayer's hqdn3d filter\n"
  91. + " The four arguments are floats and are optional\n"
  92. + " If any options are omitted, they will assume a\n"
  93. + " value based on previous options that you did specify\n"
  94. + " - ls = luma spatial filter strength [%.1lf]\n"
  95. + " - cs = chroma spatial filter strength [%.1lf]\n"
  96. + " - lt = luma temporal filter strength [%.1lf]\n"
  97. + " - ct = chroma temporal filter strength [%.1lf]\n",
  98. + PARAM1_DEFAULT, PARAM2_DEFAULT, PARAM3_DEFAULT,
  99. + PARAM3_DEFAULT * PARAM2_DEFAULT / PARAM1_DEFAULT);
  100. +}
  101. +
  102. +#define ABS(A) ( (A) > 0 ? (A) : -(A) )
  103. +
  104. +static void precalc_coefs(int *ct, double dist25)
  105. +{
  106. + //int i;
  107. + double gamma_d, simil, c;
  108. +
  109. + gamma_d = log(0.25) / log(1.0 - dist25/255.0 - 0.00001);
  110. +
  111. + for (int i = -255*16; i < 256*16; i++)
  112. + {
  113. + simil = 1.0 - ABS(i) / (16*255.0);
  114. + c = pow(simil, gamma_d) * 65536.0 * (double)i / 16.0;
  115. + ct[16*256+i] = (int)((c<0) ? (c-0.5) : (c+0.5));
  116. + }
  117. +}
  118. +
  119. +static int init( hnd_t *handle, cli_vid_filter_t *filter, video_info_t *info,
  120. + x264_param_t *param, char *opt_string )
  121. +{
  122. + double lum_spac, lum_tmp, chrom_spac, chrom_tmp;
  123. + double param1, param2, param3, param4;
  124. +
  125. + hqdn3d_hnd_t *h = calloc( 1, sizeof(hqdn3d_hnd_t) );
  126. + FAIL_IF_ERROR( !h, "Memory allocation error (hqdn3d.c:%d)\n", __LINE__ )
  127. +
  128. + h->w = info->width*x264_cli_csps[info->csp].width[0];
  129. + h->h = info->height*x264_cli_csps[info->csp].height[0];
  130. + h->cw = info->width*x264_cli_csps[info->csp].width[1];
  131. + h->ch = info->height*x264_cli_csps[info->csp].height[1];
  132. +
  133. + h->line = calloc( 1, info->width*sizeof(int) );
  134. + h->frame[0] = malloc( h->w * h->h * sizeof(short) );
  135. + h->frame[1] = malloc( h->cw * h->ch * sizeof(short) );
  136. + h->frame[2] = malloc( h->cw * h->ch * sizeof(short) );
  137. + FAIL_IF_ERROR( !h->line || !h->frame[0] || !h->frame[1] || !h->frame[2],
  138. + "Memory allocation error(hqdn3d.c:%d)\n", __LINE__ )
  139. +
  140. + FAIL_IF_ERROR( !(info->csp == X264_CSP_I420 || info->csp == X264_CSP_I422
  141. + || info->csp == X264_CSP_I444 || info->csp == X264_CSP_YV12 ),
  142. + "Only planar YUV images supported\n" )
  143. + if(opt_string)
  144. + {
  145. + switch(sscanf( opt_string, "%lf,%lf,%lf,%lf",
  146. + &param1, &param2, &param3, &param4 ))
  147. + {
  148. + case 1:
  149. + lum_spac = param1;
  150. + lum_tmp = PARAM3_DEFAULT * param1 / PARAM1_DEFAULT;
  151. + chrom_spac = PARAM2_DEFAULT * param1 / PARAM1_DEFAULT;
  152. + chrom_tmp = lum_tmp * chrom_spac / lum_spac;
  153. + break;
  154. + case 2:
  155. + lum_spac = param1;
  156. + lum_tmp = PARAM3_DEFAULT * param1 / PARAM1_DEFAULT;
  157. + chrom_spac = param2;
  158. + chrom_tmp = lum_tmp * chrom_spac / lum_spac;
  159. + break;
  160. + case 3:
  161. + lum_spac = param1;
  162. + lum_tmp = param3;
  163. + chrom_spac = param2;
  164. + chrom_tmp = lum_tmp * chrom_spac / lum_spac;
  165. + break;
  166. + case 4:
  167. + lum_spac = param1;
  168. + lum_tmp = param3;
  169. + chrom_spac = param2;
  170. + chrom_tmp = param4;
  171. + break;
  172. + default:
  173. + lum_spac = PARAM1_DEFAULT;
  174. + lum_tmp = PARAM3_DEFAULT;
  175. + chrom_spac = PARAM2_DEFAULT;
  176. + chrom_tmp = lum_tmp * chrom_spac / lum_spac;
  177. + }
  178. + }
  179. + else
  180. + {
  181. + lum_spac = PARAM1_DEFAULT;
  182. + lum_tmp = PARAM3_DEFAULT;
  183. + chrom_spac = PARAM2_DEFAULT;
  184. + chrom_tmp = lum_tmp * chrom_spac / lum_spac;
  185. + }
  186. +
  187. + precalc_coefs(h->coefs[0], lum_spac);
  188. + precalc_coefs(h->coefs[1], lum_tmp);
  189. + precalc_coefs(h->coefs[2], chrom_spac);
  190. + precalc_coefs(h->coefs[3], chrom_tmp);
  191. +
  192. + x264_cli_log( NAME, X264_LOG_INFO,
  193. + "using strengths %.1lf,%.1lf,%.1lf,%.1lf\n",
  194. + lum_spac, chrom_spac, lum_tmp, chrom_tmp );
  195. +
  196. + h->first_frame = 1;
  197. + h->prev_filter = *filter;
  198. + h->prev_hnd = *handle;
  199. + *handle = h;
  200. + *filter = hqdn3d_filter;
  201. + return 0;
  202. +}
  203. +
  204. +static inline unsigned int low_pass_mul(unsigned int prev_mul, unsigned int curr_mul, int* coef)
  205. +{
  206. +// int d_mul= (prev_mul&0xFFFFFF)-(curr_mul&0xFFFFFF);
  207. + int d_mul= prev_mul-curr_mul;
  208. + int d=((d_mul+0x10007FF)/(65536/16));
  209. + return curr_mul + coef[d];
  210. +}
  211. +
  212. +static void denoise(const unsigned char *frame, // mpi->planes[x]
  213. + unsigned char *frame_dest, // dmpi->planes[x]
  214. + unsigned int *line_ant, // vf->priv->Line (width bytes)
  215. + unsigned short *frame_ant,
  216. + int w, int h, int stride_src, int stride_dest,
  217. + int *horizontal, int *vertical, int *temporal)
  218. +{
  219. + //int X, Y;
  220. + int line_offs_src = 0, line_offs_dest = 0;
  221. + unsigned int pixel_ant;
  222. + int pixel_dst;
  223. +
  224. + /* First pixel has no left nor top neightbour. Only previous frame */
  225. + line_ant[0] = pixel_ant = frame[0]<<16;
  226. + pixel_dst = low_pass_mul(frame_ant[0]<<8, pixel_ant, temporal);
  227. + frame_ant[0] = ((pixel_dst+0x1000007F)/256);
  228. + frame_dest[0]= ((pixel_dst+0x10007FFF)/65536);
  229. +
  230. + /* Fist line has no top neightbour. Only left one for each pixel and
  231. + * last frame */
  232. + for (int x = 1; x < w; x++){
  233. + line_ant[x] = pixel_ant = low_pass_mul(pixel_ant, frame[x]<<16, horizontal);
  234. + pixel_dst = low_pass_mul(frame_ant[x]<<8, pixel_ant, temporal);
  235. + frame_ant[x] = ((pixel_dst+0x1000007F)/256);
  236. + frame_dest[x]= ((pixel_dst+0x10007FFF)/65536);
  237. + }
  238. +
  239. + for (int y = 1; y < h; y++){
  240. + //unsigned int pixel_ant_1;
  241. + unsigned short* LinePrev=&frame_ant[y*w];
  242. + line_offs_src += stride_src, line_offs_dest += stride_dest;
  243. + /* First pixel on each line doesn't have previous pixel */
  244. + pixel_ant = frame[line_offs_src]<<16;
  245. + line_ant[0] = low_pass_mul(line_ant[0], pixel_ant, vertical);
  246. + pixel_dst = low_pass_mul(LinePrev[0]<<8, line_ant[0], temporal);
  247. + LinePrev[0] = ((pixel_dst+0x1000007F)/256);
  248. + frame_dest[line_offs_dest]= ((pixel_dst+0x10007FFF)/65536);
  249. +
  250. + for (int x = 1; x < w; x++){
  251. + //int pixel_dst_1;
  252. + /* The rest are normal */
  253. + pixel_ant = low_pass_mul(pixel_ant, frame[line_offs_src+x]<<16, horizontal);
  254. + line_ant[x] = low_pass_mul(line_ant[x], pixel_ant, vertical);
  255. + pixel_dst = low_pass_mul(LinePrev[x]<<8, line_ant[x], temporal);
  256. + LinePrev[x] = ((pixel_dst+0x1000007F)/256);
  257. + frame_dest[line_offs_dest+x]= ((pixel_dst+0x10007FFF)/65536);
  258. + }
  259. + }
  260. +}
  261. +
  262. +static int get_frame( hnd_t handle, cli_pic_t *output, int frame )
  263. +{
  264. + hqdn3d_hnd_t *h = handle;
  265. +
  266. + if( h->prev_filter.get_frame( h->prev_hnd, output, frame ) )
  267. + return -1;
  268. +
  269. + if( h->first_frame )
  270. + {
  271. + int width = h->w;
  272. + int height = h->h;
  273. + int stride = output->img.stride[0];
  274. + for(int y = 0; y<height; y++)
  275. + for(int x = 0; x<width; x++)
  276. + h->frame[0][y*width+x] = output->img.plane[0][y*stride+x] << 8;
  277. +
  278. + width = h->cw;
  279. + height = h->ch;
  280. + stride = output->img.stride[1];
  281. + for(int y = 0; y<height; y++)
  282. + for(int x = 0; x<width; x++)
  283. + h->frame[1][y*width+x] = output->img.plane[1][y*stride+x] << 8;
  284. +
  285. + stride = output->img.stride[2];
  286. + for(int y = 0; y<height; y++)
  287. + for(int x = 0; x<width; x++)
  288. + h->frame[2][y*width+x] = output->img.plane[2][y*stride+x] << 8;
  289. +
  290. + h->first_frame = 0;
  291. + }
  292. +
  293. + denoise(output->img.plane[0],
  294. + output->img.plane[0],
  295. + h->line, h->frame[0],
  296. + h->w, h->h,
  297. + output->img.stride[0], output->img.stride[0],
  298. + h->coefs[0], h->coefs[0], h->coefs[1]);
  299. + denoise(output->img.plane[1],
  300. + output->img.plane[1],
  301. + h->line, h->frame[1],
  302. + h->cw, h->ch,
  303. + output->img.stride[1], output->img.stride[1],
  304. + h->coefs[2], h->coefs[2], h->coefs[3]);
  305. + denoise(output->img.plane[2],
  306. + output->img.plane[2],
  307. + h->line, h->frame[2],
  308. + h->cw, h->ch,
  309. + output->img.stride[2], output->img.stride[2],
  310. + h->coefs[2], h->coefs[2], h->coefs[3]);
  311. + return 0;
  312. +}
  313. +
  314. +static int release_frame( hnd_t handle, cli_pic_t *pic, int frame )
  315. +{
  316. + hqdn3d_hnd_t *h = handle;
  317. + return h->prev_filter.release_frame( h->prev_hnd, pic, frame );
  318. +}
  319. +
  320. +static void free_filter( hnd_t handle )
  321. +{
  322. + hqdn3d_hnd_t *h = handle;
  323. + h->prev_filter.free( h->prev_hnd );
  324. + free( h->line );
  325. + for(int i = 0; i<3; i++)
  326. + free( h->frame[i] );
  327. + free( h );
  328. +}
  329. +
  330. +cli_vid_filter_t hqdn3d_filter = { NAME, help, init, get_frame, release_frame, free_filter, NULL };
  331. Index: filters/video/video.c
  332. ===================================================================
  333. --- /dev/null
  334. +++ filters/video/video.c
  335. @@ -46,6 +46,7 @@ void x264_register_vid_filters()
  336. REGISTER_VFILTER( fix_vfr_pts );
  337. REGISTER_VFILTER( resize );
  338. REGISTER_VFILTER( select_every );
  339. + REGISTER_VFILTER( hqdn3d );
  340. }
  341.  
  342. int x264_init_vid_filter( const char *name, hnd_t *handle, cli_vid_filter_t *filter,
  343.  
  344. From: James Darnley <james.darnley@gmail.com>
  345. Subject: [PATCH 2/3] Add yadif filter
  346.  
  347. Makefile | 3 +-
  348. configure | 2 +-
  349. filters/video/avs_vf_yadif_template.h | 245 ++++++++++++++++++++++
  350. filters/video/video.c | 1 +
  351. filters/video/yadif.c | 230 +++++++++++++++++++++
  352. filters/video/yadif_filter_line.c | 358 +++++++++++++++++++++++++++++++++
  353. filters/video/yadif_filter_line.h | 27 +++
  354. 7 files changed, 864 insertions(+), 2 deletions(-)
  355. create mode 100644 filters/video/avs_vf_yadif_template.h
  356. create mode 100644 filters/video/yadif.c
  357. create mode 100644 filters/video/yadif_filter_line.c
  358. create mode 100644 filters/video/yadif_filter_line.h
  359.  
  360. Index: Makefile
  361. ===================================================================
  362. --- Makefile_orig
  363. +++ Makefile
  364. @@ -18,7 +18,8 @@ SRCCLI = x264.c input/input.c input/timecode.c input/raw.c input/y4m.c \
  365. output/flv.c output/flv_bytestream.c filters/filters.c \
  366. filters/video/video.c filters/video/source.c filters/video/internal.c \
  367. filters/video/resize.c filters/video/cache.c filters/video/fix_vfr_pts.c \
  368. - filters/video/select_every.c filters/video/crop.c filters/video/hqdn3d.c
  369. + filters/video/select_every.c filters/video/crop.c filters/video/hqdn3d.c \
  370. + filters/video/yadif.c filters/video/yadif_filter_line.c
  371.  
  372. SRCSO =
  373.  
  374. Index: configure
  375. ===================================================================
  376. --- configure_orig
  377. +++ configure
  378. @@ -770,7 +770,7 @@ Libs: $pclibs
  379. Cflags: -I$includedir
  380. EOF
  381.  
  382. -filters="crop select_every hqdn3d"
  383. +filters="crop select_every hqdn3d yadif"
  384. [ $swscale = yes ] && filters="resize $filters"
  385.  
  386. cat > conftest.log <<EOF
  387. Index: filters/video/avs_vf_yadif_template.h
  388. ===================================================================
  389. --- /dev/null
  390. +++ filters/video/avs_vf_yadif_template.h
  391. @@ -0,0 +1,245 @@
  392. +/*
  393. + * Copyright (C) 2006 Michael Niedermayer <michaelni@gmx.at>
  394. + *
  395. + * SSE2/SSSE3 version (custom optimization) by h.yamagata
  396. + *
  397. + * Small fix by Alexander Balakhnin (fizick@avisynth.org.ru)
  398. + *
  399. + * MPlayer is free software; you can redistribute it and/or modify
  400. + * it under the terms of the GNU General Public License as published by
  401. + * the Free Software Foundation; either version 2 of the License, or
  402. + * (at your option) any later version.
  403. + *
  404. + * MPlayer is distributed in the hope that it will be useful,
  405. + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  406. + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  407. + * GNU General Public License for more details.
  408. + *
  409. + * You should have received a copy of the GNU General Public License along
  410. + * with MPlayer; if not, write to the Free Software Foundation, Inc.,
  411. + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  412. + */
  413. +
  414. +#define LOAD8(mem,dst) \
  415. + "movq "mem", "#dst" \n\t"\
  416. + "punpcklbw %%xmm7, "#dst" \n\t"
  417. +
  418. +#define CHECK(pj,mj) \
  419. + "movdqu "#pj"(%[cur],%[mrefs]), %%xmm2 \n\t" /* cur[x-refs-1+j] */\
  420. + "movdqu "#mj"(%[cur],%[prefs]), %%xmm3 \n\t" /* cur[x+refs-1-j] */\
  421. + "movdqa %%xmm2, %%xmm4 \n\t"\
  422. + "movdqa %%xmm2, %%xmm5 \n\t"\
  423. + "pxor %%xmm3, %%xmm4 \n\t"\
  424. + "pavgb %%xmm3, %%xmm5 \n\t"\
  425. + "pand %[pb1], %%xmm4 \n\t"\
  426. + "psubusb %%xmm4, %%xmm5 \n\t"\
  427. + "psrldq $1, %%xmm5 \n\t"\
  428. + "punpcklbw %%xmm7, %%xmm5 \n\t" /* (cur[x-refs+j] + cur[x+refs-j])>>1 */\
  429. + "movdqa %%xmm2, %%xmm4 \n\t"\
  430. + "psubusb %%xmm3, %%xmm2 \n\t"\
  431. + "psubusb %%xmm4, %%xmm3 \n\t"\
  432. + "pmaxub %%xmm3, %%xmm2 \n\t"\
  433. + "movdqa %%xmm2, %%xmm3 \n\t"\
  434. + "movdqa %%xmm2, %%xmm4 \n\t" /* ABS(cur[x-refs-1+j] - cur[x+refs-1-j]) */\
  435. + "psrldq $1, %%xmm3 \n\t" /* ABS(cur[x-refs +j] - cur[x+refs -j]) */\
  436. + "psrldq $2, %%xmm4 \n\t" /* ABS(cur[x-refs+1+j] - cur[x+refs+1-j]) */\
  437. + "punpcklbw %%xmm7, %%xmm2 \n\t"\
  438. + "punpcklbw %%xmm7, %%xmm3 \n\t"\
  439. + "punpcklbw %%xmm7, %%xmm4 \n\t"\
  440. + "paddw %%xmm3, %%xmm2 \n\t"\
  441. + "paddw %%xmm4, %%xmm2 \n\t" /* score */
  442. +
  443. +#define CHECK1 \
  444. + "movdqa %%xmm0, %%xmm3 \n\t"\
  445. + "pcmpgtw %%xmm2, %%xmm3 \n\t" /* if(score < spatial_score) */\
  446. + "pminsw %%xmm2, %%xmm0 \n\t" /* spatial_score= score; */\
  447. + "movdqa %%xmm3, %%xmm6 \n\t"\
  448. + "pand %%xmm3, %%xmm5 \n\t"\
  449. + "pandn %%xmm1, %%xmm3 \n\t"\
  450. + "por %%xmm5, %%xmm3 \n\t"\
  451. + "movdqa %%xmm3, %%xmm1 \n\t" /* spatial_pred= (cur[x-refs+j] + cur[x+refs-j])>>1; */
  452. +
  453. +#define CHECK2 /* pretend not to have checked dir=2 if dir=1 was bad.\
  454. + hurts both quality and speed, but matches the C version. */\
  455. + "paddw %[pw1], %%xmm6 \n\t"\
  456. + "psllw $14, %%xmm6 \n\t"\
  457. + "paddsw %%xmm6, %%xmm2 \n\t"\
  458. + "movdqa %%xmm0, %%xmm3 \n\t"\
  459. + "pcmpgtw %%xmm2, %%xmm3 \n\t"\
  460. + "pminsw %%xmm2, %%xmm0 \n\t"\
  461. + "pand %%xmm3, %%xmm5 \n\t"\
  462. + "pandn %%xmm1, %%xmm3 \n\t"\
  463. + "por %%xmm5, %%xmm3 \n\t"\
  464. + "movdqa %%xmm3, %%xmm1 \n\t"
  465. +
  466. +/* mode argument mod - Fizick */
  467. +
  468. +/* static attribute_align_arg void FILTER_LINE_FUNC_NAME(YadifContext *yadctx, uint8_t *dst, uint8_t *prev, uint8_t *cur, uint8_t *next, int w, int refs, int parity){
  469. + const int mode = yadctx->mode; */
  470. +static attribute_align_arg void FILTER_LINE_FUNC_NAME(int mode, uint8_t *dst, const uint8_t *prev, const uint8_t *cur, const uint8_t *next, int w, int refs, int parity){
  471. + DECLARE_ALIGNED(16, uint8_t, tmp0[16]);
  472. + DECLARE_ALIGNED(16, uint8_t, tmp1[16]);
  473. + DECLARE_ALIGNED(16, uint8_t, tmp2[16]);
  474. + DECLARE_ALIGNED(16, uint8_t, tmp3[16]);
  475. + int x;
  476. + static DECLARE_ALIGNED(16, const unsigned short, pw_1[]) =
  477. + {
  478. + 0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001
  479. + };
  480. +
  481. + static DECLARE_ALIGNED(16, const unsigned short, pb_1[]) =
  482. + {
  483. + 0x0101,0x0101,0x0101,0x0101,0x0101,0x0101,0x0101,0x0101
  484. + };
  485. +
  486. +
  487. +#define FILTER\
  488. + for(x=0; x<w; x+=8){\
  489. + __asm__ volatile(\
  490. + "pxor %%xmm7, %%xmm7 \n\t"\
  491. + LOAD8("(%[cur],%[mrefs])", %%xmm0) /* c = cur[x-refs] */\
  492. + LOAD8("(%[cur],%[prefs])", %%xmm1) /* e = cur[x+refs] */\
  493. + LOAD8("(%["prev2"])", %%xmm2) /* prev2[x] */\
  494. + LOAD8("(%["next2"])", %%xmm3) /* next2[x] */\
  495. + "movdqa %%xmm3, %%xmm4 \n\t"\
  496. + "paddw %%xmm2, %%xmm3 \n\t"\
  497. + "psraw $1, %%xmm3 \n\t" /* d = (prev2[x] + next2[x])>>1 */\
  498. + "movdqa %%xmm0, %[tmp0] \n\t" /* c */\
  499. + "movdqa %%xmm3, %[tmp1] \n\t" /* d */\
  500. + "movdqa %%xmm1, %[tmp2] \n\t" /* e */\
  501. + "psubw %%xmm4, %%xmm2 \n\t"\
  502. + PABS( %%xmm4, %%xmm2) /* temporal_diff0 */\
  503. + LOAD8("(%[prev],%[mrefs])", %%xmm3) /* prev[x-refs] */\
  504. + LOAD8("(%[prev],%[prefs])", %%xmm4) /* prev[x+refs] */\
  505. + "psubw %%xmm0, %%xmm3 \n\t"\
  506. + "psubw %%xmm1, %%xmm4 \n\t"\
  507. + PABS( %%xmm5, %%xmm3)\
  508. + PABS( %%xmm5, %%xmm4)\
  509. + "paddw %%xmm4, %%xmm3 \n\t" /* temporal_diff1 */\
  510. + "psrlw $1, %%xmm2 \n\t"\
  511. + "psrlw $1, %%xmm3 \n\t"\
  512. + "pmaxsw %%xmm3, %%xmm2 \n\t"\
  513. + LOAD8("(%[next],%[mrefs])", %%xmm3) /* next[x-refs] */\
  514. + LOAD8("(%[next],%[prefs])", %%xmm4) /* next[x+refs] */\
  515. + "psubw %%xmm0, %%xmm3 \n\t"\
  516. + "psubw %%xmm1, %%xmm4 \n\t"\
  517. + PABS( %%xmm5, %%xmm3)\
  518. + PABS( %%xmm5, %%xmm4)\
  519. + "paddw %%xmm4, %%xmm3 \n\t" /* temporal_diff2 */\
  520. + "psrlw $1, %%xmm3 \n\t"\
  521. + "pmaxsw %%xmm3, %%xmm2 \n\t"\
  522. + "movdqa %%xmm2, %[tmp3] \n\t" /* diff */\
  523. +\
  524. + "paddw %%xmm0, %%xmm1 \n\t"\
  525. + "paddw %%xmm0, %%xmm0 \n\t"\
  526. + "psubw %%xmm1, %%xmm0 \n\t"\
  527. + "psrlw $1, %%xmm1 \n\t" /* spatial_pred */\
  528. + PABS( %%xmm2, %%xmm0) /* ABS(c-e) */\
  529. +\
  530. + "movdqu -1(%[cur],%[mrefs]), %%xmm2 \n\t" /* cur[x-refs-1] */\
  531. + "movdqu -1(%[cur],%[prefs]), %%xmm3 \n\t" /* cur[x+refs-1] */\
  532. + "movdqa %%xmm2, %%xmm4 \n\t"\
  533. + "psubusb %%xmm3, %%xmm2 \n\t"\
  534. + "psubusb %%xmm4, %%xmm3 \n\t"\
  535. + "pmaxub %%xmm3, %%xmm2 \n\t"\
  536. + /*"pshuflw $9,%%xmm2, %%xmm3 \n\t"*/\
  537. + /*"pshufhw $9,%%xmm2, %%xmm3 \n\t"*/\
  538. + "movdqa %%xmm2, %%xmm3 \n\t" /* correct replacement (here) */\
  539. + "psrldq $2, %%xmm3 \n\t"/* for "pshufw $9,%%mm2, %%mm3" - fix by Fizick */\
  540. + "punpcklbw %%xmm7, %%xmm2 \n\t" /* ABS(cur[x-refs-1] - cur[x+refs-1]) */\
  541. + "punpcklbw %%xmm7, %%xmm3 \n\t" /* ABS(cur[x-refs+1] - cur[x+refs+1]) */\
  542. + "paddw %%xmm2, %%xmm0 \n\t"\
  543. + "paddw %%xmm3, %%xmm0 \n\t"\
  544. + "psubw %[pw1], %%xmm0 \n\t" /* spatial_score */\
  545. +\
  546. + CHECK(-2,0)\
  547. + CHECK1\
  548. + CHECK(-3,1)\
  549. + CHECK2\
  550. + CHECK(0,-2)\
  551. + CHECK1\
  552. + CHECK(1,-3)\
  553. + CHECK2\
  554. +\
  555. + /* if(yadctx->mode<2) ... */\
  556. + "movdqa %[tmp3], %%xmm6 \n\t" /* diff */\
  557. + "cmp $2, %[mode] \n\t"\
  558. + "jge 1f \n\t"\
  559. + LOAD8("(%["prev2"],%[mrefs],2)", %%xmm2) /* prev2[x-2*refs] */\
  560. + LOAD8("(%["next2"],%[mrefs],2)", %%xmm4) /* next2[x-2*refs] */\
  561. + LOAD8("(%["prev2"],%[prefs],2)", %%xmm3) /* prev2[x+2*refs] */\
  562. + LOAD8("(%["next2"],%[prefs],2)", %%xmm5) /* next2[x+2*refs] */\
  563. + "paddw %%xmm4, %%xmm2 \n\t"\
  564. + "paddw %%xmm5, %%xmm3 \n\t"\
  565. + "psrlw $1, %%xmm2 \n\t" /* b */\
  566. + "psrlw $1, %%xmm3 \n\t" /* f */\
  567. + "movdqa %[tmp0], %%xmm4 \n\t" /* c */\
  568. + "movdqa %[tmp1], %%xmm5 \n\t" /* d */\
  569. + "movdqa %[tmp2], %%xmm7 \n\t" /* e */\
  570. + "psubw %%xmm4, %%xmm2 \n\t" /* b-c */\
  571. + "psubw %%xmm7, %%xmm3 \n\t" /* f-e */\
  572. + "movdqa %%xmm5, %%xmm0 \n\t"\
  573. + "psubw %%xmm4, %%xmm5 \n\t" /* d-c */\
  574. + "psubw %%xmm7, %%xmm0 \n\t" /* d-e */\
  575. + "movdqa %%xmm2, %%xmm4 \n\t"\
  576. + "pminsw %%xmm3, %%xmm2 \n\t"\
  577. + "pmaxsw %%xmm4, %%xmm3 \n\t"\
  578. + "pmaxsw %%xmm5, %%xmm2 \n\t"\
  579. + "pminsw %%xmm5, %%xmm3 \n\t"\
  580. + "pmaxsw %%xmm0, %%xmm2 \n\t" /* max */\
  581. + "pminsw %%xmm0, %%xmm3 \n\t" /* min */\
  582. + "pxor %%xmm4, %%xmm4 \n\t"\
  583. + "pmaxsw %%xmm3, %%xmm6 \n\t"\
  584. + "psubw %%xmm2, %%xmm4 \n\t" /* -max */\
  585. + "pmaxsw %%xmm4, %%xmm6 \n\t" /* diff= MAX3(diff, min, -max); */\
  586. + "1: \n\t"\
  587. +\
  588. + "movdqa %[tmp1], %%xmm2 \n\t" /* d */\
  589. + "movdqa %%xmm2, %%xmm3 \n\t"\
  590. + "psubw %%xmm6, %%xmm2 \n\t" /* d-diff */\
  591. + "paddw %%xmm6, %%xmm3 \n\t" /* d+diff */\
  592. + "pmaxsw %%xmm2, %%xmm1 \n\t"\
  593. + "pminsw %%xmm3, %%xmm1 \n\t" /* d = clip(spatial_pred, d-diff, d+diff); */\
  594. + "packuswb %%xmm1, %%xmm1 \n\t"\
  595. +\
  596. + :[tmp0]"=m"(tmp0),\
  597. + [tmp1]"=m"(tmp1),\
  598. + [tmp2]"=m"(tmp2),\
  599. + [tmp3]"=m"(tmp3)\
  600. + :[prev] "r"(prev),\
  601. + [cur] "r"(cur),\
  602. + [next] "r"(next),\
  603. + [prefs]"r"((long)refs),\
  604. + [mrefs]"r"((long)-refs),\
  605. + [pw1] "m"(*pw_1),\
  606. + [pb1] "m"(*pb_1),\
  607. + [mode] "g"(mode)\
  608. + );\
  609. + __asm__ volatile("movq %%xmm1, %0" :"=m"(*dst));\
  610. + dst += 8;\
  611. + prev+= 8;\
  612. + cur += 8;\
  613. + next+= 8;\
  614. + }
  615. +
  616. + if(parity){
  617. +#define prev2 "prev"
  618. +#define next2 "cur"
  619. + FILTER
  620. +#undef prev2
  621. +#undef next2
  622. + }else{
  623. +#define prev2 "cur"
  624. +#define next2 "next"
  625. + FILTER
  626. +#undef prev2
  627. +#undef next2
  628. + }
  629. +}
  630. +#undef LOAD8
  631. +#undef PABS
  632. +#undef CHECK
  633. +#undef CHECK1
  634. +#undef CHECK2
  635. +#undef FILTER
  636. +#undef FILTER_LINE_FUNC_NAME
  637. Index: filters/video/video.c
  638. ===================================================================
  639. --- filters/video/video_orig.c
  640. +++ filters/video/video.c
  641. @@ -47,6 +47,7 @@ void x264_register_vid_filters()
  642. REGISTER_VFILTER( resize );
  643. REGISTER_VFILTER( select_every );
  644. REGISTER_VFILTER( hqdn3d );
  645. + REGISTER_VFILTER( yadif );
  646. }
  647.  
  648. int x264_init_vid_filter( const char *name, hnd_t *handle, cli_vid_filter_t *filter,
  649. Index: filters/video/yadif.c
  650. ===================================================================
  651. --- /dev/null
  652. +++ filters/video/yadif.c
  653. @@ -0,0 +1,230 @@
  654. +/*****************************************************************************
  655. + * yadif.c: x264 yadif filter
  656. + *****************************************************************************
  657. + * Copyright (C) 2006 Michael Niedermayer <michaelni@gmx.at>
  658. + * Avisynth port (C) 2007 Alexander G. Balakhnin aka Fizick http://avisynth.org.ru
  659. + * x264 port (C) 2010 James Darnley <james.darnley@gmail.com>
  660. + *
  661. + * This program is free software; you can redistribute it and/or modify
  662. + * it under the terms of the GNU General Public License as published by
  663. + * the Free Software Foundation; either version 2 of the License, or
  664. + * (at your option) any later version.
  665. + *
  666. + * This program is distributed in the hope that it will be useful,
  667. + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  668. + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  669. + * GNU General Public License for more details.
  670. + *
  671. + * You should have received a copy of the GNU General Public License
  672. + * along with this program; if not, write to the Free Software
  673. + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
  674. + *****************************************************************************/
  675. +
  676. +#include <string.h>
  677. +#include "video.h"
  678. +#include "yadif_filter_line.h"
  679. +#define NAME "yadif"
  680. +#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, NAME, __VA_ARGS__ )
  681. +
  682. +cli_vid_filter_t yadif_filter;
  683. +filter_line_func filter_line;
  684. +
  685. +typedef struct {
  686. + hnd_t prev_handle;
  687. + cli_vid_filter_t prev_filter;
  688. + int mode;
  689. + int tff;
  690. + cli_pic_t buffer;
  691. +} yadif_handle_t;
  692. +
  693. +/***********************
  694. +* Help *
  695. +***********************/
  696. +
  697. +static void help( int longhelp )
  698. +{
  699. + printf( " "NAME":[mode][,order]\n" );
  700. + if(!longhelp)
  701. + return;
  702. + printf(
  703. +" Deinterlaces the picture using mplayer's YADIF\n"
  704. +" mode: sets the deinterlacing mode\n"
  705. +" 0 - single-rate deinterlacing (default)\n"
  706. +" 1 - double-rate deinterlacing (bob)\n"
  707. +" 2 - single-rate deinterlacing without spacial interlacing check\n"
  708. +" 3 - double-rate deinterlacing withput spacial interlacing check\n"
  709. +" order: forces the field order\n"
  710. +" tff - top-field first\n"
  711. +" bff - bottom-field first\n" );
  712. +}
  713. +
  714. +/***********************
  715. +* Init *
  716. +***********************/
  717. +
  718. +static int yadif_init( hnd_t *handle, cli_vid_filter_t *filter, video_info_t *info, x264_param_t *param, char *opt_string )
  719. +{
  720. + yadif_handle_t *h = calloc( 1, sizeof(yadif_handle_t) );
  721. + if(!h)
  722. + return -1;
  723. +
  724. + FAIL_IF_ERROR( !(info->csp == X264_CSP_I420 || info->csp == X264_CSP_I422
  725. + || info->csp == X264_CSP_I444 || info->csp == X264_CSP_YV12 ),
  726. + "Only planar YUV images supported\n" )
  727. +
  728. + if(x264_cli_pic_alloc( &h->buffer, info->csp, info->width, info->height ))
  729. + return -1;
  730. +
  731. + char *mode, *order, *opt;
  732. + static const char *optlist[] = { "mode", "order", NULL };
  733. + char **opts = x264_split_options( opt_string, optlist );
  734. +
  735. + opt = x264_get_option( "mode", opts );
  736. + mode = (opt) ? opt : "";
  737. + h->mode = x264_otoi(mode,0);
  738. + if(h->mode < 0 || h->mode > 3) {
  739. + x264_cli_log( NAME, X264_LOG_WARNING, "Invalid mode (%s), ignoring\n", mode);
  740. + mode = 0;
  741. + }
  742. +
  743. + opt = x264_get_option( "order", opts );
  744. + order = (opt) ? opt : "";
  745. + if (!strcmp(order, "top") || !strcmp(order, "tff"))
  746. + h->tff = 1;
  747. + else if (!strcmp(order, "bottom") || !strcmp(order, "bff"))
  748. + h->tff = 0;
  749. + else {
  750. + if(opt)
  751. + x264_cli_log( NAME, X264_LOG_WARNING, "Unknown order (%s), ignoring\n", order);
  752. + h->tff = info->tff;
  753. + }
  754. +
  755. + x264_free_string_array(opts);
  756. +
  757. + if (x264_init_vid_filter( "cache", handle, filter, info, param, (void*)3 ))
  758. + return -1;
  759. +
  760. + if(h->mode&1) {
  761. + info->num_frames *=2;
  762. + info->fps_num *=2;
  763. + info->timebase_den *=2;
  764. + }
  765. +
  766. + info->interlaced = 0;
  767. + h->prev_filter = *filter;
  768. + h->prev_handle = *handle;
  769. + *handle = h;
  770. + *filter = yadif_filter;
  771. +
  772. + filter_line = get_filter_func(param->cpu);
  773. +
  774. + x264_cli_log( NAME, X264_LOG_INFO, "%s-rate deinterlacing "
  775. + "%s spatial interlacing check, %s-field first\n",
  776. + (h->mode&1) ? "double" : "single",
  777. + (h->mode&2) ? "without" : "with",
  778. + (h->tff) ? "top" : "bottom" );
  779. +
  780. + return 0;
  781. +}
  782. +
  783. +/***********************
  784. +* Process Frames *
  785. +***********************/
  786. +static void interpolate(uint8_t *dst, const uint8_t *cur0, const uint8_t *cur2, int w)
  787. +{
  788. + int x;
  789. + for (x=0; x<w; x++)
  790. + dst[x] = (cur0[x] + cur2[x] + 1)>>1; // simple average
  791. +}
  792. +
  793. +static int get_frame( hnd_t handle, cli_pic_t *output, int frame_out )
  794. +{
  795. + yadif_handle_t *h = handle;
  796. + cli_pic_t prev, cur, next;
  797. + int tff = h->tff, ret = 0;
  798. + int parity = (h->mode & 1) ? (frame_out & 1) ^ (1^tff) : (tff ^ 1);
  799. + int frame_in = (h->mode&1) ? frame_out/2 : frame_out;
  800. +
  801. + *output = h->buffer;
  802. +
  803. + if (frame_in==0)
  804. + {
  805. + ret |= h->prev_filter.get_frame( h->prev_handle, &prev, frame_in+1 );
  806. + ret |= h->prev_filter.get_frame( h->prev_handle, &cur, frame_in );
  807. + ret |= h->prev_filter.get_frame( h->prev_handle, &next, frame_in+1 );
  808. + }
  809. + else
  810. + {
  811. + ret |= h->prev_filter.get_frame( h->prev_handle, &prev, frame_in-1 );
  812. + ret |= h->prev_filter.get_frame( h->prev_handle, &cur, frame_in );
  813. + if (h->prev_filter.get_frame( h->prev_handle, &next, frame_in+1 ))
  814. + ret |= h->prev_filter.get_frame( h->prev_handle, &next, frame_in );
  815. + }
  816. + if(ret)
  817. + return ret;
  818. +
  819. + for (int i=0; i<3; i++)
  820. + {
  821. + int width = cur.img.width * x264_cli_csps[cur.img.csp].width[i];
  822. + int height = cur.img.height * x264_cli_csps[cur.img.csp].height[i];
  823. + int stride = cur.img.stride[i];
  824. +
  825. + int y=0;
  826. + if((y^parity)&1)
  827. + memcpy(output->img.plane[i], cur.img.plane[i]+stride, width);// duplicate 1
  828. + else
  829. + memcpy(output->img.plane[i], cur.img.plane[i], width);
  830. + y=1;
  831. + if((y^parity)&1)
  832. + interpolate(output->img.plane[i]+stride, cur.img.plane[i], cur.img.plane[i]+2*stride, width); // interpolate 0 and 2
  833. + else
  834. + memcpy(output->img.plane[i]+stride, cur.img.plane[i]+stride, width); // copy original
  835. + for (y=2; y<height-2; y++)
  836. + {
  837. + if ((y ^ parity) & 1)
  838. + filter_line( h->mode,
  839. + output->img.plane[i]+y*stride,
  840. + prev.img.plane[i]+y*stride,
  841. + cur.img.plane[i]+y*stride,
  842. + next.img.plane[i]+y*stride,
  843. + width, stride, parity^tff );
  844. + else
  845. + memcpy( output->img.plane[i]+y*stride,
  846. + cur.img.plane[i]+y*stride,
  847. + width );
  848. + }
  849. + y=height-2;
  850. + if((y^parity)&1)
  851. + interpolate(output->img.plane[i]+y*stride, cur.img.plane[i]+(y-1)*stride, cur.img.plane[i]+(y+1)*stride, width); // interpolate h-3 and h-1
  852. + else
  853. + memcpy(output->img.plane[i]+y*stride, cur.img.plane[i]+y*stride, width); // copy original
  854. + y=height-1;
  855. + if((y^parity)&1)
  856. + memcpy(output->img.plane[i]+y*stride, cur.img.plane[i]+(y-1)*stride, width); // duplicate h-2
  857. + else
  858. + memcpy(output->img.plane[i]+y*stride, cur.img.plane[i]+(y-1)*stride, width); // copy original
  859. + }
  860. +
  861. + if(frame_out < 3 || !(frame_out&1))
  862. + return 0;
  863. + return h->prev_filter.release_frame( h->prev_handle, &prev, frame_in-1 );
  864. +}
  865. +
  866. +/***********************
  867. +* Free *
  868. +***********************/
  869. +
  870. +static int release_frame( hnd_t handle, cli_pic_t *pic, int frame )
  871. +{
  872. + return 0;
  873. +}
  874. +
  875. +static void free_filter( hnd_t handle )
  876. +{
  877. + yadif_handle_t *h = handle;
  878. + h->prev_filter.free( h->prev_handle );
  879. + x264_cli_pic_clean( &h->buffer );
  880. + free( h );
  881. +}
  882. +
  883. +cli_vid_filter_t yadif_filter = { NAME, help, yadif_init, get_frame, release_frame, free_filter, NULL };
  884. Index: filters/video/yadif_filter_line.c
  885. ===================================================================
  886. --- /dev/null
  887. +++ filters/video/yadif_filter_line.c
  888. @@ -0,0 +1,358 @@
  889. +/*****************************************************************************
  890. + * yadif_filter_line.c: x264 yadif filter
  891. + *****************************************************************************
  892. + * Copyright (C) 2006 Michael Niedermayer <michaelni@gmx.at>
  893. + * Avisynth port (C) 2007 Alexander G. Balakhnin aka Fizick http://avisynth.org.ru
  894. + * x264 port (C) 2010 James Darnley <james.darnley@gmail.com>
  895. + *
  896. + * This program is free software; you can redistribute it and/or modify
  897. + * it under the terms of the GNU General Public License as published by
  898. + * the Free Software Foundation; either version 2 of the License, or
  899. + * (at your option) any later version.
  900. + *
  901. + * This program is distributed in the hope that it will be useful,
  902. + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  903. + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  904. + * GNU General Public License for more details.
  905. + *
  906. + * You should have received a copy of the GNU General Public License
  907. + * along with this program; if not, write to the Free Software
  908. + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
  909. + *****************************************************************************
  910. + * Copied nearly verbatim from the Avisynth filter's yadif.c so that x264's
  911. + * yadif.c is cleaner
  912. + *****************************************************************************/
  913. +
  914. +#include "config.h"
  915. +#include "yadif_filter_line.h"
  916. +#include "x264.h"
  917. +
  918. +#if defined __GNUC__ && defined HAVE_MMX
  919. +#define uint64_t unsigned __int64
  920. +#define LOAD4(mem,dst) \
  921. + "movd "mem", "#dst" \n\t"\
  922. + "punpcklbw %%mm7, "#dst" \n\t"
  923. +
  924. +#define PABS(tmp,dst) \
  925. + "pxor "#tmp", "#tmp" \n\t"\
  926. + "psubw "#dst", "#tmp" \n\t"\
  927. + "pmaxsw "#tmp", "#dst" \n\t"
  928. +
  929. +#define CHECK(pj,mj) \
  930. + "movq "#pj"(%[cur],%[mrefs]), %%mm2 \n\t" /* cur[x-refs-1+j] */\
  931. + "movq "#mj"(%[cur],%[prefs]), %%mm3 \n\t" /* cur[x+refs-1-j] */\
  932. + "movq %%mm2, %%mm4 \n\t"\
  933. + "movq %%mm2, %%mm5 \n\t"\
  934. + "pxor %%mm3, %%mm4 \n\t"\
  935. + "pavgb %%mm3, %%mm5 \n\t"\
  936. + "pand %[pb1], %%mm4 \n\t"\
  937. + "psubusb %%mm4, %%mm5 \n\t"\
  938. + "psrlq $8, %%mm5 \n\t"\
  939. + "punpcklbw %%mm7, %%mm5 \n\t" /* (cur[x-refs+j] + cur[x+refs-j])>>1 */\
  940. + "movq %%mm2, %%mm4 \n\t"\
  941. + "psubusb %%mm3, %%mm2 \n\t"\
  942. + "psubusb %%mm4, %%mm3 \n\t"\
  943. + "pmaxub %%mm3, %%mm2 \n\t"\
  944. + "movq %%mm2, %%mm3 \n\t"\
  945. + "movq %%mm2, %%mm4 \n\t" /* ABS(cur[x-refs-1+j] - cur[x+refs-1-j]) */\
  946. + "psrlq $8, %%mm3 \n\t" /* ABS(cur[x-refs +j] - cur[x+refs -j]) */\
  947. + "psrlq $16, %%mm4 \n\t" /* ABS(cur[x-refs+1+j] - cur[x+refs+1-j]) */\
  948. + "punpcklbw %%mm7, %%mm2 \n\t"\
  949. + "punpcklbw %%mm7, %%mm3 \n\t"\
  950. + "punpcklbw %%mm7, %%mm4 \n\t"\
  951. + "paddw %%mm3, %%mm2 \n\t"\
  952. + "paddw %%mm4, %%mm2 \n\t" /* score */
  953. +
  954. +#define CHECK1 \
  955. + "movq %%mm0, %%mm3 \n\t"\
  956. + "pcmpgtw %%mm2, %%mm3 \n\t" /* if(score < spatial_score) */\
  957. + "pminsw %%mm2, %%mm0 \n\t" /* spatial_score= score; */\
  958. + "movq %%mm3, %%mm6 \n\t"\
  959. + "pand %%mm3, %%mm5 \n\t"\
  960. + "pandn %%mm1, %%mm3 \n\t"\
  961. + "por %%mm5, %%mm3 \n\t"\
  962. + "movq %%mm3, %%mm1 \n\t" /* spatial_pred= (cur[x-refs+j] + cur[x+refs-j])>>1; */
  963. +
  964. +#define CHECK2 /* pretend not to have checked dir=2 if dir=1 was bad.\
  965. + hurts both quality and speed, but matches the C version. */\
  966. + "paddw %[pw1], %%mm6 \n\t"\
  967. + "psllw $14, %%mm6 \n\t"\
  968. + "paddsw %%mm6, %%mm2 \n\t"\
  969. + "movq %%mm0, %%mm3 \n\t"\
  970. + "pcmpgtw %%mm2, %%mm3 \n\t"\
  971. + "pminsw %%mm2, %%mm0 \n\t"\
  972. + "pand %%mm3, %%mm5 \n\t"\
  973. + "pandn %%mm1, %%mm3 \n\t"\
  974. + "por %%mm5, %%mm3 \n\t"\
  975. + "movq %%mm3, %%mm1 \n\t"
  976. +
  977. +static void filter_line_mmx2(int mode, uint8_t *dst, const uint8_t *prev, const uint8_t *cur, const uint8_t *next, int w, int refs, int parity){
  978. + static const uint64_t pw_1 = 0x0001000100010001ULL;
  979. + static const uint64_t pb_1 = 0x0101010101010101ULL;
  980. +// const int mode = p->mode;
  981. + uint64_t tmp0, tmp1, tmp2, tmp3;
  982. + int x;
  983. +
  984. +#define FILTER\
  985. + for(x=0; x<w; x+=4){\
  986. + asm volatile(\
  987. + "pxor %%mm7, %%mm7 \n\t"\
  988. + LOAD4("(%[cur],%[mrefs])", %%mm0) /* c = cur[x-refs] */\
  989. + LOAD4("(%[cur],%[prefs])", %%mm1) /* e = cur[x+refs] */\
  990. + LOAD4("(%["prev2"])", %%mm2) /* prev2[x] */\
  991. + LOAD4("(%["next2"])", %%mm3) /* next2[x] */\
  992. + "movq %%mm3, %%mm4 \n\t"\
  993. + "paddw %%mm2, %%mm3 \n\t"\
  994. + "psraw $1, %%mm3 \n\t" /* d = (prev2[x] + next2[x])>>1 */\
  995. + "movq %%mm0, %[tmp0] \n\t" /* c */\
  996. + "movq %%mm3, %[tmp1] \n\t" /* d */\
  997. + "movq %%mm1, %[tmp2] \n\t" /* e */\
  998. + "psubw %%mm4, %%mm2 \n\t"\
  999. + PABS( %%mm4, %%mm2) /* temporal_diff0 */\
  1000. + LOAD4("(%[prev],%[mrefs])", %%mm3) /* prev[x-refs] */\
  1001. + LOAD4("(%[prev],%[prefs])", %%mm4) /* prev[x+refs] */\
  1002. + "psubw %%mm0, %%mm3 \n\t"\
  1003. + "psubw %%mm1, %%mm4 \n\t"\
  1004. + PABS( %%mm5, %%mm3)\
  1005. + PABS( %%mm5, %%mm4)\
  1006. + "paddw %%mm4, %%mm3 \n\t" /* temporal_diff1 */\
  1007. + "psrlw $1, %%mm2 \n\t"\
  1008. + "psrlw $1, %%mm3 \n\t"\
  1009. + "pmaxsw %%mm3, %%mm2 \n\t"\
  1010. + LOAD4("(%[next],%[mrefs])", %%mm3) /* next[x-refs] */\
  1011. + LOAD4("(%[next],%[prefs])", %%mm4) /* next[x+refs] */\
  1012. + "psubw %%mm0, %%mm3 \n\t"\
  1013. + "psubw %%mm1, %%mm4 \n\t"\
  1014. + PABS( %%mm5, %%mm3)\
  1015. + PABS( %%mm5, %%mm4)\
  1016. + "paddw %%mm4, %%mm3 \n\t" /* temporal_diff2 */\
  1017. + "psrlw $1, %%mm3 \n\t"\
  1018. + "pmaxsw %%mm3, %%mm2 \n\t"\
  1019. + "movq %%mm2, %[tmp3] \n\t" /* diff */\
  1020. +\
  1021. + "paddw %%mm0, %%mm1 \n\t"\
  1022. + "paddw %%mm0, %%mm0 \n\t"\
  1023. + "psubw %%mm1, %%mm0 \n\t"\
  1024. + "psrlw $1, %%mm1 \n\t" /* spatial_pred */\
  1025. + PABS( %%mm2, %%mm0) /* ABS(c-e) */\
  1026. +\
  1027. + "movq -1(%[cur],%[mrefs]), %%mm2 \n\t" /* cur[x-refs-1] */\
  1028. + "movq -1(%[cur],%[prefs]), %%mm3 \n\t" /* cur[x+refs-1] */\
  1029. + "movq %%mm2, %%mm4 \n\t"\
  1030. + "psubusb %%mm3, %%mm2 \n\t"\
  1031. + "psubusb %%mm4, %%mm3 \n\t"\
  1032. + "pmaxub %%mm3, %%mm2 \n\t"\
  1033. + /*"pshufw $9,%%mm2, %%mm3 \n\t"*/\
  1034. + "movq %%mm2, %%mm3 \n\t" /* replace for "pshufw $9,%%mm2, %%mm3" - Fizick */\
  1035. + "psrlq $16, %%mm3 \n\t"/* replace for "pshufw $9,%%mm2, %%mm3" - Fizick*/\
  1036. + "punpcklbw %%mm7, %%mm2 \n\t" /* ABS(cur[x-refs-1] - cur[x+refs-1]) */\
  1037. + "punpcklbw %%mm7, %%mm3 \n\t" /* ABS(cur[x-refs+1] - cur[x+refs+1]) */\
  1038. + "paddw %%mm2, %%mm0 \n\t"\
  1039. + "paddw %%mm3, %%mm0 \n\t"\
  1040. + "psubw %[pw1], %%mm0 \n\t" /* spatial_score */\
  1041. +\
  1042. + CHECK(-2,0)\
  1043. + CHECK1\
  1044. + CHECK(-3,1)\
  1045. + CHECK2\
  1046. + CHECK(0,-2)\
  1047. + CHECK1\
  1048. + CHECK(1,-3)\
  1049. + CHECK2\
  1050. +\
  1051. + /* if(p->mode<2) ... */\
  1052. + "movq %[tmp3], %%mm6 \n\t" /* diff */\
  1053. + "cmp $2, %[mode] \n\t"\
  1054. + "jge 1f \n\t"\
  1055. + LOAD4("(%["prev2"],%[mrefs],2)", %%mm2) /* prev2[x-2*refs] */\
  1056. + LOAD4("(%["next2"],%[mrefs],2)", %%mm4) /* next2[x-2*refs] */\
  1057. + LOAD4("(%["prev2"],%[prefs],2)", %%mm3) /* prev2[x+2*refs] */\
  1058. + LOAD4("(%["next2"],%[prefs],2)", %%mm5) /* next2[x+2*refs] */\
  1059. + "paddw %%mm4, %%mm2 \n\t"\
  1060. + "paddw %%mm5, %%mm3 \n\t"\
  1061. + "psrlw $1, %%mm2 \n\t" /* b */\
  1062. + "psrlw $1, %%mm3 \n\t" /* f */\
  1063. + "movq %[tmp0], %%mm4 \n\t" /* c */\
  1064. + "movq %[tmp1], %%mm5 \n\t" /* d */\
  1065. + "movq %[tmp2], %%mm7 \n\t" /* e */\
  1066. + "psubw %%mm4, %%mm2 \n\t" /* b-c */\
  1067. + "psubw %%mm7, %%mm3 \n\t" /* f-e */\
  1068. + "movq %%mm5, %%mm0 \n\t"\
  1069. + "psubw %%mm4, %%mm5 \n\t" /* d-c */\
  1070. + "psubw %%mm7, %%mm0 \n\t" /* d-e */\
  1071. + "movq %%mm2, %%mm4 \n\t"\
  1072. + "pminsw %%mm3, %%mm2 \n\t"\
  1073. + "pmaxsw %%mm4, %%mm3 \n\t"\
  1074. + "pmaxsw %%mm5, %%mm2 \n\t"\
  1075. + "pminsw %%mm5, %%mm3 \n\t"\
  1076. + "pmaxsw %%mm0, %%mm2 \n\t" /* max */\
  1077. + "pminsw %%mm0, %%mm3 \n\t" /* min */\
  1078. + "pxor %%mm4, %%mm4 \n\t"\
  1079. + "pmaxsw %%mm3, %%mm6 \n\t"\
  1080. + "psubw %%mm2, %%mm4 \n\t" /* -max */\
  1081. + "pmaxsw %%mm4, %%mm6 \n\t" /* diff= MAX3(diff, min, -max); */\
  1082. + "1: \n\t"\
  1083. +\
  1084. + "movq %[tmp1], %%mm2 \n\t" /* d */\
  1085. + "movq %%mm2, %%mm3 \n\t"\
  1086. + "psubw %%mm6, %%mm2 \n\t" /* d-diff */\
  1087. + "paddw %%mm6, %%mm3 \n\t" /* d+diff */\
  1088. + "pmaxsw %%mm2, %%mm1 \n\t"\
  1089. + "pminsw %%mm3, %%mm1 \n\t" /* d = clip(spatial_pred, d-diff, d+diff); */\
  1090. + "packuswb %%mm1, %%mm1 \n\t"\
  1091. +\
  1092. + :[tmp0]"=m"(tmp0),\
  1093. + [tmp1]"=m"(tmp1),\
  1094. + [tmp2]"=m"(tmp2),\
  1095. + [tmp3]"=m"(tmp3)\
  1096. + :[prev] "r"(prev),\
  1097. + [cur] "r"(cur),\
  1098. + [next] "r"(next),\
  1099. + [prefs]"r"((long)refs),\
  1100. + [mrefs]"r"((long)-refs),\
  1101. + [pw1] "m"(pw_1),\
  1102. + [pb1] "m"(pb_1),\
  1103. + [mode] "g"(mode)\
  1104. + );\
  1105. + asm volatile("movd %%mm1, %0" :"=m"(*dst));\
  1106. + dst += 4;\
  1107. + prev+= 4;\
  1108. + cur += 4;\
  1109. + next+= 4;\
  1110. + }
  1111. +
  1112. + if(parity){
  1113. +#define prev2 "prev"
  1114. +#define next2 "cur"
  1115. + FILTER
  1116. +#undef prev2
  1117. +#undef next2
  1118. + }else{
  1119. +#define prev2 "cur"
  1120. +#define next2 "next"
  1121. + FILTER
  1122. +#undef prev2
  1123. +#undef next2
  1124. + }
  1125. +}
  1126. +#undef LOAD4
  1127. +#undef PABS
  1128. +#undef CHECK
  1129. +#undef CHECK1
  1130. +#undef CHECK2
  1131. +#undef FILTER
  1132. +
  1133. +#ifndef attribute_align_arg
  1134. +#if defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__>1)
  1135. +# define attribute_align_arg __attribute__((force_align_arg_pointer))
  1136. +#else
  1137. +# define attribute_align_arg
  1138. +#endif
  1139. +#endif
  1140. +
  1141. +// for proper alignment SSE2 we need in GCC 4.2 and above
  1142. +#if (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__>1)
  1143. +
  1144. +#ifndef DECLARE_ALIGNED
  1145. +#define DECLARE_ALIGNED(n,t,v) t v __attribute__ ((aligned (n)))
  1146. +#endif
  1147. +
  1148. +// ================= SSE2 =================
  1149. +#define PABS(tmp,dst) \
  1150. + "pxor "#tmp", "#tmp" \n\t"\
  1151. + "psubw "#dst", "#tmp" \n\t"\
  1152. + "pmaxsw "#tmp", "#dst" \n\t"
  1153. +
  1154. +#define FILTER_LINE_FUNC_NAME filter_line_sse2
  1155. +#include "avs_vf_yadif_template.h"
  1156. +
  1157. +// ================ SSSE3 =================
  1158. +#define PABS(tmp,dst) \
  1159. + "pabsw "#dst", "#dst" \n\t"
  1160. +
  1161. +#define FILTER_LINE_FUNC_NAME filter_line_ssse3
  1162. +#include "avs_vf_yadif_template.h"
  1163. +
  1164. +#endif
  1165. +
  1166. +#endif
  1167. +
  1168. +#define MIN(a,b) ( (a)<(b) ? (a) : (b) )
  1169. +#define MAX(a,b) ( (a)>(b) ? (a) : (b) )
  1170. +#define MIN3(a,b,c) MIN((a),MIN((b),(c)))
  1171. +#define MAX3(a,b,c) MAX((a),MAX((b),(c)))
  1172. +#define ABS(a) ( (a) > 0 ? (a) : -(a) )
  1173. +
  1174. +static void filter_line_c(int mode, uint8_t *dst, const uint8_t *prev, const uint8_t *cur, const uint8_t *next, int w, int refs, int parity){
  1175. + int x;
  1176. + const uint8_t *prev2= parity ? prev : cur ;
  1177. + const uint8_t *next2= parity ? cur : next;
  1178. + for(x=0; x<w; x++){
  1179. + int c= cur[-refs];
  1180. + int d= (prev2[0] + next2[0])>>1;
  1181. + int e= cur[+refs];
  1182. + int temporal_diff0= ABS(prev2[0] - next2[0]);
  1183. + int temporal_diff1=( ABS(prev[-refs] - c) + ABS(prev[+refs] - e) )>>1;
  1184. + int temporal_diff2=( ABS(next[-refs] - c) + ABS(next[+refs] - e) )>>1;
  1185. + int diff= MAX3(temporal_diff0>>1, temporal_diff1, temporal_diff2);
  1186. + int spatial_pred= (c+e)>>1;
  1187. + int spatial_score= ABS(cur[-refs-1] - cur[+refs-1]) + ABS(c-e)
  1188. + + ABS(cur[-refs+1] - cur[+refs+1]) - 1;
  1189. +
  1190. +#define CHECK(j)\
  1191. + { int score= ABS(cur[-refs-1+ j] - cur[+refs-1- j])\
  1192. + + ABS(cur[-refs + j] - cur[+refs - j])\
  1193. + + ABS(cur[-refs+1+ j] - cur[+refs+1- j]);\
  1194. + if(score < spatial_score){\
  1195. + spatial_score= score;\
  1196. + spatial_pred= (cur[-refs + j] + cur[+refs - j])>>1;\
  1197. +
  1198. + CHECK(-1) CHECK(-2) }} }}
  1199. + CHECK( 1) CHECK( 2) }} }}
  1200. +
  1201. + if(mode<2){
  1202. + int b= (prev2[-2*refs] + next2[-2*refs])>>1;
  1203. + int f= (prev2[+2*refs] + next2[+2*refs])>>1;
  1204. +#if 0
  1205. + int a= cur[-3*refs];
  1206. + int g= cur[+3*refs];
  1207. + int max= MAX3(d-e, d-c, MIN3(MAX(b-c,f-e),MAX(b-c,b-a),MAX(f-g,f-e)) );
  1208. + int min= MIN3(d-e, d-c, MAX3(MIN(b-c,f-e),MIN(b-c,b-a),MIN(f-g,f-e)) );
  1209. +#else
  1210. + int max= MAX3(d-e, d-c, MIN(b-c, f-e));
  1211. + int min= MIN3(d-e, d-c, MAX(b-c, f-e));
  1212. +#endif
  1213. +
  1214. + diff= MAX3(diff, min, -max);
  1215. + }
  1216. +
  1217. + if(spatial_pred > d + diff)
  1218. + spatial_pred = d + diff;
  1219. + else if(spatial_pred < d - diff)
  1220. + spatial_pred = d - diff;
  1221. +
  1222. + dst[0] = spatial_pred;
  1223. +
  1224. + dst++;
  1225. + cur++;
  1226. + prev++;
  1227. + next++;
  1228. + prev2++;
  1229. + next2++;
  1230. + }
  1231. +}
  1232. +
  1233. +filter_line_func get_filter_func(unsigned int cpu) {
  1234. + filter_line_func ret = filter_line_c;
  1235. +#if defined __GNUC__ && defined HAVE_MMX
  1236. + if (cpu & X264_CPU_MMXEXT)
  1237. + ret = filter_line_mmx2;
  1238. +#if (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__>1)
  1239. + if (cpu & (X264_CPU_SSE2|X264_CPU_SSE2_IS_SLOW|X264_CPU_SSE2_IS_FAST))
  1240. + ret = filter_line_sse2;
  1241. + if (cpu & X264_CPU_SSSE3)
  1242. + ret = filter_line_ssse3;
  1243. +#endif
  1244. +#endif
  1245. + return ret;
  1246. +}
  1247. Index: filters/video/yadif_filter_line.h
  1248. ===================================================================
  1249. --- /dev/null
  1250. +++ filters/video/yadif_filter_line.h
  1251. @@ -0,0 +1,27 @@
  1252. +/*****************************************************************************
  1253. + * yadif_filter_line.h: x264 yadif filter
  1254. + *****************************************************************************
  1255. + * Copyright (C) 2006 Michael Niedermayer <michaelni@gmx.at>
  1256. + * Avisynth port (C) 2007 Alexander G. Balakhnin aka Fizick http://avisynth.org.ru
  1257. + * x264 port (C) 2010 James Darnley <james.darnley@gmail.com>
  1258. + *
  1259. + * This program is free software; you can redistribute it and/or modify
  1260. + * it under the terms of the GNU General Public License as published by
  1261. + * the Free Software Foundation; either version 2 of the License, or
  1262. + * (at your option) any later version.
  1263. + *
  1264. + * This program is distributed in the hope that it will be useful,
  1265. + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  1266. + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  1267. + * GNU General Public License for more details.
  1268. + *
  1269. + * You should have received a copy of the GNU General Public License
  1270. + * along with this program; if not, write to the Free Software
  1271. + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
  1272. + *****************************************************************************/
  1273. +
  1274. +#include <inttypes.h>
  1275. +
  1276. +typedef void (*filter_line_func)(int mode, uint8_t *dst, const uint8_t *prev, const uint8_t *cur, const uint8_t *next, int w, int refs, int parity);
  1277. +
  1278. +filter_line_func get_filter_func(unsigned int cpu);
  1279.  
  1280. From: James Darnley <james.darnley@gmail.com>
  1281. Subject: [PATCH 3/3] Add pad filter
  1282.  
  1283. Makefile | 2 +-
  1284. configure | 2 +-
  1285. filters/video/pad.c | 190 +++++++++++++++++++++++++++++++++++++++++++++++++
  1286. filters/video/video.c | 1 +
  1287. 4 files changed, 193 insertions(+), 2 deletions(-)
  1288. create mode 100644 filters/video/pad.c
  1289.  
  1290. Index: Makefile
  1291. ===================================================================
  1292. --- Makefile_orig
  1293. +++ Makefile
  1294. @@ -19,7 +19,7 @@ SRCCLI = x264.c input/input.c input/timecode.c input/raw.c input/y4m.c \
  1295. filters/video/video.c filters/video/source.c filters/video/internal.c \
  1296. filters/video/resize.c filters/video/cache.c filters/video/fix_vfr_pts.c \
  1297. filters/video/select_every.c filters/video/crop.c filters/video/hqdn3d.c \
  1298. - filters/video/yadif.c filters/video/yadif_filter_line.c
  1299. + filters/video/yadif.c filters/video/yadif_filter_line.c filters/video/pad.c
  1300.  
  1301. SRCSO =
  1302.  
  1303. Index: configure
  1304. ===================================================================
  1305. --- configure_orig
  1306. +++ configure
  1307. @@ -770,7 +770,7 @@ Libs: $pclibs
  1308. Cflags: -I$includedir
  1309. EOF
  1310.  
  1311. -filters="crop select_every hqdn3d yadif"
  1312. +filters="crop select_every hqdn3d yadif pad"
  1313. [ $swscale = yes ] && filters="resize $filters"
  1314.  
  1315. cat > conftest.log <<EOF
  1316. Index: filters/video/pad.c
  1317. ===================================================================
  1318. --- /dev/null
  1319. +++ filters/video/pad.c
  1320. @@ -0,0 +1,190 @@
  1321. +/*****************************************************************************
  1322. + * yadif.c: x264 yadif filter
  1323. + *****************************************************************************
  1324. + * Copyright (C) 2010 James Darnley <james.darnley@gmail.com>
  1325. + *
  1326. + * This program is free software; you can redistribute it and/or modify
  1327. + * it under the terms of the GNU General Public License as published by
  1328. + * the Free Software Foundation; either version 2 of the License, or
  1329. + * (at your option) any later version.
  1330. + *
  1331. + * This program is distributed in the hope that it will be useful,
  1332. + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  1333. + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  1334. + * GNU General Public License for more details.
  1335. + *
  1336. + * You should have received a copy of the GNU General Public License
  1337. + * along with this program; if not, write to the Free Software
  1338. + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
  1339. + *****************************************************************************/
  1340. +
  1341. +#include "internal.h"
  1342. +#include "video.h"
  1343. +#define NAME "pad"
  1344. +#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, NAME, __VA_ARGS__ )
  1345. +
  1346. +cli_vid_filter_t pad_filter;
  1347. +
  1348. +typedef struct {
  1349. + hnd_t prev_handle;
  1350. + cli_vid_filter_t prev_filter;
  1351. + int width;
  1352. + int height;
  1353. + int cols;
  1354. + int rows;
  1355. + char colour[4];
  1356. + cli_pic_t buffer;
  1357. + const x264_cli_csp_t *csp;
  1358. +} pad_handle_t;
  1359. +
  1360. +static void help( int longhelp )
  1361. +{
  1362. + printf( " "NAME":[left][,top][,right][,bottom][,width][,height][,colour]\n" );
  1363. + if( !longhelp )
  1364. + return;
  1365. + printf( " adds pixels to the frame edge\n"
  1366. + " colour values are in YUV not RGB\n"
  1367. + " default colour is black\n" );
  1368. +}
  1369. +
  1370. +static int init( hnd_t *handle, cli_vid_filter_t *filter, video_info_t *info, x264_param_t *param, char *opt_string )
  1371. +{
  1372. + int arg[7];
  1373. + char *opt;
  1374. + const x264_cli_csp_t *csp = x264_cli_get_csp(info->csp);
  1375. + static const char *optlist[] = { "left", "top", "right", "bottom", "width",
  1376. + "height", "colour", "color", NULL };
  1377. + char **opts = x264_split_options( opt_string, optlist );
  1378. +
  1379. + pad_handle_t *h = calloc( 1, sizeof(pad_handle_t) );
  1380. + if( !h )
  1381. + return -1;
  1382. +
  1383. + FAIL_IF_ERROR( !(info->csp == X264_CSP_I420 || info->csp == X264_CSP_I422
  1384. + || info->csp == X264_CSP_I444 || info->csp == X264_CSP_YV12 ),
  1385. + "Only planar YUV images currently supported, patches welcome\n" )
  1386. +
  1387. + for(int i=0; i<6; i++) {
  1388. + int mod = i&1 ? (csp->mod_height << info->interlaced) : csp->mod_width;
  1389. + opt = x264_get_option( optlist[i], opts );
  1390. + arg[i] = x264_otoi(opt, 0);
  1391. + FAIL_IF_ERROR( arg[i] % mod, "%s pad value '%s' is not a "
  1392. + "multiple of %d\n", optlist[i], opt, mod )
  1393. + }
  1394. + opt = x264_get_option( optlist[6], opts );
  1395. + if(!opt)
  1396. + opt = x264_get_option( optlist[7], opts );
  1397. + arg[6] = x264_otoi(opt, -1);
  1398. + if(arg[6] > -1) {
  1399. + h->colour[0] = (arg[6]&0xFF0000) >> 16;
  1400. + h->colour[1] = (arg[6]&0xFF00) >> 8;
  1401. + h->colour[2] = arg[6]&0xFF;
  1402. + } else {
  1403. + h->colour[0] = 0;
  1404. + h->colour[1] = 0x80;
  1405. + h->colour[2] = 0x80;
  1406. + }
  1407. + x264_free_string_array(opts);
  1408. +
  1409. +/* For sanity! */
  1410. +#define left arg[0]
  1411. +#define top arg[1]
  1412. +#define right arg[2]
  1413. +#define bottom arg[3]
  1414. +#define WIDTH arg[4]
  1415. +#define HEIGHT arg[5]
  1416. + FAIL_IF_ERROR( WIDTH && WIDTH < info->width + left + right,
  1417. + "requested width (%d) is less than requested padding (%d + %d + %d)\n",
  1418. + WIDTH, info->width, left, right )
  1419. +
  1420. + FAIL_IF_ERROR( HEIGHT && HEIGHT < info->height + top + bottom,
  1421. + "requested height (%d) is less than requested padding (%d + %d + %d)\n",
  1422. + HEIGHT, info->height, top, bottom )
  1423. +
  1424. + h->width = (WIDTH) ? WIDTH : info->width + left + right;
  1425. + h->height = (HEIGHT) ? HEIGHT : info->height + top + bottom;
  1426. +
  1427. + h->cols = (left) ? left
  1428. + : (right) ? h->width - right - info->width
  1429. + : (h->width - info->width)/2;
  1430. + h->cols = ((h->cols+1) / csp->mod_width) * csp->mod_width;
  1431. +
  1432. + h->rows = (top) ? top
  1433. + : (bottom) ? h->height - bottom - info->height
  1434. + : (h->height - info->height)/2;
  1435. + h->rows = ((h->rows+1) / csp->mod_height) * csp->mod_height;
  1436. +#undef left
  1437. +#undef top
  1438. +#undef right
  1439. +#undef bottom
  1440. +#undef WIDTH
  1441. +#undef HEIGHT
  1442. +
  1443. + if( h->width == info->width && h->height == info->height ) {
  1444. + free(h);
  1445. + return 0;
  1446. + }
  1447. +
  1448. + if(x264_cli_pic_alloc( &h->buffer, info->csp, h->width, h->height ))
  1449. + return -1;
  1450. + for(int i=0; i<h->buffer.img.planes; i++) {
  1451. + memset( h->buffer.img.plane[i], h->colour[i],
  1452. + h->height * csp->height[i] * h->buffer.img.stride[i] );
  1453. + }
  1454. +
  1455. + x264_cli_log( NAME, X264_LOG_INFO,
  1456. + "expanding frame to %dx%d, picture starting at (%d,%d)\n",
  1457. + h->width, h->height, h->cols, h->rows );
  1458. +
  1459. + info->width = h->width;
  1460. + info->height = h->height;
  1461. + h->prev_filter = *filter;
  1462. + h->prev_handle = *handle;
  1463. + h->csp = csp;
  1464. + *handle = h;
  1465. + *filter = pad_filter;
  1466. +
  1467. + return 0;
  1468. +}
  1469. +
  1470. +static int get_frame( hnd_t handle, cli_pic_t *out, int frame )
  1471. +{
  1472. + pad_handle_t *h = handle;
  1473. + cli_pic_t in;
  1474. +
  1475. + if( h->prev_filter.get_frame( h->prev_handle, &in, frame ) )
  1476. + return -1;
  1477. +
  1478. + *out = h->buffer;
  1479. +
  1480. + for(int i=0; i<in.img.planes; i++) {
  1481. + float scale[2] = { h->csp->width[i],
  1482. + h->csp->height[i] };
  1483. + int stride[2] = { in.img.stride[i],
  1484. + out->img.stride[i] };
  1485. + int in_dim[2] = { in.img.width * scale[0],
  1486. + in.img.height * scale[1] };
  1487. + int offset = h->cols*scale[0] + h->rows*scale[1]*stride[1];
  1488. +
  1489. + x264_cli_plane_copy( out->img.plane[i]+offset, stride[1],
  1490. + in.img.plane[i], stride[0], in_dim[0], in_dim[1] );
  1491. + }
  1492. +
  1493. + return h->prev_filter.release_frame( h->prev_handle, &in, frame );
  1494. +}
  1495. +
  1496. +
  1497. +static int release_frame( hnd_t handle, cli_pic_t *pic, int frame )
  1498. +{
  1499. + return 0;
  1500. +}
  1501. +
  1502. +static void free_filter( hnd_t handle )
  1503. +{
  1504. + pad_handle_t *h = handle;
  1505. + h->prev_filter.free( h->prev_handle );
  1506. + x264_cli_pic_clean( &h->buffer );
  1507. + free( h );
  1508. +}
  1509. +
  1510. +cli_vid_filter_t pad_filter = { NAME, help, init, get_frame, release_frame, free_filter, NULL };
  1511. Index: filters/video/video.c
  1512. ===================================================================
  1513. --- filters/video/video_orig.c
  1514. +++ filters/video/video.c
  1515. @@ -48,6 +48,7 @@ void x264_register_vid_filters()
  1516. REGISTER_VFILTER( select_every );
  1517. REGISTER_VFILTER( hqdn3d );
  1518. REGISTER_VFILTER( yadif );
  1519. + REGISTER_VFILTER( pad );
  1520. }
  1521.  
  1522. int x264_init_vid_filter( const char *name, hnd_t *handle, cli_vid_filter_t *filter,
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement