ddt-jinc shader

/* COMPATIBILITY
   - HLSL compilers
   - Cg   compilers
*/

/*
   Hyllian's ddt-jinc2-lobe with anti-ringing Shader

   Copyright (C) 2011-2014 Hyllian/Jararaca - sergiogdb@gmail.com

   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License
   as published by the Free Software Foundation; either version 2
   of the License, or (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

*/

      /*
         This is an approximation of Jinc(x)*Jinc(x*r1/r2) for x < 2.5,
         where r1 and r2 are the first two zeros of jinc function.
         For a jinc 2-lobe best approximation, use A=0.5 and B=0.825.
      */

// A=0.5, B=0.825 is the best jinc approximation for x<2.5. if B=1.0, it's a lanczos filter.
// Increase A to get more blur. Decrease it to get a sharper picture.
// B = 0.825 to get rid of dithering. Increase B to get a fine sharpness, though dithering returns.
#define A   0.4
#define B   0.9

    const static float     halfpi     = 1.5707963267948966192313216916398;
    const static float         pi     = 3.1415926535897932384626433832795;
    const static float         wa     = A*pi;
    const static float         wb     = B*pi;


const static float3 dtt = float3(65536,255,1);

float reduce(float3 color)
{
	return dot(color, dtt);
}


// Calculates the distance between two points
float d(float2 pt1, float2 pt2)
{
  float2 v = pt2 - pt1;
  return sqrt(dot(v,v));
}

float3 min4(float3 a, float3 b, float3 c, float3 d)
{
    return min(a, min(b, min(c, d)));
}
float3 max4(float3 a, float3 b, float3 c, float3 d)
{
    return max(a, max(b, max(c, d)));
}


    struct input
    {
        float2 video_size;
        float2 texture_size;
        float2 output_size;
            float  frame_count;
            float  frame_direction;
        float frame_rotation;
    };


    struct out_vertex {
        float4 position : POSITION;
        float4 color    : COLOR;
        float2 texCoord : TEXCOORD0;
    };

    /*    VERTEX_SHADER    */
    out_vertex main_vertex
    (
        float4 position    : POSITION,
        float4 color    : COLOR,
        float2 texCoord1   : TEXCOORD0,

           uniform float4x4 modelViewProj,
        uniform input IN
    )
    {

	// This line fix a bug in ATI cards.
	float2 tex = texCoord1;

        out_vertex OUT = {
            mul(modelViewProj, position),
            color,
            tex
        };

        return OUT;
    }

    float4 lanczos(float4 x)
    {
      float4 res;

      res = (x==float4(0.0, 0.0, 0.0, 0.0)) ?  float4(wa*wb)  :  sin(x*wa)*sin(x*wb)/(x*x);

      return res;
    }

    float4 main_fragment(in out_vertex VAR, uniform sampler2D s_p : TEXUNIT0, uniform input IN) : COLOR
    {
      float3 color;
      float4x4 weights;

      float2 dx = float2(1.0, 0.0);
      float2 dy = float2(0.0, 1.0);

      float2 pc = VAR.texCoord*IN.texture_size;

      float2 tc = (floor(pc-float2(0.5,0.5))+float2(0.5,0.5));

      float2 pos = frac(pc-float2(0.5,0.5));

      weights[0] = lanczos(float4(d(pc, tc    -dx    -dy), d(pc, tc           -dy), d(pc, tc    +dx    -dy), d(pc, tc+2.0*dx    -dy)));
      weights[1] = lanczos(float4(d(pc, tc    -dx       ), d(pc, tc              ), d(pc, tc    +dx       ), d(pc, tc+2.0*dx       )));
      weights[2] = lanczos(float4(d(pc, tc    -dx    +dy), d(pc, tc           +dy), d(pc, tc    +dx    +dy), d(pc, tc+2.0*dx    +dy)));
      weights[3] = lanczos(float4(d(pc, tc    -dx+2.0*dy), d(pc, tc       +2.0*dy), d(pc, tc    +dx+2.0*dy), d(pc, tc+2.0*dx+2.0*dy)));

      dx = dx/IN.texture_size;
      dy = dy/IN.texture_size;
      tc = tc/IN.texture_size;

     // reading the texels

      float3 c00 = tex2D(s_p, tc    -dx    -dy).xyz;
      float3 c10 = tex2D(s_p, tc           -dy).xyz;
      float3 c20 = tex2D(s_p, tc    +dx    -dy).xyz;
      float3 c30 = tex2D(s_p, tc+2.0*dx    -dy).xyz;
      float3 c01 = tex2D(s_p, tc    -dx       ).xyz;
      float3 c11 = tex2D(s_p, tc              ).xyz;
      float3 c21 = tex2D(s_p, tc    +dx       ).xyz;
      float3 c31 = tex2D(s_p, tc+2.0*dx       ).xyz;
      float3 c02 = tex2D(s_p, tc    -dx    +dy).xyz;
      float3 c12 = tex2D(s_p, tc           +dy).xyz;
      float3 c22 = tex2D(s_p, tc    +dx    +dy).xyz;
      float3 c32 = tex2D(s_p, tc+2.0*dx    +dy).xyz;
      float3 c03 = tex2D(s_p, tc    -dx+2.0*dy).xyz;
      float3 c13 = tex2D(s_p, tc       +2.0*dy).xyz;
      float3 c23 = tex2D(s_p, tc    +dx+2.0*dy).xyz;
      float3 c33 = tex2D(s_p, tc+2.0*dx+2.0*dy).xyz;


	float a = reduce(c11);
	float b = reduce(c21);
	float c = reduce(c12);
	float d = reduce(c22);

	float p = abs(pos.x);
	float q = abs(pos.y);

/*
	A  B
	C  D
*/
      //  Get min/max samples
      float3 min_sample = min4(c11, c21, c12, c22);
      float3 max_sample = max4(c11, c21, c12, c22);

	if (abs(a-d) < abs(b-c))
	{
		if (q <= p)
		{
			c12 = c11 + c22 - c21;
//			c01 = c11 + c02 - c12;
//			c23 = c13 + c22 - c12;
//			c03 = c13 + c02 - c12;
//			c01 = c23 = c02 = c13 = c03 = c12;
		}
		else
		{
			c21 = c11 + c22 - c12;
//			c10 = c11 + c20 - c21;
//			c32 = c22 + c31 - c21;
//			c30 = c31 + c20 - c21;
//			c10 = c32 = c20 = c31 = c30 = c21;
		}
	}
	else if (abs(a-d) > abs(b-c))
	{
		if ((p+q) < 1.0)
		{
			c22 = c21 + c12 - c11;
//			c31 = c21 + c32 - c22;
//			c13 = c23 + c12 - c22;
//			c33 = c23 + c32 - c22;
//			c13 = c31 = c23 = c32 = c33 = c22;
		}
		else
		{
			c11 = c21 + c12 - c22;
//			c20 = c21 + c10 - c11;
//			c02 = c01 + c12 - c11;
//			c00 = c10 + c01 - c11;
//			c02 = c20 = c01 = c10 = c00 = c11;
		}
	}


      color = mul(weights[0], float4x3(c00, c10, c20, c30));
      color+= mul(weights[1], float4x3(c01, c11, c21, c31));
      color+= mul(weights[2], float4x3(c02, c12, c22, c32));
      color+= mul(weights[3], float4x3(c03, c13, c23, c33));
      color = color/(dot(mul(weights, float4(1)), 1));

      // Anti-ringing
      color = clamp(color, min_sample, max_sample);

      // final sum and weight normalization
      return float4(color, 1);

    }