Untitled

/**
  *
  * Float4 header file, contains vector math
  *
  * Created By Vilem Otte, 2011
  *
  * If you use this library for any project, please let me know at vilem.otte@post.cz, I might add
  * a link at your project at my site. As for the licence:
  *
  * Licence: WTFPL
  *
  *            DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
  *                    Version 2, December 2004
  *
  * Copyright (C) 2004 Sam Hocevar <sam@hocevar.net>
  *
  * Everyone is permitted to copy and distribute verbatim or modified
  * copies of this license document, and changing it is allowed as long
  * as the name is changed.
  *
  *            DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
  *   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
  *
  *  0. You just DO WHAT THE FUCK YOU WANT TO.
  *
  **/

#ifndef __FLOAT4__H__
#define __FLOAT4__H__

#include "../../core/platform/platform.h"

#include <math.h>

#ifdef PLATFORM_SSE
    #include <xmmintrin.h>
#endif

#ifdef PLATFORM_SSE2
    #include <emmintrin.h>
#endif

#ifdef PLATFORM_SSE3
    #include <pmmintrin.h>
#endif

#ifdef PLATFORM_SSE4
    #include <smmintrin.h>
#endif

#include "../../core/memory/memory.h"

#define min(a, b) (a < b ? a : b)
#define max(a, b) (a > b ? a : b)

struct ALIGN(16) float4
{
#ifdef PLATFORM_SSE
    union
    {
        struct
        {
            float x, y, z, w;
        };
        __m128 xmm;
    };
#else
    float x, y, z, w;
#endif


    ///////////////////////////////////////////////////////////////////////////////////////////////////
    /** Constructors                                                                                **/

    /* Default constructor */
    inline float4()
    {
#ifdef PLATFORM_SSE
        xmm = _mm_setzero_ps();
#else
        x = y = z = w = 0.0f;
#endif
    }

    /* Float constructor */
    explicit inline float4(float f)
    {
#ifdef PLATFORM_SSE
        xmm = _mm_set1_ps(f);
#else
        x = y = z = w = f;
#endif
    }

    /* Scalar constructor */
    inline float4(float _x, float _y, float _z, float _w)
    {
#ifdef PLATFORM_SSE
        xmm = _mm_setr_ps(_x, _y, _z, _w);
#else
        x = _x;
        y = _y;
        z = _z;
        w = _w;
#endif
    }

    /* Array constructor (unaligned!!!) */
    inline float4(const float* fv)
    {
#ifdef PLATFORM_SSE
        xmm = _mm_loadu_ps(fv);
#else
        x = fv[0];
        y = fv[1];
        z = fv[2];
        w = fv[3];
#endif
    }

    /* Copy constructor */
    inline float4(const float4 &v)
    {
#ifdef PLATFORM_SSE
        xmm = v.xmm;
#else
        x = v.x;
        y = v.y;
        z = v.z;
        w = v.w;
#endif
    }

#ifdef PLATFORM_SSE
    /* SSE constructor */
    inline float4(const __m128 &_m)
    {
        xmm = _m;
    }
#endif

    ///////////////////////////////////////////////////////////////////////////////////////////////////
    /** Accessing                                                                                   **/

    /* Direct access write */
    inline float& operator[](int index)
    {
        return reinterpret_cast<float *>(this)[index];
    }

    /* Direct access read */
    inline const float& operator[](int index) const
    {
        return reinterpret_cast<const float *>(this)[index];
    }

    /* Cast */
    inline operator float* ()
    {
        return reinterpret_cast<float *>(this);
    }

    /* Const cast */
    inline operator const float* () const
    {
        return reinterpret_cast<const float *>(this);
    }

    ///////////////////////////////////////////////////////////////////////////////////////////////////
    /** Unary arithmetics                                                                           **/

    /* Scalar addition */
    friend inline float4& operator += (float4& v, float f)
    {
#ifdef PLATFORM_SSE
        v.xmm = _mm_add_ps(v.xmm, _mm_set1_ps(f));
#else
        v.x += f;
        v.y += f;
        v.z += f;
        v.w += f;
#endif
        return v;
    }

    /* Vector addition */
    friend inline float4& operator += (float4& v0, const float4& v1)
    {
#ifdef PLATFORM_SSE
        v0.xmm = _mm_add_ps(v0.xmm, v1.xmm);
#else
        v0.x += v1.x;
        v0.y += v1.y;
        v0.z += v1.z;
        v0.w += v1.w;
#endif
        return v0;
    }

    /* Scalar subtraction */
    friend inline float4& operator -= (float4& v, float f)
    {
#ifdef PLATFORM_SSE
        v.xmm = _mm_sub_ps(v.xmm, _mm_set1_ps(f));
#else
        v.x -= f;
        v.y -= f;
        v.z -= f;
        v.w -= f;
#endif
        return v;
    }

    /* Sign negation */
    friend inline const float4 operator - (const float4& v)
    {
#ifdef PLATFORM_SSE
        return _mm_xor_ps(v.xmm, _mm_set1_ps(-0.f));
#else
        float4 ret;
        ret.x = -v.x;
        ret.y = -v.y;
        ret.z = -v.z;
        ret.w = -v.w;
        return ret;
#endif
    }

    /* Vector subtraction */
    friend inline float4& operator -= (float4& v0, const float4& v1)
    {
#ifdef PLATFORM_SSE
        v0.xmm = _mm_sub_ps(v0.xmm, v1.xmm);
#else
        v0.x -= v1.x;
        v0.y -= v1.y;
        v0.z -= v1.z;
        v0.w -= v1.w;
#endif
        return v0;
    }

    /* Scalar multiplication */
    friend inline float4& operator *= (float4& v, float f)
    {
#ifdef PLATFORM_SSE
        v.xmm = _mm_mul_ps(v.xmm, _mm_set1_ps(f));
#else
        v.x *= f;
        v.y *= f;
        v.z *= f;
        v.w *= f;
#endif
        return v;
    }

    /* Vector multiplication */
    friend inline float4& operator *= (float4& v0, const float4& v1)
    {
#ifdef PLATFORM_SSE
        v0.xmm = _mm_mul_ps(v0.xmm, v1.xmm);
#else
        v0.x *= v1.x;
        v0.y *= v1.y;
        v0.z *= v1.z;
        v0.w *= v1.w;
#endif
        return v0;
    }

    /* Scalar division */
    friend inline float4& operator /= (float4& v, float f)
    {
#ifdef PLATFORM_SSE
        v.xmm = _mm_div_ps(v.xmm, _mm_set1_ps(f));
#else
        float rf = 1.0f / f;
        v.x *= rf;
        v.y *= rf;
        v.z *= rf;
        v.w *= rf;
#endif
        return v;
    }

    /* Vector division */
    friend inline float4& operator /= (float4& v0, const float4& v1)
    {
#ifdef PLATFORM_SSE
        v0.xmm = _mm_div_ps(v0.xmm, v1.xmm);
#else
        v0.x /= v1.x;
        v0.y /= v1.y;
        v0.z /= v1.z;
        v0.w /= v1.w;
#endif
        return v0;
    }

    ///////////////////////////////////////////////////////////////////////////////////////////////////
    /** Binary arithmetics                                                                          **/

    /* Addition */
    friend inline const float4 operator + (float f, const float4& v)
    {
#ifdef PLATFORM_SSE
        return _mm_add_ps(_mm_set1_ps(f), v.xmm);
#else
        float4 ret;
        ret.x = f + v.x;
        ret.y = f + v.y;
        ret.z = f + v.z;
        ret.w = f + v.w;
        return ret;
#endif
    }

    friend inline const float4 operator + (const float4& v, float f)
    {
#ifdef PLATFORM_SSE
        return _mm_add_ps(v.xmm, _mm_set1_ps(f));
#else
        float4 ret;
        ret.x = v.x + f;
        ret.y = v.y + f;
        ret.z = v.z + f;
        ret.w = v.w + f;
        return ret;
#endif
    }

    friend inline const float4 operator + (const float4& v0, const float4& v1)
    {
#ifdef PLATFORM_SSE
        return _mm_add_ps(v0.xmm, v1.xmm);
#else
        float4 ret;
        ret.x = v0.x + v1.x;
        ret.y = v0.y + v1.y;
        ret.z = v0.z + v1.z;
        ret.w = v0.w + v1.w;
        return ret;
#endif
    }

    /* Subtraction */
    friend inline const float4 operator - (float f, const float4& v)
    {
#ifdef PLATFORM_SSE
        return _mm_sub_ps(_mm_set1_ps(f), v.xmm);
#else
        float4 ret;
        ret.x = f - v.x;
        ret.y = f - v.y;
        ret.z = f - v.z;
        ret.w = f - v.w;
        return ret;
#endif
    }

    friend inline const float4 operator - (const float4& v, float f)
    {
#ifdef PLATFORM_SSE
        return _mm_sub_ps(v.xmm, _mm_set1_ps(f));
#else
        float4 ret;
        ret.x = v.x - f;
        ret.y = v.y - f;
        ret.z = v.z - f;
        ret.w = v.w - f;
        return ret;
#endif
    }

    friend inline const float4 operator - (const float4& v0, const float4& v1)
    {
#ifdef PLATFORM_SSE
        return _mm_sub_ps(v0.xmm, v1.xmm);
#else
        float4 ret;
        ret.x = v0.x - v1.x;
        ret.y = v0.y - v1.y;
        ret.z = v0.z - v1.z;
        ret.w = v0.w - v1.w;
        return ret;
#endif
    }

    /* Multiplication */
    friend inline const float4 operator * (float f, const float4& v)
    {
#ifdef PLATFORM_SSE
        return _mm_mul_ps(_mm_set1_ps(f), v.xmm);
#else
        float4 ret;
        ret.x = f * v.x;
        ret.y = f * v.y;
        ret.z = f * v.z;
        ret.w = f * v.w;
        return ret;
#endif
    }

    friend inline const float4 operator * (const float4& v, float f)
    {
#ifdef PLATFORM_SSE
        return _mm_mul_ps(v.xmm, _mm_set1_ps(f));
#else
        float4 ret;
        ret.x = v.x * f;
        ret.y = v.y * f;
        ret.z = v.z * f;
        ret.w = v.w * f;
        return ret;
#endif
    }

    friend inline const float4 operator * (const float4& v0, const float4& v1)
    {
#ifdef PLATFORM_SSE
        return _mm_mul_ps(v0.xmm, v1.xmm);
#else
        float4 ret;
        ret.x = v0.x * v1.x;
        ret.y = v0.y * v1.y;
        ret.z = v0.z * v1.z;
        ret.w = v0.w * v1.w;
        return ret;
#endif
    }

    /* Division */
    friend inline const float4 operator / (float f, const float4& v)
    {
#ifdef PLATFORM_SSE
        return _mm_div_ps(_mm_set1_ps(f), v.xmm);
#else
        float4 ret;
        ret.x = f / v.x;
        ret.y = f / v.y;
        ret.z = f / v.z;
        ret.w = f / v.w;
        return ret;
#endif
    }

    friend inline const float4 operator / (const float4& v, float f)
    {
#ifdef PLATFORM_SSE
        return _mm_div_ps(v.xmm, _mm_set1_ps(f));
#else
        float rf = 1.0f / f;
        float4 ret;
        ret.x = v.x * rf;
        ret.y = v.y * rf;
        ret.z = v.z * rf;
        ret.w = v.w * rf;
        return ret;
#endif
    }

    friend inline const float4 operator / (const float4& v0, const float4& v1)
    {
#ifdef PLATFORM_SSE
        return _mm_div_ps(v0.xmm, v1.xmm);
#else
        float4 ret;
        ret.x = v0.x / v1.x;
        ret.y = v0.y / v1.y;
        ret.z = v0.z / v1.z;
        ret.w = v0.w / v1.w;
        return ret;
#endif
    }

    ///////////////////////////////////////////////////////////////////////////////////////////////////
    /** Additional arithmetics                                                                      **/

    /* Horizontal add */
    inline friend float4 hadd(const float4& a)
    {
#ifdef PLATFORM_SSE3
        __m128 b = _mm_hadd_ps(a.xmm, a.xmm);
        return _mm_hadd_ps(b, b);
#elif defined PLATFORM_SSE
        return float4(_mm_add_ss(a.xmm, _mm_add_ss(_mm_shuffle_ps(a.xmm, a.xmm, 1), _mm_add_ss(_mm_shuffle_ps(a.xmm, a.xmm, 2), _mm_shuffle_ps(a.xmm, a.xmm, 3)))));
#else
        float4 ret;
        ret.x = a.x + a.y + a.z + a.w;
        ret.w = ret.z = ret.y = ret.x;
        return ret;
#endif
    }

    /* Absolute value */
    friend inline const float4 abs(const float4& v)
    {
#ifdef PLATFORM_SSE
        return _mm_andnot_ps(_mm_set1_ps(-0.f), v.xmm);
#else
        float4 ret;
        ret.x = fabsf(v.x);
        ret.y = fabsf(v.y);
        ret.z = fabsf(v.z);
        ret.w = fabsf(v.w);
        return ret;
#endif
    }

    /* Clamp between values */
    friend inline const float4 clamp(const float4& v0, float f1, float f2)
    {
#ifdef PLATFORM_SSE
        return _mm_max_ps(_mm_set1_ps(f1), _mm_min_ps(_mm_set1_ps(f2), v0.xmm));
#else
        float4 ret;
        ret.x = max(min(v0.x, f2), f1);
        ret.y = max(min(v0.y, f2), f1);
        ret.z = max(min(v0.z, f2), f1);
        ret.w = max(min(v0.w, f2), f1);
        return ret;
#endif
    }

    /* Clamp between vector values */
    friend inline const float4 clamp(const float4& v0, const float4& v1, const float4& v2)
    {
#ifdef PLATFORM_SSE
        return _mm_max_ps(v1.xmm, _mm_min_ps(v2.xmm, v0.xmm));
#else
        float4 ret;
        ret.x = max(min(v0.x, v2.x), v1.x);
        ret.y = max(min(v0.y, v2.y), v1.y);
        ret.z = max(min(v0.z, v2.z), v1.z);
        ret.w = max(min(v0.w, v2.w), v1.w);
        return ret;
#endif
    }

    /* Distance between two vectors */
    friend inline float distance(const float4& v0, const float4& v1)
    {
#ifdef PLATFORM_SSE4
        __m128 l = _mm_sub_ps(v0.xmm, v1.xmm);
        return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(l, l, 0xff)));
#elif defined PLATFORM_SSE3
        __m128 l = _mm_sub_ps(v0.xmm, v1.xmm);
        l = _mm_mul_ps(l, l);
        l = _mm_hadd_ps(l, l);
        return _mm_cvtss_f32(_mm_sqrt_ss(_mm_hadd_ps(l, l)));
#elif defined PLATFORM_SSE
        __m128 l = _mm_sub_ps(v0.xmm, v1.xmm);
        l = _mm_mul_ps(l, l);
        l = _mm_add_ps(l, _mm_shuffle_ps(l, l, 0x4E));
        return _mm_cvtss_f32(_mm_sqrt_ss(_mm_add_ss(l, _mm_shuffle_ps(l, l, 0x11))));
#else
        float4 v = v0 - v1;
        return sqrtf(v.x * v.x + v.y * v.y + v.z * v.z + v.w * v.w);
#endif
    }

    /* Dot product */
    friend inline float dot(const float4 &v0, const float4 &v1)
    {
#ifdef PLATFORM_SSE4
        return _mm_cvtss_f32(_mm_dp_ps(v0.xmm, v1.xmm, 0xff));
#elif defined PLATFORM_SSE3
        __m128 l = _mm_sub_ps(v0.xmm, v1.xmm);
        l = _mm_mul_ps(l, l);
        l = _mm_hadd_ps(l, l);
        return _mm_cvtss_f32(_mm_hadd_ps(l, l));
#elif defined PLATFORM_SSE
        __m128 l = _mm_mul_ps(v0.xmm, v1.xmm);
        l = _mm_add_ps(l, _mm_shuffle_ps(l, l, 0x4E));
        return _mm_cvtss_f32(_mm_add_ss(l, _mm_shuffle_ps(l, l, 0x11)));
#else
        return v0.x * v1.x + v0.y * v1.y + v0.z * v1.z + v0.w * v1.w;
#endif
    }

    /* Cross product */
    friend float4 cross(const float4& v0, const float4& v1)
    {
#ifdef PLATFORM_SSE
        const __m128 l  = _mm_mul_ps(_mm_shuffle_ps(v0.xmm, v0.xmm, _MM_SHUFFLE(3, 0, 2, 1)), _mm_shuffle_ps(v1.xmm, v1.xmm, _MM_SHUFFLE(3, 1, 0, 2)));
        const __m128 r = _mm_mul_ps(_mm_shuffle_ps(v0.xmm, v0.xmm, _MM_SHUFFLE(3, 1, 0, 2)), _mm_shuffle_ps(v1.xmm, v1.xmm, _MM_SHUFFLE(3, 0, 2, 1)));
        return _mm_sub_ps(l, r);
#else
        float4 ret;
        ret.x = v0.y * v1.z - v0.z * v1.y;
        ret.y = v0.z * v1.x - v0.x * v1.z;
        ret.z = v0.x * v1.y - v0.y * v1.x;
        ret.w = 0.0f;
        return ret;
#endif
    }

    /* Length of vector */
    friend inline float length(const float4& v)
    {
#ifdef PLATFORM_SSE4
        return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(v.xmm, v.xmm, 0xff)));
#elif defined PLATFORM_SSE3
        __m128 l = _mm_mul_ps(v.xmm, v.xmm);
        l = _mm_hadd_ps(l, l);
        return _mm_cvtss_f32(_mm_sqrt_ss(_mm_hadd_ps(l, l)));
#elif defined PLATFORM_SSE
        __m128 l = _mm_mul_ps(v.xmm, v.xmm);
        l = _mm_add_ps(l, _mm_shuffle_ps(l, l, 0x4E));
        return _mm_cvtss_f32(_mm_sqrt_ss(_mm_add_ss(l, _mm_shuffle_ps(l, l, 0x11))));
#else
        return sqrtf(v.x * v.x + v.y * v.y + v.z * v.z + v.w * v.w);
#endif
    }

    /* Normalization */
    friend inline const float4 normalize(const float4& v)
    {
#ifdef PLATFORM_SSE4
        return _mm_div_ps(v.xmm, _mm_sqrt_ps(_mm_dp_ps(v.xmm, v.xmm, 0xff)));
#elif defined PLATFORM_SSE3
        __m128 l = _mm_mul_ps(v.xmm, v.xmm);
        l = _mm_hadd_ps(l, l);
        return _mm_div_ps(v.xmm, _mm_sqrt_ps(_mm_hadd_ps(l, l)));
#elif defined PLATFORM_SSE
        __m128 l = _mm_mul_ps(v.xmm, v.xmm);
        l = _mm_add_ps(l, _mm_shuffle_ps(l, l, 0x4E));
        return _mm_mul_ps(v.xmm, _mm_rsqrt_ps(_mm_add_ps(l, _mm_shuffle_ps(l, l, 0x11))));
#else
        float l = 1.0f / sqrtf(v.x * v.x + v.y * v.y + v.z * v.z + v.w * v.w);
        float4 ret;
        ret.x = v.x * l;
        ret.y = v.y * l;
        ret.z = v.z * l;
        ret.w = v.w * l;
        return ret;
#endif
    }

    ALIGNED_NEW_DELETE
};

#endif