/**
*
* Float4 header file, contains vector math
*
* Created By Vilem Otte, 2011
*
* If you use this library for any project, please let me know at vilem.otte@post.cz, I might add
* a link at your project at my site. As for the licence:
*
* Licence: WTFPL
*
* DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
* Version 2, December 2004
*
* Copyright (C) 2004 Sam Hocevar <sam@hocevar.net>
*
* Everyone is permitted to copy and distribute verbatim or modified
* copies of this license document, and changing it is allowed as long
* as the name is changed.
*
* DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
* TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
*
* 0. You just DO WHAT THE FUCK YOU WANT TO.
*
**/
#ifndef __FLOAT4__H__
#define __FLOAT4__H__
#include "../../core/platform/platform.h"
#include <math.h>
#ifdef PLATFORM_SSE
#include <xmmintrin.h>
#endif
#ifdef PLATFORM_SSE2
#include <emmintrin.h>
#endif
#ifdef PLATFORM_SSE3
#include <pmmintrin.h>
#endif
#ifdef PLATFORM_SSE4
#include <smmintrin.h>
#endif
#include "../../core/memory/memory.h"
#define min(a, b) (a < b ? a : b)
#define max(a, b) (a > b ? a : b)
struct ALIGN(16) float4
{
#ifdef PLATFORM_SSE
union
{
struct
{
float x, y, z, w;
};
__m128 xmm;
};
#else
float x, y, z, w;
#endif
///////////////////////////////////////////////////////////////////////////////////////////////////
/** Constructors **/
/* Default constructor */
inline float4()
{
#ifdef PLATFORM_SSE
xmm = _mm_setzero_ps();
#else
x = y = z = w = 0.0f;
#endif
}
/* Float constructor */
explicit inline float4(float f)
{
#ifdef PLATFORM_SSE
xmm = _mm_set1_ps(f);
#else
x = y = z = w = f;
#endif
}
/* Scalar constructor */
inline float4(float _x, float _y, float _z, float _w)
{
#ifdef PLATFORM_SSE
xmm = _mm_setr_ps(_x, _y, _z, _w);
#else
x = _x;
y = _y;
z = _z;
w = _w;
#endif
}
/* Array constructor (unaligned!!!) */
inline float4(const float* fv)
{
#ifdef PLATFORM_SSE
xmm = _mm_loadu_ps(fv);
#else
x = fv[0];
y = fv[1];
z = fv[2];
w = fv[3];
#endif
}
/* Copy constructor */
inline float4(const float4 &v)
{
#ifdef PLATFORM_SSE
xmm = v.xmm;
#else
x = v.x;
y = v.y;
z = v.z;
w = v.w;
#endif
}
#ifdef PLATFORM_SSE
/* SSE constructor */
inline float4(const __m128 &_m)
{
xmm = _m;
}
#endif
///////////////////////////////////////////////////////////////////////////////////////////////////
/** Accessing **/
/* Direct access write */
inline float& operator[](int index)
{
return reinterpret_cast<float *>(this)[index];
}
/* Direct access read */
inline const float& operator[](int index) const
{
return reinterpret_cast<const float *>(this)[index];
}
/* Cast */
inline operator float* ()
{
return reinterpret_cast<float *>(this);
}
/* Const cast */
inline operator const float* () const
{
return reinterpret_cast<const float *>(this);
}
///////////////////////////////////////////////////////////////////////////////////////////////////
/** Unary arithmetics **/
/* Scalar addition */
friend inline float4& operator += (float4& v, float f)
{
#ifdef PLATFORM_SSE
v.xmm = _mm_add_ps(v.xmm, _mm_set1_ps(f));
#else
v.x += f;
v.y += f;
v.z += f;
v.w += f;
#endif
return v;
}
/* Vector addition */
friend inline float4& operator += (float4& v0, const float4& v1)
{
#ifdef PLATFORM_SSE
v0.xmm = _mm_add_ps(v0.xmm, v1.xmm);
#else
v0.x += v1.x;
v0.y += v1.y;
v0.z += v1.z;
v0.w += v1.w;
#endif
return v0;
}
/* Scalar subtraction */
friend inline float4& operator -= (float4& v, float f)
{
#ifdef PLATFORM_SSE
v.xmm = _mm_sub_ps(v.xmm, _mm_set1_ps(f));
#else
v.x -= f;
v.y -= f;
v.z -= f;
v.w -= f;
#endif
return v;
}
/* Sign negation */
friend inline const float4 operator - (const float4& v)
{
#ifdef PLATFORM_SSE
return _mm_xor_ps(v.xmm, _mm_set1_ps(-0.f));
#else
float4 ret;
ret.x = -v.x;
ret.y = -v.y;
ret.z = -v.z;
ret.w = -v.w;
return ret;
#endif
}
/* Vector subtraction */
friend inline float4& operator -= (float4& v0, const float4& v1)
{
#ifdef PLATFORM_SSE
v0.xmm = _mm_sub_ps(v0.xmm, v1.xmm);
#else
v0.x -= v1.x;
v0.y -= v1.y;
v0.z -= v1.z;
v0.w -= v1.w;
#endif
return v0;
}
/* Scalar multiplication */
friend inline float4& operator *= (float4& v, float f)
{
#ifdef PLATFORM_SSE
v.xmm = _mm_mul_ps(v.xmm, _mm_set1_ps(f));
#else
v.x *= f;
v.y *= f;
v.z *= f;
v.w *= f;
#endif
return v;
}
/* Vector multiplication */
friend inline float4& operator *= (float4& v0, const float4& v1)
{
#ifdef PLATFORM_SSE
v0.xmm = _mm_mul_ps(v0.xmm, v1.xmm);
#else
v0.x *= v1.x;
v0.y *= v1.y;
v0.z *= v1.z;
v0.w *= v1.w;
#endif
return v0;
}
/* Scalar division */
friend inline float4& operator /= (float4& v, float f)
{
#ifdef PLATFORM_SSE
v.xmm = _mm_div_ps(v.xmm, _mm_set1_ps(f));
#else
float rf = 1.0f / f;
v.x *= rf;
v.y *= rf;
v.z *= rf;
v.w *= rf;
#endif
return v;
}
/* Vector division */
friend inline float4& operator /= (float4& v0, const float4& v1)
{
#ifdef PLATFORM_SSE
v0.xmm = _mm_div_ps(v0.xmm, v1.xmm);
#else
v0.x /= v1.x;
v0.y /= v1.y;
v0.z /= v1.z;
v0.w /= v1.w;
#endif
return v0;
}
///////////////////////////////////////////////////////////////////////////////////////////////////
/** Binary arithmetics **/
/* Addition */
friend inline const float4 operator + (float f, const float4& v)
{
#ifdef PLATFORM_SSE
return _mm_add_ps(_mm_set1_ps(f), v.xmm);
#else
float4 ret;
ret.x = f + v.x;
ret.y = f + v.y;
ret.z = f + v.z;
ret.w = f + v.w;
return ret;
#endif
}
friend inline const float4 operator + (const float4& v, float f)
{
#ifdef PLATFORM_SSE
return _mm_add_ps(v.xmm, _mm_set1_ps(f));
#else
float4 ret;
ret.x = v.x + f;
ret.y = v.y + f;
ret.z = v.z + f;
ret.w = v.w + f;
return ret;
#endif
}
friend inline const float4 operator + (const float4& v0, const float4& v1)
{
#ifdef PLATFORM_SSE
return _mm_add_ps(v0.xmm, v1.xmm);
#else
float4 ret;
ret.x = v0.x + v1.x;
ret.y = v0.y + v1.y;
ret.z = v0.z + v1.z;
ret.w = v0.w + v1.w;
return ret;
#endif
}
/* Subtraction */
friend inline const float4 operator - (float f, const float4& v)
{
#ifdef PLATFORM_SSE
return _mm_sub_ps(_mm_set1_ps(f), v.xmm);
#else
float4 ret;
ret.x = f - v.x;
ret.y = f - v.y;
ret.z = f - v.z;
ret.w = f - v.w;
return ret;
#endif
}
friend inline const float4 operator - (const float4& v, float f)
{
#ifdef PLATFORM_SSE
return _mm_sub_ps(v.xmm, _mm_set1_ps(f));
#else
float4 ret;
ret.x = v.x - f;
ret.y = v.y - f;
ret.z = v.z - f;
ret.w = v.w - f;
return ret;
#endif
}
friend inline const float4 operator - (const float4& v0, const float4& v1)
{
#ifdef PLATFORM_SSE
return _mm_sub_ps(v0.xmm, v1.xmm);
#else
float4 ret;
ret.x = v0.x - v1.x;
ret.y = v0.y - v1.y;
ret.z = v0.z - v1.z;
ret.w = v0.w - v1.w;
return ret;
#endif
}
/* Multiplication */
friend inline const float4 operator * (float f, const float4& v)
{
#ifdef PLATFORM_SSE
return _mm_mul_ps(_mm_set1_ps(f), v.xmm);
#else
float4 ret;
ret.x = f * v.x;
ret.y = f * v.y;
ret.z = f * v.z;
ret.w = f * v.w;
return ret;
#endif
}
friend inline const float4 operator * (const float4& v, float f)
{
#ifdef PLATFORM_SSE
return _mm_mul_ps(v.xmm, _mm_set1_ps(f));
#else
float4 ret;
ret.x = v.x * f;
ret.y = v.y * f;
ret.z = v.z * f;
ret.w = v.w * f;
return ret;
#endif
}
friend inline const float4 operator * (const float4& v0, const float4& v1)
{
#ifdef PLATFORM_SSE
return _mm_mul_ps(v0.xmm, v1.xmm);
#else
float4 ret;
ret.x = v0.x * v1.x;
ret.y = v0.y * v1.y;
ret.z = v0.z * v1.z;
ret.w = v0.w * v1.w;
return ret;
#endif
}
/* Division */
friend inline const float4 operator / (float f, const float4& v)
{
#ifdef PLATFORM_SSE
return _mm_div_ps(_mm_set1_ps(f), v.xmm);
#else
float4 ret;
ret.x = f / v.x;
ret.y = f / v.y;
ret.z = f / v.z;
ret.w = f / v.w;
return ret;
#endif
}
friend inline const float4 operator / (const float4& v, float f)
{
#ifdef PLATFORM_SSE
return _mm_div_ps(v.xmm, _mm_set1_ps(f));
#else
float rf = 1.0f / f;
float4 ret;
ret.x = v.x * rf;
ret.y = v.y * rf;
ret.z = v.z * rf;
ret.w = v.w * rf;
return ret;
#endif
}
friend inline const float4 operator / (const float4& v0, const float4& v1)
{
#ifdef PLATFORM_SSE
return _mm_div_ps(v0.xmm, v1.xmm);
#else
float4 ret;
ret.x = v0.x / v1.x;
ret.y = v0.y / v1.y;
ret.z = v0.z / v1.z;
ret.w = v0.w / v1.w;
return ret;
#endif
}
///////////////////////////////////////////////////////////////////////////////////////////////////
/** Additional arithmetics **/
/* Horizontal add */
inline friend float4 hadd(const float4& a)
{
#ifdef PLATFORM_SSE3
__m128 b = _mm_hadd_ps(a.xmm, a.xmm);
return _mm_hadd_ps(b, b);
#elif defined PLATFORM_SSE
return float4(_mm_add_ss(a.xmm, _mm_add_ss(_mm_shuffle_ps(a.xmm, a.xmm, 1), _mm_add_ss(_mm_shuffle_ps(a.xmm, a.xmm, 2), _mm_shuffle_ps(a.xmm, a.xmm, 3)))));
#else
float4 ret;
ret.x = a.x + a.y + a.z + a.w;
ret.w = ret.z = ret.y = ret.x;
return ret;
#endif
}
/* Absolute value */
friend inline const float4 abs(const float4& v)
{
#ifdef PLATFORM_SSE
return _mm_andnot_ps(_mm_set1_ps(-0.f), v.xmm);
#else
float4 ret;
ret.x = fabsf(v.x);
ret.y = fabsf(v.y);
ret.z = fabsf(v.z);
ret.w = fabsf(v.w);
return ret;
#endif
}
/* Clamp between values */
friend inline const float4 clamp(const float4& v0, float f1, float f2)
{
#ifdef PLATFORM_SSE
return _mm_max_ps(_mm_set1_ps(f1), _mm_min_ps(_mm_set1_ps(f2), v0.xmm));
#else
float4 ret;
ret.x = max(min(v0.x, f2), f1);
ret.y = max(min(v0.y, f2), f1);
ret.z = max(min(v0.z, f2), f1);
ret.w = max(min(v0.w, f2), f1);
return ret;
#endif
}
/* Clamp between vector values */
friend inline const float4 clamp(const float4& v0, const float4& v1, const float4& v2)
{
#ifdef PLATFORM_SSE
return _mm_max_ps(v1.xmm, _mm_min_ps(v2.xmm, v0.xmm));
#else
float4 ret;
ret.x = max(min(v0.x, v2.x), v1.x);
ret.y = max(min(v0.y, v2.y), v1.y);
ret.z = max(min(v0.z, v2.z), v1.z);
ret.w = max(min(v0.w, v2.w), v1.w);
return ret;
#endif
}
/* Distance between two vectors */
friend inline float distance(const float4& v0, const float4& v1)
{
#ifdef PLATFORM_SSE4
__m128 l = _mm_sub_ps(v0.xmm, v1.xmm);
return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(l, l, 0xff)));
#elif defined PLATFORM_SSE3
__m128 l = _mm_sub_ps(v0.xmm, v1.xmm);
l = _mm_mul_ps(l, l);
l = _mm_hadd_ps(l, l);
return _mm_cvtss_f32(_mm_sqrt_ss(_mm_hadd_ps(l, l)));
#elif defined PLATFORM_SSE
__m128 l = _mm_sub_ps(v0.xmm, v1.xmm);
l = _mm_mul_ps(l, l);
l = _mm_add_ps(l, _mm_shuffle_ps(l, l, 0x4E));
return _mm_cvtss_f32(_mm_sqrt_ss(_mm_add_ss(l, _mm_shuffle_ps(l, l, 0x11))));
#else
float4 v = v0 - v1;
return sqrtf(v.x * v.x + v.y * v.y + v.z * v.z + v.w * v.w);
#endif
}
/* Dot product */
friend inline float dot(const float4 &v0, const float4 &v1)
{
#ifdef PLATFORM_SSE4
return _mm_cvtss_f32(_mm_dp_ps(v0.xmm, v1.xmm, 0xff));
#elif defined PLATFORM_SSE3
__m128 l = _mm_sub_ps(v0.xmm, v1.xmm);
l = _mm_mul_ps(l, l);
l = _mm_hadd_ps(l, l);
return _mm_cvtss_f32(_mm_hadd_ps(l, l));
#elif defined PLATFORM_SSE
__m128 l = _mm_mul_ps(v0.xmm, v1.xmm);
l = _mm_add_ps(l, _mm_shuffle_ps(l, l, 0x4E));
return _mm_cvtss_f32(_mm_add_ss(l, _mm_shuffle_ps(l, l, 0x11)));
#else
return v0.x * v1.x + v0.y * v1.y + v0.z * v1.z + v0.w * v1.w;
#endif
}
/* Cross product */
friend float4 cross(const float4& v0, const float4& v1)
{
#ifdef PLATFORM_SSE
const __m128 l = _mm_mul_ps(_mm_shuffle_ps(v0.xmm, v0.xmm, _MM_SHUFFLE(3, 0, 2, 1)), _mm_shuffle_ps(v1.xmm, v1.xmm, _MM_SHUFFLE(3, 1, 0, 2)));
const __m128 r = _mm_mul_ps(_mm_shuffle_ps(v0.xmm, v0.xmm, _MM_SHUFFLE(3, 1, 0, 2)), _mm_shuffle_ps(v1.xmm, v1.xmm, _MM_SHUFFLE(3, 0, 2, 1)));
return _mm_sub_ps(l, r);
#else
float4 ret;
ret.x = v0.y * v1.z - v0.z * v1.y;
ret.y = v0.z * v1.x - v0.x * v1.z;
ret.z = v0.x * v1.y - v0.y * v1.x;
ret.w = 0.0f;
return ret;
#endif
}
/* Length of vector */
friend inline float length(const float4& v)
{
#ifdef PLATFORM_SSE4
return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(v.xmm, v.xmm, 0xff)));
#elif defined PLATFORM_SSE3
__m128 l = _mm_mul_ps(v.xmm, v.xmm);
l = _mm_hadd_ps(l, l);
return _mm_cvtss_f32(_mm_sqrt_ss(_mm_hadd_ps(l, l)));
#elif defined PLATFORM_SSE
__m128 l = _mm_mul_ps(v.xmm, v.xmm);
l = _mm_add_ps(l, _mm_shuffle_ps(l, l, 0x4E));
return _mm_cvtss_f32(_mm_sqrt_ss(_mm_add_ss(l, _mm_shuffle_ps(l, l, 0x11))));
#else
return sqrtf(v.x * v.x + v.y * v.y + v.z * v.z + v.w * v.w);
#endif
}
/* Normalization */
friend inline const float4 normalize(const float4& v)
{
#ifdef PLATFORM_SSE4
return _mm_div_ps(v.xmm, _mm_sqrt_ps(_mm_dp_ps(v.xmm, v.xmm, 0xff)));
#elif defined PLATFORM_SSE3
__m128 l = _mm_mul_ps(v.xmm, v.xmm);
l = _mm_hadd_ps(l, l);
return _mm_div_ps(v.xmm, _mm_sqrt_ps(_mm_hadd_ps(l, l)));
#elif defined PLATFORM_SSE
__m128 l = _mm_mul_ps(v.xmm, v.xmm);
l = _mm_add_ps(l, _mm_shuffle_ps(l, l, 0x4E));
return _mm_mul_ps(v.xmm, _mm_rsqrt_ps(_mm_add_ps(l, _mm_shuffle_ps(l, l, 0x11))));
#else
float l = 1.0f / sqrtf(v.x * v.x + v.y * v.y + v.z * v.z + v.w * v.w);
float4 ret;
ret.x = v.x * l;
ret.y = v.y * l;
ret.z = v.z * l;
ret.w = v.w * l;
return ret;
#endif
}
ALIGNED_NEW_DELETE
};
#endif