Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /**
- *
- * Float4 header file, contains vector math
- *
- * Created By Vilem Otte, 2011
- *
- * If you use this library for any project, please let me know at vilem.otte@post.cz, I might add
- * a link at your project at my site. As for the licence:
- *
- * Licence: WTFPL
- *
- * DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
- * Version 2, December 2004
- *
- * Copyright (C) 2004 Sam Hocevar <sam@hocevar.net>
- *
- * Everyone is permitted to copy and distribute verbatim or modified
- * copies of this license document, and changing it is allowed as long
- * as the name is changed.
- *
- * DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
- * TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
- *
- * 0. You just DO WHAT THE FUCK YOU WANT TO.
- *
- **/
- #ifndef __FLOAT4__H__
- #define __FLOAT4__H__
- #include "../../core/platform/platform.h"
- #include <math.h>
- #ifdef PLATFORM_SSE
- #include <xmmintrin.h>
- #endif
- #ifdef PLATFORM_SSE2
- #include <emmintrin.h>
- #endif
- #ifdef PLATFORM_SSE3
- #include <pmmintrin.h>
- #endif
- #ifdef PLATFORM_SSE4
- #include <smmintrin.h>
- #endif
- #include "../../core/memory/memory.h"
- #define min(a, b) (a < b ? a : b)
- #define max(a, b) (a > b ? a : b)
- struct ALIGN(16) float4
- {
- #ifdef PLATFORM_SSE
- union
- {
- struct
- {
- float x, y, z, w;
- };
- __m128 xmm;
- };
- #else
- float x, y, z, w;
- #endif
- ///////////////////////////////////////////////////////////////////////////////////////////////////
- /** Constructors **/
- /* Default constructor */
- inline float4()
- {
- #ifdef PLATFORM_SSE
- xmm = _mm_setzero_ps();
- #else
- x = y = z = w = 0.0f;
- #endif
- }
- /* Float constructor */
- explicit inline float4(float f)
- {
- #ifdef PLATFORM_SSE
- xmm = _mm_set1_ps(f);
- #else
- x = y = z = w = f;
- #endif
- }
- /* Scalar constructor */
- inline float4(float _x, float _y, float _z, float _w)
- {
- #ifdef PLATFORM_SSE
- xmm = _mm_setr_ps(_x, _y, _z, _w);
- #else
- x = _x;
- y = _y;
- z = _z;
- w = _w;
- #endif
- }
- /* Array constructor (unaligned!!!) */
- inline float4(const float* fv)
- {
- #ifdef PLATFORM_SSE
- xmm = _mm_loadu_ps(fv);
- #else
- x = fv[0];
- y = fv[1];
- z = fv[2];
- w = fv[3];
- #endif
- }
- /* Copy constructor */
- inline float4(const float4 &v)
- {
- #ifdef PLATFORM_SSE
- xmm = v.xmm;
- #else
- x = v.x;
- y = v.y;
- z = v.z;
- w = v.w;
- #endif
- }
- #ifdef PLATFORM_SSE
- /* SSE constructor */
- inline float4(const __m128 &_m)
- {
- xmm = _m;
- }
- #endif
- ///////////////////////////////////////////////////////////////////////////////////////////////////
- /** Accessing **/
- /* Direct access write */
- inline float& operator[](int index)
- {
- return reinterpret_cast<float *>(this)[index];
- }
- /* Direct access read */
- inline const float& operator[](int index) const
- {
- return reinterpret_cast<const float *>(this)[index];
- }
- /* Cast */
- inline operator float* ()
- {
- return reinterpret_cast<float *>(this);
- }
- /* Const cast */
- inline operator const float* () const
- {
- return reinterpret_cast<const float *>(this);
- }
- ///////////////////////////////////////////////////////////////////////////////////////////////////
- /** Unary arithmetics **/
- /* Scalar addition */
- friend inline float4& operator += (float4& v, float f)
- {
- #ifdef PLATFORM_SSE
- v.xmm = _mm_add_ps(v.xmm, _mm_set1_ps(f));
- #else
- v.x += f;
- v.y += f;
- v.z += f;
- v.w += f;
- #endif
- return v;
- }
- /* Vector addition */
- friend inline float4& operator += (float4& v0, const float4& v1)
- {
- #ifdef PLATFORM_SSE
- v0.xmm = _mm_add_ps(v0.xmm, v1.xmm);
- #else
- v0.x += v1.x;
- v0.y += v1.y;
- v0.z += v1.z;
- v0.w += v1.w;
- #endif
- return v0;
- }
- /* Scalar subtraction */
- friend inline float4& operator -= (float4& v, float f)
- {
- #ifdef PLATFORM_SSE
- v.xmm = _mm_sub_ps(v.xmm, _mm_set1_ps(f));
- #else
- v.x -= f;
- v.y -= f;
- v.z -= f;
- v.w -= f;
- #endif
- return v;
- }
- /* Sign negation */
- friend inline const float4 operator - (const float4& v)
- {
- #ifdef PLATFORM_SSE
- return _mm_xor_ps(v.xmm, _mm_set1_ps(-0.f));
- #else
- float4 ret;
- ret.x = -v.x;
- ret.y = -v.y;
- ret.z = -v.z;
- ret.w = -v.w;
- return ret;
- #endif
- }
- /* Vector subtraction */
- friend inline float4& operator -= (float4& v0, const float4& v1)
- {
- #ifdef PLATFORM_SSE
- v0.xmm = _mm_sub_ps(v0.xmm, v1.xmm);
- #else
- v0.x -= v1.x;
- v0.y -= v1.y;
- v0.z -= v1.z;
- v0.w -= v1.w;
- #endif
- return v0;
- }
- /* Scalar multiplication */
- friend inline float4& operator *= (float4& v, float f)
- {
- #ifdef PLATFORM_SSE
- v.xmm = _mm_mul_ps(v.xmm, _mm_set1_ps(f));
- #else
- v.x *= f;
- v.y *= f;
- v.z *= f;
- v.w *= f;
- #endif
- return v;
- }
- /* Vector multiplication */
- friend inline float4& operator *= (float4& v0, const float4& v1)
- {
- #ifdef PLATFORM_SSE
- v0.xmm = _mm_mul_ps(v0.xmm, v1.xmm);
- #else
- v0.x *= v1.x;
- v0.y *= v1.y;
- v0.z *= v1.z;
- v0.w *= v1.w;
- #endif
- return v0;
- }
- /* Scalar division */
- friend inline float4& operator /= (float4& v, float f)
- {
- #ifdef PLATFORM_SSE
- v.xmm = _mm_div_ps(v.xmm, _mm_set1_ps(f));
- #else
- float rf = 1.0f / f;
- v.x *= rf;
- v.y *= rf;
- v.z *= rf;
- v.w *= rf;
- #endif
- return v;
- }
- /* Vector division */
- friend inline float4& operator /= (float4& v0, const float4& v1)
- {
- #ifdef PLATFORM_SSE
- v0.xmm = _mm_div_ps(v0.xmm, v1.xmm);
- #else
- v0.x /= v1.x;
- v0.y /= v1.y;
- v0.z /= v1.z;
- v0.w /= v1.w;
- #endif
- return v0;
- }
- ///////////////////////////////////////////////////////////////////////////////////////////////////
- /** Binary arithmetics **/
- /* Addition */
- friend inline const float4 operator + (float f, const float4& v)
- {
- #ifdef PLATFORM_SSE
- return _mm_add_ps(_mm_set1_ps(f), v.xmm);
- #else
- float4 ret;
- ret.x = f + v.x;
- ret.y = f + v.y;
- ret.z = f + v.z;
- ret.w = f + v.w;
- return ret;
- #endif
- }
- friend inline const float4 operator + (const float4& v, float f)
- {
- #ifdef PLATFORM_SSE
- return _mm_add_ps(v.xmm, _mm_set1_ps(f));
- #else
- float4 ret;
- ret.x = v.x + f;
- ret.y = v.y + f;
- ret.z = v.z + f;
- ret.w = v.w + f;
- return ret;
- #endif
- }
- friend inline const float4 operator + (const float4& v0, const float4& v1)
- {
- #ifdef PLATFORM_SSE
- return _mm_add_ps(v0.xmm, v1.xmm);
- #else
- float4 ret;
- ret.x = v0.x + v1.x;
- ret.y = v0.y + v1.y;
- ret.z = v0.z + v1.z;
- ret.w = v0.w + v1.w;
- return ret;
- #endif
- }
- /* Subtraction */
- friend inline const float4 operator - (float f, const float4& v)
- {
- #ifdef PLATFORM_SSE
- return _mm_sub_ps(_mm_set1_ps(f), v.xmm);
- #else
- float4 ret;
- ret.x = f - v.x;
- ret.y = f - v.y;
- ret.z = f - v.z;
- ret.w = f - v.w;
- return ret;
- #endif
- }
- friend inline const float4 operator - (const float4& v, float f)
- {
- #ifdef PLATFORM_SSE
- return _mm_sub_ps(v.xmm, _mm_set1_ps(f));
- #else
- float4 ret;
- ret.x = v.x - f;
- ret.y = v.y - f;
- ret.z = v.z - f;
- ret.w = v.w - f;
- return ret;
- #endif
- }
- friend inline const float4 operator - (const float4& v0, const float4& v1)
- {
- #ifdef PLATFORM_SSE
- return _mm_sub_ps(v0.xmm, v1.xmm);
- #else
- float4 ret;
- ret.x = v0.x - v1.x;
- ret.y = v0.y - v1.y;
- ret.z = v0.z - v1.z;
- ret.w = v0.w - v1.w;
- return ret;
- #endif
- }
- /* Multiplication */
- friend inline const float4 operator * (float f, const float4& v)
- {
- #ifdef PLATFORM_SSE
- return _mm_mul_ps(_mm_set1_ps(f), v.xmm);
- #else
- float4 ret;
- ret.x = f * v.x;
- ret.y = f * v.y;
- ret.z = f * v.z;
- ret.w = f * v.w;
- return ret;
- #endif
- }
- friend inline const float4 operator * (const float4& v, float f)
- {
- #ifdef PLATFORM_SSE
- return _mm_mul_ps(v.xmm, _mm_set1_ps(f));
- #else
- float4 ret;
- ret.x = v.x * f;
- ret.y = v.y * f;
- ret.z = v.z * f;
- ret.w = v.w * f;
- return ret;
- #endif
- }
- friend inline const float4 operator * (const float4& v0, const float4& v1)
- {
- #ifdef PLATFORM_SSE
- return _mm_mul_ps(v0.xmm, v1.xmm);
- #else
- float4 ret;
- ret.x = v0.x * v1.x;
- ret.y = v0.y * v1.y;
- ret.z = v0.z * v1.z;
- ret.w = v0.w * v1.w;
- return ret;
- #endif
- }
- /* Division */
- friend inline const float4 operator / (float f, const float4& v)
- {
- #ifdef PLATFORM_SSE
- return _mm_div_ps(_mm_set1_ps(f), v.xmm);
- #else
- float4 ret;
- ret.x = f / v.x;
- ret.y = f / v.y;
- ret.z = f / v.z;
- ret.w = f / v.w;
- return ret;
- #endif
- }
- friend inline const float4 operator / (const float4& v, float f)
- {
- #ifdef PLATFORM_SSE
- return _mm_div_ps(v.xmm, _mm_set1_ps(f));
- #else
- float rf = 1.0f / f;
- float4 ret;
- ret.x = v.x * rf;
- ret.y = v.y * rf;
- ret.z = v.z * rf;
- ret.w = v.w * rf;
- return ret;
- #endif
- }
- friend inline const float4 operator / (const float4& v0, const float4& v1)
- {
- #ifdef PLATFORM_SSE
- return _mm_div_ps(v0.xmm, v1.xmm);
- #else
- float4 ret;
- ret.x = v0.x / v1.x;
- ret.y = v0.y / v1.y;
- ret.z = v0.z / v1.z;
- ret.w = v0.w / v1.w;
- return ret;
- #endif
- }
- ///////////////////////////////////////////////////////////////////////////////////////////////////
- /** Additional arithmetics **/
- /* Horizontal add */
- inline friend float4 hadd(const float4& a)
- {
- #ifdef PLATFORM_SSE3
- __m128 b = _mm_hadd_ps(a.xmm, a.xmm);
- return _mm_hadd_ps(b, b);
- #elif defined PLATFORM_SSE
- return float4(_mm_add_ss(a.xmm, _mm_add_ss(_mm_shuffle_ps(a.xmm, a.xmm, 1), _mm_add_ss(_mm_shuffle_ps(a.xmm, a.xmm, 2), _mm_shuffle_ps(a.xmm, a.xmm, 3)))));
- #else
- float4 ret;
- ret.x = a.x + a.y + a.z + a.w;
- ret.w = ret.z = ret.y = ret.x;
- return ret;
- #endif
- }
- /* Absolute value */
- friend inline const float4 abs(const float4& v)
- {
- #ifdef PLATFORM_SSE
- return _mm_andnot_ps(_mm_set1_ps(-0.f), v.xmm);
- #else
- float4 ret;
- ret.x = fabsf(v.x);
- ret.y = fabsf(v.y);
- ret.z = fabsf(v.z);
- ret.w = fabsf(v.w);
- return ret;
- #endif
- }
- /* Clamp between values */
- friend inline const float4 clamp(const float4& v0, float f1, float f2)
- {
- #ifdef PLATFORM_SSE
- return _mm_max_ps(_mm_set1_ps(f1), _mm_min_ps(_mm_set1_ps(f2), v0.xmm));
- #else
- float4 ret;
- ret.x = max(min(v0.x, f2), f1);
- ret.y = max(min(v0.y, f2), f1);
- ret.z = max(min(v0.z, f2), f1);
- ret.w = max(min(v0.w, f2), f1);
- return ret;
- #endif
- }
- /* Clamp between vector values */
- friend inline const float4 clamp(const float4& v0, const float4& v1, const float4& v2)
- {
- #ifdef PLATFORM_SSE
- return _mm_max_ps(v1.xmm, _mm_min_ps(v2.xmm, v0.xmm));
- #else
- float4 ret;
- ret.x = max(min(v0.x, v2.x), v1.x);
- ret.y = max(min(v0.y, v2.y), v1.y);
- ret.z = max(min(v0.z, v2.z), v1.z);
- ret.w = max(min(v0.w, v2.w), v1.w);
- return ret;
- #endif
- }
- /* Distance between two vectors */
- friend inline float distance(const float4& v0, const float4& v1)
- {
- #ifdef PLATFORM_SSE4
- __m128 l = _mm_sub_ps(v0.xmm, v1.xmm);
- return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(l, l, 0xff)));
- #elif defined PLATFORM_SSE3
- __m128 l = _mm_sub_ps(v0.xmm, v1.xmm);
- l = _mm_mul_ps(l, l);
- l = _mm_hadd_ps(l, l);
- return _mm_cvtss_f32(_mm_sqrt_ss(_mm_hadd_ps(l, l)));
- #elif defined PLATFORM_SSE
- __m128 l = _mm_sub_ps(v0.xmm, v1.xmm);
- l = _mm_mul_ps(l, l);
- l = _mm_add_ps(l, _mm_shuffle_ps(l, l, 0x4E));
- return _mm_cvtss_f32(_mm_sqrt_ss(_mm_add_ss(l, _mm_shuffle_ps(l, l, 0x11))));
- #else
- float4 v = v0 - v1;
- return sqrtf(v.x * v.x + v.y * v.y + v.z * v.z + v.w * v.w);
- #endif
- }
- /* Dot product */
- friend inline float dot(const float4 &v0, const float4 &v1)
- {
- #ifdef PLATFORM_SSE4
- return _mm_cvtss_f32(_mm_dp_ps(v0.xmm, v1.xmm, 0xff));
- #elif defined PLATFORM_SSE3
- __m128 l = _mm_sub_ps(v0.xmm, v1.xmm);
- l = _mm_mul_ps(l, l);
- l = _mm_hadd_ps(l, l);
- return _mm_cvtss_f32(_mm_hadd_ps(l, l));
- #elif defined PLATFORM_SSE
- __m128 l = _mm_mul_ps(v0.xmm, v1.xmm);
- l = _mm_add_ps(l, _mm_shuffle_ps(l, l, 0x4E));
- return _mm_cvtss_f32(_mm_add_ss(l, _mm_shuffle_ps(l, l, 0x11)));
- #else
- return v0.x * v1.x + v0.y * v1.y + v0.z * v1.z + v0.w * v1.w;
- #endif
- }
- /* Cross product */
- friend float4 cross(const float4& v0, const float4& v1)
- {
- #ifdef PLATFORM_SSE
- const __m128 l = _mm_mul_ps(_mm_shuffle_ps(v0.xmm, v0.xmm, _MM_SHUFFLE(3, 0, 2, 1)), _mm_shuffle_ps(v1.xmm, v1.xmm, _MM_SHUFFLE(3, 1, 0, 2)));
- const __m128 r = _mm_mul_ps(_mm_shuffle_ps(v0.xmm, v0.xmm, _MM_SHUFFLE(3, 1, 0, 2)), _mm_shuffle_ps(v1.xmm, v1.xmm, _MM_SHUFFLE(3, 0, 2, 1)));
- return _mm_sub_ps(l, r);
- #else
- float4 ret;
- ret.x = v0.y * v1.z - v0.z * v1.y;
- ret.y = v0.z * v1.x - v0.x * v1.z;
- ret.z = v0.x * v1.y - v0.y * v1.x;
- ret.w = 0.0f;
- return ret;
- #endif
- }
- /* Length of vector */
- friend inline float length(const float4& v)
- {
- #ifdef PLATFORM_SSE4
- return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(v.xmm, v.xmm, 0xff)));
- #elif defined PLATFORM_SSE3
- __m128 l = _mm_mul_ps(v.xmm, v.xmm);
- l = _mm_hadd_ps(l, l);
- return _mm_cvtss_f32(_mm_sqrt_ss(_mm_hadd_ps(l, l)));
- #elif defined PLATFORM_SSE
- __m128 l = _mm_mul_ps(v.xmm, v.xmm);
- l = _mm_add_ps(l, _mm_shuffle_ps(l, l, 0x4E));
- return _mm_cvtss_f32(_mm_sqrt_ss(_mm_add_ss(l, _mm_shuffle_ps(l, l, 0x11))));
- #else
- return sqrtf(v.x * v.x + v.y * v.y + v.z * v.z + v.w * v.w);
- #endif
- }
- /* Normalization */
- friend inline const float4 normalize(const float4& v)
- {
- #ifdef PLATFORM_SSE4
- return _mm_div_ps(v.xmm, _mm_sqrt_ps(_mm_dp_ps(v.xmm, v.xmm, 0xff)));
- #elif defined PLATFORM_SSE3
- __m128 l = _mm_mul_ps(v.xmm, v.xmm);
- l = _mm_hadd_ps(l, l);
- return _mm_div_ps(v.xmm, _mm_sqrt_ps(_mm_hadd_ps(l, l)));
- #elif defined PLATFORM_SSE
- __m128 l = _mm_mul_ps(v.xmm, v.xmm);
- l = _mm_add_ps(l, _mm_shuffle_ps(l, l, 0x4E));
- return _mm_mul_ps(v.xmm, _mm_rsqrt_ps(_mm_add_ps(l, _mm_shuffle_ps(l, l, 0x11))));
- #else
- float l = 1.0f / sqrtf(v.x * v.x + v.y * v.y + v.z * v.z + v.w * v.w);
- float4 ret;
- ret.x = v.x * l;
- ret.y = v.y * l;
- ret.z = v.z * l;
- ret.w = v.w * l;
- return ret;
- #endif
- }
- ALIGNED_NEW_DELETE
- };
- #endif
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement