31 #ifndef _VECTORMATH_AOS_CPP_SSE_H
32 #define _VECTORMATH_AOS_CPP_SSE_H
35 #include <xmmintrin.h>
36 #include <emmintrin.h>
39 #define Vector3Ref Vector3&
41 #define Matrix3Ref Matrix3&
43 #if (defined (_WIN32) && (_MSC_VER) && _MSC_VER >= 1400)
44 #define USE_SSE3_LDDQU
46 #define VM_ATTRIBUTE_ALIGNED_CLASS16(a) __declspec(align(16)) a
47 #define VM_ATTRIBUTE_ALIGN16 __declspec(align(16))
48 #define VECTORMATH_FORCE_INLINE __forceinline
50 #define VM_ATTRIBUTE_ALIGNED_CLASS16(a) a __attribute__ ((aligned (16)))
51 #define VM_ATTRIBUTE_ALIGN16 __attribute__ ((aligned (16)))
52 #define VECTORMATH_FORCE_INLINE inline __attribute__ ((always_inline))
54 #define USE_SSE3_LDDQU
60 #include <pmmintrin.h>
61 #endif //USE_SSE3_LDDQU
71 #define vec_splat(x, e) _mm_shuffle_ps(x, x, _MM_SHUFFLE(e,e,e,e))
73 #define _mm_ror_ps(vec,i) \
74 (((i)%4) ? (_mm_shuffle_ps(vec,vec, _MM_SHUFFLE((unsigned char)(i+3)%4,(unsigned char)(i+2)%4,(unsigned char)(i+1)%4,(unsigned char)(i+0)%4))) : (vec))
75 #define _mm_rol_ps(vec,i) \
76 (((i)%4) ? (_mm_shuffle_ps(vec,vec, _MM_SHUFFLE((unsigned char)(7-i)%4,(unsigned char)(6-i)%4,(unsigned char)(5-i)%4,(unsigned char)(4-i)%4))) : (vec))
78 #define vec_sld(vec,vec2,x) _mm_ror_ps(vec, ((x)/4))
80 #define _mm_abs_ps(vec) _mm_andnot_ps(_MASKSIGN_,vec)
81 #define _mm_neg_ps(vec) _mm_xor_ps(_MASKSIGN_,vec)
83 #define vec_madd(a, b, c) _mm_add_ps(c, _mm_mul_ps(a, b) )
100 return _mm_or_ps(_mm_and_ps(mask, b), _mm_andnot_ps(mask, a));
104 return vec_sel(a, b, _mm_load_ps((
float *)_mask));
108 return vec_sel(a, b, _mm_set1_ps(*(
float *)&_mask));
113 return _mm_set1_ps( *(
float *)&x );
118 return _mm_and_ps( x,
toM128( 0x7fffffff ) );
159 __m128i result = _mm_cvtps_epi32(x);
160 return (__m128 &)result;
167 return _mm_cvtepi32_ps((__m128i &)x);
170 #define vec_nmsub(a,b,c) _mm_sub_ps( c, _mm_mul_ps( a, b ) )
171 #define vec_sub(a,b) _mm_sub_ps( a, b )
172 #define vec_add(a,b) _mm_add_ps( a, b )
173 #define vec_mul(a,b) _mm_mul_ps( a, b )
174 #define vec_xor(a,b) _mm_xor_ps( a, b )
175 #define vec_and(a,b) _mm_and_ps( a, b )
176 #define vec_cmpeq(a,b) _mm_cmpeq_ps( a, b )
177 #define vec_cmpgt(a,b) _mm_cmpgt_ps( a, b )
179 #define vec_mergeh(a,b) _mm_unpacklo_ps( a, b )
180 #define vec_mergel(a,b) _mm_unpackhi_ps( a, b )
182 #define vec_andc(a,b) _mm_andnot_ps( b, a )
184 #define sqrtf4(x) _mm_sqrt_ps( x )
185 #define rsqrtf4(x) _mm_rsqrt_ps( x )
186 #define recipf4(x) _mm_rcp_ps( x )
187 #define negatef4(x) _mm_sub_ps( _mm_setzero_ps(), x )
191 #define _half4 _mm_setr_ps(.5f,.5f,.5f,.5f)
192 #define _three _mm_setr_ps(3.f,3.f,3.f,3.f)
193 const __m128 approx = _mm_rsqrt_ps( v );
194 const __m128 muls = _mm_mul_ps(_mm_mul_ps(v, approx), approx);
195 return _mm_mul_ps(_mm_mul_ps(
_half4, approx), _mm_sub_ps(
_three, muls) );
201 __m128
select = _mm_cmplt_ps( x, _mm_setzero_ps() );
209 __m128 xabs2 = _mm_mul_ps(xabs, xabs);
210 __m128 xabs4 = _mm_mul_ps(xabs2, xabs2);
212 xabs, _mm_set1_ps(0.0066700901f)),
213 xabs, _mm_set1_ps(-0.0170881256f)),
214 xabs, _mm_set1_ps( 0.0308918810f));
216 xabs, _mm_set1_ps(0.0889789874f)),
217 xabs, _mm_set1_ps(-0.2145988016f)),
218 xabs, _mm_set1_ps( 1.5707963050f));
220 __m128 result =
vec_madd(hi, xabs4, lo);
225 vec_nmsub(t1, result, _mm_set1_ps(3.1415926535898f)),
235 #define _SINCOS_CC0 -0.0013602249f
236 #define _SINCOS_CC1 0.0416566950f
237 #define _SINCOS_CC2 -0.4999990225f
238 #define _SINCOS_SC0 -0.0001950727f
239 #define _SINCOS_SC1 0.0083320758f
240 #define _SINCOS_SC2 -0.1666665247f
242 #define _SINCOS_KC1 1.57079625129f
243 #define _SINCOS_KC2 7.54978995489e-8f
249 xl =
vec_mul(x, _mm_set1_ps(0.63661977236f));
307 xl =
vec_mul(x, _mm_set1_ps(0.63661977236f));
319 __m128i temp = _mm_add_epi32(_mm_set1_epi32(1),(__m128i &)offsetSin);
320 offsetCos = (__m128 &)temp;
367 #ifdef _VECTORMATH_DEBUG
370 namespace Vectormath {
711 VECTORMATH_FORCE_INLINE void storeHalfFloats(
const Vector3 &vec0,
const Vector3 &vec1,
const Vector3 &vec2,
const Vector3 &vec3,
const Vector3 &vec4,
const Vector3 &vec5,
const Vector3 &vec6,
const Vector3 &vec7,
vec_ushort8 * threeQuads );
713 #ifdef _VECTORMATH_DEBUG
1054 #ifdef _VECTORMATH_DEBUG
1309 #ifdef _VECTORMATH_DEBUG
1630 VECTORMATH_FORCE_INLINE const Quat
squad(
float t,
const Quat &unitQuat0,
const Quat &unitQuat1,
const Quat &unitQuat2,
const Quat &unitQuat3 );
1650 #ifdef _VECTORMATH_DEBUG
1916 #ifdef _VECTORMATH_DEBUG
2267 #ifdef _VECTORMATH_DEBUG
2524 #ifdef _VECTORMATH_DEBUG
const Vector4 operator*(float scalar) const
const Quat normalize(const Quat &quat)
Matrix4 & setCol1(const Vector4 &col1)
float determinant(const Matrix3 &mat)
static const Quat rotationY(float radians)
float & operator[](int idx)
const Vector3 rowMul(const Vector3 &vec, const Matrix3 &mat)
static const Matrix4 scale(const Vector3 &scaleVec)
const Vector3 getXYZ() const
const Matrix3 crossMatrixMul(const Vector3 &vec, const Matrix3 &mat)
const Vector3 recipPerElem(const Vector3 &vec)
void loadXYZArray(Vector3 &vec0, Vector3 &vec1, Vector3 &vec2, Vector3 &vec3, const __m128 *threeQuads)
static const Vector4 yAxis()
float minElem(const Vector3 &vec)
Vector4 & setXYZ(const Vector3 &vec)
static const Matrix4 rotationX(float radians)
Vector4 & operator+=(const Vector4 &vec)
static const Matrix4 translation(const Vector3 &translateVec)
static const Vector3 xAxis()
static const Vector3 zAxis()
const Matrix4 operator+(const Matrix4 &mat) const
void storeHalfFloats(const Vector3 &vec, unsigned short *hfptr)
float dist(const Point3 &pnt0, const Point3 &pnt1)
const Vector3 minPerElem(const Vector3 &vec0, const Vector3 &vec1)
Quat & operator*=(const Quat &quat)
Quat & operator-=(const Quat &quat)
const Matrix3 operator*(float scalar) const
Matrix3 & setElem(int col, int row, float val)
Point3 & operator=(const Point3 &pnt)
const Quat operator+(const Quat &quat) const
Point3 & setElem(int idx, float value)
static const Vector4 xAxis()
#define vec_madd(a, b, c)
static __m128 sinf4(vec_float4 x)
const Matrix3 appendScale(const Matrix3 &mat, const Vector3 &scaleVec)
Vector3 & operator=(const Vector3 &vec)
Matrix3 & setCol0(const Vector3 &col0)
Matrix3 & setCol2(const Vector3 &col2)
Matrix3 & operator-=(const Matrix3 &mat)
Point3 & operator-=(const Vector3 &vec)
const Matrix3 inverse(const Matrix3 &mat)
Quat & operator=(const Quat &quat)
float distFromOrigin(const Point3 &pnt)
static const Matrix4 rotation(float radians, const Vector3 &unitVec)
static const Matrix4 rotationY(float radians)
const Vector4 operator-() const
Vector3 & operator/=(float scalar)
const Vector3 rotate(const Quat &quat, const Vector3 &vec)
float & operator[](int idx)
static __m128 vec_cts(__m128 x, int a)
const Matrix3 crossMatrix(const Vector3 &vec)
static const Quat identity()
float getElem(int col, int row) const
Matrix4 & operator-=(const Matrix4 &mat)
static const Matrix4 orthographic(float left, float right, float bottom, float top, float zNear, float zFar)
#define VECTORMATH_FORCE_INLINE
static __m128 acosf4(__m128 x)
static const Matrix3 rotationZ(float radians)
static const Vector3 yAxis()
static const Matrix3 rotationZYX(const Vector3 &radiansXYZ)
static const Quat rotationZ(float radians)
static const Quat rotation(const Vector3 &unitVec0, const Vector3 &unitVec1)
Vector3 & operator-=(const Vector3 &vec)
const Matrix4 operator*(float scalar) const
const Vector4 getCol3() const
static const Matrix4 identity()
static const Vector4 wAxis()
static const Matrix3 identity()
const Vector4 getCol1() const
Matrix4 & setTranslation(const Vector3 &translateVec)
const Vector3 maxPerElem(const Vector3 &vec0, const Vector3 &vec1)
const Vector4 getCol0() const
const Matrix4 affineInverse(const Matrix4 &mat)
static const Matrix3 scale(const Vector3 &scaleVec)
float lengthSqr(const Vector3 &vec)
float distSqrFromOrigin(const Point3 &pnt)
const Point3 operator+(const Vector3 &vec) const
float & operator[](int idx)
const Point3 scale(const Point3 &pnt, float scaleVal)
const Vector3 operator-(const Point3 &pnt) const
float projection(const Point3 &pnt, const Vector3 &unitVec)
Vector4 & operator[](int col)
static const Quat rotationX(float radians)
const Vector3 copySignPerElem(const Vector3 &vec0, const Vector3 &vec1)
const Vector3 getCol0() const
const Matrix3 getUpper3x3() const
Matrix4 & setElem(int col, int row, float val)
const Matrix3 outer(const Vector3 &tfrm0, const Vector3 &tfrm1)
const Vector3 operator-() const
float select(float arg0, float arg1, bool select)
const Matrix4 operator-() const
Matrix4 & operator*=(float scalar)
const Vector3 operator+(const Vector3 &vec) const
void set128(vec_float4 vec)
static __m128 toM128(unsigned int x)
const Quat operator*(const Quat &quat) const
static const Matrix4 frustum(float left, float right, float bottom, float top, float zNear, float zFar)
float norm(const Quat &quat)
Vector3 & operator[](int col)
const Quat operator/(float scalar) const
Vector3 & setElem(int idx, float value)
const Vector3 getTranslation() const
const Matrix3 prependScale(const Vector3 &scaleVec, const Matrix3 &mat)
const Quat operator-() const
const Vector4 getCol2() const
void storeXYZArray(const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, __m128 *threeQuads)
const Matrix3 operator+(const Matrix3 &mat) const
const Vector3 getXYZ() const
Matrix3 & operator+=(const Matrix3 &mat)
#define vec_nmsub(a, b, c)
const Vector3 operator/(float scalar) const
const Matrix3 operator-() const
void set128(vec_float4 vec)
Matrix3 & setCol1(const Vector3 &col1)
Point3 & operator+=(const Vector3 &vec)
static const Matrix3 rotationX(float radians)
const Matrix4 orthoInverse(const Matrix4 &mat)
const Quat conj(const Quat &quat)
const Vector4 operator+(const Vector4 &vec) const
float distSqr(const Point3 &pnt0, const Point3 &pnt1)
static __m128 newtonrapson_rsqrt4(const __m128 v)
float getElem(int idx) const
Matrix4 & setCol3(const Vector4 &col3)
static void sincosf4(vec_float4 x, vec_float4 *s, vec_float4 *c)
static const Matrix3 rotation(float radians, const Vector3 &unitVec)
const Vector4 getCol(int col) const
Matrix4 & setCol(int col, const Vector4 &vec)
Matrix4 & setUpper3x3(const Matrix3 &mat3)
Quat & operator/=(float scalar)
const Quat lerp(float t, const Quat &quat0, const Quat &quat1)
Matrix4 & setCol2(const Vector4 &col2)
static const Matrix4 lookAt(const Point3 &eyePos, const Point3 &lookAtPos, const Vector3 &upVec)
const Matrix3 select(const Matrix3 &mat0, const Matrix3 &mat1, bool select1)
Matrix4 & operator+=(const Matrix4 &mat)
Quat & setElem(int idx, float value)
static const Matrix4 perspective(float fovyRadians, float aspect, float zNear, float zFar)
const Vector4 operator/(float scalar) const
Vector3 & operator*=(float scalar)
float getElem(int idx) const
static const Matrix3 rotationY(float radians)
const Matrix3 transpose(const Matrix3 &mat)
Vector3 & operator+=(const Vector3 &vec)
Matrix3 & operator*=(float scalar)
float getElem(int idx) const
const Vector3 getRow(int row) const
float getElem(int idx) const
Matrix4 & setCol0(const Vector4 &col0)
float maxElem(const Vector3 &vec)
void storeXYZ(const Vector3 &vec, float *fptr)
float & operator[](int idx)
Matrix4 & setRow(int row, const Vector4 &vec)
Matrix3 & setCol(int col, const Vector3 &vec)
const Quat squad(float t, const Quat &unitQuat0, const Quat &unitQuat1, const Quat &unitQuat2, const Quat &unitQuat3)
float getElem(int col, int row) const
const Vector3 getCol(int col) const
static const Matrix4 rotationZYX(const Vector3 &radiansXYZ)
float sum(const Vector3 &vec)
static const Vector4 zAxis()
float dot(const Quat &quat0, const Quat &quat1)
static __m128 fabsf4(__m128 x)
static __m128 vec_ctf(__m128 x, int a)
const Vector3 cross(const Vector3 &vec0, const Vector3 &vec1)
const Matrix3 mulPerElem(const Matrix3 &mat0, const Matrix3 &mat1)
Vector4 & operator/=(float scalar)
Quat & setXYZ(const Vector3 &vec)
static const Matrix4 rotationZ(float radians)
Matrix3 & operator=(const Matrix3 &mat)
const Vector3 operator*(float scalar) const
const Vector4 getRow(int row) const
float length(const Quat &quat)
Vector4 & operator*=(float scalar)
const Vector3 divPerElem(const Vector3 &vec0, const Vector3 &vec1)
const Vector3 getCol2() const
static __m128 vec_sel(__m128 a, __m128 b, __m128 mask)
const Matrix3 absPerElem(const Matrix3 &mat)
Vector4 & setElem(int idx, float value)
const Matrix3 operator*(float scalar, const Matrix3 &mat)
Matrix3 & setRow(int row, const Vector3 &vec)
Vector4 & operator=(const Vector4 &vec)
Matrix4 & operator=(const Matrix4 &mat)
Quat & operator+=(const Quat &quat)
Vector4 & operator-=(const Vector4 &vec)
const Vector3 getCol1() const
const Quat slerp(float t, const Quat &unitQuat0, const Quat &unitQuat1)