31 #ifndef _VECTORMATH_QUAT_AOS_CPP_H
32 #define _VECTORMATH_QUAT_AOS_CPP_H
37 #ifndef _VECTORMATH_INTERNAL_FUNCTIONS
38 #define _VECTORMATH_INTERNAL_FUNCTIONS
42 namespace Vectormath {
72 mVec128 = _mm_setr_ps(_x, _y, _z, _w);
117 return ( quat0 + ( ( quat1 - quat0 ) * t ) );
128 vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
131 selectMask = (__m128)
vec_cmpgt( _mm_setzero_ps(), cosAngle );
135 angle =
acosf4( cosAngle );
137 oneMinusT =
vec_sub( _mm_set1_ps(1.0f), tttt );
138 angles =
vec_mergeh( _mm_set1_ps(1.0f), tttt );
140 angles =
vec_madd( angles, angle, _mm_setzero_ps() );
141 sines =
sinf4( angles );
142 scales = _mm_div_ps( sines,
vec_splat( sines, 0 ) );
148 VECTORMATH_FORCE_INLINE const Quat
squad(
float t,
const Quat &unitQuat0,
const Quat &unitQuat1,
const Quat &unitQuat2,
const Quat &unitQuat3 )
150 return squad(
floatInVec(t), unitQuat0, unitQuat1, unitQuat2, unitQuat3 );
273 return floatInVec(
mVec128, idx );
278 return Quat( _mm_add_ps(
mVec128, quat.mVec128 ) );
284 return Quat( _mm_sub_ps(
mVec128, quat.mVec128 ) );
289 return *
this * floatInVec(scalar);
299 *
this = *
this + quat;
305 *
this = *
this - quat;
311 *
this = *
this * scalar;
317 *
this = *
this * scalar;
333 *
this = *
this / scalar;
339 *
this = *
this / scalar;
345 return Quat(_mm_sub_ps( _mm_setzero_ps(),
mVec128 ) );
355 return quat * scalar;
365 return floatInVec(
_vmathVfDot4( quat.get128(), quat.get128() ), 0 );
370 return floatInVec( _mm_sqrt_ps(
_vmathVfDot4( quat.get128(), quat.get128() )), 0 );
383 __m128 cosAngle, cosAngleX2Plus2, recipCosHalfAngleX2, cosHalfAngleX2, res;
384 cosAngle =
_vmathVfDot3( unitVec0.get128(), unitVec1.get128() );
385 cosAngleX2Plus2 =
vec_madd( cosAngle, _mm_set1_ps(2.0f), _mm_set1_ps(2.0f) );
386 recipCosHalfAngleX2 = _mm_rsqrt_ps( cosAngleX2Plus2 );
387 cosHalfAngleX2 =
vec_mul( recipCosHalfAngleX2, cosAngleX2Plus2 );
388 crossVec =
cross( unitVec0, unitVec1 );
389 res =
vec_mul( crossVec.get128(), recipCosHalfAngleX2 );
391 res =
vec_sel( res,
vec_mul( cosHalfAngleX2, _mm_set1_ps(0.5f) ), sw );
397 return rotation( floatInVec(radians), unitVec );
402 __m128 s, c, angle, res;
417 __m128 s, c, angle, res;
422 res =
vec_sel( _mm_setzero_ps(), s, xsw );
434 __m128 s, c, angle, res;
439 res =
vec_sel( _mm_setzero_ps(), s, ysw );
451 __m128 s, c, angle, res;
456 res =
vec_sel( _mm_setzero_ps(), s, zsw );
463 __m128 ldata, rdata, qv, tmp0, tmp1, tmp2, tmp3;
464 __m128 product, l_wxyz, r_wxyz, xy, qw;
467 tmp0 = _mm_shuffle_ps( ldata, ldata, _MM_SHUFFLE(3,0,2,1) );
468 tmp1 = _mm_shuffle_ps( rdata, rdata, _MM_SHUFFLE(3,1,0,2) );
469 tmp2 = _mm_shuffle_ps( ldata, ldata, _MM_SHUFFLE(3,1,0,2) );
470 tmp3 = _mm_shuffle_ps( rdata, rdata, _MM_SHUFFLE(3,0,2,1) );
475 product =
vec_mul( ldata, rdata );
476 l_wxyz =
vec_sld( ldata, ldata, 12 );
477 r_wxyz =
vec_sld( rdata, rdata, 12 );
478 qw =
vec_nmsub( l_wxyz, r_wxyz, product );
479 xy =
vec_madd( l_wxyz, r_wxyz, product );
487 *
this = *
this * quat;
492 { __m128 qdata, vdata, product, tmp0, tmp1, tmp2, tmp3, wwww, qv, qw, res;
494 vdata = vec.get128();
495 tmp0 = _mm_shuffle_ps( qdata, qdata, _MM_SHUFFLE(3,0,2,1) );
496 tmp1 = _mm_shuffle_ps( vdata, vdata, _MM_SHUFFLE(3,1,0,2) );
497 tmp2 = _mm_shuffle_ps( qdata, qdata, _MM_SHUFFLE(3,1,0,2) );
498 tmp3 = _mm_shuffle_ps( vdata, vdata, _MM_SHUFFLE(3,0,2,1) );
503 product =
vec_mul( qdata, vdata );
506 tmp1 = _mm_shuffle_ps( qv, qv, _MM_SHUFFLE(3,1,0,2) );
507 tmp3 = _mm_shuffle_ps( qv, qv, _MM_SHUFFLE(3,0,2,1) );
512 return Vector3( res );
518 return Quat(
vec_xor( quat.get128(), _mm_load_ps((
float *)sw) ) );
523 return select( quat0, quat1, boolInVec(select1) );
533 #ifdef USE_SSE3_LDDQU
534 quat = Quat(
SSEFloat(_mm_lddqu_si128((
const __m128i*)((
float*)(fptr)))).m128 );
541 quat = Quat( fl.
m128);
549 fptr[0] = quat.getX();
550 fptr[1] = quat.getY();
551 fptr[2] = quat.getZ();
552 fptr[3] = quat.getW();
558 #ifdef _VECTORMATH_DEBUG
562 union { __m128 v;
float s[4]; } tmp;
563 tmp.v = quat.get128();
564 printf(
"( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
569 union { __m128 v;
float s[4]; } tmp;
570 tmp.v = quat.get128();
571 printf(
"%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
const Quat normalize(const Quat &quat)
static const Quat rotationY(float radians)
Quat & operator*=(const Quat &quat)
Quat & operator-=(const Quat &quat)
const Quat operator+(const Quat &quat) const
#define vec_madd(a, b, c)
static __m128 sinf4(vec_float4 x)
Quat & operator=(const Quat &quat)
#define _VECTORMATH_SLERP_TOL
const Vector3 rotate(const Quat &quat, const Vector3 &vec)
float & operator[](int idx)
static const Quat identity()
#define VECTORMATH_FORCE_INLINE
static __m128 acosf4(__m128 x)
static const Quat rotationZ(float radians)
static const Quat rotation(const Vector3 &unitVec0, const Vector3 &unitVec1)
#define _vmathVfSetElement(vec, scalar, slot)
static const Quat rotationX(float radians)
void set128(vec_float4 vec)
const Quat operator*(const Quat &quat) const
float norm(const Quat &quat)
#define _VECTORMATH_UNIT_0001
const Quat operator/(float scalar) const
static __m128 _vmathVfDot4(__m128 vec0, __m128 vec1)
const Quat operator-() const
#define VM_ATTRIBUTE_ALIGN16
const Vector3 getXYZ() const
#define vec_nmsub(a, b, c)
#define vec_sld(vec, vec2, x)
void loadXYZW(Quat &quat, const float *fptr)
const Quat conj(const Quat &quat)
static __m128 newtonrapson_rsqrt4(const __m128 v)
static __m128 _vmathVfInsert(__m128 dst, __m128 src, int slot)
static void sincosf4(vec_float4 x, vec_float4 *s, vec_float4 *c)
Quat & operator/=(float scalar)
void storeXYZW(const Quat &quat, float *fptr)
const Quat lerp(float t, const Quat &quat0, const Quat &quat1)
const Matrix3 select(const Matrix3 &mat0, const Matrix3 &mat1, bool select1)
Quat & setElem(int idx, float value)
float getElem(int idx) const
const Quat squad(float t, const Quat &unitQuat0, const Quat &unitQuat1, const Quat &unitQuat2, const Quat &unitQuat3)
float dot(const Quat &quat0, const Quat &quat1)
const Vector3 cross(const Vector3 &vec0, const Vector3 &vec1)
Quat & setXYZ(const Vector3 &vec)
float length(const Quat &quat)
static __m128 vec_sel(__m128 a, __m128 b, __m128 mask)
static __m128 _vmathVfDot3(__m128 vec0, __m128 vec1)
const Matrix3 operator*(float scalar, const Matrix3 &mat)
Quat & operator+=(const Quat &quat)
const Quat slerp(float t, const Quat &unitQuat0, const Quat &unitQuat1)