17 #ifndef BT_SIMD__QUATERNION_H_
18 #define BT_SIMD__QUATERNION_H_
31 #define vOnes (_mm_set_ps(1.0f, 1.0f, 1.0f, 1.0f))
35 #if defined(BT_USE_SSE)
37 #define vQInv (_mm_set_ps(+0.0f, -0.0f, -0.0f, -0.0f))
38 #define vPPPM (_mm_set_ps(-0.0f, +0.0f, +0.0f, +0.0f))
40 #elif defined(BT_USE_NEON)
53 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))|| defined(BT_USE_NEON)
63 mVec128 = rhs.mVec128;
96 #ifndef BT_EULER_DEFAULT_ZYX
128 setValue(cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw,
129 cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw,
130 sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw,
131 cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw);
148 setValue(sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw,
149 cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw,
150 cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw,
151 cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw);
157 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
158 mVec128 = _mm_add_ps(mVec128, q.mVec128);
159 #elif defined(BT_USE_NEON)
160 mVec128 = vaddq_f32(mVec128, q.mVec128);
174 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
175 mVec128 = _mm_sub_ps(mVec128, q.mVec128);
176 #elif defined(BT_USE_NEON)
177 mVec128 = vsubq_f32(mVec128, q.mVec128);
191 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
192 __m128 vs = _mm_load_ss(&s);
193 vs = bt_pshufd_ps(vs, 0);
194 mVec128 = _mm_mul_ps(mVec128, vs);
195 #elif defined(BT_USE_NEON)
196 mVec128 = vmulq_n_f32(mVec128, s);
211 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
212 __m128 vQ2 = q.get128();
214 __m128 A1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(0,1,2,0));
215 __m128 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0));
219 __m128 A2 = bt_pshufd_ps(mVec128, BT_SHUFFLE(1,2,0,1));
220 __m128 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
224 B1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(2,0,1,2));
225 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
229 mVec128 = bt_splat_ps(mVec128, 3);
230 mVec128 = mVec128 * vQ2;
233 mVec128 = mVec128 - B1;
234 A1 = _mm_xor_ps(A1, vPPPM);
235 mVec128 = mVec128+ A1;
237 #elif defined(BT_USE_NEON)
239 float32x4_t vQ1 = mVec128;
240 float32x4_t vQ2 = q.get128();
241 float32x4_t A0, A1, B1, A2, B2, A3, B3;
242 float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
246 tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );
249 tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );
252 vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
254 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
256 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
257 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
259 A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);
260 B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx);
262 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
263 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
265 A3 = vcombine_f32(vQ1zx, vQ1yz);
266 B3 = vcombine_f32(vQ2yz, vQ2xz);
268 A1 = vmulq_f32(A1, B1);
269 A2 = vmulq_f32(A2, B2);
270 A3 = vmulq_f32(A3, B3);
271 A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1);
273 A1 = vaddq_f32(A1, A2);
274 A0 = vsubq_f32(A0, A3);
277 A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
278 A0 = vaddq_f32(A0, A1);
294 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
297 vd = _mm_mul_ps(mVec128, q.mVec128);
299 __m128 t = _mm_movehl_ps(vd, vd);
300 vd = _mm_add_ps(vd, t);
301 t = _mm_shuffle_ps(vd, vd, 0x55);
302 vd = _mm_add_ss(vd, t);
304 return _mm_cvtss_f32(vd);
305 #elif defined(BT_USE_NEON)
306 float32x4_t vd = vmulq_f32(mVec128, q.mVec128);
307 float32x2_t
x = vpadd_f32(vget_low_f32(vd), vget_high_f32(vd));
309 return vget_lane_f32(x, 0);
334 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
337 vd = _mm_mul_ps(mVec128, mVec128);
339 __m128 t = _mm_movehl_ps(vd, vd);
340 vd = _mm_add_ps(vd, t);
341 t = _mm_shuffle_ps(vd, vd, 0x55);
342 vd = _mm_add_ss(vd, t);
344 vd = _mm_sqrt_ss(vd);
345 vd = _mm_div_ss(vOnes, vd);
346 vd = bt_pshufd_ps(vd, 0);
347 mVec128 = _mm_mul_ps(mVec128, vd);
360 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
361 __m128 vs = _mm_load_ss(&s);
362 vs = bt_pshufd_ps(vs, 0x00);
365 #elif defined(BT_USE_NEON)
442 return btVector3(m_floats[0] * s, m_floats[1] * s, m_floats[2] * s);
448 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
450 #elif defined(BT_USE_NEON)
451 return btQuaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)vQInv));
462 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
464 #elif defined(BT_USE_NEON)
477 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
479 #elif defined(BT_USE_NEON)
491 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
493 #elif defined(BT_USE_NEON)
494 return btQuaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)btvMzeroMask) );
506 if( diff.
dot(diff) > sum.
dot(sum) )
517 if( diff.
dot(diff) < sum.
dot(sum) )
574 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
575 __m128 vQ1 = q1.get128();
576 __m128 vQ2 = q2.get128();
577 __m128 A0, A1, B1, A2, B2;
579 A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0,1,2,0));
580 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0));
584 A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1));
585 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
589 B1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2));
590 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
594 A0 = bt_splat_ps(vQ1, 3);
600 A1 = _mm_xor_ps(A1, vPPPM);
605 #elif defined(BT_USE_NEON)
607 float32x4_t vQ1 = q1.get128();
608 float32x4_t vQ2 = q2.get128();
609 float32x4_t A0, A1, B1, A2, B2, A3, B3;
610 float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
614 tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );
617 tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );
620 vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
622 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
624 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
625 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
627 A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);
628 B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx);
630 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
631 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
633 A3 = vcombine_f32(vQ1zx, vQ1yz);
634 B3 = vcombine_f32(vQ2yz, vQ2xz);
636 A1 = vmulq_f32(A1, B1);
637 A2 = vmulq_f32(A2, B2);
638 A3 = vmulq_f32(A3, B3);
639 A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1);
641 A1 = vaddq_f32(A1, A2);
642 A0 = vsubq_f32(A0, A3);
645 A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
646 A0 = vaddq_f32(A0, A1);
652 q1.
w() * q2.
x() + q1.
x() * q2.
w() + q1.
y() * q2.
z() - q1.
z() * q2.
y(),
653 q1.
w() * q2.
y() + q1.
y() * q2.
w() + q1.
z() * q2.
x() - q1.
x() * q2.
z(),
654 q1.
w() * q2.
z() + q1.
z() * q2.
w() + q1.
x() * q2.
y() - q1.
y() * q2.
x(),
655 q1.
w() * q2.
w() - q1.
x() * q2.
x() - q1.
y() * q2.
y() - q1.
z() * q2.
z());
662 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
663 __m128 vQ1 = q.get128();
664 __m128 vQ2 = w.get128();
665 __m128 A1, B1, A2, B2, A3, B3;
667 A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(3,3,3,0));
668 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(0,1,2,0));
672 A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1));
673 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
677 A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2));
678 B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
683 A1 = _mm_xor_ps(A1, vPPPM);
688 #elif defined(BT_USE_NEON)
690 float32x4_t vQ1 = q.get128();
691 float32x4_t vQ2 = w.get128();
692 float32x4_t A1, B1, A2, B2, A3, B3;
693 float32x2_t vQ1wx, vQ2zx, vQ1yz, vQ2yz, vQ1zx, vQ2xz;
695 vQ1wx = vext_f32(vget_high_f32(vQ1), vget_low_f32(vQ1), 1);
699 tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );
702 tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );
706 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
708 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
709 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
711 A1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ1), 1), vQ1wx);
712 B1 = vcombine_f32(vget_low_f32(vQ2), vQ2zx);
714 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
715 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
717 A3 = vcombine_f32(vQ1zx, vQ1yz);
718 B3 = vcombine_f32(vQ2yz, vQ2xz);
720 A1 = vmulq_f32(A1, B1);
721 A2 = vmulq_f32(A2, B2);
722 A3 = vmulq_f32(A3, B3);
724 A1 = vaddq_f32(A1, A2);
727 A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
729 A1 = vsubq_f32(A1, A3);
735 q.
w() * w.
x() + q.
y() * w.
z() - q.
z() * w.
y(),
736 q.
w() * w.
y() + q.
z() * w.
x() - q.
x() * w.
z(),
737 q.
w() * w.
z() + q.
x() * w.
y() - q.
y() * w.
x(),
738 -q.
x() * w.
x() - q.
y() * w.
y() - q.
z() * w.
z());
745 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
746 __m128 vQ1 = w.get128();
747 __m128 vQ2 = q.get128();
748 __m128 A1, B1, A2, B2, A3, B3;
750 A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0,1,2,0));
751 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0));
755 A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1));
756 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
760 A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2));
761 B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
766 A1 = _mm_xor_ps(A1, vPPPM);
771 #elif defined(BT_USE_NEON)
773 float32x4_t vQ1 = w.get128();
774 float32x4_t vQ2 = q.get128();
775 float32x4_t A1, B1, A2, B2, A3, B3;
776 float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
781 tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );
784 tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );
787 vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
789 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
791 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
792 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
794 A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);
795 B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx);
797 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
798 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
800 A3 = vcombine_f32(vQ1zx, vQ1yz);
801 B3 = vcombine_f32(vQ2yz, vQ2xz);
803 A1 = vmulq_f32(A1, B1);
804 A2 = vmulq_f32(A2, B2);
805 A3 = vmulq_f32(A3, B3);
807 A1 = vaddq_f32(A1, A2);
810 A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
812 A1 = vsubq_f32(A1, A3);
818 +w.
x() * q.
w() + w.
y() * q.
z() - w.
z() * q.
y(),
819 +w.
y() * q.
w() + w.
z() * q.
x() - w.
x() * q.
z(),
820 +w.
z() * q.
w() + w.
x() * q.
y() - w.
y() * q.
x(),
821 -w.
x() * q.
x() - w.
y() * q.
y() - w.
z() * q.
z());
862 return q1.
slerp(q2, t);
870 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
871 return btVector3(_mm_and_ps(q.get128(), btvFFF0fMask));
872 #elif defined(BT_USE_NEON)
873 return btVector3((float32x4_t)vandq_s32((int32x4_t)q.get128(), btvFFF0Mask));
906 #endif //BT_SIMD__QUATERNION_H_
static T sum(const btAlignedObjectArray< T > &items)
btScalar length(const btQuaternion &q)
Return the length of a quaternion.
btScalar getAngle() const
Return the angle of rotation represented by this quaternion.
btQuaternion & operator*=(const btQuaternion &q)
Multiply this quaternion by q on the right.
btQuaternion farthest(const btQuaternion &qd) const
btQuaternion(const btScalar &yaw, const btScalar &pitch, const btScalar &roll)
Constructor from Euler angles.
void setEulerZYX(const btScalar &yaw, const btScalar &pitch, const btScalar &roll)
Set the quaternion using euler angles.
btQuaternion slerp(const btQuaternion &q1, const btQuaternion &q2, const btScalar &t)
Return the result of spherical linear interpolation betwen two quaternions.
btScalar btSin(btScalar x)
static const btQuaternion & getIdentity()
const btScalar & z() const
Return the z value.
void btPlaneSpace1(const T &n, T &p, T &q)
btScalar btSqrt(btScalar y)
btScalar dot(const btQuaternion &q) const
Return the dot product between this quaternion and another.
const btScalar & getW() const
btQuaternion operator*(const btQuaternion &q1, const btQuaternion &q2)
Return the product of two quaternions.
#define SIMD_FORCE_INLINE
btQuaternion & operator/=(const btScalar &s)
Inversely scale this quaternion.
btQuaternion operator-(const btQuaternion &q2) const
Return the difference between this quaternion and the other.
btQuaternion & operator-=(const btQuaternion &q)
Subtract out a quaternion.
const btScalar & y() const
Return the y value.
btVector3 getAxis() const
Return the axis of the rotation represented by this quaternion.
btQuaternion operator-() const
Return the negative of this quaternion This simply negates each element.
const btScalar & getY() const
Return the y value.
const btScalar & getX() const
Return the x value.
btQuaternion inverse(const btQuaternion &q)
Return the inverse of a quaternion.
const btScalar & w() const
Return the w value.
btScalar dot(const btVector3 &v) const
Return the dot product.
btQuaternion & operator+=(const btQuaternion &q)
Add two quaternions.
btVector3 & normalize()
Normalize this vector x^2 + y^2 + z^2 = 1.
btQuaternion nearest(const btQuaternion &qd) const
btScalar length() const
Return the length of the quaternion.
btQuaternion operator*(const btScalar &s) const
Return a scaled version of this quaternion.
const btScalar & x() const
Return the x value.
btVector3 quatRotate(const btQuaternion &rotation, const btVector3 &v)
const btScalar & getZ() const
Return the z value.
btQuaternion shortestArcQuat(const btVector3 &v0, const btVector3 &v1)
void setRotation(const btVector3 &axis, const btScalar &_angle)
Set the rotation using axis angle notation.
btQuaternion & normalize()
Normalize the quaternion Such that x^2 + y^2 + z^2 +w^2 = 1.
btQuaternion shortestArcQuatNormalize2(btVector3 &v0, btVector3 &v1)
void setValue(const btScalar &_x, const btScalar &_y, const btScalar &_z)
Set x,y,z and zero w.
btVector3 cross(const btVector3 &v) const
Return the cross product between this and another vector.
btQuaternion()
No initialization constructor.
const btScalar & getY() const
Return the y value.
The btQuadWord class is base class for btVector3 and btQuaternion.
const btScalar & getX() const
Return the x value.
btQuaternion inverse() const
Return the inverse of this quaternion.
btScalar length() const
Return the length of the vector.
btScalar length2() const
Return the length squared of the quaternion.
const btScalar & y() const
Return the y value.
btScalar angleShortestPath(const btQuaternion &q) const
Return the angle between this quaternion and the other along the shortest path.
btVector3 can be used to represent 3D points and vectors.
#define ATTRIBUTE_ALIGNED16(a)
btQuaternion(const btScalar &_x, const btScalar &_y, const btScalar &_z, const btScalar &_w)
Constructor from scalars.
btScalar btAcos(btScalar x)
btQuaternion normalized() const
Return a normalized version of this quaternion.
btQuaternion & operator*=(const btScalar &s)
Scale this quaternion.
btScalar angle(const btQuaternion &q) const
Return the ***half*** angle between this quaternion and the other.
btScalar getAngleShortestPath() const
Return the angle of rotation represented by this quaternion along the shortest path.
btQuaternion operator+(const btQuaternion &q2) const
Return the sum of this quaternion and the other.
const btScalar & x() const
Return the x value.
btScalar dot(const btQuaternion &q1, const btQuaternion &q2)
Calculate the dot product between two quaternions.
btQuaternion operator/(const btScalar &s) const
Return an inversely scaled versionof this quaternion.
The btQuaternion implements quaternion to perform linear algebra rotations in combination with btMatr...
void setEuler(const btScalar &yaw, const btScalar &pitch, const btScalar &roll)
Set the quaternion using Euler angles.
btScalar btAngle(const btQuaternion &q1, const btQuaternion &q2)
Return the angle between two quaternions.
btQuaternion(const btVector3 &_axis, const btScalar &_angle)
Axis angle Constructor.
const btScalar & getZ() const
Return the z value.
btQuaternion slerp(const btQuaternion &q, const btScalar &t) const
Return the quaternion which is the result of Spherical Linear Interpolation between this and the othe...
float btScalar
The btScalar type abstracts floating point numbers, to easily switch between double and single floati...
btScalar btCos(btScalar x)
btScalar btFabs(btScalar x)
const btScalar & z() const
Return the z value.