Bullet Collision Detection & Physics Library
btQuaternion.h
Go to the documentation of this file.
1 /*
2 Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans http://continuousphysics.com/Bullet/
3 
4 This software is provided 'as-is', without any express or implied warranty.
5 In no event will the authors be held liable for any damages arising from the use of this software.
6 Permission is granted to anyone to use this software for any purpose,
7 including commercial applications, and to alter it and redistribute it freely,
8 subject to the following restrictions:
9 
10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
12 3. This notice may not be removed or altered from any source distribution.
13 */
14 
15 
16 
17 #ifndef BT_SIMD__QUATERNION_H_
18 #define BT_SIMD__QUATERNION_H_
19 
20 
21 #include "btVector3.h"
22 #include "btQuadWord.h"
23 
24 
25 
26 
27 
28 #ifdef BT_USE_SSE
29 
30 //const __m128 ATTRIBUTE_ALIGNED16(vOnes) = {1.0f, 1.0f, 1.0f, 1.0f};
31 #define vOnes (_mm_set_ps(1.0f, 1.0f, 1.0f, 1.0f))
32 
33 #endif
34 
35 #if defined(BT_USE_SSE)
36 
37 #define vQInv (_mm_set_ps(+0.0f, -0.0f, -0.0f, -0.0f))
38 #define vPPPM (_mm_set_ps(-0.0f, +0.0f, +0.0f, +0.0f))
39 
40 #elif defined(BT_USE_NEON)
41 
42 const btSimdFloat4 ATTRIBUTE_ALIGNED16(vQInv) = {-0.0f, -0.0f, -0.0f, +0.0f};
43 const btSimdFloat4 ATTRIBUTE_ALIGNED16(vPPPM) = {+0.0f, +0.0f, +0.0f, -0.0f};
44 
45 #endif
46 
48 class btQuaternion : public btQuadWord {
49 public:
52 
53 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))|| defined(BT_USE_NEON)
54  // Set Vector
55  SIMD_FORCE_INLINE btQuaternion(const btSimdFloat4 vec)
56  {
57  mVec128 = vec;
58  }
59 
60  // Copy constructor
62  {
63  mVec128 = rhs.mVec128;
64  }
65 
66  // Assignment Operator
68  operator=(const btQuaternion& v)
69  {
70  mVec128 = v.mVec128;
71 
72  return *this;
73  }
74 
75 #endif
76 
77  // template <typename btScalar>
78  // explicit Quaternion(const btScalar *v) : Tuple4<btScalar>(v) {}
80  btQuaternion(const btScalar& _x, const btScalar& _y, const btScalar& _z, const btScalar& _w)
81  : btQuadWord(_x, _y, _z, _w)
82  {}
86  btQuaternion(const btVector3& _axis, const btScalar& _angle)
87  {
88  setRotation(_axis, _angle);
89  }
94  btQuaternion(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
95  {
96 #ifndef BT_EULER_DEFAULT_ZYX
97  setEuler(yaw, pitch, roll);
98 #else
99  setEulerZYX(yaw, pitch, roll);
100 #endif
101  }
105  void setRotation(const btVector3& axis, const btScalar& _angle)
106  {
107  btScalar d = axis.length();
108  btAssert(d != btScalar(0.0));
109  btScalar s = btSin(_angle * btScalar(0.5)) / d;
110  setValue(axis.x() * s, axis.y() * s, axis.z() * s,
111  btCos(_angle * btScalar(0.5)));
112  }
117  void setEuler(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
118  {
119  btScalar halfYaw = btScalar(yaw) * btScalar(0.5);
120  btScalar halfPitch = btScalar(pitch) * btScalar(0.5);
121  btScalar halfRoll = btScalar(roll) * btScalar(0.5);
122  btScalar cosYaw = btCos(halfYaw);
123  btScalar sinYaw = btSin(halfYaw);
124  btScalar cosPitch = btCos(halfPitch);
125  btScalar sinPitch = btSin(halfPitch);
126  btScalar cosRoll = btCos(halfRoll);
127  btScalar sinRoll = btSin(halfRoll);
128  setValue(cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw,
129  cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw,
130  sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw,
131  cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw);
132  }
137  void setEulerZYX(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
138  {
139  btScalar halfYaw = btScalar(yaw) * btScalar(0.5);
140  btScalar halfPitch = btScalar(pitch) * btScalar(0.5);
141  btScalar halfRoll = btScalar(roll) * btScalar(0.5);
142  btScalar cosYaw = btCos(halfYaw);
143  btScalar sinYaw = btSin(halfYaw);
144  btScalar cosPitch = btCos(halfPitch);
145  btScalar sinPitch = btSin(halfPitch);
146  btScalar cosRoll = btCos(halfRoll);
147  btScalar sinRoll = btSin(halfRoll);
148  setValue(sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw, //x
149  cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw, //y
150  cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw, //z
151  cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw); //formerly yzx
152  }
156  {
157 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
158  mVec128 = _mm_add_ps(mVec128, q.mVec128);
159 #elif defined(BT_USE_NEON)
160  mVec128 = vaddq_f32(mVec128, q.mVec128);
161 #else
162  m_floats[0] += q.x();
163  m_floats[1] += q.y();
164  m_floats[2] += q.z();
165  m_floats[3] += q.m_floats[3];
166 #endif
167  return *this;
168  }
169 
173  {
174 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
175  mVec128 = _mm_sub_ps(mVec128, q.mVec128);
176 #elif defined(BT_USE_NEON)
177  mVec128 = vsubq_f32(mVec128, q.mVec128);
178 #else
179  m_floats[0] -= q.x();
180  m_floats[1] -= q.y();
181  m_floats[2] -= q.z();
182  m_floats[3] -= q.m_floats[3];
183 #endif
184  return *this;
185  }
186 
190  {
191 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
192  __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
193  vs = bt_pshufd_ps(vs, 0); // (S S S S)
194  mVec128 = _mm_mul_ps(mVec128, vs);
195 #elif defined(BT_USE_NEON)
196  mVec128 = vmulq_n_f32(mVec128, s);
197 #else
198  m_floats[0] *= s;
199  m_floats[1] *= s;
200  m_floats[2] *= s;
201  m_floats[3] *= s;
202 #endif
203  return *this;
204  }
205 
210  {
211 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
212  __m128 vQ2 = q.get128();
213 
214  __m128 A1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(0,1,2,0));
215  __m128 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0));
216 
217  A1 = A1 * B1;
218 
219  __m128 A2 = bt_pshufd_ps(mVec128, BT_SHUFFLE(1,2,0,1));
220  __m128 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
221 
222  A2 = A2 * B2;
223 
224  B1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(2,0,1,2));
225  B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
226 
227  B1 = B1 * B2; // A3 *= B3
228 
229  mVec128 = bt_splat_ps(mVec128, 3); // A0
230  mVec128 = mVec128 * vQ2; // A0 * B0
231 
232  A1 = A1 + A2; // AB12
233  mVec128 = mVec128 - B1; // AB03 = AB0 - AB3
234  A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
235  mVec128 = mVec128+ A1; // AB03 + AB12
236 
237 #elif defined(BT_USE_NEON)
238 
239  float32x4_t vQ1 = mVec128;
240  float32x4_t vQ2 = q.get128();
241  float32x4_t A0, A1, B1, A2, B2, A3, B3;
242  float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
243 
244  {
245  float32x2x2_t tmp;
246  tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) ); // {z x}, {w y}
247  vQ1zx = tmp.val[0];
248 
249  tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) ); // {z x}, {w y}
250  vQ2zx = tmp.val[0];
251  }
252  vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
253 
254  vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
255 
256  vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
257  vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
258 
259  A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
260  B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
261 
262  A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
263  B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
264 
265  A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
266  B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
267 
268  A1 = vmulq_f32(A1, B1);
269  A2 = vmulq_f32(A2, B2);
270  A3 = vmulq_f32(A3, B3); // A3 *= B3
271  A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); // A0 * B0
272 
273  A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
274  A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3
275 
276  // change the sign of the last element
277  A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
278  A0 = vaddq_f32(A0, A1); // AB03 + AB12
279 
280  mVec128 = A0;
281 #else
282  setValue(
283  m_floats[3] * q.x() + m_floats[0] * q.m_floats[3] + m_floats[1] * q.z() - m_floats[2] * q.y(),
284  m_floats[3] * q.y() + m_floats[1] * q.m_floats[3] + m_floats[2] * q.x() - m_floats[0] * q.z(),
285  m_floats[3] * q.z() + m_floats[2] * q.m_floats[3] + m_floats[0] * q.y() - m_floats[1] * q.x(),
286  m_floats[3] * q.m_floats[3] - m_floats[0] * q.x() - m_floats[1] * q.y() - m_floats[2] * q.z());
287 #endif
288  return *this;
289  }
292  btScalar dot(const btQuaternion& q) const
293  {
294 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
295  __m128 vd;
296 
297  vd = _mm_mul_ps(mVec128, q.mVec128);
298 
299  __m128 t = _mm_movehl_ps(vd, vd);
300  vd = _mm_add_ps(vd, t);
301  t = _mm_shuffle_ps(vd, vd, 0x55);
302  vd = _mm_add_ss(vd, t);
303 
304  return _mm_cvtss_f32(vd);
305 #elif defined(BT_USE_NEON)
306  float32x4_t vd = vmulq_f32(mVec128, q.mVec128);
307  float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_high_f32(vd));
308  x = vpadd_f32(x, x);
309  return vget_lane_f32(x, 0);
310 #else
311  return m_floats[0] * q.x() +
312  m_floats[1] * q.y() +
313  m_floats[2] * q.z() +
314  m_floats[3] * q.m_floats[3];
315 #endif
316  }
317 
320  {
321  return dot(*this);
322  }
323 
325  btScalar length() const
326  {
327  return btSqrt(length2());
328  }
329 
333  {
334 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
335  __m128 vd;
336 
337  vd = _mm_mul_ps(mVec128, mVec128);
338 
339  __m128 t = _mm_movehl_ps(vd, vd);
340  vd = _mm_add_ps(vd, t);
341  t = _mm_shuffle_ps(vd, vd, 0x55);
342  vd = _mm_add_ss(vd, t);
343 
344  vd = _mm_sqrt_ss(vd);
345  vd = _mm_div_ss(vOnes, vd);
346  vd = bt_pshufd_ps(vd, 0); // splat
347  mVec128 = _mm_mul_ps(mVec128, vd);
348 
349  return *this;
350 #else
351  return *this /= length();
352 #endif
353  }
354 
358  operator*(const btScalar& s) const
359  {
360 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
361  __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
362  vs = bt_pshufd_ps(vs, 0x00); // (S S S S)
363 
364  return btQuaternion(_mm_mul_ps(mVec128, vs));
365 #elif defined(BT_USE_NEON)
366  return btQuaternion(vmulq_n_f32(mVec128, s));
367 #else
368  return btQuaternion(x() * s, y() * s, z() * s, m_floats[3] * s);
369 #endif
370  }
371 
375  {
376  btAssert(s != btScalar(0.0));
377  return *this * (btScalar(1.0) / s);
378  }
379 
383  {
384  btAssert(s != btScalar(0.0));
385  return *this *= btScalar(1.0) / s;
386  }
387 
390  {
391  return *this / length();
392  }
395  btScalar angle(const btQuaternion& q) const
396  {
397  btScalar s = btSqrt(length2() * q.length2());
398  btAssert(s != btScalar(0.0));
399  return btAcos(dot(q) / s);
400  }
401 
405  {
406  btScalar s = btSqrt(length2() * q.length2());
407  btAssert(s != btScalar(0.0));
408  if (dot(q) < 0) // Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
409  return btAcos(dot(-q) / s) * btScalar(2.0);
410  else
411  return btAcos(dot(q) / s) * btScalar(2.0);
412  }
413 
416  {
417  btScalar s = btScalar(2.) * btAcos(m_floats[3]);
418  return s;
419  }
420 
423  {
424  btScalar s;
425  if (dot(*this) < 0)
426  s = btScalar(2.) * btAcos(m_floats[3]);
427  else
428  s = btScalar(2.) * btAcos(-m_floats[3]);
429 
430  return s;
431  }
432 
433 
436  {
437  btScalar s_squared = 1.f-m_floats[3]*m_floats[3];
438 
439  if (s_squared < btScalar(10.) * SIMD_EPSILON) //Check for divide by zero
440  return btVector3(1.0, 0.0, 0.0); // Arbitrary
441  btScalar s = 1.f/btSqrt(s_squared);
442  return btVector3(m_floats[0] * s, m_floats[1] * s, m_floats[2] * s);
443  }
444 
447  {
448 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
449  return btQuaternion(_mm_xor_ps(mVec128, vQInv));
450 #elif defined(BT_USE_NEON)
451  return btQuaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)vQInv));
452 #else
453  return btQuaternion(-m_floats[0], -m_floats[1], -m_floats[2], m_floats[3]);
454 #endif
455  }
456 
460  operator+(const btQuaternion& q2) const
461  {
462 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
463  return btQuaternion(_mm_add_ps(mVec128, q2.mVec128));
464 #elif defined(BT_USE_NEON)
465  return btQuaternion(vaddq_f32(mVec128, q2.mVec128));
466 #else
467  const btQuaternion& q1 = *this;
468  return btQuaternion(q1.x() + q2.x(), q1.y() + q2.y(), q1.z() + q2.z(), q1.m_floats[3] + q2.m_floats[3]);
469 #endif
470  }
471 
475  operator-(const btQuaternion& q2) const
476  {
477 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
478  return btQuaternion(_mm_sub_ps(mVec128, q2.mVec128));
479 #elif defined(BT_USE_NEON)
480  return btQuaternion(vsubq_f32(mVec128, q2.mVec128));
481 #else
482  const btQuaternion& q1 = *this;
483  return btQuaternion(q1.x() - q2.x(), q1.y() - q2.y(), q1.z() - q2.z(), q1.m_floats[3] - q2.m_floats[3]);
484 #endif
485  }
486 
490  {
491 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
492  return btQuaternion(_mm_xor_ps(mVec128, btvMzeroMask));
493 #elif defined(BT_USE_NEON)
494  return btQuaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)btvMzeroMask) );
495 #else
496  const btQuaternion& q2 = *this;
497  return btQuaternion( - q2.x(), - q2.y(), - q2.z(), - q2.m_floats[3]);
498 #endif
499  }
502  {
503  btQuaternion diff,sum;
504  diff = *this - qd;
505  sum = *this + qd;
506  if( diff.dot(diff) > sum.dot(sum) )
507  return qd;
508  return (-qd);
509  }
510 
513  {
514  btQuaternion diff,sum;
515  diff = *this - qd;
516  sum = *this + qd;
517  if( diff.dot(diff) < sum.dot(sum) )
518  return qd;
519  return (-qd);
520  }
521 
522 
527  btQuaternion slerp(const btQuaternion& q, const btScalar& t) const
528  {
529  btScalar magnitude = btSqrt(length2() * q.length2());
530  btAssert(magnitude > btScalar(0));
531 
532  btScalar product = dot(q) / magnitude;
533  if (btFabs(product) < btScalar(1))
534  {
535  // Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
536  const btScalar sign = (product < 0) ? btScalar(-1) : btScalar(1);
537 
538  const btScalar theta = btAcos(sign * product);
539  const btScalar s1 = btSin(sign * t * theta);
540  const btScalar d = btScalar(1.0) / btSin(theta);
541  const btScalar s0 = btSin((btScalar(1.0) - t) * theta);
542 
543  return btQuaternion(
544  (m_floats[0] * s0 + q.x() * s1) * d,
545  (m_floats[1] * s0 + q.y() * s1) * d,
546  (m_floats[2] * s0 + q.z() * s1) * d,
547  (m_floats[3] * s0 + q.m_floats[3] * s1) * d);
548  }
549  else
550  {
551  return *this;
552  }
553  }
554 
555  static const btQuaternion& getIdentity()
556  {
557  static const btQuaternion identityQuat(btScalar(0.),btScalar(0.),btScalar(0.),btScalar(1.));
558  return identityQuat;
559  }
560 
561  SIMD_FORCE_INLINE const btScalar& getW() const { return m_floats[3]; }
562 
563 
564 };
565 
566 
567 
568 
569 
572 operator*(const btQuaternion& q1, const btQuaternion& q2)
573 {
574 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
575  __m128 vQ1 = q1.get128();
576  __m128 vQ2 = q2.get128();
577  __m128 A0, A1, B1, A2, B2;
578 
579  A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0,1,2,0)); // X Y z x // vtrn
580  B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0)); // W W W X // vdup vext
581 
582  A1 = A1 * B1;
583 
584  A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1)); // Y Z X Y // vext
585  B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1)); // z x Y Y // vtrn vdup
586 
587  A2 = A2 * B2;
588 
589  B1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2)); // z x Y Z // vtrn vext
590  B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2)); // Y Z x z // vext vtrn
591 
592  B1 = B1 * B2; // A3 *= B3
593 
594  A0 = bt_splat_ps(vQ1, 3); // A0
595  A0 = A0 * vQ2; // A0 * B0
596 
597  A1 = A1 + A2; // AB12
598  A0 = A0 - B1; // AB03 = AB0 - AB3
599 
600  A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
601  A0 = A0 + A1; // AB03 + AB12
602 
603  return btQuaternion(A0);
604 
605 #elif defined(BT_USE_NEON)
606 
607  float32x4_t vQ1 = q1.get128();
608  float32x4_t vQ2 = q2.get128();
609  float32x4_t A0, A1, B1, A2, B2, A3, B3;
610  float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
611 
612  {
613  float32x2x2_t tmp;
614  tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) ); // {z x}, {w y}
615  vQ1zx = tmp.val[0];
616 
617  tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) ); // {z x}, {w y}
618  vQ2zx = tmp.val[0];
619  }
620  vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
621 
622  vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
623 
624  vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
625  vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
626 
627  A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
628  B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
629 
630  A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
631  B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
632 
633  A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
634  B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
635 
636  A1 = vmulq_f32(A1, B1);
637  A2 = vmulq_f32(A2, B2);
638  A3 = vmulq_f32(A3, B3); // A3 *= B3
639  A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); // A0 * B0
640 
641  A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
642  A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3
643 
644  // change the sign of the last element
645  A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
646  A0 = vaddq_f32(A0, A1); // AB03 + AB12
647 
648  return btQuaternion(A0);
649 
650 #else
651  return btQuaternion(
652  q1.w() * q2.x() + q1.x() * q2.w() + q1.y() * q2.z() - q1.z() * q2.y(),
653  q1.w() * q2.y() + q1.y() * q2.w() + q1.z() * q2.x() - q1.x() * q2.z(),
654  q1.w() * q2.z() + q1.z() * q2.w() + q1.x() * q2.y() - q1.y() * q2.x(),
655  q1.w() * q2.w() - q1.x() * q2.x() - q1.y() * q2.y() - q1.z() * q2.z());
656 #endif
657 }
658 
660 operator*(const btQuaternion& q, const btVector3& w)
661 {
662 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
663  __m128 vQ1 = q.get128();
664  __m128 vQ2 = w.get128();
665  __m128 A1, B1, A2, B2, A3, B3;
666 
667  A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(3,3,3,0));
668  B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(0,1,2,0));
669 
670  A1 = A1 * B1;
671 
672  A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1));
673  B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
674 
675  A2 = A2 * B2;
676 
677  A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2));
678  B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
679 
680  A3 = A3 * B3; // A3 *= B3
681 
682  A1 = A1 + A2; // AB12
683  A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
684  A1 = A1 - A3; // AB123 = AB12 - AB3
685 
686  return btQuaternion(A1);
687 
688 #elif defined(BT_USE_NEON)
689 
690  float32x4_t vQ1 = q.get128();
691  float32x4_t vQ2 = w.get128();
692  float32x4_t A1, B1, A2, B2, A3, B3;
693  float32x2_t vQ1wx, vQ2zx, vQ1yz, vQ2yz, vQ1zx, vQ2xz;
694 
695  vQ1wx = vext_f32(vget_high_f32(vQ1), vget_low_f32(vQ1), 1);
696  {
697  float32x2x2_t tmp;
698 
699  tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) ); // {z x}, {w y}
700  vQ2zx = tmp.val[0];
701 
702  tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) ); // {z x}, {w y}
703  vQ1zx = tmp.val[0];
704  }
705 
706  vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
707 
708  vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
709  vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
710 
711  A1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ1), 1), vQ1wx); // W W W X
712  B1 = vcombine_f32(vget_low_f32(vQ2), vQ2zx); // X Y z x
713 
714  A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
715  B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
716 
717  A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
718  B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
719 
720  A1 = vmulq_f32(A1, B1);
721  A2 = vmulq_f32(A2, B2);
722  A3 = vmulq_f32(A3, B3); // A3 *= B3
723 
724  A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
725 
726  // change the sign of the last element
727  A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
728 
729  A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3
730 
731  return btQuaternion(A1);
732 
733 #else
734  return btQuaternion(
735  q.w() * w.x() + q.y() * w.z() - q.z() * w.y(),
736  q.w() * w.y() + q.z() * w.x() - q.x() * w.z(),
737  q.w() * w.z() + q.x() * w.y() - q.y() * w.x(),
738  -q.x() * w.x() - q.y() * w.y() - q.z() * w.z());
739 #endif
740 }
741 
743 operator*(const btVector3& w, const btQuaternion& q)
744 {
745 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
746  __m128 vQ1 = w.get128();
747  __m128 vQ2 = q.get128();
748  __m128 A1, B1, A2, B2, A3, B3;
749 
750  A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0,1,2,0)); // X Y z x
751  B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0)); // W W W X
752 
753  A1 = A1 * B1;
754 
755  A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1));
756  B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
757 
758  A2 = A2 *B2;
759 
760  A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2));
761  B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
762 
763  A3 = A3 * B3; // A3 *= B3
764 
765  A1 = A1 + A2; // AB12
766  A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
767  A1 = A1 - A3; // AB123 = AB12 - AB3
768 
769  return btQuaternion(A1);
770 
771 #elif defined(BT_USE_NEON)
772 
773  float32x4_t vQ1 = w.get128();
774  float32x4_t vQ2 = q.get128();
775  float32x4_t A1, B1, A2, B2, A3, B3;
776  float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
777 
778  {
779  float32x2x2_t tmp;
780 
781  tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) ); // {z x}, {w y}
782  vQ1zx = tmp.val[0];
783 
784  tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) ); // {z x}, {w y}
785  vQ2zx = tmp.val[0];
786  }
787  vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
788 
789  vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
790 
791  vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
792  vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
793 
794  A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
795  B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
796 
797  A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
798  B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
799 
800  A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
801  B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
802 
803  A1 = vmulq_f32(A1, B1);
804  A2 = vmulq_f32(A2, B2);
805  A3 = vmulq_f32(A3, B3); // A3 *= B3
806 
807  A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
808 
809  // change the sign of the last element
810  A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
811 
812  A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3
813 
814  return btQuaternion(A1);
815 
816 #else
817  return btQuaternion(
818  +w.x() * q.w() + w.y() * q.z() - w.z() * q.y(),
819  +w.y() * q.w() + w.z() * q.x() - w.x() * q.z(),
820  +w.z() * q.w() + w.x() * q.y() - w.y() * q.x(),
821  -w.x() * q.x() - w.y() * q.y() - w.z() * q.z());
822 #endif
823 }
824 
827 dot(const btQuaternion& q1, const btQuaternion& q2)
828 {
829  return q1.dot(q2);
830 }
831 
832 
836 {
837  return q.length();
838 }
839 
842 btAngle(const btQuaternion& q1, const btQuaternion& q2)
843 {
844  return q1.angle(q2);
845 }
846 
850 {
851  return q.inverse();
852 }
853 
860 slerp(const btQuaternion& q1, const btQuaternion& q2, const btScalar& t)
861 {
862  return q1.slerp(q2, t);
863 }
864 
866 quatRotate(const btQuaternion& rotation, const btVector3& v)
867 {
868  btQuaternion q = rotation * v;
869  q *= rotation.inverse();
870 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
871  return btVector3(_mm_and_ps(q.get128(), btvFFF0fMask));
872 #elif defined(BT_USE_NEON)
873  return btVector3((float32x4_t)vandq_s32((int32x4_t)q.get128(), btvFFF0Mask));
874 #else
875  return btVector3(q.getX(),q.getY(),q.getZ());
876 #endif
877 }
878 
880 shortestArcQuat(const btVector3& v0, const btVector3& v1) // Game Programming Gems 2.10. make sure v0,v1 are normalized
881 {
882  btVector3 c = v0.cross(v1);
883  btScalar d = v0.dot(v1);
884 
885  if (d < -1.0 + SIMD_EPSILON)
886  {
887  btVector3 n,unused;
888  btPlaneSpace1(v0,n,unused);
889  return btQuaternion(n.x(),n.y(),n.z(),0.0f); // just pick any vector that is orthogonal to v0
890  }
891 
892  btScalar s = btSqrt((1.0f + d) * 2.0f);
893  btScalar rs = 1.0f / s;
894 
895  return btQuaternion(c.getX()*rs,c.getY()*rs,c.getZ()*rs,s * 0.5f);
896 }
897 
900 {
901  v0.normalize();
902  v1.normalize();
903  return shortestArcQuat(v0,v1);
904 }
905 
906 #endif //BT_SIMD__QUATERNION_H_
907 
908 
909 
static T sum(const btAlignedObjectArray< T > &items)
#define SIMD_EPSILON
Definition: btScalar.h:448
btScalar length(const btQuaternion &q)
Return the length of a quaternion.
Definition: btQuaternion.h:835
btScalar getAngle() const
Return the angle of rotation represented by this quaternion.
Definition: btQuaternion.h:415
btQuaternion & operator*=(const btQuaternion &q)
Multiply this quaternion by q on the right.
Definition: btQuaternion.h:209
btQuaternion farthest(const btQuaternion &qd) const
Definition: btQuaternion.h:501
btQuaternion(const btScalar &yaw, const btScalar &pitch, const btScalar &roll)
Constructor from Euler angles.
Definition: btQuaternion.h:94
void setEulerZYX(const btScalar &yaw, const btScalar &pitch, const btScalar &roll)
Set the quaternion using euler angles.
Definition: btQuaternion.h:137
btQuaternion slerp(const btQuaternion &q1, const btQuaternion &q2, const btScalar &t)
Return the result of spherical linear interpolation betwen two quaternions.
Definition: btQuaternion.h:860
btScalar btSin(btScalar x)
Definition: btScalar.h:409
static const btQuaternion & getIdentity()
Definition: btQuaternion.h:555
const btScalar & z() const
Return the z value.
Definition: btQuadWord.h:120
void btPlaneSpace1(const T &n, T &p, T &q)
Definition: btVector3.h:1271
btScalar btSqrt(btScalar y)
Definition: btScalar.h:387
#define btAssert(x)
Definition: btScalar.h:101
btScalar dot(const btQuaternion &q) const
Return the dot product between this quaternion and another.
Definition: btQuaternion.h:292
const btScalar & getW() const
Definition: btQuaternion.h:561
btQuaternion operator*(const btQuaternion &q1, const btQuaternion &q2)
Return the product of two quaternions.
Definition: btQuaternion.h:572
#define SIMD_FORCE_INLINE
Definition: btScalar.h:58
btQuaternion & operator/=(const btScalar &s)
Inversely scale this quaternion.
Definition: btQuaternion.h:382
btQuaternion operator-(const btQuaternion &q2) const
Return the difference between this quaternion and the other.
Definition: btQuaternion.h:475
btQuaternion & operator-=(const btQuaternion &q)
Subtract out a quaternion.
Definition: btQuaternion.h:172
const btScalar & y() const
Return the y value.
Definition: btQuadWord.h:118
btVector3 getAxis() const
Return the axis of the rotation represented by this quaternion.
Definition: btQuaternion.h:435
btQuaternion operator-() const
Return the negative of this quaternion This simply negates each element.
Definition: btQuaternion.h:489
const btScalar & getY() const
Return the y value.
Definition: btQuadWord.h:104
const btScalar & getX() const
Return the x value.
Definition: btQuadWord.h:102
btQuaternion inverse(const btQuaternion &q)
Return the inverse of a quaternion.
Definition: btQuaternion.h:849
const btScalar & w() const
Return the w value.
Definition: btQuadWord.h:122
btScalar dot(const btVector3 &v) const
Return the dot product.
Definition: btVector3.h:235
btQuaternion & operator+=(const btQuaternion &q)
Add two quaternions.
Definition: btQuaternion.h:155
btVector3 & normalize()
Normalize this vector x^2 + y^2 + z^2 = 1.
Definition: btVector3.h:297
btQuaternion nearest(const btQuaternion &qd) const
Definition: btQuaternion.h:512
btScalar length() const
Return the length of the quaternion.
Definition: btQuaternion.h:325
btQuaternion operator*(const btScalar &s) const
Return a scaled version of this quaternion.
Definition: btQuaternion.h:358
const btScalar & x() const
Return the x value.
Definition: btVector3.h:575
btVector3 quatRotate(const btQuaternion &rotation, const btVector3 &v)
Definition: btQuaternion.h:866
const btScalar & getZ() const
Return the z value.
Definition: btVector3.h:565
btQuaternion shortestArcQuat(const btVector3 &v0, const btVector3 &v1)
Definition: btQuaternion.h:880
void setRotation(const btVector3 &axis, const btScalar &_angle)
Set the rotation using axis angle notation.
Definition: btQuaternion.h:105
btQuaternion & normalize()
Normalize the quaternion Such that x^2 + y^2 + z^2 +w^2 = 1.
Definition: btQuaternion.h:332
btQuaternion shortestArcQuatNormalize2(btVector3 &v0, btVector3 &v1)
Definition: btQuaternion.h:899
void setValue(const btScalar &_x, const btScalar &_y, const btScalar &_z)
Set x,y,z and zero w.
Definition: btQuadWord.h:152
btVector3 cross(const btVector3 &v) const
Return the cross product between this and another vector.
Definition: btVector3.h:377
btQuaternion()
No initialization constructor.
Definition: btQuaternion.h:51
const btScalar & getY() const
Return the y value.
Definition: btVector3.h:563
The btQuadWord class is base class for btVector3 and btQuaternion.
Definition: btQuadWord.h:34
const btScalar & getX() const
Return the x value.
Definition: btVector3.h:561
btQuaternion inverse() const
Return the inverse of this quaternion.
Definition: btQuaternion.h:446
btScalar length() const
Return the length of the vector.
Definition: btVector3.h:263
btScalar m_floats[4]
Definition: btQuadWord.h:69
btScalar length2() const
Return the length squared of the quaternion.
Definition: btQuaternion.h:319
const btScalar & y() const
Return the y value.
Definition: btVector3.h:577
btScalar angleShortestPath(const btQuaternion &q) const
Return the angle between this quaternion and the other along the shortest path.
Definition: btQuaternion.h:404
btVector3 can be used to represent 3D points and vectors.
Definition: btVector3.h:83
#define ATTRIBUTE_ALIGNED16(a)
Definition: btScalar.h:59
btQuaternion(const btScalar &_x, const btScalar &_y, const btScalar &_z, const btScalar &_w)
Constructor from scalars.
Definition: btQuaternion.h:80
btScalar btAcos(btScalar x)
Definition: btScalar.h:411
btQuaternion normalized() const
Return a normalized version of this quaternion.
Definition: btQuaternion.h:389
btQuaternion & operator*=(const btScalar &s)
Scale this quaternion.
Definition: btQuaternion.h:189
btScalar angle(const btQuaternion &q) const
Return the ***half*** angle between this quaternion and the other.
Definition: btQuaternion.h:395
btScalar getAngleShortestPath() const
Return the angle of rotation represented by this quaternion along the shortest path.
Definition: btQuaternion.h:422
btQuaternion operator+(const btQuaternion &q2) const
Return the sum of this quaternion and the other.
Definition: btQuaternion.h:460
const btScalar & x() const
Return the x value.
Definition: btQuadWord.h:116
btScalar dot(const btQuaternion &q1, const btQuaternion &q2)
Calculate the dot product between two quaternions.
Definition: btQuaternion.h:827
btQuaternion operator/(const btScalar &s) const
Return an inversely scaled versionof this quaternion.
Definition: btQuaternion.h:374
The btQuaternion implements quaternion to perform linear algebra rotations in combination with btMatr...
Definition: btQuaternion.h:48
void setEuler(const btScalar &yaw, const btScalar &pitch, const btScalar &roll)
Set the quaternion using Euler angles.
Definition: btQuaternion.h:117
btScalar btAngle(const btQuaternion &q1, const btQuaternion &q2)
Return the angle between two quaternions.
Definition: btQuaternion.h:842
btQuaternion(const btVector3 &_axis, const btScalar &_angle)
Axis angle Constructor.
Definition: btQuaternion.h:86
const btScalar & getZ() const
Return the z value.
Definition: btQuadWord.h:106
btQuaternion slerp(const btQuaternion &q, const btScalar &t) const
Return the quaternion which is the result of Spherical Linear Interpolation between this and the othe...
Definition: btQuaternion.h:527
float btScalar
The btScalar type abstracts floating point numbers, to easily switch between double and single floati...
Definition: btScalar.h:266
btScalar btCos(btScalar x)
Definition: btScalar.h:408
btScalar btFabs(btScalar x)
Definition: btScalar.h:407
const btScalar & z() const
Return the z value.
Definition: btVector3.h:579