Bullet Collision Detection & Physics Library
btMatrix3x3.h
Go to the documentation of this file.
1 /*
2 Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans http://continuousphysics.com/Bullet/
3 
4 This software is provided 'as-is', without any express or implied warranty.
5 In no event will the authors be held liable for any damages arising from the use of this software.
6 Permission is granted to anyone to use this software for any purpose,
7 including commercial applications, and to alter it and redistribute it freely,
8 subject to the following restrictions:
9 
10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
12 3. This notice may not be removed or altered from any source distribution.
13 */
14 
15 
16 #ifndef BT_MATRIX3x3_H
17 #define BT_MATRIX3x3_H
18 
19 #include "btVector3.h"
20 #include "btQuaternion.h"
21 #include <stdio.h>
22 
23 #ifdef BT_USE_SSE
24 //const __m128 ATTRIBUTE_ALIGNED16(v2220) = {2.0f, 2.0f, 2.0f, 0.0f};
25 //const __m128 ATTRIBUTE_ALIGNED16(vMPPP) = {-0.0f, +0.0f, +0.0f, +0.0f};
26 #define vMPPP (_mm_set_ps (+0.0f, +0.0f, +0.0f, -0.0f))
27 #endif
28 
29 #if defined(BT_USE_SSE)
30 #define v1000 (_mm_set_ps(0.0f,0.0f,0.0f,1.0f))
31 #define v0100 (_mm_set_ps(0.0f,0.0f,1.0f,0.0f))
32 #define v0010 (_mm_set_ps(0.0f,1.0f,0.0f,0.0f))
33 #elif defined(BT_USE_NEON)
34 const btSimdFloat4 ATTRIBUTE_ALIGNED16(v1000) = {1.0f, 0.0f, 0.0f, 0.0f};
35 const btSimdFloat4 ATTRIBUTE_ALIGNED16(v0100) = {0.0f, 1.0f, 0.0f, 0.0f};
36 const btSimdFloat4 ATTRIBUTE_ALIGNED16(v0010) = {0.0f, 0.0f, 1.0f, 0.0f};
37 #endif
38 
39 #ifdef BT_USE_DOUBLE_PRECISION
40 #define btMatrix3x3Data btMatrix3x3DoubleData
41 #else
42 #define btMatrix3x3Data btMatrix3x3FloatData
43 #endif //BT_USE_DOUBLE_PRECISION
44 
45 
49 
51  btVector3 m_el[3];
52 
53 public:
56 
57  // explicit btMatrix3x3(const btScalar *m) { setFromOpenGLSubMatrix(m); }
58 
60  explicit btMatrix3x3(const btQuaternion& q) { setRotation(q); }
61  /*
62  template <typename btScalar>
63  Matrix3x3(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
64  {
65  setEulerYPR(yaw, pitch, roll);
66  }
67  */
69  btMatrix3x3(const btScalar& xx, const btScalar& xy, const btScalar& xz,
70  const btScalar& yx, const btScalar& yy, const btScalar& yz,
71  const btScalar& zx, const btScalar& zy, const btScalar& zz)
72  {
73  setValue(xx, xy, xz,
74  yx, yy, yz,
75  zx, zy, zz);
76  }
77 
78 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
79  SIMD_FORCE_INLINE btMatrix3x3 (const btSimdFloat4 v0, const btSimdFloat4 v1, const btSimdFloat4 v2 )
80  {
81  m_el[0].mVec128 = v0;
82  m_el[1].mVec128 = v1;
83  m_el[2].mVec128 = v2;
84  }
85 
86  SIMD_FORCE_INLINE btMatrix3x3 (const btVector3& v0, const btVector3& v1, const btVector3& v2 )
87  {
88  m_el[0] = v0;
89  m_el[1] = v1;
90  m_el[2] = v2;
91  }
92 
93  // Copy constructor
95  {
96  m_el[0].mVec128 = rhs.m_el[0].mVec128;
97  m_el[1].mVec128 = rhs.m_el[1].mVec128;
98  m_el[2].mVec128 = rhs.m_el[2].mVec128;
99  }
100 
101  // Assignment Operator
102  SIMD_FORCE_INLINE btMatrix3x3& operator=(const btMatrix3x3& m)
103  {
104  m_el[0].mVec128 = m.m_el[0].mVec128;
105  m_el[1].mVec128 = m.m_el[1].mVec128;
106  m_el[2].mVec128 = m.m_el[2].mVec128;
107 
108  return *this;
109  }
110 
111 #else
112 
115  {
116  m_el[0] = other.m_el[0];
117  m_el[1] = other.m_el[1];
118  m_el[2] = other.m_el[2];
119  }
120 
123  {
124  m_el[0] = other.m_el[0];
125  m_el[1] = other.m_el[1];
126  m_el[2] = other.m_el[2];
127  return *this;
128  }
129 
130 #endif
131 
135  {
136  return btVector3(m_el[0][i],m_el[1][i],m_el[2][i]);
137  }
138 
139 
142  SIMD_FORCE_INLINE const btVector3& getRow(int i) const
143  {
144  btFullAssert(0 <= i && i < 3);
145  return m_el[i];
146  }
147 
151  {
152  btFullAssert(0 <= i && i < 3);
153  return m_el[i];
154  }
155 
159  {
160  btFullAssert(0 <= i && i < 3);
161  return m_el[i];
162  }
163 
167  btMatrix3x3& operator*=(const btMatrix3x3& m);
168 
172  btMatrix3x3& operator+=(const btMatrix3x3& m);
173 
177  btMatrix3x3& operator-=(const btMatrix3x3& m);
178 
182  {
183  m_el[0].setValue(m[0],m[4],m[8]);
184  m_el[1].setValue(m[1],m[5],m[9]);
185  m_el[2].setValue(m[2],m[6],m[10]);
186 
187  }
198  void setValue(const btScalar& xx, const btScalar& xy, const btScalar& xz,
199  const btScalar& yx, const btScalar& yy, const btScalar& yz,
200  const btScalar& zx, const btScalar& zy, const btScalar& zz)
201  {
202  m_el[0].setValue(xx,xy,xz);
203  m_el[1].setValue(yx,yy,yz);
204  m_el[2].setValue(zx,zy,zz);
205  }
206 
209  void setRotation(const btQuaternion& q)
210  {
211  btScalar d = q.length2();
212  btFullAssert(d != btScalar(0.0));
213  btScalar s = btScalar(2.0) / d;
214 
215  #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
216  __m128 vs, Q = q.get128();
217  __m128i Qi = btCastfTo128i(Q);
218  __m128 Y, Z;
219  __m128 V1, V2, V3;
220  __m128 V11, V21, V31;
221  __m128 NQ = _mm_xor_ps(Q, btvMzeroMask);
222  __m128i NQi = btCastfTo128i(NQ);
223 
224  V1 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,0,2,3))); // Y X Z W
225  V2 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(0,0,1,3)); // -X -X Y W
226  V3 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(2,1,0,3))); // Z Y X W
227  V1 = _mm_xor_ps(V1, vMPPP); // change the sign of the first element
228 
229  V11 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,1,0,3))); // Y Y X W
230  V21 = _mm_unpackhi_ps(Q, Q); // Z Z W W
231  V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(0,2,0,3)); // X Z -X -W
232 
233  V2 = V2 * V1; //
234  V1 = V1 * V11; //
235  V3 = V3 * V31; //
236 
237  V11 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(2,3,1,3)); // -Z -W Y W
238  V11 = V11 * V21; //
239  V21 = _mm_xor_ps(V21, vMPPP); // change the sign of the first element
240  V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(3,3,1,3)); // W W -Y -W
241  V31 = _mm_xor_ps(V31, vMPPP); // change the sign of the first element
242  Y = btCastiTo128f(_mm_shuffle_epi32 (NQi, BT_SHUFFLE(3,2,0,3))); // -W -Z -X -W
243  Z = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,0,1,3))); // Y X Y W
244 
245  vs = _mm_load_ss(&s);
246  V21 = V21 * Y;
247  V31 = V31 * Z;
248 
249  V1 = V1 + V11;
250  V2 = V2 + V21;
251  V3 = V3 + V31;
252 
253  vs = bt_splat3_ps(vs, 0);
254  // s ready
255  V1 = V1 * vs;
256  V2 = V2 * vs;
257  V3 = V3 * vs;
258 
259  V1 = V1 + v1000;
260  V2 = V2 + v0100;
261  V3 = V3 + v0010;
262 
263  m_el[0] = V1;
264  m_el[1] = V2;
265  m_el[2] = V3;
266  #else
267  btScalar xs = q.x() * s, ys = q.y() * s, zs = q.z() * s;
268  btScalar wx = q.w() * xs, wy = q.w() * ys, wz = q.w() * zs;
269  btScalar xx = q.x() * xs, xy = q.x() * ys, xz = q.x() * zs;
270  btScalar yy = q.y() * ys, yz = q.y() * zs, zz = q.z() * zs;
271  setValue(
272  btScalar(1.0) - (yy + zz), xy - wz, xz + wy,
273  xy + wz, btScalar(1.0) - (xx + zz), yz - wx,
274  xz - wy, yz + wx, btScalar(1.0) - (xx + yy));
275  #endif
276  }
277 
278 
284  void setEulerYPR(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
285  {
286  setEulerZYX(roll, pitch, yaw);
287  }
288 
298  void setEulerZYX(btScalar eulerX,btScalar eulerY,btScalar eulerZ) {
300  btScalar ci ( btCos(eulerX));
301  btScalar cj ( btCos(eulerY));
302  btScalar ch ( btCos(eulerZ));
303  btScalar si ( btSin(eulerX));
304  btScalar sj ( btSin(eulerY));
305  btScalar sh ( btSin(eulerZ));
306  btScalar cc = ci * ch;
307  btScalar cs = ci * sh;
308  btScalar sc = si * ch;
309  btScalar ss = si * sh;
310 
311  setValue(cj * ch, sj * sc - cs, sj * cc + ss,
312  cj * sh, sj * ss + cc, sj * cs - sc,
313  -sj, cj * si, cj * ci);
314  }
315 
317  void setIdentity()
318  {
319 #if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined(BT_USE_NEON)
320  m_el[0] = v1000;
321  m_el[1] = v0100;
322  m_el[2] = v0010;
323 #else
324  setValue(btScalar(1.0), btScalar(0.0), btScalar(0.0),
325  btScalar(0.0), btScalar(1.0), btScalar(0.0),
326  btScalar(0.0), btScalar(0.0), btScalar(1.0));
327 #endif
328  }
329 
330  static const btMatrix3x3& getIdentity()
331  {
332 #if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined(BT_USE_NEON)
333  static const btMatrix3x3
334  identityMatrix(v1000, v0100, v0010);
335 #else
336  static const btMatrix3x3
337  identityMatrix(
338  btScalar(1.0), btScalar(0.0), btScalar(0.0),
339  btScalar(0.0), btScalar(1.0), btScalar(0.0),
340  btScalar(0.0), btScalar(0.0), btScalar(1.0));
341 #endif
342  return identityMatrix;
343  }
344 
347  void getOpenGLSubMatrix(btScalar *m) const
348  {
349 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
350  __m128 v0 = m_el[0].mVec128;
351  __m128 v1 = m_el[1].mVec128;
352  __m128 v2 = m_el[2].mVec128; // x2 y2 z2 w2
353  __m128 *vm = (__m128 *)m;
354  __m128 vT;
355 
356  v2 = _mm_and_ps(v2, btvFFF0fMask); // x2 y2 z2 0
357 
358  vT = _mm_unpackhi_ps(v0, v1); // z0 z1 * *
359  v0 = _mm_unpacklo_ps(v0, v1); // x0 x1 y0 y1
360 
361  v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3) ); // y0 y1 y2 0
362  v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3) ); // x0 x1 x2 0
363  v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT))); // z0 z1 z2 0
364 
365  vm[0] = v0;
366  vm[1] = v1;
367  vm[2] = v2;
368 #elif defined(BT_USE_NEON)
369  // note: zeros the w channel. We can preserve it at the cost of two more vtrn instructions.
370  static const uint32x2_t zMask = (const uint32x2_t) {static_cast<uint32_t>(-1), 0 };
371  float32x4_t *vm = (float32x4_t *)m;
372  float32x4x2_t top = vtrnq_f32( m_el[0].mVec128, m_el[1].mVec128 ); // {x0 x1 z0 z1}, {y0 y1 w0 w1}
373  float32x2x2_t bl = vtrn_f32( vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f) ); // {x2 0 }, {y2 0}
374  float32x4_t v0 = vcombine_f32( vget_low_f32(top.val[0]), bl.val[0] );
375  float32x4_t v1 = vcombine_f32( vget_low_f32(top.val[1]), bl.val[1] );
376  float32x2_t q = (float32x2_t) vand_u32( (uint32x2_t) vget_high_f32( m_el[2].mVec128), zMask );
377  float32x4_t v2 = vcombine_f32( vget_high_f32(top.val[0]), q ); // z0 z1 z2 0
378 
379  vm[0] = v0;
380  vm[1] = v1;
381  vm[2] = v2;
382 #else
383  m[0] = btScalar(m_el[0].x());
384  m[1] = btScalar(m_el[1].x());
385  m[2] = btScalar(m_el[2].x());
386  m[3] = btScalar(0.0);
387  m[4] = btScalar(m_el[0].y());
388  m[5] = btScalar(m_el[1].y());
389  m[6] = btScalar(m_el[2].y());
390  m[7] = btScalar(0.0);
391  m[8] = btScalar(m_el[0].z());
392  m[9] = btScalar(m_el[1].z());
393  m[10] = btScalar(m_el[2].z());
394  m[11] = btScalar(0.0);
395 #endif
396  }
397 
400  void getRotation(btQuaternion& q) const
401  {
402 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
403  btScalar trace = m_el[0].x() + m_el[1].y() + m_el[2].z();
404  btScalar s, x;
405 
406  union {
407  btSimdFloat4 vec;
408  btScalar f[4];
409  } temp;
410 
411  if (trace > btScalar(0.0))
412  {
413  x = trace + btScalar(1.0);
414 
415  temp.f[0]=m_el[2].y() - m_el[1].z();
416  temp.f[1]=m_el[0].z() - m_el[2].x();
417  temp.f[2]=m_el[1].x() - m_el[0].y();
418  temp.f[3]=x;
419  //temp.f[3]= s * btScalar(0.5);
420  }
421  else
422  {
423  int i, j, k;
424  if(m_el[0].x() < m_el[1].y())
425  {
426  if( m_el[1].y() < m_el[2].z() )
427  { i = 2; j = 0; k = 1; }
428  else
429  { i = 1; j = 2; k = 0; }
430  }
431  else
432  {
433  if( m_el[0].x() < m_el[2].z())
434  { i = 2; j = 0; k = 1; }
435  else
436  { i = 0; j = 1; k = 2; }
437  }
438 
439  x = m_el[i][i] - m_el[j][j] - m_el[k][k] + btScalar(1.0);
440 
441  temp.f[3] = (m_el[k][j] - m_el[j][k]);
442  temp.f[j] = (m_el[j][i] + m_el[i][j]);
443  temp.f[k] = (m_el[k][i] + m_el[i][k]);
444  temp.f[i] = x;
445  //temp.f[i] = s * btScalar(0.5);
446  }
447 
448  s = btSqrt(x);
449  q.set128(temp.vec);
450  s = btScalar(0.5) / s;
451 
452  q *= s;
453 #else
454  btScalar trace = m_el[0].x() + m_el[1].y() + m_el[2].z();
455 
456  btScalar temp[4];
457 
458  if (trace > btScalar(0.0))
459  {
460  btScalar s = btSqrt(trace + btScalar(1.0));
461  temp[3]=(s * btScalar(0.5));
462  s = btScalar(0.5) / s;
463 
464  temp[0]=((m_el[2].y() - m_el[1].z()) * s);
465  temp[1]=((m_el[0].z() - m_el[2].x()) * s);
466  temp[2]=((m_el[1].x() - m_el[0].y()) * s);
467  }
468  else
469  {
470  int i = m_el[0].x() < m_el[1].y() ?
471  (m_el[1].y() < m_el[2].z() ? 2 : 1) :
472  (m_el[0].x() < m_el[2].z() ? 2 : 0);
473  int j = (i + 1) % 3;
474  int k = (i + 2) % 3;
475 
476  btScalar s = btSqrt(m_el[i][i] - m_el[j][j] - m_el[k][k] + btScalar(1.0));
477  temp[i] = s * btScalar(0.5);
478  s = btScalar(0.5) / s;
479 
480  temp[3] = (m_el[k][j] - m_el[j][k]) * s;
481  temp[j] = (m_el[j][i] + m_el[i][j]) * s;
482  temp[k] = (m_el[k][i] + m_el[i][k]) * s;
483  }
484  q.setValue(temp[0],temp[1],temp[2],temp[3]);
485 #endif
486  }
487 
492  void getEulerYPR(btScalar& yaw, btScalar& pitch, btScalar& roll) const
493  {
494 
495  // first use the normal calculus
496  yaw = btScalar(btAtan2(m_el[1].x(), m_el[0].x()));
497  pitch = btScalar(btAsin(-m_el[2].x()));
498  roll = btScalar(btAtan2(m_el[2].y(), m_el[2].z()));
499 
500  // on pitch = +/-HalfPI
501  if (btFabs(pitch)==SIMD_HALF_PI)
502  {
503  if (yaw>0)
504  yaw-=SIMD_PI;
505  else
506  yaw+=SIMD_PI;
507 
508  if (roll>0)
509  roll-=SIMD_PI;
510  else
511  roll+=SIMD_PI;
512  }
513  };
514 
515 
521  void getEulerZYX(btScalar& yaw, btScalar& pitch, btScalar& roll, unsigned int solution_number = 1) const
522  {
523  struct Euler
524  {
525  btScalar yaw;
526  btScalar pitch;
527  btScalar roll;
528  };
529 
530  Euler euler_out;
531  Euler euler_out2; //second solution
532  //get the pointer to the raw data
533 
534  // Check that pitch is not at a singularity
535  if (btFabs(m_el[2].x()) >= 1)
536  {
537  euler_out.yaw = 0;
538  euler_out2.yaw = 0;
539 
540  // From difference of angles formula
541  btScalar delta = btAtan2(m_el[0].x(),m_el[0].z());
542  if (m_el[2].x() > 0) //gimbal locked up
543  {
544  euler_out.pitch = SIMD_PI / btScalar(2.0);
545  euler_out2.pitch = SIMD_PI / btScalar(2.0);
546  euler_out.roll = euler_out.pitch + delta;
547  euler_out2.roll = euler_out.pitch + delta;
548  }
549  else // gimbal locked down
550  {
551  euler_out.pitch = -SIMD_PI / btScalar(2.0);
552  euler_out2.pitch = -SIMD_PI / btScalar(2.0);
553  euler_out.roll = -euler_out.pitch + delta;
554  euler_out2.roll = -euler_out.pitch + delta;
555  }
556  }
557  else
558  {
559  euler_out.pitch = - btAsin(m_el[2].x());
560  euler_out2.pitch = SIMD_PI - euler_out.pitch;
561 
562  euler_out.roll = btAtan2(m_el[2].y()/btCos(euler_out.pitch),
563  m_el[2].z()/btCos(euler_out.pitch));
564  euler_out2.roll = btAtan2(m_el[2].y()/btCos(euler_out2.pitch),
565  m_el[2].z()/btCos(euler_out2.pitch));
566 
567  euler_out.yaw = btAtan2(m_el[1].x()/btCos(euler_out.pitch),
568  m_el[0].x()/btCos(euler_out.pitch));
569  euler_out2.yaw = btAtan2(m_el[1].x()/btCos(euler_out2.pitch),
570  m_el[0].x()/btCos(euler_out2.pitch));
571  }
572 
573  if (solution_number == 1)
574  {
575  yaw = euler_out.yaw;
576  pitch = euler_out.pitch;
577  roll = euler_out.roll;
578  }
579  else
580  {
581  yaw = euler_out2.yaw;
582  pitch = euler_out2.pitch;
583  roll = euler_out2.roll;
584  }
585  }
586 
590  btMatrix3x3 scaled(const btVector3& s) const
591  {
592 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
593  return btMatrix3x3(m_el[0] * s, m_el[1] * s, m_el[2] * s);
594 #else
595  return btMatrix3x3(
596  m_el[0].x() * s.x(), m_el[0].y() * s.y(), m_el[0].z() * s.z(),
597  m_el[1].x() * s.x(), m_el[1].y() * s.y(), m_el[1].z() * s.z(),
598  m_el[2].x() * s.x(), m_el[2].y() * s.y(), m_el[2].z() * s.z());
599 #endif
600  }
601 
603  btScalar determinant() const;
605  btMatrix3x3 adjoint() const;
607  btMatrix3x3 absolute() const;
609  btMatrix3x3 transpose() const;
611  btMatrix3x3 inverse() const;
612 
613  btMatrix3x3 transposeTimes(const btMatrix3x3& m) const;
614  btMatrix3x3 timesTranspose(const btMatrix3x3& m) const;
615 
617  {
618  return m_el[0].x() * v.x() + m_el[1].x() * v.y() + m_el[2].x() * v.z();
619  }
621  {
622  return m_el[0].y() * v.x() + m_el[1].y() * v.y() + m_el[2].y() * v.z();
623  }
625  {
626  return m_el[0].z() * v.x() + m_el[1].z() * v.y() + m_el[2].z() * v.z();
627  }
628 
629 
639  void diagonalize(btMatrix3x3& rot, btScalar threshold, int maxSteps)
640  {
641  rot.setIdentity();
642  for (int step = maxSteps; step > 0; step--)
643  {
644  // find off-diagonal element [p][q] with largest magnitude
645  int p = 0;
646  int q = 1;
647  int r = 2;
648  btScalar max = btFabs(m_el[0][1]);
649  btScalar v = btFabs(m_el[0][2]);
650  if (v > max)
651  {
652  q = 2;
653  r = 1;
654  max = v;
655  }
656  v = btFabs(m_el[1][2]);
657  if (v > max)
658  {
659  p = 1;
660  q = 2;
661  r = 0;
662  max = v;
663  }
664 
665  btScalar t = threshold * (btFabs(m_el[0][0]) + btFabs(m_el[1][1]) + btFabs(m_el[2][2]));
666  if (max <= t)
667  {
668  if (max <= SIMD_EPSILON * t)
669  {
670  return;
671  }
672  step = 1;
673  }
674 
675  // compute Jacobi rotation J which leads to a zero for element [p][q]
676  btScalar mpq = m_el[p][q];
677  btScalar theta = (m_el[q][q] - m_el[p][p]) / (2 * mpq);
678  btScalar theta2 = theta * theta;
679  btScalar cos;
680  btScalar sin;
681  if (theta2 * theta2 < btScalar(10 / SIMD_EPSILON))
682  {
683  t = (theta >= 0) ? 1 / (theta + btSqrt(1 + theta2))
684  : 1 / (theta - btSqrt(1 + theta2));
685  cos = 1 / btSqrt(1 + t * t);
686  sin = cos * t;
687  }
688  else
689  {
690  // approximation for large theta-value, i.e., a nearly diagonal matrix
691  t = 1 / (theta * (2 + btScalar(0.5) / theta2));
692  cos = 1 - btScalar(0.5) * t * t;
693  sin = cos * t;
694  }
695 
696  // apply rotation to matrix (this = J^T * this * J)
697  m_el[p][q] = m_el[q][p] = 0;
698  m_el[p][p] -= t * mpq;
699  m_el[q][q] += t * mpq;
700  btScalar mrp = m_el[r][p];
701  btScalar mrq = m_el[r][q];
702  m_el[r][p] = m_el[p][r] = cos * mrp - sin * mrq;
703  m_el[r][q] = m_el[q][r] = cos * mrq + sin * mrp;
704 
705  // apply rotation to rot (rot = rot * J)
706  for (int i = 0; i < 3; i++)
707  {
708  btVector3& row = rot[i];
709  mrp = row[p];
710  mrq = row[q];
711  row[p] = cos * mrp - sin * mrq;
712  row[q] = cos * mrq + sin * mrp;
713  }
714  }
715  }
716 
717 
718 
719 
727  btScalar cofac(int r1, int c1, int r2, int c2) const
728  {
729  return m_el[r1][c1] * m_el[r2][c2] - m_el[r1][c2] * m_el[r2][c1];
730  }
731 
732  void serialize(struct btMatrix3x3Data& dataOut) const;
733 
734  void serializeFloat(struct btMatrix3x3FloatData& dataOut) const;
735 
736  void deSerialize(const struct btMatrix3x3Data& dataIn);
737 
738  void deSerializeFloat(const struct btMatrix3x3FloatData& dataIn);
739 
740  void deSerializeDouble(const struct btMatrix3x3DoubleData& dataIn);
741 
742 };
743 
744 
747 {
748 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
749  __m128 rv00, rv01, rv02;
750  __m128 rv10, rv11, rv12;
751  __m128 rv20, rv21, rv22;
752  __m128 mv0, mv1, mv2;
753 
754  rv02 = m_el[0].mVec128;
755  rv12 = m_el[1].mVec128;
756  rv22 = m_el[2].mVec128;
757 
758  mv0 = _mm_and_ps(m[0].mVec128, btvFFF0fMask);
759  mv1 = _mm_and_ps(m[1].mVec128, btvFFF0fMask);
760  mv2 = _mm_and_ps(m[2].mVec128, btvFFF0fMask);
761 
762  // rv0
763  rv00 = bt_splat_ps(rv02, 0);
764  rv01 = bt_splat_ps(rv02, 1);
765  rv02 = bt_splat_ps(rv02, 2);
766 
767  rv00 = _mm_mul_ps(rv00, mv0);
768  rv01 = _mm_mul_ps(rv01, mv1);
769  rv02 = _mm_mul_ps(rv02, mv2);
770 
771  // rv1
772  rv10 = bt_splat_ps(rv12, 0);
773  rv11 = bt_splat_ps(rv12, 1);
774  rv12 = bt_splat_ps(rv12, 2);
775 
776  rv10 = _mm_mul_ps(rv10, mv0);
777  rv11 = _mm_mul_ps(rv11, mv1);
778  rv12 = _mm_mul_ps(rv12, mv2);
779 
780  // rv2
781  rv20 = bt_splat_ps(rv22, 0);
782  rv21 = bt_splat_ps(rv22, 1);
783  rv22 = bt_splat_ps(rv22, 2);
784 
785  rv20 = _mm_mul_ps(rv20, mv0);
786  rv21 = _mm_mul_ps(rv21, mv1);
787  rv22 = _mm_mul_ps(rv22, mv2);
788 
789  rv00 = _mm_add_ps(rv00, rv01);
790  rv10 = _mm_add_ps(rv10, rv11);
791  rv20 = _mm_add_ps(rv20, rv21);
792 
793  m_el[0].mVec128 = _mm_add_ps(rv00, rv02);
794  m_el[1].mVec128 = _mm_add_ps(rv10, rv12);
795  m_el[2].mVec128 = _mm_add_ps(rv20, rv22);
796 
797 #elif defined(BT_USE_NEON)
798 
799  float32x4_t rv0, rv1, rv2;
800  float32x4_t v0, v1, v2;
801  float32x4_t mv0, mv1, mv2;
802 
803  v0 = m_el[0].mVec128;
804  v1 = m_el[1].mVec128;
805  v2 = m_el[2].mVec128;
806 
807  mv0 = (float32x4_t) vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
808  mv1 = (float32x4_t) vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
809  mv2 = (float32x4_t) vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
810 
811  rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
812  rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
813  rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
814 
815  rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
816  rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
817  rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
818 
819  rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
820  rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
821  rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
822 
823  m_el[0].mVec128 = rv0;
824  m_el[1].mVec128 = rv1;
825  m_el[2].mVec128 = rv2;
826 #else
827  setValue(
828  m.tdotx(m_el[0]), m.tdoty(m_el[0]), m.tdotz(m_el[0]),
829  m.tdotx(m_el[1]), m.tdoty(m_el[1]), m.tdotz(m_el[1]),
830  m.tdotx(m_el[2]), m.tdoty(m_el[2]), m.tdotz(m_el[2]));
831 #endif
832  return *this;
833 }
834 
837 {
838 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
839  m_el[0].mVec128 = m_el[0].mVec128 + m.m_el[0].mVec128;
840  m_el[1].mVec128 = m_el[1].mVec128 + m.m_el[1].mVec128;
841  m_el[2].mVec128 = m_el[2].mVec128 + m.m_el[2].mVec128;
842 #else
843  setValue(
844  m_el[0][0]+m.m_el[0][0],
845  m_el[0][1]+m.m_el[0][1],
846  m_el[0][2]+m.m_el[0][2],
847  m_el[1][0]+m.m_el[1][0],
848  m_el[1][1]+m.m_el[1][1],
849  m_el[1][2]+m.m_el[1][2],
850  m_el[2][0]+m.m_el[2][0],
851  m_el[2][1]+m.m_el[2][1],
852  m_el[2][2]+m.m_el[2][2]);
853 #endif
854  return *this;
855 }
856 
858 operator*(const btMatrix3x3& m, const btScalar & k)
859 {
860 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
861  __m128 vk = bt_splat_ps(_mm_load_ss((float *)&k), 0x80);
862  return btMatrix3x3(
863  _mm_mul_ps(m[0].mVec128, vk),
864  _mm_mul_ps(m[1].mVec128, vk),
865  _mm_mul_ps(m[2].mVec128, vk));
866 #elif defined(BT_USE_NEON)
867  return btMatrix3x3(
868  vmulq_n_f32(m[0].mVec128, k),
869  vmulq_n_f32(m[1].mVec128, k),
870  vmulq_n_f32(m[2].mVec128, k));
871 #else
872  return btMatrix3x3(
873  m[0].x()*k,m[0].y()*k,m[0].z()*k,
874  m[1].x()*k,m[1].y()*k,m[1].z()*k,
875  m[2].x()*k,m[2].y()*k,m[2].z()*k);
876 #endif
877 }
878 
880 operator+(const btMatrix3x3& m1, const btMatrix3x3& m2)
881 {
882 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
883  return btMatrix3x3(
884  m1[0].mVec128 + m2[0].mVec128,
885  m1[1].mVec128 + m2[1].mVec128,
886  m1[2].mVec128 + m2[2].mVec128);
887 #else
888  return btMatrix3x3(
889  m1[0][0]+m2[0][0],
890  m1[0][1]+m2[0][1],
891  m1[0][2]+m2[0][2],
892 
893  m1[1][0]+m2[1][0],
894  m1[1][1]+m2[1][1],
895  m1[1][2]+m2[1][2],
896 
897  m1[2][0]+m2[2][0],
898  m1[2][1]+m2[2][1],
899  m1[2][2]+m2[2][2]);
900 #endif
901 }
902 
904 operator-(const btMatrix3x3& m1, const btMatrix3x3& m2)
905 {
906 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
907  return btMatrix3x3(
908  m1[0].mVec128 - m2[0].mVec128,
909  m1[1].mVec128 - m2[1].mVec128,
910  m1[2].mVec128 - m2[2].mVec128);
911 #else
912  return btMatrix3x3(
913  m1[0][0]-m2[0][0],
914  m1[0][1]-m2[0][1],
915  m1[0][2]-m2[0][2],
916 
917  m1[1][0]-m2[1][0],
918  m1[1][1]-m2[1][1],
919  m1[1][2]-m2[1][2],
920 
921  m1[2][0]-m2[2][0],
922  m1[2][1]-m2[2][1],
923  m1[2][2]-m2[2][2]);
924 #endif
925 }
926 
927 
930 {
931 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
932  m_el[0].mVec128 = m_el[0].mVec128 - m.m_el[0].mVec128;
933  m_el[1].mVec128 = m_el[1].mVec128 - m.m_el[1].mVec128;
934  m_el[2].mVec128 = m_el[2].mVec128 - m.m_el[2].mVec128;
935 #else
936  setValue(
937  m_el[0][0]-m.m_el[0][0],
938  m_el[0][1]-m.m_el[0][1],
939  m_el[0][2]-m.m_el[0][2],
940  m_el[1][0]-m.m_el[1][0],
941  m_el[1][1]-m.m_el[1][1],
942  m_el[1][2]-m.m_el[1][2],
943  m_el[2][0]-m.m_el[2][0],
944  m_el[2][1]-m.m_el[2][1],
945  m_el[2][2]-m.m_el[2][2]);
946 #endif
947  return *this;
948 }
949 
950 
953 {
954  return btTriple((*this)[0], (*this)[1], (*this)[2]);
955 }
956 
957 
960 {
961 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
962  return btMatrix3x3(
963  _mm_and_ps(m_el[0].mVec128, btvAbsfMask),
964  _mm_and_ps(m_el[1].mVec128, btvAbsfMask),
965  _mm_and_ps(m_el[2].mVec128, btvAbsfMask));
966 #elif defined(BT_USE_NEON)
967  return btMatrix3x3(
968  (float32x4_t)vandq_s32((int32x4_t)m_el[0].mVec128, btv3AbsMask),
969  (float32x4_t)vandq_s32((int32x4_t)m_el[1].mVec128, btv3AbsMask),
970  (float32x4_t)vandq_s32((int32x4_t)m_el[2].mVec128, btv3AbsMask));
971 #else
972  return btMatrix3x3(
973  btFabs(m_el[0].x()), btFabs(m_el[0].y()), btFabs(m_el[0].z()),
974  btFabs(m_el[1].x()), btFabs(m_el[1].y()), btFabs(m_el[1].z()),
975  btFabs(m_el[2].x()), btFabs(m_el[2].y()), btFabs(m_el[2].z()));
976 #endif
977 }
978 
981 {
982 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
983  __m128 v0 = m_el[0].mVec128;
984  __m128 v1 = m_el[1].mVec128;
985  __m128 v2 = m_el[2].mVec128; // x2 y2 z2 w2
986  __m128 vT;
987 
988  v2 = _mm_and_ps(v2, btvFFF0fMask); // x2 y2 z2 0
989 
990  vT = _mm_unpackhi_ps(v0, v1); // z0 z1 * *
991  v0 = _mm_unpacklo_ps(v0, v1); // x0 x1 y0 y1
992 
993  v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3) ); // y0 y1 y2 0
994  v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3) ); // x0 x1 x2 0
995  v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT))); // z0 z1 z2 0
996 
997 
998  return btMatrix3x3( v0, v1, v2 );
999 #elif defined(BT_USE_NEON)
1000  // note: zeros the w channel. We can preserve it at the cost of two more vtrn instructions.
1001  static const uint32x2_t zMask = (const uint32x2_t) {static_cast<uint32_t>(-1), 0 };
1002  float32x4x2_t top = vtrnq_f32( m_el[0].mVec128, m_el[1].mVec128 ); // {x0 x1 z0 z1}, {y0 y1 w0 w1}
1003  float32x2x2_t bl = vtrn_f32( vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f) ); // {x2 0 }, {y2 0}
1004  float32x4_t v0 = vcombine_f32( vget_low_f32(top.val[0]), bl.val[0] );
1005  float32x4_t v1 = vcombine_f32( vget_low_f32(top.val[1]), bl.val[1] );
1006  float32x2_t q = (float32x2_t) vand_u32( (uint32x2_t) vget_high_f32( m_el[2].mVec128), zMask );
1007  float32x4_t v2 = vcombine_f32( vget_high_f32(top.val[0]), q ); // z0 z1 z2 0
1008  return btMatrix3x3( v0, v1, v2 );
1009 #else
1010  return btMatrix3x3( m_el[0].x(), m_el[1].x(), m_el[2].x(),
1011  m_el[0].y(), m_el[1].y(), m_el[2].y(),
1012  m_el[0].z(), m_el[1].z(), m_el[2].z());
1013 #endif
1014 }
1015 
1018 {
1019  return btMatrix3x3(cofac(1, 1, 2, 2), cofac(0, 2, 2, 1), cofac(0, 1, 1, 2),
1020  cofac(1, 2, 2, 0), cofac(0, 0, 2, 2), cofac(0, 2, 1, 0),
1021  cofac(1, 0, 2, 1), cofac(0, 1, 2, 0), cofac(0, 0, 1, 1));
1022 }
1023 
1026 {
1027  btVector3 co(cofac(1, 1, 2, 2), cofac(1, 2, 2, 0), cofac(1, 0, 2, 1));
1028  btScalar det = (*this)[0].dot(co);
1029  btFullAssert(det != btScalar(0.0));
1030  btScalar s = btScalar(1.0) / det;
1031  return btMatrix3x3(co.x() * s, cofac(0, 2, 2, 1) * s, cofac(0, 1, 1, 2) * s,
1032  co.y() * s, cofac(0, 0, 2, 2) * s, cofac(0, 2, 1, 0) * s,
1033  co.z() * s, cofac(0, 1, 2, 0) * s, cofac(0, 0, 1, 1) * s);
1034 }
1035 
1038 {
1039 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1040  // zeros w
1041 // static const __m128i xyzMask = (const __m128i){ -1ULL, 0xffffffffULL };
1042  __m128 row = m_el[0].mVec128;
1043  __m128 m0 = _mm_and_ps( m.getRow(0).mVec128, btvFFF0fMask );
1044  __m128 m1 = _mm_and_ps( m.getRow(1).mVec128, btvFFF0fMask);
1045  __m128 m2 = _mm_and_ps( m.getRow(2).mVec128, btvFFF0fMask );
1046  __m128 r0 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0));
1047  __m128 r1 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0x55));
1048  __m128 r2 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0xaa));
1049  row = m_el[1].mVec128;
1050  r0 = _mm_add_ps( r0, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0)));
1051  r1 = _mm_add_ps( r1, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0x55)));
1052  r2 = _mm_add_ps( r2, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0xaa)));
1053  row = m_el[2].mVec128;
1054  r0 = _mm_add_ps( r0, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0)));
1055  r1 = _mm_add_ps( r1, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0x55)));
1056  r2 = _mm_add_ps( r2, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0xaa)));
1057  return btMatrix3x3( r0, r1, r2 );
1058 
1059 #elif defined BT_USE_NEON
1060  // zeros w
1061  static const uint32x4_t xyzMask = (const uint32x4_t){ static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), 0 };
1062  float32x4_t m0 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(0).mVec128, xyzMask );
1063  float32x4_t m1 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(1).mVec128, xyzMask );
1064  float32x4_t m2 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(2).mVec128, xyzMask );
1065  float32x4_t row = m_el[0].mVec128;
1066  float32x4_t r0 = vmulq_lane_f32( m0, vget_low_f32(row), 0);
1067  float32x4_t r1 = vmulq_lane_f32( m0, vget_low_f32(row), 1);
1068  float32x4_t r2 = vmulq_lane_f32( m0, vget_high_f32(row), 0);
1069  row = m_el[1].mVec128;
1070  r0 = vmlaq_lane_f32( r0, m1, vget_low_f32(row), 0);
1071  r1 = vmlaq_lane_f32( r1, m1, vget_low_f32(row), 1);
1072  r2 = vmlaq_lane_f32( r2, m1, vget_high_f32(row), 0);
1073  row = m_el[2].mVec128;
1074  r0 = vmlaq_lane_f32( r0, m2, vget_low_f32(row), 0);
1075  r1 = vmlaq_lane_f32( r1, m2, vget_low_f32(row), 1);
1076  r2 = vmlaq_lane_f32( r2, m2, vget_high_f32(row), 0);
1077  return btMatrix3x3( r0, r1, r2 );
1078 #else
1079  return btMatrix3x3(
1080  m_el[0].x() * m[0].x() + m_el[1].x() * m[1].x() + m_el[2].x() * m[2].x(),
1081  m_el[0].x() * m[0].y() + m_el[1].x() * m[1].y() + m_el[2].x() * m[2].y(),
1082  m_el[0].x() * m[0].z() + m_el[1].x() * m[1].z() + m_el[2].x() * m[2].z(),
1083  m_el[0].y() * m[0].x() + m_el[1].y() * m[1].x() + m_el[2].y() * m[2].x(),
1084  m_el[0].y() * m[0].y() + m_el[1].y() * m[1].y() + m_el[2].y() * m[2].y(),
1085  m_el[0].y() * m[0].z() + m_el[1].y() * m[1].z() + m_el[2].y() * m[2].z(),
1086  m_el[0].z() * m[0].x() + m_el[1].z() * m[1].x() + m_el[2].z() * m[2].x(),
1087  m_el[0].z() * m[0].y() + m_el[1].z() * m[1].y() + m_el[2].z() * m[2].y(),
1088  m_el[0].z() * m[0].z() + m_el[1].z() * m[1].z() + m_el[2].z() * m[2].z());
1089 #endif
1090 }
1091 
1094 {
1095 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1096  __m128 a0 = m_el[0].mVec128;
1097  __m128 a1 = m_el[1].mVec128;
1098  __m128 a2 = m_el[2].mVec128;
1099 
1100  btMatrix3x3 mT = m.transpose(); // we rely on transpose() zeroing w channel so that we don't have to do it here
1101  __m128 mx = mT[0].mVec128;
1102  __m128 my = mT[1].mVec128;
1103  __m128 mz = mT[2].mVec128;
1104 
1105  __m128 r0 = _mm_mul_ps(mx, _mm_shuffle_ps(a0, a0, 0x00));
1106  __m128 r1 = _mm_mul_ps(mx, _mm_shuffle_ps(a1, a1, 0x00));
1107  __m128 r2 = _mm_mul_ps(mx, _mm_shuffle_ps(a2, a2, 0x00));
1108  r0 = _mm_add_ps(r0, _mm_mul_ps(my, _mm_shuffle_ps(a0, a0, 0x55)));
1109  r1 = _mm_add_ps(r1, _mm_mul_ps(my, _mm_shuffle_ps(a1, a1, 0x55)));
1110  r2 = _mm_add_ps(r2, _mm_mul_ps(my, _mm_shuffle_ps(a2, a2, 0x55)));
1111  r0 = _mm_add_ps(r0, _mm_mul_ps(mz, _mm_shuffle_ps(a0, a0, 0xaa)));
1112  r1 = _mm_add_ps(r1, _mm_mul_ps(mz, _mm_shuffle_ps(a1, a1, 0xaa)));
1113  r2 = _mm_add_ps(r2, _mm_mul_ps(mz, _mm_shuffle_ps(a2, a2, 0xaa)));
1114  return btMatrix3x3( r0, r1, r2);
1115 
1116 #elif defined BT_USE_NEON
1117  float32x4_t a0 = m_el[0].mVec128;
1118  float32x4_t a1 = m_el[1].mVec128;
1119  float32x4_t a2 = m_el[2].mVec128;
1120 
1121  btMatrix3x3 mT = m.transpose(); // we rely on transpose() zeroing w channel so that we don't have to do it here
1122  float32x4_t mx = mT[0].mVec128;
1123  float32x4_t my = mT[1].mVec128;
1124  float32x4_t mz = mT[2].mVec128;
1125 
1126  float32x4_t r0 = vmulq_lane_f32( mx, vget_low_f32(a0), 0);
1127  float32x4_t r1 = vmulq_lane_f32( mx, vget_low_f32(a1), 0);
1128  float32x4_t r2 = vmulq_lane_f32( mx, vget_low_f32(a2), 0);
1129  r0 = vmlaq_lane_f32( r0, my, vget_low_f32(a0), 1);
1130  r1 = vmlaq_lane_f32( r1, my, vget_low_f32(a1), 1);
1131  r2 = vmlaq_lane_f32( r2, my, vget_low_f32(a2), 1);
1132  r0 = vmlaq_lane_f32( r0, mz, vget_high_f32(a0), 0);
1133  r1 = vmlaq_lane_f32( r1, mz, vget_high_f32(a1), 0);
1134  r2 = vmlaq_lane_f32( r2, mz, vget_high_f32(a2), 0);
1135  return btMatrix3x3( r0, r1, r2 );
1136 
1137 #else
1138  return btMatrix3x3(
1139  m_el[0].dot(m[0]), m_el[0].dot(m[1]), m_el[0].dot(m[2]),
1140  m_el[1].dot(m[0]), m_el[1].dot(m[1]), m_el[1].dot(m[2]),
1141  m_el[2].dot(m[0]), m_el[2].dot(m[1]), m_el[2].dot(m[2]));
1142 #endif
1143 }
1144 
1146 operator*(const btMatrix3x3& m, const btVector3& v)
1147 {
1148 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
1149  return v.dot3(m[0], m[1], m[2]);
1150 #else
1151  return btVector3(m[0].dot(v), m[1].dot(v), m[2].dot(v));
1152 #endif
1153 }
1154 
1155 
1157 operator*(const btVector3& v, const btMatrix3x3& m)
1158 {
1159 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1160 
1161  const __m128 vv = v.mVec128;
1162 
1163  __m128 c0 = bt_splat_ps( vv, 0);
1164  __m128 c1 = bt_splat_ps( vv, 1);
1165  __m128 c2 = bt_splat_ps( vv, 2);
1166 
1167  c0 = _mm_mul_ps(c0, _mm_and_ps(m[0].mVec128, btvFFF0fMask) );
1168  c1 = _mm_mul_ps(c1, _mm_and_ps(m[1].mVec128, btvFFF0fMask) );
1169  c0 = _mm_add_ps(c0, c1);
1170  c2 = _mm_mul_ps(c2, _mm_and_ps(m[2].mVec128, btvFFF0fMask) );
1171 
1172  return btVector3(_mm_add_ps(c0, c2));
1173 #elif defined(BT_USE_NEON)
1174  const float32x4_t vv = v.mVec128;
1175  const float32x2_t vlo = vget_low_f32(vv);
1176  const float32x2_t vhi = vget_high_f32(vv);
1177 
1178  float32x4_t c0, c1, c2;
1179 
1180  c0 = (float32x4_t) vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
1181  c1 = (float32x4_t) vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
1182  c2 = (float32x4_t) vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
1183 
1184  c0 = vmulq_lane_f32(c0, vlo, 0);
1185  c1 = vmulq_lane_f32(c1, vlo, 1);
1186  c2 = vmulq_lane_f32(c2, vhi, 0);
1187  c0 = vaddq_f32(c0, c1);
1188  c0 = vaddq_f32(c0, c2);
1189 
1190  return btVector3(c0);
1191 #else
1192  return btVector3(m.tdotx(v), m.tdoty(v), m.tdotz(v));
1193 #endif
1194 }
1195 
1197 operator*(const btMatrix3x3& m1, const btMatrix3x3& m2)
1198 {
1199 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1200 
1201  __m128 m10 = m1[0].mVec128;
1202  __m128 m11 = m1[1].mVec128;
1203  __m128 m12 = m1[2].mVec128;
1204 
1205  __m128 m2v = _mm_and_ps(m2[0].mVec128, btvFFF0fMask);
1206 
1207  __m128 c0 = bt_splat_ps( m10, 0);
1208  __m128 c1 = bt_splat_ps( m11, 0);
1209  __m128 c2 = bt_splat_ps( m12, 0);
1210 
1211  c0 = _mm_mul_ps(c0, m2v);
1212  c1 = _mm_mul_ps(c1, m2v);
1213  c2 = _mm_mul_ps(c2, m2v);
1214 
1215  m2v = _mm_and_ps(m2[1].mVec128, btvFFF0fMask);
1216 
1217  __m128 c0_1 = bt_splat_ps( m10, 1);
1218  __m128 c1_1 = bt_splat_ps( m11, 1);
1219  __m128 c2_1 = bt_splat_ps( m12, 1);
1220 
1221  c0_1 = _mm_mul_ps(c0_1, m2v);
1222  c1_1 = _mm_mul_ps(c1_1, m2v);
1223  c2_1 = _mm_mul_ps(c2_1, m2v);
1224 
1225  m2v = _mm_and_ps(m2[2].mVec128, btvFFF0fMask);
1226 
1227  c0 = _mm_add_ps(c0, c0_1);
1228  c1 = _mm_add_ps(c1, c1_1);
1229  c2 = _mm_add_ps(c2, c2_1);
1230 
1231  m10 = bt_splat_ps( m10, 2);
1232  m11 = bt_splat_ps( m11, 2);
1233  m12 = bt_splat_ps( m12, 2);
1234 
1235  m10 = _mm_mul_ps(m10, m2v);
1236  m11 = _mm_mul_ps(m11, m2v);
1237  m12 = _mm_mul_ps(m12, m2v);
1238 
1239  c0 = _mm_add_ps(c0, m10);
1240  c1 = _mm_add_ps(c1, m11);
1241  c2 = _mm_add_ps(c2, m12);
1242 
1243  return btMatrix3x3(c0, c1, c2);
1244 
1245 #elif defined(BT_USE_NEON)
1246 
1247  float32x4_t rv0, rv1, rv2;
1248  float32x4_t v0, v1, v2;
1249  float32x4_t mv0, mv1, mv2;
1250 
1251  v0 = m1[0].mVec128;
1252  v1 = m1[1].mVec128;
1253  v2 = m1[2].mVec128;
1254 
1255  mv0 = (float32x4_t) vandq_s32((int32x4_t)m2[0].mVec128, btvFFF0Mask);
1256  mv1 = (float32x4_t) vandq_s32((int32x4_t)m2[1].mVec128, btvFFF0Mask);
1257  mv2 = (float32x4_t) vandq_s32((int32x4_t)m2[2].mVec128, btvFFF0Mask);
1258 
1259  rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
1260  rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
1261  rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
1262 
1263  rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
1264  rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
1265  rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
1266 
1267  rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
1268  rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
1269  rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
1270 
1271  return btMatrix3x3(rv0, rv1, rv2);
1272 
1273 #else
1274  return btMatrix3x3(
1275  m2.tdotx( m1[0]), m2.tdoty( m1[0]), m2.tdotz( m1[0]),
1276  m2.tdotx( m1[1]), m2.tdoty( m1[1]), m2.tdotz( m1[1]),
1277  m2.tdotx( m1[2]), m2.tdoty( m1[2]), m2.tdotz( m1[2]));
1278 #endif
1279 }
1280 
1281 /*
1282 SIMD_FORCE_INLINE btMatrix3x3 btMultTransposeLeft(const btMatrix3x3& m1, const btMatrix3x3& m2) {
1283 return btMatrix3x3(
1284 m1[0][0] * m2[0][0] + m1[1][0] * m2[1][0] + m1[2][0] * m2[2][0],
1285 m1[0][0] * m2[0][1] + m1[1][0] * m2[1][1] + m1[2][0] * m2[2][1],
1286 m1[0][0] * m2[0][2] + m1[1][0] * m2[1][2] + m1[2][0] * m2[2][2],
1287 m1[0][1] * m2[0][0] + m1[1][1] * m2[1][0] + m1[2][1] * m2[2][0],
1288 m1[0][1] * m2[0][1] + m1[1][1] * m2[1][1] + m1[2][1] * m2[2][1],
1289 m1[0][1] * m2[0][2] + m1[1][1] * m2[1][2] + m1[2][1] * m2[2][2],
1290 m1[0][2] * m2[0][0] + m1[1][2] * m2[1][0] + m1[2][2] * m2[2][0],
1291 m1[0][2] * m2[0][1] + m1[1][2] * m2[1][1] + m1[2][2] * m2[2][1],
1292 m1[0][2] * m2[0][2] + m1[1][2] * m2[1][2] + m1[2][2] * m2[2][2]);
1293 }
1294 */
1295 
1299 {
1300 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1301 
1302  __m128 c0, c1, c2;
1303 
1304  c0 = _mm_cmpeq_ps(m1[0].mVec128, m2[0].mVec128);
1305  c1 = _mm_cmpeq_ps(m1[1].mVec128, m2[1].mVec128);
1306  c2 = _mm_cmpeq_ps(m1[2].mVec128, m2[2].mVec128);
1307 
1308  c0 = _mm_and_ps(c0, c1);
1309  c0 = _mm_and_ps(c0, c2);
1310 
1311  return (0x7 == _mm_movemask_ps((__m128)c0));
1312 #else
1313  return
1314  ( m1[0][0] == m2[0][0] && m1[1][0] == m2[1][0] && m1[2][0] == m2[2][0] &&
1315  m1[0][1] == m2[0][1] && m1[1][1] == m2[1][1] && m1[2][1] == m2[2][1] &&
1316  m1[0][2] == m2[0][2] && m1[1][2] == m2[1][2] && m1[2][2] == m2[2][2] );
1317 #endif
1318 }
1319 
1322 {
1324 };
1325 
1328 {
1330 };
1331 
1332 
1333 
1334 
1336 {
1337  for (int i=0;i<3;i++)
1338  m_el[i].serialize(dataOut.m_el[i]);
1339 }
1340 
1342 {
1343  for (int i=0;i<3;i++)
1344  m_el[i].serializeFloat(dataOut.m_el[i]);
1345 }
1346 
1347 
1349 {
1350  for (int i=0;i<3;i++)
1351  m_el[i].deSerialize(dataIn.m_el[i]);
1352 }
1353 
1355 {
1356  for (int i=0;i<3;i++)
1357  m_el[i].deSerializeFloat(dataIn.m_el[i]);
1358 }
1359 
1361 {
1362  for (int i=0;i<3;i++)
1363  m_el[i].deSerializeDouble(dataIn.m_el[i]);
1364 }
1365 
1366 #endif //BT_MATRIX3x3_H
1367 
btMatrix3x3 inverse() const
Return the inverse of the matrix.
Definition: btMatrix3x3.h:1025
float determinant(const Matrix3 &mat)
Definition: neon/mat_aos.h:189
void deSerializeFloat(const struct btMatrix3x3FloatData &dataIn)
Definition: btMatrix3x3.h:1354
#define SIMD_EPSILON
Definition: btScalar.h:448
for serialization
Definition: btMatrix3x3.h:1321
btVector3DoubleData m_el[3]
Definition: btMatrix3x3.h:1329
btScalar tdoty(const btVector3 &v) const
Definition: btMatrix3x3.h:620
bool operator==(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
Equality operator between two matrices It will test all elements are equal.
Definition: btMatrix3x3.h:1298
void serialize(struct btMatrix3x3Data &dataOut) const
Definition: btMatrix3x3.h:1335
void setValue(const btScalar &_x, const btScalar &_y, const btScalar &_z)
Definition: btVector3.h:640
void setRotation(const btQuaternion &q)
Set the matrix from a quaternion.
Definition: btMatrix3x3.h:209
btScalar btSin(btScalar x)
Definition: btScalar.h:409
const btScalar & z() const
Return the z value.
Definition: btQuadWord.h:120
btScalar btSqrt(btScalar y)
Definition: btScalar.h:387
#define SIMD_FORCE_INLINE
Definition: btScalar.h:58
btMatrix3x3 transposeTimes(const btMatrix3x3 &m) const
Definition: btMatrix3x3.h:1037
const btScalar & y() const
Return the y value.
Definition: btQuadWord.h:118
btVector3 getColumn(int i) const
Get a column of the matrix as a vector.
Definition: btMatrix3x3.h:134
const btVector3 & getRow(int i) const
Get a row of the matrix as a vector.
Definition: btMatrix3x3.h:142
btMatrix3x3 operator+(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
Definition: btMatrix3x3.h:880
btMatrix3x3 & operator=(const btMatrix3x3 &other)
Assignment Operator.
Definition: btMatrix3x3.h:122
btQuaternion inverse(const btQuaternion &q)
Return the inverse of a quaternion.
Definition: btQuaternion.h:849
#define btFullAssert(x)
Definition: btScalar.h:104
const btScalar & w() const
Return the w value.
Definition: btQuadWord.h:122
#define SIMD_HALF_PI
Definition: btScalar.h:436
btVector3 m_el[3]
Data storage for the matrix, each vector is a row of the matrix.
Definition: btMatrix3x3.h:51
float3 & operator-=(float3 &a, const float3 &b)
Definition: btGpuDefines.h:177
const btScalar & x() const
Return the x value.
Definition: btVector3.h:575
btMatrix3x3(const btQuaternion &q)
Constructor from Quaternion.
Definition: btMatrix3x3.h:60
btScalar tdotx(const btVector3 &v) const
Definition: btMatrix3x3.h:616
btScalar tdotz(const btVector3 &v) const
Definition: btMatrix3x3.h:624
void deSerialize(const struct btMatrix3x3Data &dataIn)
Definition: btMatrix3x3.h:1348
#define SIMD_PI
Definition: btScalar.h:434
void getRotation(btQuaternion &q) const
Get the matrix represented as a quaternion.
Definition: btMatrix3x3.h:400
btMatrix3x3 absolute() const
Return the matrix with all values non negative.
Definition: btMatrix3x3.h:959
void diagonalize(btMatrix3x3 &rot, btScalar threshold, int maxSteps)
diagonalizes this matrix by the Jacobi method.
Definition: btMatrix3x3.h:639
btMatrix3x3 scaled(const btVector3 &s) const
Create a scaled copy of the matrix.
Definition: btMatrix3x3.h:590
#define btMatrix3x3Data
Definition: btMatrix3x3.h:42
void deSerializeDouble(const struct btMatrix3x3DoubleData &dataIn)
Definition: btMatrix3x3.h:1360
static float max(float a, float b)
btMatrix3x3 & operator*=(const btMatrix3x3 &m)
Multiply by the target matrix on the right.
Definition: btMatrix3x3.h:746
btScalar btAtan2(btScalar x, btScalar y)
Definition: btScalar.h:426
void setValue(const btScalar &_x, const btScalar &_y, const btScalar &_z)
Set x,y,z and zero w.
Definition: btQuadWord.h:152
const btVector3 & operator[](int i) const
Get a const reference to a row of the matrix as a vector.
Definition: btMatrix3x3.h:158
btMatrix3x3 operator*(const btMatrix3x3 &m, const btScalar &k)
Definition: btMatrix3x3.h:858
float4 & operator+=(float4 &a, const float4 &b)
Definition: btGpuDefines.h:133
void setValue(const btScalar &xx, const btScalar &xy, const btScalar &xz, const btScalar &yx, const btScalar &yy, const btScalar &yz, const btScalar &zx, const btScalar &zy, const btScalar &zz)
Set the values of the matrix explicitly (row major)
Definition: btMatrix3x3.h:198
btMatrix3x3(const btScalar &xx, const btScalar &xy, const btScalar &xz, const btScalar &yx, const btScalar &yy, const btScalar &yz, const btScalar &zx, const btScalar &zy, const btScalar &zz)
Constructor with row major formatting.
Definition: btMatrix3x3.h:69
unsigned int uint32_t
btScalar length2() const
Return the length squared of the quaternion.
Definition: btQuaternion.h:319
const btScalar & y() const
Return the y value.
Definition: btVector3.h:577
void getOpenGLSubMatrix(btScalar *m) const
Fill the rotational part of an OpenGL matrix and clear the shear/perspective.
Definition: btMatrix3x3.h:347
btVector3 can be used to represent 3D points and vectors.
Definition: btVector3.h:83
#define ATTRIBUTE_ALIGNED16(a)
Definition: btScalar.h:59
btMatrix3x3 & operator-=(const btMatrix3x3 &m)
Substractss by the target matrix on the right.
Definition: btMatrix3x3.h:929
btMatrix3x3 adjoint() const
Return the adjoint of the matrix.
Definition: btMatrix3x3.h:1017
void serializeFloat(struct btMatrix3x3FloatData &dataOut) const
Definition: btMatrix3x3.h:1341
void setEulerYPR(const btScalar &yaw, const btScalar &pitch, const btScalar &roll)
Set the matrix from euler angles using YPR around YXZ respectively.
Definition: btMatrix3x3.h:284
float4 & operator*=(float4 &a, float fact)
Definition: btGpuDefines.h:128
btMatrix3x3 & operator+=(const btMatrix3x3 &m)
Adds by the target matrix on the right.
Definition: btMatrix3x3.h:836
btMatrix3x3 operator-(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
Definition: btMatrix3x3.h:904
void getEulerYPR(btScalar &yaw, btScalar &pitch, btScalar &roll) const
Get the matrix represented as euler angles around YXZ, roundtrip with setEulerYPR.
Definition: btMatrix3x3.h:492
btMatrix3x3()
No initializaion constructor.
Definition: btMatrix3x3.h:55
btMatrix3x3 transpose() const
Return the transpose of the matrix.
Definition: btMatrix3x3.h:980
btVector3 dot3(const btVector3 &v0, const btVector3 &v1, const btVector3 &v2) const
Definition: btVector3.h:718
const Matrix3 transpose(const Matrix3 &mat)
Definition: neon/mat_aos.h:165
btVector3 & operator[](int i)
Get a mutable reference to a row of the matrix as a vector.
Definition: btMatrix3x3.h:150
const btScalar & x() const
Return the x value.
Definition: btQuadWord.h:116
The btMatrix3x3 class implements a 3x3 rotation matrix, to perform linear algebra in combination with...
Definition: btMatrix3x3.h:48
btScalar dot(const btQuaternion &q1, const btQuaternion &q2)
Calculate the dot product between two quaternions.
Definition: btQuaternion.h:827
btMatrix3x3(const btMatrix3x3 &other)
Copy constructor.
Definition: btMatrix3x3.h:114
btMatrix3x3 timesTranspose(const btMatrix3x3 &m) const
Definition: btMatrix3x3.h:1093
for serialization
Definition: btMatrix3x3.h:1327
The btQuaternion implements quaternion to perform linear algebra rotations in combination with btMatr...
Definition: btQuaternion.h:48
void setFromOpenGLSubMatrix(const btScalar *m)
Set from the rotational part of a 4x4 OpenGL matrix.
Definition: btMatrix3x3.h:181
btScalar btAsin(btScalar x)
Definition: btScalar.h:418
btScalar cofac(int r1, int c1, int r2, int c2) const
Calculate the matrix cofactor.
Definition: btMatrix3x3.h:727
btScalar btTriple(const btVector3 &v1, const btVector3 &v2, const btVector3 &v3)
Definition: btVector3.h:924
void getEulerZYX(btScalar &yaw, btScalar &pitch, btScalar &roll, unsigned int solution_number=1) const
Get the matrix represented as euler angles around ZYX.
Definition: btMatrix3x3.h:521
btScalar determinant() const
Return the determinant of the matrix.
Definition: btMatrix3x3.h:952
void setIdentity()
Set the matrix to the identity.
Definition: btMatrix3x3.h:317
static const btMatrix3x3 & getIdentity()
Definition: btMatrix3x3.h:330
float btScalar
The btScalar type abstracts floating point numbers, to easily switch between double and single floati...
Definition: btScalar.h:266
btScalar btCos(btScalar x)
Definition: btScalar.h:408
btVector3FloatData m_el[3]
Definition: btMatrix3x3.h:1323
btScalar btFabs(btScalar x)
Definition: btScalar.h:407
const btScalar & z() const
Return the z value.
Definition: btVector3.h:579
void setEulerZYX(btScalar eulerX, btScalar eulerY, btScalar eulerZ)
Set the matrix from euler angles YPR around ZYX axes.
Definition: btMatrix3x3.h:298