16 #ifndef BT_MATRIX3x3_H
17 #define BT_MATRIX3x3_H
26 #define vMPPP (_mm_set_ps (+0.0f, +0.0f, +0.0f, -0.0f))
29 #if defined(BT_USE_SSE)
30 #define v1000 (_mm_set_ps(0.0f,0.0f,0.0f,1.0f))
31 #define v0100 (_mm_set_ps(0.0f,0.0f,1.0f,0.0f))
32 #define v0010 (_mm_set_ps(0.0f,1.0f,0.0f,0.0f))
33 #elif defined(BT_USE_NEON)
39 #ifdef BT_USE_DOUBLE_PRECISION
40 #define btMatrix3x3Data btMatrix3x3DoubleData
42 #define btMatrix3x3Data btMatrix3x3FloatData
43 #endif //BT_USE_DOUBLE_PRECISION
78 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
96 m_el[0].mVec128 = rhs.
m_el[0].mVec128;
97 m_el[1].mVec128 = rhs.
m_el[1].mVec128;
98 m_el[2].mVec128 = rhs.
m_el[2].mVec128;
104 m_el[0].mVec128 = m.
m_el[0].mVec128;
105 m_el[1].mVec128 = m.
m_el[1].mVec128;
106 m_el[2].mVec128 = m.
m_el[2].mVec128;
116 m_el[0] = other.
m_el[0];
117 m_el[1] = other.
m_el[1];
118 m_el[2] = other.
m_el[2];
124 m_el[0] = other.
m_el[0];
125 m_el[1] = other.
m_el[1];
126 m_el[2] = other.
m_el[2];
136 return btVector3(m_el[0][i],m_el[1][i],m_el[2][i]);
215 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
216 __m128 vs, Q = q.get128();
217 __m128i Qi = btCastfTo128i(Q);
220 __m128 V11, V21, V31;
221 __m128 NQ = _mm_xor_ps(Q, btvMzeroMask);
222 __m128i NQi = btCastfTo128i(NQ);
224 V1 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,0,2,3)));
225 V2 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(0,0,1,3));
226 V3 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(2,1,0,3)));
227 V1 = _mm_xor_ps(V1, vMPPP);
229 V11 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,1,0,3)));
230 V21 = _mm_unpackhi_ps(Q, Q);
231 V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(0,2,0,3));
237 V11 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(2,3,1,3));
239 V21 = _mm_xor_ps(V21, vMPPP);
240 V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(3,3,1,3));
241 V31 = _mm_xor_ps(V31, vMPPP);
242 Y = btCastiTo128f(_mm_shuffle_epi32 (NQi, BT_SHUFFLE(3,2,0,3)));
243 Z = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,0,1,3)));
245 vs = _mm_load_ss(&s);
253 vs = bt_splat3_ps(vs, 0);
267 btScalar xs = q.
x() * s, ys = q.
y() * s, zs = q.
z() * s;
268 btScalar wx = q.
w() * xs, wy = q.
w() * ys, wz = q.
w() * zs;
269 btScalar xx = q.
x() * xs, xy = q.
x() * ys, xz = q.
x() * zs;
270 btScalar yy = q.
y() * ys, yz = q.
y() * zs, zz = q.
z() * zs;
272 btScalar(1.0) - (yy + zz), xy - wz, xz + wy,
273 xy + wz,
btScalar(1.0) - (xx + zz), yz - wx,
274 xz - wy, yz + wx,
btScalar(1.0) - (xx + yy));
286 setEulerZYX(roll, pitch, yaw);
311 setValue(cj * ch, sj * sc - cs, sj * cc + ss,
312 cj * sh, sj * ss + cc, sj * cs - sc,
313 -sj, cj * si, cj * ci);
319 #if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined(BT_USE_NEON)
332 #if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined(BT_USE_NEON)
334 identityMatrix(v1000, v0100, v0010);
342 return identityMatrix;
349 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
350 __m128 v0 = m_el[0].mVec128;
351 __m128 v1 = m_el[1].mVec128;
352 __m128 v2 = m_el[2].mVec128;
353 __m128 *vm = (__m128 *)m;
356 v2 = _mm_and_ps(v2, btvFFF0fMask);
358 vT = _mm_unpackhi_ps(v0, v1);
359 v0 = _mm_unpacklo_ps(v0, v1);
361 v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3) );
362 v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3) );
363 v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT)));
368 #elif defined(BT_USE_NEON)
370 static const uint32x2_t zMask = (
const uint32x2_t) {
static_cast<uint32_t>(-1), 0 };
371 float32x4_t *vm = (float32x4_t *)m;
372 float32x4x2_t top = vtrnq_f32( m_el[0].mVec128, m_el[1].mVec128 );
373 float32x2x2_t bl = vtrn_f32( vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f) );
374 float32x4_t v0 = vcombine_f32( vget_low_f32(top.val[0]), bl.val[0] );
375 float32x4_t v1 = vcombine_f32( vget_low_f32(top.val[1]), bl.val[1] );
376 float32x2_t q = (float32x2_t) vand_u32( (uint32x2_t) vget_high_f32( m_el[2].mVec128), zMask );
377 float32x4_t v2 = vcombine_f32( vget_high_f32(top.val[0]), q );
402 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
403 btScalar trace = m_el[0].
x() + m_el[1].
y() + m_el[2].
z();
415 temp.f[0]=m_el[2].
y() - m_el[1].
z();
416 temp.f[1]=m_el[0].
z() - m_el[2].
x();
417 temp.f[2]=m_el[1].
x() - m_el[0].
y();
424 if(m_el[0].x() < m_el[1].y())
426 if( m_el[1].y() < m_el[2].z() )
427 { i = 2; j = 0; k = 1; }
429 { i = 1; j = 2; k = 0; }
433 if( m_el[0].x() < m_el[2].z())
434 { i = 2; j = 0; k = 1; }
436 { i = 0; j = 1; k = 2; }
439 x = m_el[i][i] - m_el[j][j] - m_el[k][k] +
btScalar(1.0);
441 temp.f[3] = (m_el[k][j] - m_el[j][k]);
442 temp.f[j] = (m_el[j][i] + m_el[i][j]);
443 temp.f[k] = (m_el[k][i] + m_el[i][k]);
454 btScalar trace = m_el[0].
x() + m_el[1].
y() + m_el[2].
z();
464 temp[0]=((m_el[2].
y() - m_el[1].
z()) * s);
465 temp[1]=((m_el[0].
z() - m_el[2].
x()) * s);
466 temp[2]=((m_el[1].
x() - m_el[0].
y()) * s);
470 int i = m_el[0].
x() < m_el[1].
y() ?
471 (m_el[1].
y() < m_el[2].
z() ? 2 : 1) :
472 (m_el[0].x() < m_el[2].
z() ? 2 : 0);
480 temp[3] = (m_el[k][j] - m_el[j][k]) * s;
481 temp[j] = (m_el[j][i] + m_el[i][j]) * s;
482 temp[k] = (m_el[k][i] + m_el[i][k]) * s;
484 q.
setValue(temp[0],temp[1],temp[2],temp[3]);
535 if (
btFabs(m_el[2].x()) >= 1)
546 euler_out.roll = euler_out.pitch + delta;
547 euler_out2.roll = euler_out.pitch + delta;
553 euler_out.roll = -euler_out.pitch + delta;
554 euler_out2.roll = -euler_out.pitch + delta;
559 euler_out.pitch = -
btAsin(m_el[2].x());
560 euler_out2.pitch =
SIMD_PI - euler_out.pitch;
562 euler_out.roll =
btAtan2(m_el[2].y()/
btCos(euler_out.pitch),
563 m_el[2].
z()/
btCos(euler_out.pitch));
564 euler_out2.roll =
btAtan2(m_el[2].y()/
btCos(euler_out2.pitch),
565 m_el[2].
z()/
btCos(euler_out2.pitch));
567 euler_out.yaw =
btAtan2(m_el[1].x()/
btCos(euler_out.pitch),
568 m_el[0].
x()/
btCos(euler_out.pitch));
569 euler_out2.yaw =
btAtan2(m_el[1].x()/
btCos(euler_out2.pitch),
570 m_el[0].
x()/
btCos(euler_out2.pitch));
573 if (solution_number == 1)
576 pitch = euler_out.pitch;
577 roll = euler_out.roll;
581 yaw = euler_out2.yaw;
582 pitch = euler_out2.pitch;
583 roll = euler_out2.roll;
592 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
593 return btMatrix3x3(m_el[0] * s, m_el[1] * s, m_el[2] * s);
596 m_el[0].x() * s.
x(), m_el[0].
y() * s.y(), m_el[0].
z() * s.z(),
597 m_el[1].
x() * s.x(), m_el[1].
y() * s.y(), m_el[1].
z() * s.z(),
598 m_el[2].
x() * s.x(), m_el[2].
y() * s.y(), m_el[2].
z() * s.z());
618 return m_el[0].
x() * v.
x() + m_el[1].
x() * v.
y() + m_el[2].
x() * v.
z();
622 return m_el[0].
y() * v.
x() + m_el[1].
y() * v.
y() + m_el[2].
y() * v.
z();
626 return m_el[0].
z() * v.
x() + m_el[1].
z() * v.
y() + m_el[2].
z() * v.
z();
642 for (
int step = maxSteps; step > 0; step--)
677 btScalar theta = (m_el[q][q] - m_el[p][p]) / (2 * mpq);
683 t = (theta >= 0) ? 1 / (theta +
btSqrt(1 + theta2))
684 : 1 / (theta -
btSqrt(1 + theta2));
685 cos = 1 /
btSqrt(1 + t * t);
691 t = 1 / (theta * (2 +
btScalar(0.5) / theta2));
697 m_el[p][q] = m_el[q][p] = 0;
698 m_el[p][p] -= t * mpq;
699 m_el[q][q] += t * mpq;
702 m_el[r][p] = m_el[p][r] = cos * mrp - sin * mrq;
703 m_el[r][q] = m_el[q][r] = cos * mrq + sin * mrp;
706 for (
int i = 0; i < 3; i++)
711 row[p] = cos * mrp - sin * mrq;
712 row[q] = cos * mrq + sin * mrp;
729 return m_el[r1][c1] * m_el[r2][c2] - m_el[r1][c2] * m_el[r2][c1];
748 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
749 __m128 rv00, rv01, rv02;
750 __m128 rv10, rv11, rv12;
751 __m128 rv20, rv21, rv22;
752 __m128 mv0, mv1, mv2;
754 rv02 = m_el[0].mVec128;
755 rv12 = m_el[1].mVec128;
756 rv22 = m_el[2].mVec128;
758 mv0 = _mm_and_ps(m[0].mVec128, btvFFF0fMask);
759 mv1 = _mm_and_ps(m[1].mVec128, btvFFF0fMask);
760 mv2 = _mm_and_ps(m[2].mVec128, btvFFF0fMask);
763 rv00 = bt_splat_ps(rv02, 0);
764 rv01 = bt_splat_ps(rv02, 1);
765 rv02 = bt_splat_ps(rv02, 2);
767 rv00 = _mm_mul_ps(rv00, mv0);
768 rv01 = _mm_mul_ps(rv01, mv1);
769 rv02 = _mm_mul_ps(rv02, mv2);
772 rv10 = bt_splat_ps(rv12, 0);
773 rv11 = bt_splat_ps(rv12, 1);
774 rv12 = bt_splat_ps(rv12, 2);
776 rv10 = _mm_mul_ps(rv10, mv0);
777 rv11 = _mm_mul_ps(rv11, mv1);
778 rv12 = _mm_mul_ps(rv12, mv2);
781 rv20 = bt_splat_ps(rv22, 0);
782 rv21 = bt_splat_ps(rv22, 1);
783 rv22 = bt_splat_ps(rv22, 2);
785 rv20 = _mm_mul_ps(rv20, mv0);
786 rv21 = _mm_mul_ps(rv21, mv1);
787 rv22 = _mm_mul_ps(rv22, mv2);
789 rv00 = _mm_add_ps(rv00, rv01);
790 rv10 = _mm_add_ps(rv10, rv11);
791 rv20 = _mm_add_ps(rv20, rv21);
793 m_el[0].mVec128 = _mm_add_ps(rv00, rv02);
794 m_el[1].mVec128 = _mm_add_ps(rv10, rv12);
795 m_el[2].mVec128 = _mm_add_ps(rv20, rv22);
797 #elif defined(BT_USE_NEON)
799 float32x4_t rv0, rv1, rv2;
800 float32x4_t v0, v1, v2;
801 float32x4_t mv0, mv1, mv2;
803 v0 = m_el[0].mVec128;
804 v1 = m_el[1].mVec128;
805 v2 = m_el[2].mVec128;
807 mv0 = (float32x4_t) vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
808 mv1 = (float32x4_t) vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
809 mv2 = (float32x4_t) vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
811 rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
812 rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
813 rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
815 rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
816 rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
817 rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
819 rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
820 rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
821 rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
823 m_el[0].mVec128 = rv0;
824 m_el[1].mVec128 = rv1;
825 m_el[2].mVec128 = rv2;
838 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
839 m_el[0].mVec128 = m_el[0].mVec128 + m.
m_el[0].mVec128;
840 m_el[1].mVec128 = m_el[1].mVec128 + m.
m_el[1].mVec128;
841 m_el[2].mVec128 = m_el[2].mVec128 + m.
m_el[2].mVec128;
844 m_el[0][0]+m.
m_el[0][0],
845 m_el[0][1]+m.
m_el[0][1],
846 m_el[0][2]+m.
m_el[0][2],
847 m_el[1][0]+m.
m_el[1][0],
848 m_el[1][1]+m.
m_el[1][1],
849 m_el[1][2]+m.
m_el[1][2],
850 m_el[2][0]+m.
m_el[2][0],
851 m_el[2][1]+m.
m_el[2][1],
852 m_el[2][2]+m.
m_el[2][2]);
860 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
861 __m128 vk = bt_splat_ps(_mm_load_ss((
float *)&k), 0x80);
863 _mm_mul_ps(m[0].mVec128, vk),
864 _mm_mul_ps(m[1].mVec128, vk),
865 _mm_mul_ps(m[2].mVec128, vk));
866 #elif defined(BT_USE_NEON)
868 vmulq_n_f32(m[0].mVec128, k),
869 vmulq_n_f32(m[1].mVec128, k),
870 vmulq_n_f32(m[2].mVec128, k));
873 m[0].x()*k,m[0].y()*k,m[0].z()*k,
874 m[1].x()*k,m[1].y()*k,m[1].z()*k,
875 m[2].x()*k,m[2].y()*k,m[2].z()*k);
882 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
884 m1[0].mVec128 + m2[0].mVec128,
885 m1[1].mVec128 + m2[1].mVec128,
886 m1[2].mVec128 + m2[2].mVec128);
906 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
908 m1[0].mVec128 - m2[0].mVec128,
909 m1[1].mVec128 - m2[1].mVec128,
910 m1[2].mVec128 - m2[2].mVec128);
931 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
932 m_el[0].mVec128 = m_el[0].mVec128 - m.
m_el[0].mVec128;
933 m_el[1].mVec128 = m_el[1].mVec128 - m.
m_el[1].mVec128;
934 m_el[2].mVec128 = m_el[2].mVec128 - m.
m_el[2].mVec128;
937 m_el[0][0]-m.
m_el[0][0],
938 m_el[0][1]-m.
m_el[0][1],
939 m_el[0][2]-m.
m_el[0][2],
940 m_el[1][0]-m.
m_el[1][0],
941 m_el[1][1]-m.
m_el[1][1],
942 m_el[1][2]-m.
m_el[1][2],
943 m_el[2][0]-m.
m_el[2][0],
944 m_el[2][1]-m.
m_el[2][1],
945 m_el[2][2]-m.
m_el[2][2]);
954 return btTriple((*
this)[0], (*
this)[1], (*
this)[2]);
961 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
963 _mm_and_ps(m_el[0].mVec128, btvAbsfMask),
964 _mm_and_ps(m_el[1].mVec128, btvAbsfMask),
965 _mm_and_ps(m_el[2].mVec128, btvAbsfMask));
966 #elif defined(BT_USE_NEON)
968 (float32x4_t)vandq_s32((int32x4_t)m_el[0].mVec128, btv3AbsMask),
969 (float32x4_t)vandq_s32((int32x4_t)m_el[1].mVec128, btv3AbsMask),
970 (float32x4_t)vandq_s32((int32x4_t)m_el[2].mVec128, btv3AbsMask));
982 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
983 __m128 v0 = m_el[0].mVec128;
984 __m128 v1 = m_el[1].mVec128;
985 __m128 v2 = m_el[2].mVec128;
988 v2 = _mm_and_ps(v2, btvFFF0fMask);
990 vT = _mm_unpackhi_ps(v0, v1);
991 v0 = _mm_unpacklo_ps(v0, v1);
993 v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3) );
994 v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3) );
995 v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT)));
999 #elif defined(BT_USE_NEON)
1001 static const uint32x2_t zMask = (
const uint32x2_t) {
static_cast<uint32_t>(-1), 0 };
1002 float32x4x2_t top = vtrnq_f32( m_el[0].mVec128, m_el[1].mVec128 );
1003 float32x2x2_t bl = vtrn_f32( vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f) );
1004 float32x4_t v0 = vcombine_f32( vget_low_f32(top.val[0]), bl.val[0] );
1005 float32x4_t v1 = vcombine_f32( vget_low_f32(top.val[1]), bl.val[1] );
1006 float32x2_t q = (float32x2_t) vand_u32( (uint32x2_t) vget_high_f32( m_el[2].mVec128), zMask );
1007 float32x4_t v2 = vcombine_f32( vget_high_f32(top.val[0]), q );
1010 return btMatrix3x3( m_el[0].x(), m_el[1].x(), m_el[2].x(),
1011 m_el[0].y(), m_el[1].y(), m_el[2].y(),
1012 m_el[0].z(), m_el[1].z(), m_el[2].z());
1019 return btMatrix3x3(
cofac(1, 1, 2, 2),
cofac(0, 2, 2, 1),
cofac(0, 1, 1, 2),
1020 cofac(1, 2, 2, 0),
cofac(0, 0, 2, 2),
cofac(0, 2, 1, 0),
1021 cofac(1, 0, 2, 1),
cofac(0, 1, 2, 0),
cofac(0, 0, 1, 1));
1027 btVector3 co(
cofac(1, 1, 2, 2),
cofac(1, 2, 2, 0),
cofac(1, 0, 2, 1));
1032 co.
y() * s,
cofac(0, 0, 2, 2) * s,
cofac(0, 2, 1, 0) * s,
1033 co.
z() * s,
cofac(0, 1, 2, 0) * s,
cofac(0, 0, 1, 1) * s);
1039 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1042 __m128 row = m_el[0].mVec128;
1043 __m128 m0 = _mm_and_ps( m.
getRow(0).mVec128, btvFFF0fMask );
1044 __m128 m1 = _mm_and_ps( m.
getRow(1).mVec128, btvFFF0fMask);
1045 __m128 m2 = _mm_and_ps( m.
getRow(2).mVec128, btvFFF0fMask );
1046 __m128 r0 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0));
1047 __m128 r1 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0x55));
1048 __m128 r2 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0xaa));
1049 row = m_el[1].mVec128;
1050 r0 = _mm_add_ps( r0, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0)));
1051 r1 = _mm_add_ps( r1, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0x55)));
1052 r2 = _mm_add_ps( r2, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0xaa)));
1053 row = m_el[2].mVec128;
1054 r0 = _mm_add_ps( r0, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0)));
1055 r1 = _mm_add_ps( r1, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0x55)));
1056 r2 = _mm_add_ps( r2, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0xaa)));
1059 #elif defined BT_USE_NEON
1061 static const uint32x4_t xyzMask = (
const uint32x4_t){
static_cast<uint32_t>(-1), static_cast<uint32_t>(-1),
static_cast<uint32_t>(-1), 0 };
1062 float32x4_t m0 = (float32x4_t) vandq_u32( (uint32x4_t) m.
getRow(0).mVec128, xyzMask );
1063 float32x4_t m1 = (float32x4_t) vandq_u32( (uint32x4_t) m.
getRow(1).mVec128, xyzMask );
1064 float32x4_t m2 = (float32x4_t) vandq_u32( (uint32x4_t) m.
getRow(2).mVec128, xyzMask );
1065 float32x4_t row = m_el[0].mVec128;
1066 float32x4_t r0 = vmulq_lane_f32( m0, vget_low_f32(row), 0);
1067 float32x4_t r1 = vmulq_lane_f32( m0, vget_low_f32(row), 1);
1068 float32x4_t r2 = vmulq_lane_f32( m0, vget_high_f32(row), 0);
1069 row = m_el[1].mVec128;
1070 r0 = vmlaq_lane_f32( r0, m1, vget_low_f32(row), 0);
1071 r1 = vmlaq_lane_f32( r1, m1, vget_low_f32(row), 1);
1072 r2 = vmlaq_lane_f32( r2, m1, vget_high_f32(row), 0);
1073 row = m_el[2].mVec128;
1074 r0 = vmlaq_lane_f32( r0, m2, vget_low_f32(row), 0);
1075 r1 = vmlaq_lane_f32( r1, m2, vget_low_f32(row), 1);
1076 r2 = vmlaq_lane_f32( r2, m2, vget_high_f32(row), 0);
1080 m_el[0].x() * m[0].x() + m_el[1].x() * m[1].x() + m_el[2].x() * m[2].x(),
1081 m_el[0].x() * m[0].y() + m_el[1].x() * m[1].y() + m_el[2].x() * m[2].y(),
1082 m_el[0].x() * m[0].z() + m_el[1].x() * m[1].z() + m_el[2].x() * m[2].z(),
1083 m_el[0].y() * m[0].x() + m_el[1].y() * m[1].x() + m_el[2].y() * m[2].x(),
1084 m_el[0].y() * m[0].y() + m_el[1].y() * m[1].y() + m_el[2].y() * m[2].y(),
1085 m_el[0].y() * m[0].z() + m_el[1].y() * m[1].z() + m_el[2].y() * m[2].z(),
1086 m_el[0].z() * m[0].x() + m_el[1].z() * m[1].x() + m_el[2].z() * m[2].x(),
1087 m_el[0].z() * m[0].y() + m_el[1].z() * m[1].y() + m_el[2].z() * m[2].y(),
1088 m_el[0].z() * m[0].z() + m_el[1].z() * m[1].z() + m_el[2].z() * m[2].z());
1095 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1096 __m128 a0 = m_el[0].mVec128;
1097 __m128 a1 = m_el[1].mVec128;
1098 __m128 a2 = m_el[2].mVec128;
1101 __m128 mx = mT[0].mVec128;
1102 __m128 my = mT[1].mVec128;
1103 __m128 mz = mT[2].mVec128;
1105 __m128 r0 = _mm_mul_ps(mx, _mm_shuffle_ps(a0, a0, 0x00));
1106 __m128 r1 = _mm_mul_ps(mx, _mm_shuffle_ps(a1, a1, 0x00));
1107 __m128 r2 = _mm_mul_ps(mx, _mm_shuffle_ps(a2, a2, 0x00));
1108 r0 = _mm_add_ps(r0, _mm_mul_ps(my, _mm_shuffle_ps(a0, a0, 0x55)));
1109 r1 = _mm_add_ps(r1, _mm_mul_ps(my, _mm_shuffle_ps(a1, a1, 0x55)));
1110 r2 = _mm_add_ps(r2, _mm_mul_ps(my, _mm_shuffle_ps(a2, a2, 0x55)));
1111 r0 = _mm_add_ps(r0, _mm_mul_ps(mz, _mm_shuffle_ps(a0, a0, 0xaa)));
1112 r1 = _mm_add_ps(r1, _mm_mul_ps(mz, _mm_shuffle_ps(a1, a1, 0xaa)));
1113 r2 = _mm_add_ps(r2, _mm_mul_ps(mz, _mm_shuffle_ps(a2, a2, 0xaa)));
1116 #elif defined BT_USE_NEON
1117 float32x4_t a0 = m_el[0].mVec128;
1118 float32x4_t a1 = m_el[1].mVec128;
1119 float32x4_t a2 = m_el[2].mVec128;
1122 float32x4_t mx = mT[0].mVec128;
1123 float32x4_t my = mT[1].mVec128;
1124 float32x4_t mz = mT[2].mVec128;
1126 float32x4_t r0 = vmulq_lane_f32( mx, vget_low_f32(a0), 0);
1127 float32x4_t r1 = vmulq_lane_f32( mx, vget_low_f32(a1), 0);
1128 float32x4_t r2 = vmulq_lane_f32( mx, vget_low_f32(a2), 0);
1129 r0 = vmlaq_lane_f32( r0, my, vget_low_f32(a0), 1);
1130 r1 = vmlaq_lane_f32( r1, my, vget_low_f32(a1), 1);
1131 r2 = vmlaq_lane_f32( r2, my, vget_low_f32(a2), 1);
1132 r0 = vmlaq_lane_f32( r0, mz, vget_high_f32(a0), 0);
1133 r1 = vmlaq_lane_f32( r1, mz, vget_high_f32(a1), 0);
1134 r2 = vmlaq_lane_f32( r2, mz, vget_high_f32(a2), 0);
1139 m_el[0].
dot(m[0]), m_el[0].
dot(m[1]), m_el[0].
dot(m[2]),
1140 m_el[1].
dot(m[0]), m_el[1].
dot(m[1]), m_el[1].
dot(m[2]),
1141 m_el[2].
dot(m[0]), m_el[2].
dot(m[1]), m_el[2].
dot(m[2]));
1148 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
1149 return v.
dot3(m[0], m[1], m[2]);
1159 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1161 const __m128 vv = v.mVec128;
1163 __m128 c0 = bt_splat_ps( vv, 0);
1164 __m128 c1 = bt_splat_ps( vv, 1);
1165 __m128 c2 = bt_splat_ps( vv, 2);
1167 c0 = _mm_mul_ps(c0, _mm_and_ps(m[0].mVec128, btvFFF0fMask) );
1168 c1 = _mm_mul_ps(c1, _mm_and_ps(m[1].mVec128, btvFFF0fMask) );
1169 c0 = _mm_add_ps(c0, c1);
1170 c2 = _mm_mul_ps(c2, _mm_and_ps(m[2].mVec128, btvFFF0fMask) );
1173 #elif defined(BT_USE_NEON)
1174 const float32x4_t vv = v.mVec128;
1175 const float32x2_t vlo = vget_low_f32(vv);
1176 const float32x2_t vhi = vget_high_f32(vv);
1178 float32x4_t c0, c1, c2;
1180 c0 = (float32x4_t) vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
1181 c1 = (float32x4_t) vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
1182 c2 = (float32x4_t) vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
1184 c0 = vmulq_lane_f32(c0, vlo, 0);
1185 c1 = vmulq_lane_f32(c1, vlo, 1);
1186 c2 = vmulq_lane_f32(c2, vhi, 0);
1187 c0 = vaddq_f32(c0, c1);
1188 c0 = vaddq_f32(c0, c2);
1199 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1201 __m128 m10 = m1[0].mVec128;
1202 __m128 m11 = m1[1].mVec128;
1203 __m128 m12 = m1[2].mVec128;
1205 __m128 m2v = _mm_and_ps(m2[0].mVec128, btvFFF0fMask);
1207 __m128 c0 = bt_splat_ps( m10, 0);
1208 __m128 c1 = bt_splat_ps( m11, 0);
1209 __m128 c2 = bt_splat_ps( m12, 0);
1211 c0 = _mm_mul_ps(c0, m2v);
1212 c1 = _mm_mul_ps(c1, m2v);
1213 c2 = _mm_mul_ps(c2, m2v);
1215 m2v = _mm_and_ps(m2[1].mVec128, btvFFF0fMask);
1217 __m128 c0_1 = bt_splat_ps( m10, 1);
1218 __m128 c1_1 = bt_splat_ps( m11, 1);
1219 __m128 c2_1 = bt_splat_ps( m12, 1);
1221 c0_1 = _mm_mul_ps(c0_1, m2v);
1222 c1_1 = _mm_mul_ps(c1_1, m2v);
1223 c2_1 = _mm_mul_ps(c2_1, m2v);
1225 m2v = _mm_and_ps(m2[2].mVec128, btvFFF0fMask);
1227 c0 = _mm_add_ps(c0, c0_1);
1228 c1 = _mm_add_ps(c1, c1_1);
1229 c2 = _mm_add_ps(c2, c2_1);
1231 m10 = bt_splat_ps( m10, 2);
1232 m11 = bt_splat_ps( m11, 2);
1233 m12 = bt_splat_ps( m12, 2);
1235 m10 = _mm_mul_ps(m10, m2v);
1236 m11 = _mm_mul_ps(m11, m2v);
1237 m12 = _mm_mul_ps(m12, m2v);
1239 c0 = _mm_add_ps(c0, m10);
1240 c1 = _mm_add_ps(c1, m11);
1241 c2 = _mm_add_ps(c2, m12);
1245 #elif defined(BT_USE_NEON)
1247 float32x4_t rv0, rv1, rv2;
1248 float32x4_t v0, v1, v2;
1249 float32x4_t mv0, mv1, mv2;
1255 mv0 = (float32x4_t) vandq_s32((int32x4_t)m2[0].mVec128, btvFFF0Mask);
1256 mv1 = (float32x4_t) vandq_s32((int32x4_t)m2[1].mVec128, btvFFF0Mask);
1257 mv2 = (float32x4_t) vandq_s32((int32x4_t)m2[2].mVec128, btvFFF0Mask);
1259 rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
1260 rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
1261 rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
1263 rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
1264 rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
1265 rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
1267 rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
1268 rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
1269 rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
1300 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1304 c0 = _mm_cmpeq_ps(m1[0].mVec128, m2[0].mVec128);
1305 c1 = _mm_cmpeq_ps(m1[1].mVec128, m2[1].mVec128);
1306 c2 = _mm_cmpeq_ps(m1[2].mVec128, m2[2].mVec128);
1308 c0 = _mm_and_ps(c0, c1);
1309 c0 = _mm_and_ps(c0, c2);
1311 return (0x7 == _mm_movemask_ps((__m128)c0));
1314 ( m1[0][0] == m2[0][0] && m1[1][0] == m2[1][0] && m1[2][0] == m2[2][0] &&
1315 m1[0][1] == m2[0][1] && m1[1][1] == m2[1][1] && m1[2][1] == m2[2][1] &&
1316 m1[0][2] == m2[0][2] && m1[1][2] == m2[1][2] && m1[2][2] == m2[2][2] );
1337 for (
int i=0;i<3;i++)
1343 for (
int i=0;i<3;i++)
1350 for (
int i=0;i<3;i++)
1356 for (
int i=0;i<3;i++)
1362 for (
int i=0;i<3;i++)
1366 #endif //BT_MATRIX3x3_H
btMatrix3x3 inverse() const
Return the inverse of the matrix.
float determinant(const Matrix3 &mat)
void deSerializeFloat(const struct btMatrix3x3FloatData &dataIn)
btVector3DoubleData m_el[3]
btScalar tdoty(const btVector3 &v) const
bool operator==(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
Equality operator between two matrices It will test all elements are equal.
void serialize(struct btMatrix3x3Data &dataOut) const
void setValue(const btScalar &_x, const btScalar &_y, const btScalar &_z)
void setRotation(const btQuaternion &q)
Set the matrix from a quaternion.
btScalar btSin(btScalar x)
const btScalar & z() const
Return the z value.
btScalar btSqrt(btScalar y)
#define SIMD_FORCE_INLINE
btMatrix3x3 transposeTimes(const btMatrix3x3 &m) const
const btScalar & y() const
Return the y value.
btVector3 getColumn(int i) const
Get a column of the matrix as a vector.
const btVector3 & getRow(int i) const
Get a row of the matrix as a vector.
btMatrix3x3 operator+(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
btMatrix3x3 & operator=(const btMatrix3x3 &other)
Assignment Operator.
btQuaternion inverse(const btQuaternion &q)
Return the inverse of a quaternion.
const btScalar & w() const
Return the w value.
btVector3 m_el[3]
Data storage for the matrix, each vector is a row of the matrix.
float3 & operator-=(float3 &a, const float3 &b)
const btScalar & x() const
Return the x value.
btMatrix3x3(const btQuaternion &q)
Constructor from Quaternion.
btScalar tdotx(const btVector3 &v) const
btScalar tdotz(const btVector3 &v) const
void deSerialize(const struct btMatrix3x3Data &dataIn)
void getRotation(btQuaternion &q) const
Get the matrix represented as a quaternion.
btMatrix3x3 absolute() const
Return the matrix with all values non negative.
void diagonalize(btMatrix3x3 &rot, btScalar threshold, int maxSteps)
diagonalizes this matrix by the Jacobi method.
btMatrix3x3 scaled(const btVector3 &s) const
Create a scaled copy of the matrix.
void deSerializeDouble(const struct btMatrix3x3DoubleData &dataIn)
static float max(float a, float b)
btMatrix3x3 & operator*=(const btMatrix3x3 &m)
Multiply by the target matrix on the right.
btScalar btAtan2(btScalar x, btScalar y)
void setValue(const btScalar &_x, const btScalar &_y, const btScalar &_z)
Set x,y,z and zero w.
const btVector3 & operator[](int i) const
Get a const reference to a row of the matrix as a vector.
btMatrix3x3 operator*(const btMatrix3x3 &m, const btScalar &k)
float4 & operator+=(float4 &a, const float4 &b)
void setValue(const btScalar &xx, const btScalar &xy, const btScalar &xz, const btScalar &yx, const btScalar &yy, const btScalar &yz, const btScalar &zx, const btScalar &zy, const btScalar &zz)
Set the values of the matrix explicitly (row major)
btMatrix3x3(const btScalar &xx, const btScalar &xy, const btScalar &xz, const btScalar &yx, const btScalar &yy, const btScalar &yz, const btScalar &zx, const btScalar &zy, const btScalar &zz)
Constructor with row major formatting.
btScalar length2() const
Return the length squared of the quaternion.
const btScalar & y() const
Return the y value.
void getOpenGLSubMatrix(btScalar *m) const
Fill the rotational part of an OpenGL matrix and clear the shear/perspective.
btVector3 can be used to represent 3D points and vectors.
#define ATTRIBUTE_ALIGNED16(a)
btMatrix3x3 & operator-=(const btMatrix3x3 &m)
Substractss by the target matrix on the right.
btMatrix3x3 adjoint() const
Return the adjoint of the matrix.
void serializeFloat(struct btMatrix3x3FloatData &dataOut) const
void setEulerYPR(const btScalar &yaw, const btScalar &pitch, const btScalar &roll)
Set the matrix from euler angles using YPR around YXZ respectively.
float4 & operator*=(float4 &a, float fact)
btMatrix3x3 & operator+=(const btMatrix3x3 &m)
Adds by the target matrix on the right.
btMatrix3x3 operator-(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
void getEulerYPR(btScalar &yaw, btScalar &pitch, btScalar &roll) const
Get the matrix represented as euler angles around YXZ, roundtrip with setEulerYPR.
btMatrix3x3()
No initializaion constructor.
btMatrix3x3 transpose() const
Return the transpose of the matrix.
btVector3 dot3(const btVector3 &v0, const btVector3 &v1, const btVector3 &v2) const
const Matrix3 transpose(const Matrix3 &mat)
btVector3 & operator[](int i)
Get a mutable reference to a row of the matrix as a vector.
const btScalar & x() const
Return the x value.
The btMatrix3x3 class implements a 3x3 rotation matrix, to perform linear algebra in combination with...
btScalar dot(const btQuaternion &q1, const btQuaternion &q2)
Calculate the dot product between two quaternions.
btMatrix3x3(const btMatrix3x3 &other)
Copy constructor.
btMatrix3x3 timesTranspose(const btMatrix3x3 &m) const
The btQuaternion implements quaternion to perform linear algebra rotations in combination with btMatr...
void setFromOpenGLSubMatrix(const btScalar *m)
Set from the rotational part of a 4x4 OpenGL matrix.
btScalar btAsin(btScalar x)
btScalar cofac(int r1, int c1, int r2, int c2) const
Calculate the matrix cofactor.
btScalar btTriple(const btVector3 &v1, const btVector3 &v2, const btVector3 &v3)
void getEulerZYX(btScalar &yaw, btScalar &pitch, btScalar &roll, unsigned int solution_number=1) const
Get the matrix represented as euler angles around ZYX.
btScalar determinant() const
Return the determinant of the matrix.
void setIdentity()
Set the matrix to the identity.
static const btMatrix3x3 & getIdentity()
float btScalar
The btScalar type abstracts floating point numbers, to easily switch between double and single floati...
btScalar btCos(btScalar x)
btVector3FloatData m_el[3]
btScalar btFabs(btScalar x)
const btScalar & z() const
Return the z value.
void setEulerZYX(btScalar eulerX, btScalar eulerY, btScalar eulerZ)
Set the matrix from euler angles YPR around ZYX axes.