31 #ifndef _VECTORMATH_MAT_AOS_CPP_H
32 #define _VECTORMATH_MAT_AOS_CPP_H
34 namespace Vectormath {
41 #define _VECTORMATH_PERM_ZBWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
42 #define _VECTORMATH_PERM_XCYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
43 #define _VECTORMATH_PERM_XYAB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B })
44 #define _VECTORMATH_PERM_ZWCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W, _VECTORMATH_PERM_C, _VECTORMATH_PERM_D })
45 #define _VECTORMATH_PERM_XZBX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_X })
46 #define _VECTORMATH_PERM_CXXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
47 #define _VECTORMATH_PERM_YAXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
48 #define _VECTORMATH_PERM_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_C })
49 #define _VECTORMATH_PERM_YXWZ ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W, _VECTORMATH_PERM_Z })
50 #define _VECTORMATH_PERM_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_D })
51 #define _VECTORMATH_PERM_XYCX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X })
52 #define _VECTORMATH_PERM_YCXY ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y })
53 #define _VECTORMATH_PERM_CXYC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C })
54 #define _VECTORMATH_PERM_ZAYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
55 #define _VECTORMATH_PERM_BZXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
56 #define _VECTORMATH_PERM_XZYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A })
57 #define _VECTORMATH_PERM_ZXXB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_B })
58 #define _VECTORMATH_PERM_YXXC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_C })
59 #define _VECTORMATH_PERM_BBYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_B, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
60 #define _VECTORMATH_PI_OVER_2 1.570796327f
74 mCol0 = Vector3( scalar );
75 mCol1 = Vector3( scalar );
76 mCol2 = Vector3( scalar );
88 __m128 xyzw_2, wwww, yzxw, zxyw, yzxw_2, zxyw_2;
89 __m128 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
92 __m128 select_x = _mm_load_ps((
float *)sx);
93 __m128 select_z = _mm_load_ps((
float *)sz);
95 xyzw_2 = _mm_add_ps( unitQuat.
get128(), unitQuat.
get128() );
96 wwww = _mm_shuffle_ps( unitQuat.
get128(), unitQuat.
get128(), _MM_SHUFFLE(3,3,3,3) );
97 yzxw = _mm_shuffle_ps( unitQuat.
get128(), unitQuat.
get128(), _MM_SHUFFLE(3,0,2,1) );
98 zxyw = _mm_shuffle_ps( unitQuat.
get128(), unitQuat.
get128(), _MM_SHUFFLE(3,1,0,2) );
99 yzxw_2 = _mm_shuffle_ps( xyzw_2, xyzw_2, _MM_SHUFFLE(3,0,2,1) );
100 zxyw_2 = _mm_shuffle_ps( xyzw_2, xyzw_2, _MM_SHUFFLE(3,1,0,2) );
102 tmp0 = _mm_mul_ps( yzxw_2, wwww );
103 tmp1 = _mm_sub_ps( _mm_set1_ps(1.0f), _mm_mul_ps(yzxw, yzxw_2) );
104 tmp2 = _mm_mul_ps( yzxw, xyzw_2 );
105 tmp0 = _mm_add_ps( _mm_mul_ps(zxyw, xyzw_2), tmp0 );
106 tmp1 = _mm_sub_ps( tmp1, _mm_mul_ps(zxyw, zxyw_2) );
107 tmp2 = _mm_sub_ps( tmp2, _mm_mul_ps(zxyw_2, wwww) );
109 tmp3 =
vec_sel( tmp0, tmp1, select_x );
110 tmp4 =
vec_sel( tmp1, tmp2, select_x );
111 tmp5 =
vec_sel( tmp2, tmp0, select_x );
144 *(&
mCol0 + col) = vec;
158 (*this)[col].
setElem(row, val);
165 tmpV3_0 = this->
getCol( col );
167 this->
setCol( col, tmpV3_0 );
193 return *(&
mCol0 + col);
203 return *(&
mCol0 + col);
208 return *(&
mCol0 + col);
221 __m128 tmp0, tmp1, res0, res1, res2;
222 tmp0 =
vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() );
223 tmp1 =
vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() );
224 res0 =
vec_mergeh( tmp0, mat.getCol1().get128() );
227 res1 = _mm_shuffle_ps( tmp0, tmp0, _MM_SHUFFLE(0,3,2,2));
228 res1 =
vec_sel(res1, mat.getCol1().get128(), select_y);
230 res2 = _mm_shuffle_ps( tmp1, tmp1, _MM_SHUFFLE(0,1,1,0));
241 __m128 tmp0, tmp1, tmp2, tmp3, tmp4,
dot, invdet, inv0, inv1, inv2;
242 tmp2 =
_vmathVfCross( mat.getCol0().get128(), mat.getCol1().get128() );
243 tmp0 =
_vmathVfCross( mat.getCol1().get128(), mat.getCol2().get128() );
244 tmp1 =
_vmathVfCross( mat.getCol2().get128(), mat.getCol0().get128() );
253 inv1 = _mm_shuffle_ps( tmp3, tmp3, _MM_SHUFFLE(0,3,2,2));
254 inv1 =
vec_sel(inv1, tmp1, select_y);
256 inv2 = _mm_shuffle_ps( tmp4, tmp4, _MM_SHUFFLE(0,1,1,0));
258 inv0 =
vec_mul( inv0, invdet );
259 inv1 =
vec_mul( inv1, invdet );
260 inv2 =
vec_mul( inv2, invdet );
270 return dot( mat.getCol2(),
cross( mat.getCol0(), mat.getCol1() ) );
276 (
mCol0 + mat.mCol0 ),
277 (
mCol1 + mat.mCol1 ),
278 (
mCol2 + mat.mCol2 )
285 (
mCol0 - mat.mCol0 ),
286 (
mCol1 - mat.mCol1 ),
287 (
mCol2 - mat.mCol2 )
323 return *
this * floatInVec(scalar);
342 *
this = *
this * scalar;
359 __m128 xxxx, yyyy, zzzz;
366 return Vector3( res );
372 ( *
this * mat.mCol0 ),
373 ( *
this * mat.mCol1 ),
374 ( *
this * mat.mCol2 )
409 __m128 s, c, res1, res2;
413 zero = _mm_setzero_ps();
415 res1 =
vec_sel( zero, c, select_y );
416 res1 =
vec_sel( res1, s, select_z );
418 res2 =
vec_sel( res2, c, select_z );
433 __m128 s, c, res0, res2;
437 zero = _mm_setzero_ps();
439 res0 =
vec_sel( zero, c, select_x );
441 res2 =
vec_sel( zero, s, select_x );
442 res2 =
vec_sel( res2, c, select_z );
457 __m128 s, c, res0, res1;
461 zero = _mm_setzero_ps();
463 res0 =
vec_sel( zero, c, select_x );
464 res0 =
vec_sel( res0, s, select_y );
466 res1 =
vec_sel( res1, c, select_y );
476 __m128 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
483 Z1 =
vec_and( Z1, _mm_load_ps( (
float *)select_xyz ) );
484 Y0 = _mm_shuffle_ps( c, negS, _MM_SHUFFLE(0,1,1,1) );
485 Y1 = _mm_shuffle_ps( s, c, _MM_SHUFFLE(0,1,1,1) );
498 return rotation( floatInVec(radians), unitVec );
503 __m128 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2;
509 oneMinusC =
vec_sub( _mm_set1_ps(1.0f), c );
516 tmp0 = _mm_shuffle_ps( axisS, axisS, _MM_SHUFFLE(0,0,2,0) );
521 tmp2 = _mm_shuffle_ps( axisS, axisS, _MM_SHUFFLE(0,0,0,1) );
523 tmp0 =
vec_sel( tmp0, c, select_x );
524 tmp1 =
vec_sel( tmp1, c, select_y );
525 tmp2 =
vec_sel( tmp2, c, select_z );
540 __m128 zero = _mm_setzero_ps();
545 Vector3(
vec_sel( zero, scaleVec.get128(), select_x ) ),
546 Vector3(
vec_sel( zero, scaleVec.get128(), select_y ) ),
547 Vector3(
vec_sel( zero, scaleVec.get128(), select_z ) )
554 ( mat.getCol0() * scaleVec.getX( ) ),
555 ( mat.getCol1() * scaleVec.getY( ) ),
556 ( mat.getCol2() * scaleVec.getZ( ) )
572 select( mat0.getCol0(), mat1.getCol0(), select1 ),
573 select( mat0.getCol1(), mat1.getCol1(), select1 ),
574 select( mat0.getCol2(), mat1.getCol2(), select1 )
587 #ifdef _VECTORMATH_DEBUG
591 print( mat.getRow( 0 ) );
592 print( mat.getRow( 1 ) );
593 print( mat.getRow( 2 ) );
598 printf(
"%s:\n", name);
614 mCol0 = Vector4( scalar );
615 mCol1 = Vector4( scalar );
616 mCol2 = Vector4( scalar );
617 mCol3 = Vector4( scalar );
646 mCol0 = Vector4( mat.getCol0(), 0.0f );
647 mCol1 = Vector4( mat.getCol1(), 0.0f );
648 mCol2 = Vector4( mat.getCol2(), 0.0f );
649 mCol3 = Vector4( translateVec, 1.0f );
655 mat = Matrix3( unitQuat );
656 mCol0 = Vector4( mat.getCol0(), 0.0f );
657 mCol1 = Vector4( mat.getCol1(), 0.0f );
658 mCol2 = Vector4( mat.getCol2(), 0.0f );
659 mCol3 = Vector4( translateVec, 1.0f );
688 *(&
mCol0 + col) = vec;
695 mCol1.
setElem( row, vec.getElem( 1 ) );
696 mCol2.
setElem( row, vec.getElem( 2 ) );
697 mCol3.
setElem( row, vec.getElem( 3 ) );
703 (*this)[col].
setElem(row, val);
710 tmpV3_0 = this->
getCol( col );
712 this->
setCol( col, tmpV3_0 );
743 return *(&
mCol0 + col);
748 return Vector4(
mCol0.
getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.
getElem( row ) );
753 return *(&
mCol0 + col);
758 return *(&
mCol0 + col);
772 __m128 tmp0, tmp1, tmp2, tmp3, res0, res1, res2, res3;
773 tmp0 =
vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() );
774 tmp1 =
vec_mergeh( mat.getCol1().get128(), mat.getCol3().get128() );
775 tmp2 =
vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() );
776 tmp3 =
vec_mergel( mat.getCol1().get128(), mat.getCol3().get128() );
797 __m128 r1,r2,r3,tt,tt2;
799 __m128 trns0,trns1,trns2,trns3;
801 __m128 _L1 = mat.getCol0().get128();
802 __m128 _L2 = mat.getCol1().get128();
803 __m128 _L3 = mat.getCol2().get128();
804 __m128 _L4 = mat.getCol3().get128();
818 Va =
_mm_ror_ps(tt,1); sum = _mm_mul_ps(Va,r1);
819 Vb =
_mm_ror_ps(tt,2); sum = _mm_add_ps(sum,_mm_mul_ps(Vb,r2));
820 Vc =
_mm_ror_ps(tt,3); sum = _mm_add_ps(sum,_mm_mul_ps(Vc,r3));
823 Det = _mm_mul_ps(sum,_L1);
824 Det = _mm_add_ps(Det,_mm_movehl_ps(Det,Det));
826 const __m128 Sign_PNPN = _mm_load_ps((
float *)
_vmathPNPN);
827 const __m128 Sign_NPNP = _mm_load_ps((
float *)
_vmathNPNP);
829 __m128 mtL1 = _mm_xor_ps(sum,Sign_PNPN);
832 tt =
_mm_ror_ps(_L1,1); sum = _mm_mul_ps(tt,r1);
833 tt =
_mm_ror_ps(tt,1); sum = _mm_add_ps(sum,_mm_mul_ps(tt,r2));
834 tt =
_mm_ror_ps(tt,1); sum = _mm_add_ps(sum,_mm_mul_ps(tt,r3));
835 __m128 mtL2 = _mm_xor_ps(sum,Sign_NPNP);
838 Det = _mm_sub_ss(Det,_mm_shuffle_ps(Det,Det,1));
842 Va = _mm_mul_ps(tt,Vb);
843 Vb = _mm_mul_ps(tt,Vc);
844 Vc = _mm_mul_ps(tt,_L2);
850 tt =
_mm_ror_ps(_L4,1); sum = _mm_mul_ps(tt,r1);
851 tt =
_mm_ror_ps(tt,1); sum = _mm_add_ps(sum,_mm_mul_ps(tt,r2));
852 tt =
_mm_ror_ps(tt,1); sum = _mm_add_ps(sum,_mm_mul_ps(tt,r3));
853 __m128 mtL3 = _mm_xor_ps(sum,Sign_PNPN);
856 RDet = _mm_div_ss(_mm_load_ss((
float *)&
_vmathZERONE), Det);
857 RDet = _mm_shuffle_ps(RDet,RDet,0x00);
860 mtL1 = _mm_mul_ps(mtL1, RDet);
861 mtL2 = _mm_mul_ps(mtL2, RDet);
862 mtL3 = _mm_mul_ps(mtL3, RDet);
865 tt =
_mm_ror_ps(_L3,1); sum = _mm_mul_ps(tt,r1);
866 tt =
_mm_ror_ps(tt,1); sum = _mm_add_ps(sum,_mm_mul_ps(tt,r2));
867 tt =
_mm_ror_ps(tt,1); sum = _mm_add_ps(sum,_mm_mul_ps(tt,r3));
868 __m128 mtL4 = _mm_xor_ps(sum,Sign_NPNP);
869 mtL4 = _mm_mul_ps(mtL4, RDet);
872 trns0 = _mm_unpacklo_ps(mtL1,mtL2);
873 trns1 = _mm_unpacklo_ps(mtL3,mtL4);
874 trns2 = _mm_unpackhi_ps(mtL1,mtL2);
875 trns3 = _mm_unpackhi_ps(mtL3,mtL4);
876 _L1 = _mm_movelh_ps(trns0,trns1);
877 _L2 = _mm_movehl_ps(trns1,trns0);
878 _L3 = _mm_movelh_ps(trns2,trns3);
879 _L4 = _mm_movehl_ps(trns3,trns2);
891 Transform3 affineMat;
892 affineMat.
setCol0( mat.getCol0().getXYZ( ) );
893 affineMat.
setCol1( mat.getCol1().getXYZ( ) );
894 affineMat.
setCol2( mat.getCol2().getXYZ( ) );
895 affineMat.
setCol3( mat.getCol3().getXYZ( ) );
896 return Matrix4(
inverse( affineMat ) );
901 Transform3 affineMat;
902 affineMat.
setCol0( mat.getCol0().getXYZ( ) );
903 affineMat.
setCol1( mat.getCol1().getXYZ( ) );
904 affineMat.
setCol2( mat.getCol2().getXYZ( ) );
905 affineMat.
setCol3( mat.getCol3().getXYZ( ) );
912 __m128 r1,r2,r3,tt,tt2;
915 __m128 _L1 = mat.getCol0().get128();
916 __m128 _L2 = mat.getCol1().get128();
917 __m128 _L3 = mat.getCol2().get128();
918 __m128 _L4 = mat.getCol3().get128();
932 Va =
_mm_ror_ps(tt,1); sum = _mm_mul_ps(Va,r1);
933 Vb =
_mm_ror_ps(tt,2); sum = _mm_add_ps(sum,_mm_mul_ps(Vb,r2));
934 Vc =
_mm_ror_ps(tt,3); sum = _mm_add_ps(sum,_mm_mul_ps(Vc,r3));
937 Det = _mm_mul_ps(sum,_L1);
938 Det = _mm_add_ps(Det,_mm_movehl_ps(Det,Det));
941 tt =
_mm_ror_ps(_L1,1); sum = _mm_mul_ps(tt,r1);
942 tt =
_mm_ror_ps(tt,1); sum = _mm_add_ps(sum,_mm_mul_ps(tt,r2));
943 tt =
_mm_ror_ps(tt,1); sum = _mm_add_ps(sum,_mm_mul_ps(tt,r3));
946 Det = _mm_sub_ss(Det,_mm_shuffle_ps(Det,Det,1));
947 return floatInVec(Det, 0);
953 (
mCol0 + mat.mCol0 ),
954 ( mCol1 + mat.mCol1 ),
955 ( mCol2 + mat.mCol2 ),
956 ( mCol3 + mat.mCol3 )
963 (
mCol0 - mat.mCol0 ),
964 ( mCol1 - mat.mCol1 ),
965 ( mCol2 - mat.mCol2 ),
966 ( mCol3 - mat.mCol3 )
1004 return *
this * floatInVec(scalar);
1024 *
this = *
this * scalar;
1035 return mat * scalar;
1042 _mm_add_ps(_mm_mul_ps(
mCol0.
get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(0,0,0,0))), _mm_mul_ps(mCol1.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(1,1,1,1)))),
1043 _mm_add_ps(_mm_mul_ps(mCol2.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(2,2,2,2))), _mm_mul_ps(mCol3.
get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(3,3,3,3)))))
1051 _mm_add_ps(_mm_mul_ps(
mCol0.
get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(0,0,0,0))), _mm_mul_ps(mCol1.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(1,1,1,1)))),
1052 _mm_mul_ps(mCol2.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(2,2,2,2))))
1060 _mm_add_ps(_mm_mul_ps(
mCol0.
get128(), _mm_shuffle_ps(pnt.get128(), pnt.get128(), _MM_SHUFFLE(0,0,0,0))), _mm_mul_ps(mCol1.get128(), _mm_shuffle_ps(pnt.get128(), pnt.get128(), _MM_SHUFFLE(1,1,1,1)))),
1061 _mm_add_ps(_mm_mul_ps(mCol2.get128(), _mm_shuffle_ps(pnt.get128(), pnt.get128(), _MM_SHUFFLE(2,2,2,2))), mCol3.
get128()))
1068 ( *
this * mat.mCol0 ),
1069 ( *
this * mat.mCol1 ),
1070 ( *
this * mat.mCol2 ),
1071 ( *
this * mat.mCol3 )
1077 *
this = *
this * mat;
1084 ( *
this * tfrm.getCol0() ),
1085 ( *
this * tfrm.getCol1() ),
1086 ( *
this * tfrm.getCol2() ),
1087 ( *
this * Point3( tfrm.getCol3() ) )
1093 *
this = *
this * tfrm;
1100 mulPerElem( mat0.getCol0(), mat1.getCol0() ),
1101 mulPerElem( mat0.getCol1(), mat1.getCol1() ),
1102 mulPerElem( mat0.getCol2(), mat1.getCol2() ),
1120 mCol1.
setXYZ( mat3.getCol1() );
1121 mCol2.
setXYZ( mat3.getCol2() );
1136 mCol3.
setXYZ( translateVec );
1147 return rotationX( floatInVec(radians) );
1152 __m128 s, c, res1, res2;
1156 zero = _mm_setzero_ps();
1158 res1 =
vec_sel( zero, c, select_y );
1159 res1 =
vec_sel( res1, s, select_z );
1161 res2 =
vec_sel( res2, c, select_z );
1177 __m128 s, c, res0, res2;
1181 zero = _mm_setzero_ps();
1183 res0 =
vec_sel( zero, c, select_x );
1185 res2 =
vec_sel( zero, s, select_x );
1186 res2 =
vec_sel( res2, c, select_z );
1202 __m128 s, c, res0, res1;
1206 zero = _mm_setzero_ps();
1208 res0 =
vec_sel( zero, c, select_x );
1209 res0 =
vec_sel( res0, s, select_y );
1211 res1 =
vec_sel( res1, c, select_y );
1222 __m128 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
1229 Z1 =
vec_and( Z1, _mm_load_ps( (
float *)select_xyz ) );
1230 Y0 = _mm_shuffle_ps( c, negS, _MM_SHUFFLE(0,1,1,1) );
1231 Y1 = _mm_shuffle_ps( s, c, _MM_SHUFFLE(0,1,1,1) );
1245 return rotation( floatInVec(radians), unitVec );
1250 __m128 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2;
1256 oneMinusC =
vec_sub( _mm_set1_ps(1.0f), c );
1263 tmp0 = _mm_shuffle_ps( axisS, axisS, _MM_SHUFFLE(0,0,2,0) );
1268 tmp2 = _mm_shuffle_ps( axisS, axisS, _MM_SHUFFLE(0,0,0,1) );
1270 tmp0 =
vec_sel( tmp0, c, select_x );
1271 tmp1 =
vec_sel( tmp1, c, select_y );
1272 tmp2 =
vec_sel( tmp2, c, select_z );
1274 axis =
vec_and( axis, _mm_load_ps( (
float *)select_xyz ) );
1275 tmp0 =
vec_and( tmp0, _mm_load_ps( (
float *)select_xyz ) );
1276 tmp1 =
vec_and( tmp1, _mm_load_ps( (
float *)select_xyz ) );
1277 tmp2 =
vec_and( tmp2, _mm_load_ps( (
float *)select_xyz ) );
1293 __m128 zero = _mm_setzero_ps();
1298 Vector4(
vec_sel( zero, scaleVec.get128(), select_x ) ),
1299 Vector4(
vec_sel( zero, scaleVec.get128(), select_y ) ),
1300 Vector4(
vec_sel( zero, scaleVec.get128(), select_z ) ),
1308 ( mat.getCol0() * scaleVec.getX( ) ),
1309 ( mat.getCol1() * scaleVec.getY( ) ),
1310 ( mat.getCol2() * scaleVec.getZ( ) ),
1318 scale4 = Vector4( scaleVec, 1.0f );
1333 Vector4( translateVec, 1.0f )
1340 Vector3 v3X, v3Y, v3Z;
1342 v3Z =
normalize( ( eyePos - lookAtPos ) );
1344 v3Y =
cross( v3Z, v3X );
1345 m4EyeFrame =
Matrix4( Vector4( v3X ), Vector4( v3Y ), Vector4( v3Z ), Vector4( eyePos ) );
1352 __m128 zero, col0, col1, col2, col3;
1353 union { __m128 v;
float s[4]; } tmp;
1355 rangeInv = 1.0f / ( zNear - zFar );
1356 zero = _mm_setzero_ps();
1358 tmp.s[0] = f / aspect;
1364 tmp.s[2] = ( zNear + zFar ) * rangeInv;
1368 tmp.s[2] = zNear * zFar * rangeInv * 2.0f;
1393 __m128 diff,
sum, inv_diff;
1394 __m128 diagonal, column, near2;
1395 __m128 zero = _mm_setzero_ps();
1396 union { __m128 v;
float s[4]; } l, f, r, n, b, t;
1411 near2 =
vec_add( near2, near2 );
1412 diagonal =
vec_mul( near2, inv_diff );
1413 column =
vec_mul( sum, inv_diff );
1419 Vector4(
vec_sel( zero, diagonal, select_x ) ),
1420 Vector4(
vec_sel( zero, diagonal, select_y ) ),
1421 Vector4(
vec_sel( column, _mm_set1_ps(-1.0f), select_w ) ),
1441 __m128 diff,
sum, inv_diff, neg_inv_diff;
1442 __m128 diagonal, column;
1443 __m128 zero = _mm_setzero_ps();
1444 union { __m128 v;
float s[4]; } l, f, r, n, b, t;
1458 neg_inv_diff =
negatef4( inv_diff );
1459 diagonal =
vec_add( inv_diff, inv_diff );
1464 column =
vec_mul( sum,
vec_sel( neg_inv_diff, inv_diff, select_z ) );
1466 Vector4(
vec_sel( zero, diagonal, select_x ) ),
1467 Vector4(
vec_sel( zero, diagonal, select_y ) ),
1468 Vector4(
vec_sel( zero, diagonal, select_z ) ),
1469 Vector4(
vec_sel( column, _mm_set1_ps(1.0f), select_w ) )
1476 select( mat0.getCol0(), mat1.getCol0(), select1 ),
1477 select( mat0.getCol1(), mat1.getCol1(), select1 ),
1478 select( mat0.getCol2(), mat1.getCol2(), select1 ),
1479 select( mat0.getCol3(), mat1.getCol3(), select1 )
1493 #ifdef _VECTORMATH_DEBUG
1497 print( mat.getRow( 0 ) );
1498 print( mat.getRow( 1 ) );
1499 print( mat.getRow( 2 ) );
1500 print( mat.getRow( 3 ) );
1505 printf(
"%s:\n", name);
1521 mCol0 = Vector3( scalar );
1522 mCol1 = Vector3( scalar );
1523 mCol2 = Vector3( scalar );
1524 mCol3 = Vector3( scalar );
1581 *(&
mCol0 + col) = vec;
1588 mCol1.
setElem( row, vec.getElem( 1 ) );
1589 mCol2.
setElem( row, vec.getElem( 2 ) );
1590 mCol3.
setElem( row, vec.getElem( 3 ) );
1596 (*this)[col].
setElem(row, val);
1603 tmpV3_0 = this->
getCol( col );
1605 this->
setCol( col, tmpV3_0 );
1636 return *(&
mCol0 + col);
1641 return Vector4(
mCol0.
getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.
getElem( row ) );
1646 return *(&
mCol0 + col);
1651 return *(&
mCol0 + col);
1665 __m128 inv0, inv1, inv2, inv3;
1666 __m128 tmp0, tmp1, tmp2, tmp3, tmp4,
dot, invdet;
1667 __m128 xxxx, yyyy, zzzz;
1668 tmp2 =
_vmathVfCross( tfrm.getCol0().get128(), tfrm.getCol1().get128() );
1669 tmp0 =
_vmathVfCross( tfrm.getCol1().get128(), tfrm.getCol2().get128() );
1670 tmp1 =
_vmathVfCross( tfrm.getCol2().get128(), tfrm.getCol0().get128() );
1671 inv3 =
negatef4( tfrm.getCol3().get128() );
1681 inv1 = _mm_shuffle_ps( tmp3, tmp3, _MM_SHUFFLE(0,3,2,2));
1682 inv1 =
vec_sel(inv1, tmp1, select_y);
1684 inv2 = _mm_shuffle_ps( tmp4, tmp4, _MM_SHUFFLE(0,1,1,0));
1689 inv3 =
vec_madd( inv1, yyyy, inv3 );
1690 inv3 =
vec_madd( inv2, zzzz, inv3 );
1691 inv0 =
vec_mul( inv0, invdet );
1692 inv1 =
vec_mul( inv1, invdet );
1693 inv2 =
vec_mul( inv2, invdet );
1694 inv3 =
vec_mul( inv3, invdet );
1705 __m128 inv0, inv1, inv2, inv3;
1707 __m128 xxxx, yyyy, zzzz;
1708 tmp0 =
vec_mergeh( tfrm.getCol0().get128(), tfrm.getCol2().get128() );
1709 tmp1 =
vec_mergel( tfrm.getCol0().get128(), tfrm.getCol2().get128() );
1710 inv3 =
negatef4( tfrm.getCol3().get128() );
1711 inv0 =
vec_mergeh( tmp0, tfrm.getCol1().get128() );
1715 inv1 = _mm_shuffle_ps( tmp0, tmp0, _MM_SHUFFLE(0,3,2,2));
1716 inv1 =
vec_sel(inv1, tfrm.getCol1().get128(), select_y);
1718 inv2 = _mm_shuffle_ps( tmp1, tmp1, _MM_SHUFFLE(0,1,1,0));
1723 inv3 =
vec_madd( inv1, yyyy, inv3 );
1724 inv3 =
vec_madd( inv2, zzzz, inv3 );
1746 __m128 xxxx, yyyy, zzzz;
1751 res =
vec_madd( mCol1.get128(), yyyy, res );
1752 res =
vec_madd( mCol2.get128(), zzzz, res );
1753 return Vector3( res );
1758 __m128 tmp0, tmp1, res;
1759 __m128 xxxx, yyyy, zzzz;
1764 tmp1 =
vec_mul( mCol1.get128(), yyyy );
1765 tmp0 =
vec_madd( mCol2.get128(), zzzz, tmp0 );
1768 return Point3( res );
1774 ( *
this * tfrm.mCol0 ),
1775 ( *
this * tfrm.mCol1 ),
1776 ( *
this * tfrm.mCol2 ),
1777 Vector3( ( *
this * Point3( tfrm.mCol3 ) ) )
1783 *
this = *
this * tfrm;
1790 mulPerElem( tfrm0.getCol0(), tfrm1.getCol0() ),
1791 mulPerElem( tfrm0.getCol1(), tfrm1.getCol1() ),
1792 mulPerElem( tfrm0.getCol2(), tfrm1.getCol2() ),
1793 mulPerElem( tfrm0.getCol3(), tfrm1.getCol3() )
1809 mCol0 = tfrm.getCol0();
1810 mCol1 = tfrm.getCol1();
1811 mCol2 = tfrm.getCol2();
1817 return Matrix3(
mCol0, mCol1, mCol2 );
1822 mCol3 = translateVec;
1833 return rotationX( floatInVec(radians) );
1838 __m128 s, c, res1, res2;
1842 zero = _mm_setzero_ps();
1844 res1 =
vec_sel( zero, c, select_y );
1845 res1 =
vec_sel( res1, s, select_z );
1847 res2 =
vec_sel( res2, c, select_z );
1863 __m128 s, c, res0, res2;
1867 zero = _mm_setzero_ps();
1869 res0 =
vec_sel( zero, c, select_x );
1871 res2 =
vec_sel( zero, s, select_x );
1872 res2 =
vec_sel( res2, c, select_z );
1888 __m128 s, c, res0, res1;
1891 __m128 zero = _mm_setzero_ps();
1893 res0 =
vec_sel( zero, c, select_x );
1894 res0 =
vec_sel( res0, s, select_y );
1896 res1 =
vec_sel( res1, c, select_y );
1907 __m128 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
1914 Z1 =
vec_and( Z1, _mm_load_ps( (
float *)select_xyz ) );
1915 Y0 = _mm_shuffle_ps( c, negS, _MM_SHUFFLE(0,1,1,1) );
1916 Y1 = _mm_shuffle_ps( s, c, _MM_SHUFFLE(0,1,1,1) );
1930 return rotation( floatInVec(radians), unitVec );
1945 __m128 zero = _mm_setzero_ps();
1950 Vector3(
vec_sel( zero, scaleVec.get128(), select_x ) ),
1951 Vector3(
vec_sel( zero, scaleVec.get128(), select_y ) ),
1952 Vector3(
vec_sel( zero, scaleVec.get128(), select_z ) ),
1960 ( tfrm.getCol0() * scaleVec.getX( ) ),
1961 ( tfrm.getCol1() * scaleVec.getY( ) ),
1962 ( tfrm.getCol2() * scaleVec.getZ( ) ),
1990 select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
1991 select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
1992 select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
1993 select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
2007 #ifdef _VECTORMATH_DEBUG
2011 print( tfrm.getRow( 0 ) );
2012 print( tfrm.getRow( 1 ) );
2013 print( tfrm.getRow( 2 ) );
2018 printf(
"%s:\n", name);
2027 __m128 col0, col1, col2;
2028 __m128 xx_yy, xx_yy_zz_xx, yy_zz_xx_yy, zz_xx_yy_zz, diagSum, diagDiff;
2029 __m128 zy_xz_yx, yz_zx_xy,
sum, diff;
2030 __m128 radicand, invSqrt,
scale;
2031 __m128 res0, res1, res2, res3;
2038 col0 = tfrm.getCol0().get128();
2039 col1 = tfrm.getCol1().get128();
2040 col2 = tfrm.getCol2().get128();
2051 xx_yy =
vec_sel( col0, col1, select_y );
2055 xx_yy_zz_xx = _mm_shuffle_ps( xx_yy, xx_yy, _MM_SHUFFLE(0,0,1,0) );
2056 xx_yy_zz_xx =
vec_sel( xx_yy_zz_xx, col2, select_z );
2057 yy_zz_xx_yy = _mm_shuffle_ps( xx_yy_zz_xx, xx_yy_zz_xx, _MM_SHUFFLE(1,0,2,1) );
2058 zz_xx_yy_zz = _mm_shuffle_ps( xx_yy_zz_xx, xx_yy_zz_xx, _MM_SHUFFLE(2,1,0,2) );
2060 diagSum =
vec_add(
vec_add( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
2061 diagDiff =
vec_sub(
vec_sub( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
2062 radicand =
vec_add(
vec_sel( diagDiff, diagSum, select_w ), _mm_set1_ps(1.0f) );
2068 zy_xz_yx =
vec_sel( col0, col1, select_z );
2070 zy_xz_yx = _mm_shuffle_ps( zy_xz_yx, zy_xz_yx, _MM_SHUFFLE(0,1,2,2) );
2072 yz_zx_xy =
vec_sel( col0, col1, select_x );
2074 yz_zx_xy = _mm_shuffle_ps( yz_zx_xy, yz_zx_xy, _MM_SHUFFLE(0,0,2,0) );
2077 sum =
vec_add( zy_xz_yx, yz_zx_xy );
2078 diff =
vec_sub( zy_xz_yx, yz_zx_xy );
2080 scale =
vec_mul( invSqrt, _mm_set1_ps(0.5f) );
2083 res0 = _mm_shuffle_ps( sum, sum, _MM_SHUFFLE(0,1,2,0) );
2086 res1 = _mm_shuffle_ps( sum, sum, _MM_SHUFFLE(0,0,0,2) );
2089 res2 = _mm_shuffle_ps( sum, sum, _MM_SHUFFLE(0,0,0,1) );
2092 res0 =
vec_sel( res0, radicand, select_x );
2093 res1 =
vec_sel( res1, radicand, select_y );
2094 res2 =
vec_sel( res2, radicand, select_z );
2095 res3 =
vec_sel( res3, radicand, select_w );
2115 ( tfrm0 * tfrm1.getX( ) ),
2116 ( tfrm0 * tfrm1.getY( ) ),
2117 ( tfrm0 * tfrm1.getZ( ) )
2124 ( tfrm0 * tfrm1.getX( ) ),
2125 ( tfrm0 * tfrm1.getY( ) ),
2126 ( tfrm0 * tfrm1.getZ( ) ),
2127 ( tfrm0 * tfrm1.getW( ) )
2133 __m128 tmp0, tmp1, mcol0, mcol1, mcol2, res;
2134 __m128 xxxx, yyyy, zzzz;
2135 tmp0 =
vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() );
2136 tmp1 =
vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() );
2138 mcol0 =
vec_mergeh( tmp0, mat.getCol1().get128() );
2141 mcol1 = _mm_shuffle_ps( tmp0, tmp0, _MM_SHUFFLE(0,3,2,2));
2142 mcol1 =
vec_sel(mcol1, mat.getCol1().get128(), select_y);
2144 mcol2 = _mm_shuffle_ps( tmp1, tmp1, _MM_SHUFFLE(0,1,1,0));
2149 res =
vec_madd( mcol1, yyyy, res );
2150 res =
vec_madd( mcol2, zzzz, res );
2151 return Vector3( res );
2156 __m128 neg, res0, res1, res2;
2162 res0 = _mm_shuffle_ps( vec.get128(), vec.get128(), _MM_SHUFFLE(0,2,2,0) );
2167 res2 = _mm_shuffle_ps( vec.get128(), vec.get128(), _MM_SHUFFLE(0,0,1,1) );
2172 res0 =
vec_and( res0, _mm_load_ps((
float *)filter_x ) );
2173 res1 =
vec_and( res1, _mm_load_ps((
float *)filter_y ) );
2174 res2 =
vec_and( res2, _mm_load_ps((
float *)filter_z ) );
2184 return Matrix3(
cross( vec, mat.getCol0() ),
cross( vec, mat.getCol1() ),
cross( vec, mat.getCol2() ) );
const Quat normalize(const Quat &quat)
Matrix4 & setCol1(const Vector4 &col1)
float determinant(const Matrix3 &mat)
const Vector3 rowMul(const Vector3 &vec, const Matrix3 &mat)
static const Matrix4 scale(const Vector3 &scaleVec)
const Vector3 getXYZ() const
const Matrix3 crossMatrixMul(const Vector3 &vec, const Matrix3 &mat)
static const Vector4 yAxis()
Vector4 & setXYZ(const Vector3 &vec)
static const Matrix4 rotationX(float radians)
static const Matrix4 translation(const Vector3 &translateVec)
static const Vector3 xAxis()
static const Vector3 zAxis()
const Matrix4 operator+(const Matrix4 &mat) const
const Matrix3 operator*(float scalar) const
Matrix3 & setElem(int col, int row, float val)
static const Vector4 xAxis()
#define vec_madd(a, b, c)
const Matrix3 appendScale(const Matrix3 &mat, const Vector3 &scaleVec)
Matrix3 & setCol0(const Vector3 &col0)
Matrix3 & setCol2(const Vector3 &col2)
Matrix3 & operator-=(const Matrix3 &mat)
const Matrix3 inverse(const Matrix3 &mat)
static const Matrix4 rotation(float radians, const Vector3 &unitVec)
static const Matrix4 rotationY(float radians)
const Matrix3 crossMatrix(const Vector3 &vec)
float getElem(int col, int row) const
Matrix4 & operator-=(const Matrix4 &mat)
static const Matrix4 orthographic(float left, float right, float bottom, float top, float zNear, float zFar)
#define _mm_ror_ps(vec, i)
#define VECTORMATH_FORCE_INLINE
static __m128 _vmathVfCross(__m128 vec0, __m128 vec1)
static const Matrix3 rotationZ(float radians)
static const Vector3 yAxis()
static const Matrix3 rotationZYX(const Vector3 &radiansXYZ)
static VM_ATTRIBUTE_ALIGN16 const float _vmathZERONE[4]
const Matrix4 operator*(float scalar) const
const Vector4 getCol3() const
static const Matrix4 identity()
static const Vector4 wAxis()
static const Matrix3 identity()
const Vector4 getCol1() const
Matrix4 & setTranslation(const Vector3 &translateVec)
const Vector4 getCol0() const
const Matrix4 affineInverse(const Matrix4 &mat)
static const Matrix3 scale(const Vector3 &scaleVec)
const Point3 scale(const Point3 &pnt, float scaleVal)
Vector4 & operator[](int col)
const Vector3 getCol0() const
const Matrix3 getUpper3x3() const
Matrix4 & setElem(int col, int row, float val)
const Matrix3 outer(const Vector3 &tfrm0, const Vector3 &tfrm1)
const Matrix4 operator-() const
Matrix4 & operator*=(float scalar)
static const Matrix4 frustum(float left, float right, float bottom, float top, float zNear, float zFar)
Vector3 & operator[](int col)
Vector3 & setElem(int idx, float value)
const Vector3 getTranslation() const
const Matrix3 prependScale(const Vector3 &scaleVec, const Matrix3 &mat)
#define VM_ATTRIBUTE_ALIGN16
const Vector4 getCol2() const
#define _VECTORMATH_PI_OVER_2
const Matrix3 operator+(const Matrix3 &mat) const
Matrix3 & operator+=(const Matrix3 &mat)
#define vec_nmsub(a, b, c)
const Matrix3 operator-() const
Matrix3 & setCol1(const Vector3 &col1)
static const Matrix3 rotationX(float radians)
const Matrix4 orthoInverse(const Matrix4 &mat)
static VM_ATTRIBUTE_ALIGN16 const unsigned int _vmathPNPN[4]
static __m128 newtonrapson_rsqrt4(const __m128 v)
float getElem(int idx) const
Matrix4 & setCol3(const Vector4 &col3)
static void sincosf4(vec_float4 x, vec_float4 *s, vec_float4 *c)
static const Matrix3 rotation(float radians, const Vector3 &unitVec)
const Vector4 getCol(int col) const
Matrix4 & setCol(int col, const Vector4 &vec)
Matrix4 & setUpper3x3(const Matrix3 &mat3)
Matrix4 & setCol2(const Vector4 &col2)
static const Matrix4 lookAt(const Point3 &eyePos, const Point3 &lookAtPos, const Vector3 &upVec)
const Matrix3 select(const Matrix3 &mat0, const Matrix3 &mat1, bool select1)
Matrix4 & operator+=(const Matrix4 &mat)
static const Matrix4 perspective(float fovyRadians, float aspect, float zNear, float zFar)
float getElem(int idx) const
static const Matrix3 rotationY(float radians)
const Matrix3 transpose(const Matrix3 &mat)
Matrix3 & operator*=(float scalar)
const Vector3 getRow(int row) const
Matrix4 & setCol0(const Vector4 &col0)
Matrix4 & setRow(int row, const Vector4 &vec)
Matrix3 & setCol(int col, const Vector3 &vec)
float getElem(int col, int row) const
const Vector3 getCol(int col) const
static const Matrix4 rotationZYX(const Vector3 &radiansXYZ)
float sum(const Vector3 &vec)
static const Vector4 zAxis()
float dot(const Quat &quat0, const Quat &quat1)
static VM_ATTRIBUTE_ALIGN16 const unsigned int _vmathNPNP[4]
const Vector3 cross(const Vector3 &vec0, const Vector3 &vec1)
const Matrix3 mulPerElem(const Matrix3 &mat0, const Matrix3 &mat1)
static const Matrix4 rotationZ(float radians)
Matrix3 & operator=(const Matrix3 &mat)
const Vector4 getRow(int row) const
const Vector3 getCol2() const
static __m128 vec_sel(__m128 a, __m128 b, __m128 mask)
static __m128 _vmathVfDot3(__m128 vec0, __m128 vec1)
const Matrix3 absPerElem(const Matrix3 &mat)
Vector4 & setElem(int idx, float value)
const Matrix3 operator*(float scalar, const Matrix3 &mat)
Matrix3 & setRow(int row, const Vector3 &vec)
Matrix4 & operator=(const Matrix4 &mat)
const Vector3 getCol1() const