Bullet Collision Detection & Physics Library
sse/mat_aos.h
Go to the documentation of this file.
1 /*
2  Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
3  All rights reserved.
4 
5  Redistribution and use in source and binary forms,
6  with or without modification, are permitted provided that the
7  following conditions are met:
8  * Redistributions of source code must retain the above copyright
9  notice, this list of conditions and the following disclaimer.
10  * Redistributions in binary form must reproduce the above copyright
11  notice, this list of conditions and the following disclaimer in the
12  documentation and/or other materials provided with the distribution.
13  * Neither the name of the Sony Computer Entertainment Inc nor the names
14  of its contributors may be used to endorse or promote products derived
15  from this software without specific prior written permission.
16 
17  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27  POSSIBILITY OF SUCH DAMAGE.
28 */
29 
30 
31 #ifndef _VECTORMATH_MAT_AOS_CPP_H
32 #define _VECTORMATH_MAT_AOS_CPP_H
33 
34 namespace Vectormath {
35 namespace Aos {
36 
37 //-----------------------------------------------------------------------------
38 // Constants
39 // for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
40 
41 #define _VECTORMATH_PERM_ZBWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
42 #define _VECTORMATH_PERM_XCYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
43 #define _VECTORMATH_PERM_XYAB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B })
44 #define _VECTORMATH_PERM_ZWCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W, _VECTORMATH_PERM_C, _VECTORMATH_PERM_D })
45 #define _VECTORMATH_PERM_XZBX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_X })
46 #define _VECTORMATH_PERM_CXXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
47 #define _VECTORMATH_PERM_YAXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
48 #define _VECTORMATH_PERM_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_C })
49 #define _VECTORMATH_PERM_YXWZ ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W, _VECTORMATH_PERM_Z })
50 #define _VECTORMATH_PERM_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_D })
51 #define _VECTORMATH_PERM_XYCX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X })
52 #define _VECTORMATH_PERM_YCXY ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y })
53 #define _VECTORMATH_PERM_CXYC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C })
54 #define _VECTORMATH_PERM_ZAYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
55 #define _VECTORMATH_PERM_BZXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
56 #define _VECTORMATH_PERM_XZYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A })
57 #define _VECTORMATH_PERM_ZXXB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_B })
58 #define _VECTORMATH_PERM_YXXC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_C })
59 #define _VECTORMATH_PERM_BBYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_B, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
60 #define _VECTORMATH_PI_OVER_2 1.570796327f
61 
62 //-----------------------------------------------------------------------------
63 // Definitions
64 
65 VECTORMATH_FORCE_INLINE Matrix3::Matrix3( const Matrix3 & mat )
66 {
67  mCol0 = mat.mCol0;
68  mCol1 = mat.mCol1;
69  mCol2 = mat.mCol2;
70 }
71 
73 {
74  mCol0 = Vector3( scalar );
75  mCol1 = Vector3( scalar );
76  mCol2 = Vector3( scalar );
77 }
78 
80 {
81  mCol0 = Vector3( scalar );
82  mCol1 = Vector3( scalar );
83  mCol2 = Vector3( scalar );
84 }
85 
87 {
88  __m128 xyzw_2, wwww, yzxw, zxyw, yzxw_2, zxyw_2;
89  __m128 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
90  VM_ATTRIBUTE_ALIGN16 unsigned int sx[4] = {0xffffffff, 0, 0, 0};
91  VM_ATTRIBUTE_ALIGN16 unsigned int sz[4] = {0, 0, 0xffffffff, 0};
92  __m128 select_x = _mm_load_ps((float *)sx);
93  __m128 select_z = _mm_load_ps((float *)sz);
94 
95  xyzw_2 = _mm_add_ps( unitQuat.get128(), unitQuat.get128() );
96  wwww = _mm_shuffle_ps( unitQuat.get128(), unitQuat.get128(), _MM_SHUFFLE(3,3,3,3) );
97  yzxw = _mm_shuffle_ps( unitQuat.get128(), unitQuat.get128(), _MM_SHUFFLE(3,0,2,1) );
98  zxyw = _mm_shuffle_ps( unitQuat.get128(), unitQuat.get128(), _MM_SHUFFLE(3,1,0,2) );
99  yzxw_2 = _mm_shuffle_ps( xyzw_2, xyzw_2, _MM_SHUFFLE(3,0,2,1) );
100  zxyw_2 = _mm_shuffle_ps( xyzw_2, xyzw_2, _MM_SHUFFLE(3,1,0,2) );
101 
102  tmp0 = _mm_mul_ps( yzxw_2, wwww ); // tmp0 = 2yw, 2zw, 2xw, 2w2
103  tmp1 = _mm_sub_ps( _mm_set1_ps(1.0f), _mm_mul_ps(yzxw, yzxw_2) ); // tmp1 = 1 - 2y2, 1 - 2z2, 1 - 2x2, 1 - 2w2
104  tmp2 = _mm_mul_ps( yzxw, xyzw_2 ); // tmp2 = 2xy, 2yz, 2xz, 2w2
105  tmp0 = _mm_add_ps( _mm_mul_ps(zxyw, xyzw_2), tmp0 ); // tmp0 = 2yw + 2zx, 2zw + 2xy, 2xw + 2yz, 2w2 + 2w2
106  tmp1 = _mm_sub_ps( tmp1, _mm_mul_ps(zxyw, zxyw_2) ); // tmp1 = 1 - 2y2 - 2z2, 1 - 2z2 - 2x2, 1 - 2x2 - 2y2, 1 - 2w2 - 2w2
107  tmp2 = _mm_sub_ps( tmp2, _mm_mul_ps(zxyw_2, wwww) ); // tmp2 = 2xy - 2zw, 2yz - 2xw, 2xz - 2yw, 2w2 -2w2
108 
109  tmp3 = vec_sel( tmp0, tmp1, select_x );
110  tmp4 = vec_sel( tmp1, tmp2, select_x );
111  tmp5 = vec_sel( tmp2, tmp0, select_x );
112  mCol0 = Vector3( vec_sel( tmp3, tmp2, select_z ) );
113  mCol1 = Vector3( vec_sel( tmp4, tmp0, select_z ) );
114  mCol2 = Vector3( vec_sel( tmp5, tmp1, select_z ) );
115 }
116 
117 VECTORMATH_FORCE_INLINE Matrix3::Matrix3( const Vector3 &_col0, const Vector3 &_col1, const Vector3 &_col2 )
118 {
119  mCol0 = _col0;
120  mCol1 = _col1;
121  mCol2 = _col2;
122 }
123 
124 VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setCol0( const Vector3 &_col0 )
125 {
126  mCol0 = _col0;
127  return *this;
128 }
129 
130 VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setCol1( const Vector3 &_col1 )
131 {
132  mCol1 = _col1;
133  return *this;
134 }
135 
136 VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setCol2( const Vector3 &_col2 )
137 {
138  mCol2 = _col2;
139  return *this;
140 }
141 
142 VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setCol( int col, const Vector3 &vec )
143 {
144  *(&mCol0 + col) = vec;
145  return *this;
146 }
147 
148 VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setRow( int row, const Vector3 &vec )
149 {
150  mCol0.setElem( row, vec.getElem( 0 ) );
151  mCol1.setElem( row, vec.getElem( 1 ) );
152  mCol2.setElem( row, vec.getElem( 2 ) );
153  return *this;
154 }
155 
156 VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setElem( int col, int row, float val )
157 {
158  (*this)[col].setElem(row, val);
159  return *this;
160 }
161 
163 {
164  Vector3 tmpV3_0;
165  tmpV3_0 = this->getCol( col );
166  tmpV3_0.setElem( row, val );
167  this->setCol( col, tmpV3_0 );
168  return *this;
169 }
170 
171 VECTORMATH_FORCE_INLINE const floatInVec Matrix3::getElem( int col, int row ) const
172 {
173  return this->getCol( col ).getElem( row );
174 }
175 
176 VECTORMATH_FORCE_INLINE const Vector3 Matrix3::getCol0( ) const
177 {
178  return mCol0;
179 }
180 
181 VECTORMATH_FORCE_INLINE const Vector3 Matrix3::getCol1( ) const
182 {
183  return mCol1;
184 }
185 
186 VECTORMATH_FORCE_INLINE const Vector3 Matrix3::getCol2( ) const
187 {
188  return mCol2;
189 }
190 
191 VECTORMATH_FORCE_INLINE const Vector3 Matrix3::getCol( int col ) const
192 {
193  return *(&mCol0 + col);
194 }
195 
196 VECTORMATH_FORCE_INLINE const Vector3 Matrix3::getRow( int row ) const
197 {
198  return Vector3( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ) );
199 }
200 
202 {
203  return *(&mCol0 + col);
204 }
205 
206 VECTORMATH_FORCE_INLINE const Vector3 Matrix3::operator []( int col ) const
207 {
208  return *(&mCol0 + col);
209 }
210 
211 VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::operator =( const Matrix3 & mat )
212 {
213  mCol0 = mat.mCol0;
214  mCol1 = mat.mCol1;
215  mCol2 = mat.mCol2;
216  return *this;
217 }
218 
219 VECTORMATH_FORCE_INLINE const Matrix3 transpose( const Matrix3 & mat )
220 {
221  __m128 tmp0, tmp1, res0, res1, res2;
222  tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() );
223  tmp1 = vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() );
224  res0 = vec_mergeh( tmp0, mat.getCol1().get128() );
225  //res1 = vec_perm( tmp0, mat.getCol1().get128(), _VECTORMATH_PERM_ZBWX );
226  VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
227  res1 = _mm_shuffle_ps( tmp0, tmp0, _MM_SHUFFLE(0,3,2,2));
228  res1 = vec_sel(res1, mat.getCol1().get128(), select_y);
229  //res2 = vec_perm( tmp1, mat.getCol1().get128(), _VECTORMATH_PERM_XCYX );
230  res2 = _mm_shuffle_ps( tmp1, tmp1, _MM_SHUFFLE(0,1,1,0));
231  res2 = vec_sel(res2, vec_splat(mat.getCol1().get128(), 2), select_y);
232  return Matrix3(
233  Vector3( res0 ),
234  Vector3( res1 ),
235  Vector3( res2 )
236  );
237 }
238 
239 VECTORMATH_FORCE_INLINE const Matrix3 inverse( const Matrix3 & mat )
240 {
241  __m128 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet, inv0, inv1, inv2;
242  tmp2 = _vmathVfCross( mat.getCol0().get128(), mat.getCol1().get128() );
243  tmp0 = _vmathVfCross( mat.getCol1().get128(), mat.getCol2().get128() );
244  tmp1 = _vmathVfCross( mat.getCol2().get128(), mat.getCol0().get128() );
245  dot = _vmathVfDot3( tmp2, mat.getCol2().get128() );
246  dot = vec_splat( dot, 0 );
247  invdet = recipf4( dot );
248  tmp3 = vec_mergeh( tmp0, tmp2 );
249  tmp4 = vec_mergel( tmp0, tmp2 );
250  inv0 = vec_mergeh( tmp3, tmp1 );
251  //inv1 = vec_perm( tmp3, tmp1, _VECTORMATH_PERM_ZBWX );
252  VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
253  inv1 = _mm_shuffle_ps( tmp3, tmp3, _MM_SHUFFLE(0,3,2,2));
254  inv1 = vec_sel(inv1, tmp1, select_y);
255  //inv2 = vec_perm( tmp4, tmp1, _VECTORMATH_PERM_XCYX );
256  inv2 = _mm_shuffle_ps( tmp4, tmp4, _MM_SHUFFLE(0,1,1,0));
257  inv2 = vec_sel(inv2, vec_splat(tmp1, 2), select_y);
258  inv0 = vec_mul( inv0, invdet );
259  inv1 = vec_mul( inv1, invdet );
260  inv2 = vec_mul( inv2, invdet );
261  return Matrix3(
262  Vector3( inv0 ),
263  Vector3( inv1 ),
264  Vector3( inv2 )
265  );
266 }
267 
268 VECTORMATH_FORCE_INLINE const floatInVec determinant( const Matrix3 & mat )
269 {
270  return dot( mat.getCol2(), cross( mat.getCol0(), mat.getCol1() ) );
271 }
272 
273 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::operator +( const Matrix3 & mat ) const
274 {
275  return Matrix3(
276  ( mCol0 + mat.mCol0 ),
277  ( mCol1 + mat.mCol1 ),
278  ( mCol2 + mat.mCol2 )
279  );
280 }
281 
282 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::operator -( const Matrix3 & mat ) const
283 {
284  return Matrix3(
285  ( mCol0 - mat.mCol0 ),
286  ( mCol1 - mat.mCol1 ),
287  ( mCol2 - mat.mCol2 )
288  );
289 }
290 
291 VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::operator +=( const Matrix3 & mat )
292 {
293  *this = *this + mat;
294  return *this;
295 }
296 
297 VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::operator -=( const Matrix3 & mat )
298 {
299  *this = *this - mat;
300  return *this;
301 }
302 
303 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::operator -( ) const
304 {
305  return Matrix3(
306  ( -mCol0 ),
307  ( -mCol1 ),
308  ( -mCol2 )
309  );
310 }
311 
312 VECTORMATH_FORCE_INLINE const Matrix3 absPerElem( const Matrix3 & mat )
313 {
314  return Matrix3(
315  absPerElem( mat.getCol0() ),
316  absPerElem( mat.getCol1() ),
317  absPerElem( mat.getCol2() )
318  );
319 }
320 
321 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::operator *( float scalar ) const
322 {
323  return *this * floatInVec(scalar);
324 }
325 
327 {
328  return Matrix3(
329  ( mCol0 * scalar ),
330  ( mCol1 * scalar ),
331  ( mCol2 * scalar )
332  );
333 }
334 
336 {
337  return *this *= floatInVec(scalar);
338 }
339 
341 {
342  *this = *this * scalar;
343  return *this;
344 }
345 
346 VECTORMATH_FORCE_INLINE const Matrix3 operator *( float scalar, const Matrix3 & mat )
347 {
348  return floatInVec(scalar) * mat;
349 }
350 
351 VECTORMATH_FORCE_INLINE const Matrix3 operator *( const floatInVec &scalar, const Matrix3 & mat )
352 {
353  return mat * scalar;
354 }
355 
356 VECTORMATH_FORCE_INLINE const Vector3 Matrix3::operator *( const Vector3 &vec ) const
357 {
358  __m128 res;
359  __m128 xxxx, yyyy, zzzz;
360  xxxx = vec_splat( vec.get128(), 0 );
361  yyyy = vec_splat( vec.get128(), 1 );
362  zzzz = vec_splat( vec.get128(), 2 );
363  res = vec_mul( mCol0.get128(), xxxx );
364  res = vec_madd( mCol1.get128(), yyyy, res );
365  res = vec_madd( mCol2.get128(), zzzz, res );
366  return Vector3( res );
367 }
368 
369 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::operator *( const Matrix3 & mat ) const
370 {
371  return Matrix3(
372  ( *this * mat.mCol0 ),
373  ( *this * mat.mCol1 ),
374  ( *this * mat.mCol2 )
375  );
376 }
377 
378 VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::operator *=( const Matrix3 & mat )
379 {
380  *this = *this * mat;
381  return *this;
382 }
383 
384 VECTORMATH_FORCE_INLINE const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 )
385 {
386  return Matrix3(
387  mulPerElem( mat0.getCol0(), mat1.getCol0() ),
388  mulPerElem( mat0.getCol1(), mat1.getCol1() ),
389  mulPerElem( mat0.getCol2(), mat1.getCol2() )
390  );
391 }
392 
394 {
395  return Matrix3(
396  Vector3::xAxis( ),
397  Vector3::yAxis( ),
398  Vector3::zAxis( )
399  );
400 }
401 
402 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotationX( float radians )
403 {
404  return rotationX( floatInVec(radians) );
405 }
406 
408 {
409  __m128 s, c, res1, res2;
410  __m128 zero;
411  VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
412  VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
413  zero = _mm_setzero_ps();
414  sincosf4( radians.get128(), &s, &c );
415  res1 = vec_sel( zero, c, select_y );
416  res1 = vec_sel( res1, s, select_z );
417  res2 = vec_sel( zero, negatef4(s), select_y );
418  res2 = vec_sel( res2, c, select_z );
419  return Matrix3(
420  Vector3::xAxis( ),
421  Vector3( res1 ),
422  Vector3( res2 )
423  );
424 }
425 
427 {
428  return rotationY( floatInVec(radians) );
429 }
430 
432 {
433  __m128 s, c, res0, res2;
434  __m128 zero;
435  VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
436  VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
437  zero = _mm_setzero_ps();
438  sincosf4( radians.get128(), &s, &c );
439  res0 = vec_sel( zero, c, select_x );
440  res0 = vec_sel( res0, negatef4(s), select_z );
441  res2 = vec_sel( zero, s, select_x );
442  res2 = vec_sel( res2, c, select_z );
443  return Matrix3(
444  Vector3( res0 ),
445  Vector3::yAxis( ),
446  Vector3( res2 )
447  );
448 }
449 
451 {
452  return rotationZ( floatInVec(radians) );
453 }
454 
456 {
457  __m128 s, c, res0, res1;
458  __m128 zero;
459  VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
460  VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
461  zero = _mm_setzero_ps();
462  sincosf4( radians.get128(), &s, &c );
463  res0 = vec_sel( zero, c, select_x );
464  res0 = vec_sel( res0, s, select_y );
465  res1 = vec_sel( zero, negatef4(s), select_x );
466  res1 = vec_sel( res1, c, select_y );
467  return Matrix3(
468  Vector3( res0 ),
469  Vector3( res1 ),
470  Vector3::zAxis( )
471  );
472 }
473 
475 {
476  __m128 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
477  angles = Vector4( radiansXYZ, 0.0f ).get128();
478  sincosf4( angles, &s, &c );
479  negS = negatef4( s );
480  Z0 = vec_mergel( c, s );
481  Z1 = vec_mergel( negS, c );
482  VM_ATTRIBUTE_ALIGN16 unsigned int select_xyz[4] = {0xffffffff, 0xffffffff, 0xffffffff, 0};
483  Z1 = vec_and( Z1, _mm_load_ps( (float *)select_xyz ) );
484  Y0 = _mm_shuffle_ps( c, negS, _MM_SHUFFLE(0,1,1,1) );
485  Y1 = _mm_shuffle_ps( s, c, _MM_SHUFFLE(0,1,1,1) );
486  X0 = vec_splat( s, 0 );
487  X1 = vec_splat( c, 0 );
488  tmp = vec_mul( Z0, Y1 );
489  return Matrix3(
490  Vector3( vec_mul( Z0, Y0 ) ),
491  Vector3( vec_madd( Z1, X1, vec_mul( tmp, X0 ) ) ),
492  Vector3( vec_nmsub( Z1, X0, vec_mul( tmp, X1 ) ) )
493  );
494 }
495 
496 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotation( float radians, const Vector3 &unitVec )
497 {
498  return rotation( floatInVec(radians), unitVec );
499 }
500 
501 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotation( const floatInVec &radians, const Vector3 &unitVec )
502 {
503  __m128 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2;
504  axis = unitVec.get128();
505  sincosf4( radians.get128(), &s, &c );
506  xxxx = vec_splat( axis, 0 );
507  yyyy = vec_splat( axis, 1 );
508  zzzz = vec_splat( axis, 2 );
509  oneMinusC = vec_sub( _mm_set1_ps(1.0f), c );
510  axisS = vec_mul( axis, s );
511  negAxisS = negatef4( axisS );
512  VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
513  VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
514  VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
515  //tmp0 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_XZBX );
516  tmp0 = _mm_shuffle_ps( axisS, axisS, _MM_SHUFFLE(0,0,2,0) );
517  tmp0 = vec_sel(tmp0, vec_splat(negAxisS, 1), select_z);
518  //tmp1 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_CXXX );
519  tmp1 = vec_sel( vec_splat(axisS, 0), vec_splat(negAxisS, 2), select_x );
520  //tmp2 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_YAXX );
521  tmp2 = _mm_shuffle_ps( axisS, axisS, _MM_SHUFFLE(0,0,0,1) );
522  tmp2 = vec_sel(tmp2, vec_splat(negAxisS, 0), select_y);
523  tmp0 = vec_sel( tmp0, c, select_x );
524  tmp1 = vec_sel( tmp1, c, select_y );
525  tmp2 = vec_sel( tmp2, c, select_z );
526  return Matrix3(
527  Vector3( vec_madd( vec_mul( axis, xxxx ), oneMinusC, tmp0 ) ),
528  Vector3( vec_madd( vec_mul( axis, yyyy ), oneMinusC, tmp1 ) ),
529  Vector3( vec_madd( vec_mul( axis, zzzz ), oneMinusC, tmp2 ) )
530  );
531 }
532 
533 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotation( const Quat &unitQuat )
534 {
535  return Matrix3( unitQuat );
536 }
537 
538 VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::scale( const Vector3 &scaleVec )
539 {
540  __m128 zero = _mm_setzero_ps();
541  VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
542  VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
543  VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
544  return Matrix3(
545  Vector3( vec_sel( zero, scaleVec.get128(), select_x ) ),
546  Vector3( vec_sel( zero, scaleVec.get128(), select_y ) ),
547  Vector3( vec_sel( zero, scaleVec.get128(), select_z ) )
548  );
549 }
550 
551 VECTORMATH_FORCE_INLINE const Matrix3 appendScale( const Matrix3 & mat, const Vector3 &scaleVec )
552 {
553  return Matrix3(
554  ( mat.getCol0() * scaleVec.getX( ) ),
555  ( mat.getCol1() * scaleVec.getY( ) ),
556  ( mat.getCol2() * scaleVec.getZ( ) )
557  );
558 }
559 
560 VECTORMATH_FORCE_INLINE const Matrix3 prependScale( const Vector3 &scaleVec, const Matrix3 & mat )
561 {
562  return Matrix3(
563  mulPerElem( mat.getCol0(), scaleVec ),
564  mulPerElem( mat.getCol1(), scaleVec ),
565  mulPerElem( mat.getCol2(), scaleVec )
566  );
567 }
568 
569 VECTORMATH_FORCE_INLINE const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 )
570 {
571  return Matrix3(
572  select( mat0.getCol0(), mat1.getCol0(), select1 ),
573  select( mat0.getCol1(), mat1.getCol1(), select1 ),
574  select( mat0.getCol2(), mat1.getCol2(), select1 )
575  );
576 }
577 
578 VECTORMATH_FORCE_INLINE const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, const boolInVec &select1 )
579 {
580  return Matrix3(
581  select( mat0.getCol0(), mat1.getCol0(), select1 ),
582  select( mat0.getCol1(), mat1.getCol1(), select1 ),
583  select( mat0.getCol2(), mat1.getCol2(), select1 )
584  );
585 }
586 
587 #ifdef _VECTORMATH_DEBUG
588 
589 VECTORMATH_FORCE_INLINE void print( const Matrix3 & mat )
590 {
591  print( mat.getRow( 0 ) );
592  print( mat.getRow( 1 ) );
593  print( mat.getRow( 2 ) );
594 }
595 
596 VECTORMATH_FORCE_INLINE void print( const Matrix3 & mat, const char * name )
597 {
598  printf("%s:\n", name);
599  print( mat );
600 }
601 
602 #endif
603 
604 VECTORMATH_FORCE_INLINE Matrix4::Matrix4( const Matrix4 & mat )
605 {
606  mCol0 = mat.mCol0;
607  mCol1 = mat.mCol1;
608  mCol2 = mat.mCol2;
609  mCol3 = mat.mCol3;
610 }
611 
613 {
614  mCol0 = Vector4( scalar );
615  mCol1 = Vector4( scalar );
616  mCol2 = Vector4( scalar );
617  mCol3 = Vector4( scalar );
618 }
619 
621 {
622  mCol0 = Vector4( scalar );
623  mCol1 = Vector4( scalar );
624  mCol2 = Vector4( scalar );
625  mCol3 = Vector4( scalar );
626 }
627 
629 {
630  mCol0 = Vector4( mat.getCol0(), 0.0f );
631  mCol1 = Vector4( mat.getCol1(), 0.0f );
632  mCol2 = Vector4( mat.getCol2(), 0.0f );
633  mCol3 = Vector4( mat.getCol3(), 1.0f );
634 }
635 
636 VECTORMATH_FORCE_INLINE Matrix4::Matrix4( const Vector4 &_col0, const Vector4 &_col1, const Vector4 &_col2, const Vector4 &_col3 )
637 {
638  mCol0 = _col0;
639  mCol1 = _col1;
640  mCol2 = _col2;
641  mCol3 = _col3;
642 }
643 
644 VECTORMATH_FORCE_INLINE Matrix4::Matrix4( const Matrix3 & mat, const Vector3 &translateVec )
645 {
646  mCol0 = Vector4( mat.getCol0(), 0.0f );
647  mCol1 = Vector4( mat.getCol1(), 0.0f );
648  mCol2 = Vector4( mat.getCol2(), 0.0f );
649  mCol3 = Vector4( translateVec, 1.0f );
650 }
651 
652 VECTORMATH_FORCE_INLINE Matrix4::Matrix4( const Quat &unitQuat, const Vector3 &translateVec )
653 {
654  Matrix3 mat;
655  mat = Matrix3( unitQuat );
656  mCol0 = Vector4( mat.getCol0(), 0.0f );
657  mCol1 = Vector4( mat.getCol1(), 0.0f );
658  mCol2 = Vector4( mat.getCol2(), 0.0f );
659  mCol3 = Vector4( translateVec, 1.0f );
660 }
661 
662 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setCol0( const Vector4 &_col0 )
663 {
664  mCol0 = _col0;
665  return *this;
666 }
667 
668 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setCol1( const Vector4 &_col1 )
669 {
670  mCol1 = _col1;
671  return *this;
672 }
673 
674 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setCol2( const Vector4 &_col2 )
675 {
676  mCol2 = _col2;
677  return *this;
678 }
679 
680 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setCol3( const Vector4 &_col3 )
681 {
682  mCol3 = _col3;
683  return *this;
684 }
685 
686 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setCol( int col, const Vector4 &vec )
687 {
688  *(&mCol0 + col) = vec;
689  return *this;
690 }
691 
692 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setRow( int row, const Vector4 &vec )
693 {
694  mCol0.setElem( row, vec.getElem( 0 ) );
695  mCol1.setElem( row, vec.getElem( 1 ) );
696  mCol2.setElem( row, vec.getElem( 2 ) );
697  mCol3.setElem( row, vec.getElem( 3 ) );
698  return *this;
699 }
700 
701 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setElem( int col, int row, float val )
702 {
703  (*this)[col].setElem(row, val);
704  return *this;
705 }
706 
708 {
709  Vector4 tmpV3_0;
710  tmpV3_0 = this->getCol( col );
711  tmpV3_0.setElem( row, val );
712  this->setCol( col, tmpV3_0 );
713  return *this;
714 }
715 
716 VECTORMATH_FORCE_INLINE const floatInVec Matrix4::getElem( int col, int row ) const
717 {
718  return this->getCol( col ).getElem( row );
719 }
720 
721 VECTORMATH_FORCE_INLINE const Vector4 Matrix4::getCol0( ) const
722 {
723  return mCol0;
724 }
725 
726 VECTORMATH_FORCE_INLINE const Vector4 Matrix4::getCol1( ) const
727 {
728  return mCol1;
729 }
730 
731 VECTORMATH_FORCE_INLINE const Vector4 Matrix4::getCol2( ) const
732 {
733  return mCol2;
734 }
735 
736 VECTORMATH_FORCE_INLINE const Vector4 Matrix4::getCol3( ) const
737 {
738  return mCol3;
739 }
740 
741 VECTORMATH_FORCE_INLINE const Vector4 Matrix4::getCol( int col ) const
742 {
743  return *(&mCol0 + col);
744 }
745 
746 VECTORMATH_FORCE_INLINE const Vector4 Matrix4::getRow( int row ) const
747 {
748  return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
749 }
750 
752 {
753  return *(&mCol0 + col);
754 }
755 
756 VECTORMATH_FORCE_INLINE const Vector4 Matrix4::operator []( int col ) const
757 {
758  return *(&mCol0 + col);
759 }
760 
761 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::operator =( const Matrix4 & mat )
762 {
763  mCol0 = mat.mCol0;
764  mCol1 = mat.mCol1;
765  mCol2 = mat.mCol2;
766  mCol3 = mat.mCol3;
767  return *this;
768 }
769 
770 VECTORMATH_FORCE_INLINE const Matrix4 transpose( const Matrix4 & mat )
771 {
772  __m128 tmp0, tmp1, tmp2, tmp3, res0, res1, res2, res3;
773  tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() );
774  tmp1 = vec_mergeh( mat.getCol1().get128(), mat.getCol3().get128() );
775  tmp2 = vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() );
776  tmp3 = vec_mergel( mat.getCol1().get128(), mat.getCol3().get128() );
777  res0 = vec_mergeh( tmp0, tmp1 );
778  res1 = vec_mergel( tmp0, tmp1 );
779  res2 = vec_mergeh( tmp2, tmp3 );
780  res3 = vec_mergel( tmp2, tmp3 );
781  return Matrix4(
782  Vector4( res0 ),
783  Vector4( res1 ),
784  Vector4( res2 ),
785  Vector4( res3 )
786  );
787 }
788 
789 // TODO: Tidy
790 static VM_ATTRIBUTE_ALIGN16 const unsigned int _vmathPNPN[4] = {0x00000000, 0x80000000, 0x00000000, 0x80000000};
791 static VM_ATTRIBUTE_ALIGN16 const unsigned int _vmathNPNP[4] = {0x80000000, 0x00000000, 0x80000000, 0x00000000};
792 static VM_ATTRIBUTE_ALIGN16 const float _vmathZERONE[4] = {1.0f, 0.0f, 0.0f, 1.0f};
793 
794 VECTORMATH_FORCE_INLINE const Matrix4 inverse( const Matrix4 & mat )
795 {
796  __m128 Va,Vb,Vc;
797  __m128 r1,r2,r3,tt,tt2;
798  __m128 sum,Det,RDet;
799  __m128 trns0,trns1,trns2,trns3;
800 
801  __m128 _L1 = mat.getCol0().get128();
802  __m128 _L2 = mat.getCol1().get128();
803  __m128 _L3 = mat.getCol2().get128();
804  __m128 _L4 = mat.getCol3().get128();
805  // Calculating the minterms for the first line.
806 
807  // _mm_ror_ps is just a macro using _mm_shuffle_ps().
808  tt = _L4; tt2 = _mm_ror_ps(_L3,1);
809  Vc = _mm_mul_ps(tt2,_mm_ror_ps(tt,0)); // V3'dot V4
810  Va = _mm_mul_ps(tt2,_mm_ror_ps(tt,2)); // V3'dot V4"
811  Vb = _mm_mul_ps(tt2,_mm_ror_ps(tt,3)); // V3' dot V4^
812 
813  r1 = _mm_sub_ps(_mm_ror_ps(Va,1),_mm_ror_ps(Vc,2)); // V3" dot V4^ - V3^ dot V4"
814  r2 = _mm_sub_ps(_mm_ror_ps(Vb,2),_mm_ror_ps(Vb,0)); // V3^ dot V4' - V3' dot V4^
815  r3 = _mm_sub_ps(_mm_ror_ps(Va,0),_mm_ror_ps(Vc,1)); // V3' dot V4" - V3" dot V4'
816 
817  tt = _L2;
818  Va = _mm_ror_ps(tt,1); sum = _mm_mul_ps(Va,r1);
819  Vb = _mm_ror_ps(tt,2); sum = _mm_add_ps(sum,_mm_mul_ps(Vb,r2));
820  Vc = _mm_ror_ps(tt,3); sum = _mm_add_ps(sum,_mm_mul_ps(Vc,r3));
821 
822  // Calculating the determinant.
823  Det = _mm_mul_ps(sum,_L1);
824  Det = _mm_add_ps(Det,_mm_movehl_ps(Det,Det));
825 
826  const __m128 Sign_PNPN = _mm_load_ps((float *)_vmathPNPN);
827  const __m128 Sign_NPNP = _mm_load_ps((float *)_vmathNPNP);
828 
829  __m128 mtL1 = _mm_xor_ps(sum,Sign_PNPN);
830 
831  // Calculating the minterms of the second line (using previous results).
832  tt = _mm_ror_ps(_L1,1); sum = _mm_mul_ps(tt,r1);
833  tt = _mm_ror_ps(tt,1); sum = _mm_add_ps(sum,_mm_mul_ps(tt,r2));
834  tt = _mm_ror_ps(tt,1); sum = _mm_add_ps(sum,_mm_mul_ps(tt,r3));
835  __m128 mtL2 = _mm_xor_ps(sum,Sign_NPNP);
836 
837  // Testing the determinant.
838  Det = _mm_sub_ss(Det,_mm_shuffle_ps(Det,Det,1));
839 
840  // Calculating the minterms of the third line.
841  tt = _mm_ror_ps(_L1,1);
842  Va = _mm_mul_ps(tt,Vb); // V1' dot V2"
843  Vb = _mm_mul_ps(tt,Vc); // V1' dot V2^
844  Vc = _mm_mul_ps(tt,_L2); // V1' dot V2
845 
846  r1 = _mm_sub_ps(_mm_ror_ps(Va,1),_mm_ror_ps(Vc,2)); // V1" dot V2^ - V1^ dot V2"
847  r2 = _mm_sub_ps(_mm_ror_ps(Vb,2),_mm_ror_ps(Vb,0)); // V1^ dot V2' - V1' dot V2^
848  r3 = _mm_sub_ps(_mm_ror_ps(Va,0),_mm_ror_ps(Vc,1)); // V1' dot V2" - V1" dot V2'
849 
850  tt = _mm_ror_ps(_L4,1); sum = _mm_mul_ps(tt,r1);
851  tt = _mm_ror_ps(tt,1); sum = _mm_add_ps(sum,_mm_mul_ps(tt,r2));
852  tt = _mm_ror_ps(tt,1); sum = _mm_add_ps(sum,_mm_mul_ps(tt,r3));
853  __m128 mtL3 = _mm_xor_ps(sum,Sign_PNPN);
854 
855  // Dividing is FASTER than rcp_nr! (Because rcp_nr causes many register-memory RWs).
856  RDet = _mm_div_ss(_mm_load_ss((float *)&_vmathZERONE), Det); // TODO: just 1.0f?
857  RDet = _mm_shuffle_ps(RDet,RDet,0x00);
858 
859  // Devide the first 12 minterms with the determinant.
860  mtL1 = _mm_mul_ps(mtL1, RDet);
861  mtL2 = _mm_mul_ps(mtL2, RDet);
862  mtL3 = _mm_mul_ps(mtL3, RDet);
863 
864  // Calculate the minterms of the forth line and devide by the determinant.
865  tt = _mm_ror_ps(_L3,1); sum = _mm_mul_ps(tt,r1);
866  tt = _mm_ror_ps(tt,1); sum = _mm_add_ps(sum,_mm_mul_ps(tt,r2));
867  tt = _mm_ror_ps(tt,1); sum = _mm_add_ps(sum,_mm_mul_ps(tt,r3));
868  __m128 mtL4 = _mm_xor_ps(sum,Sign_NPNP);
869  mtL4 = _mm_mul_ps(mtL4, RDet);
870 
871  // Now we just have to transpose the minterms matrix.
872  trns0 = _mm_unpacklo_ps(mtL1,mtL2);
873  trns1 = _mm_unpacklo_ps(mtL3,mtL4);
874  trns2 = _mm_unpackhi_ps(mtL1,mtL2);
875  trns3 = _mm_unpackhi_ps(mtL3,mtL4);
876  _L1 = _mm_movelh_ps(trns0,trns1);
877  _L2 = _mm_movehl_ps(trns1,trns0);
878  _L3 = _mm_movelh_ps(trns2,trns3);
879  _L4 = _mm_movehl_ps(trns3,trns2);
880 
881  return Matrix4(
882  Vector4( _L1 ),
883  Vector4( _L2 ),
884  Vector4( _L3 ),
885  Vector4( _L4 )
886  );
887 }
888 
889 VECTORMATH_FORCE_INLINE const Matrix4 affineInverse( const Matrix4 & mat )
890 {
891  Transform3 affineMat;
892  affineMat.setCol0( mat.getCol0().getXYZ( ) );
893  affineMat.setCol1( mat.getCol1().getXYZ( ) );
894  affineMat.setCol2( mat.getCol2().getXYZ( ) );
895  affineMat.setCol3( mat.getCol3().getXYZ( ) );
896  return Matrix4( inverse( affineMat ) );
897 }
898 
899 VECTORMATH_FORCE_INLINE const Matrix4 orthoInverse( const Matrix4 & mat )
900 {
901  Transform3 affineMat;
902  affineMat.setCol0( mat.getCol0().getXYZ( ) );
903  affineMat.setCol1( mat.getCol1().getXYZ( ) );
904  affineMat.setCol2( mat.getCol2().getXYZ( ) );
905  affineMat.setCol3( mat.getCol3().getXYZ( ) );
906  return Matrix4( orthoInverse( affineMat ) );
907 }
908 
909 VECTORMATH_FORCE_INLINE const floatInVec determinant( const Matrix4 & mat )
910 {
911  __m128 Va,Vb,Vc;
912  __m128 r1,r2,r3,tt,tt2;
913  __m128 sum,Det;
914 
915  __m128 _L1 = mat.getCol0().get128();
916  __m128 _L2 = mat.getCol1().get128();
917  __m128 _L3 = mat.getCol2().get128();
918  __m128 _L4 = mat.getCol3().get128();
919  // Calculating the minterms for the first line.
920 
921  // _mm_ror_ps is just a macro using _mm_shuffle_ps().
922  tt = _L4; tt2 = _mm_ror_ps(_L3,1);
923  Vc = _mm_mul_ps(tt2,_mm_ror_ps(tt,0)); // V3' dot V4
924  Va = _mm_mul_ps(tt2,_mm_ror_ps(tt,2)); // V3' dot V4"
925  Vb = _mm_mul_ps(tt2,_mm_ror_ps(tt,3)); // V3' dot V4^
926 
927  r1 = _mm_sub_ps(_mm_ror_ps(Va,1),_mm_ror_ps(Vc,2)); // V3" dot V4^ - V3^ dot V4"
928  r2 = _mm_sub_ps(_mm_ror_ps(Vb,2),_mm_ror_ps(Vb,0)); // V3^ dot V4' - V3' dot V4^
929  r3 = _mm_sub_ps(_mm_ror_ps(Va,0),_mm_ror_ps(Vc,1)); // V3' dot V4" - V3" dot V4'
930 
931  tt = _L2;
932  Va = _mm_ror_ps(tt,1); sum = _mm_mul_ps(Va,r1);
933  Vb = _mm_ror_ps(tt,2); sum = _mm_add_ps(sum,_mm_mul_ps(Vb,r2));
934  Vc = _mm_ror_ps(tt,3); sum = _mm_add_ps(sum,_mm_mul_ps(Vc,r3));
935 
936  // Calculating the determinant.
937  Det = _mm_mul_ps(sum,_L1);
938  Det = _mm_add_ps(Det,_mm_movehl_ps(Det,Det));
939 
940  // Calculating the minterms of the second line (using previous results).
941  tt = _mm_ror_ps(_L1,1); sum = _mm_mul_ps(tt,r1);
942  tt = _mm_ror_ps(tt,1); sum = _mm_add_ps(sum,_mm_mul_ps(tt,r2));
943  tt = _mm_ror_ps(tt,1); sum = _mm_add_ps(sum,_mm_mul_ps(tt,r3));
944 
945  // Testing the determinant.
946  Det = _mm_sub_ss(Det,_mm_shuffle_ps(Det,Det,1));
947  return floatInVec(Det, 0);
948 }
949 
950 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator +( const Matrix4 & mat ) const
951 {
952  return Matrix4(
953  ( mCol0 + mat.mCol0 ),
954  ( mCol1 + mat.mCol1 ),
955  ( mCol2 + mat.mCol2 ),
956  ( mCol3 + mat.mCol3 )
957  );
958 }
959 
960 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator -( const Matrix4 & mat ) const
961 {
962  return Matrix4(
963  ( mCol0 - mat.mCol0 ),
964  ( mCol1 - mat.mCol1 ),
965  ( mCol2 - mat.mCol2 ),
966  ( mCol3 - mat.mCol3 )
967  );
968 }
969 
970 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::operator +=( const Matrix4 & mat )
971 {
972  *this = *this + mat;
973  return *this;
974 }
975 
976 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::operator -=( const Matrix4 & mat )
977 {
978  *this = *this - mat;
979  return *this;
980 }
981 
982 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator -( ) const
983 {
984  return Matrix4(
985  ( -mCol0 ),
986  ( -mCol1 ),
987  ( -mCol2 ),
988  ( -mCol3 )
989  );
990 }
991 
992 VECTORMATH_FORCE_INLINE const Matrix4 absPerElem( const Matrix4 & mat )
993 {
994  return Matrix4(
995  absPerElem( mat.getCol0() ),
996  absPerElem( mat.getCol1() ),
997  absPerElem( mat.getCol2() ),
998  absPerElem( mat.getCol3() )
999  );
1000 }
1001 
1002 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator *( float scalar ) const
1003 {
1004  return *this * floatInVec(scalar);
1005 }
1006 
1008 {
1009  return Matrix4(
1010  ( mCol0 * scalar ),
1011  ( mCol1 * scalar ),
1012  ( mCol2 * scalar ),
1013  ( mCol3 * scalar )
1014  );
1015 }
1016 
1018 {
1019  return *this *= floatInVec(scalar);
1020 }
1021 
1023 {
1024  *this = *this * scalar;
1025  return *this;
1026 }
1027 
1028 VECTORMATH_FORCE_INLINE const Matrix4 operator *( float scalar, const Matrix4 & mat )
1029 {
1030  return floatInVec(scalar) * mat;
1031 }
1032 
1033 VECTORMATH_FORCE_INLINE const Matrix4 operator *( const floatInVec &scalar, const Matrix4 & mat )
1034 {
1035  return mat * scalar;
1036 }
1037 
1038 VECTORMATH_FORCE_INLINE const Vector4 Matrix4::operator *( const Vector4 &vec ) const
1039 {
1040  return Vector4(
1041  _mm_add_ps(
1042  _mm_add_ps(_mm_mul_ps(mCol0.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(0,0,0,0))), _mm_mul_ps(mCol1.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(1,1,1,1)))),
1043  _mm_add_ps(_mm_mul_ps(mCol2.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(2,2,2,2))), _mm_mul_ps(mCol3.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(3,3,3,3)))))
1044  );
1045 }
1046 
1047 VECTORMATH_FORCE_INLINE const Vector4 Matrix4::operator *( const Vector3 &vec ) const
1048 {
1049  return Vector4(
1050  _mm_add_ps(
1051  _mm_add_ps(_mm_mul_ps(mCol0.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(0,0,0,0))), _mm_mul_ps(mCol1.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(1,1,1,1)))),
1052  _mm_mul_ps(mCol2.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(2,2,2,2))))
1053  );
1054 }
1055 
1056 VECTORMATH_FORCE_INLINE const Vector4 Matrix4::operator *( const Point3 &pnt ) const
1057 {
1058  return Vector4(
1059  _mm_add_ps(
1060  _mm_add_ps(_mm_mul_ps(mCol0.get128(), _mm_shuffle_ps(pnt.get128(), pnt.get128(), _MM_SHUFFLE(0,0,0,0))), _mm_mul_ps(mCol1.get128(), _mm_shuffle_ps(pnt.get128(), pnt.get128(), _MM_SHUFFLE(1,1,1,1)))),
1061  _mm_add_ps(_mm_mul_ps(mCol2.get128(), _mm_shuffle_ps(pnt.get128(), pnt.get128(), _MM_SHUFFLE(2,2,2,2))), mCol3.get128()))
1062  );
1063 }
1064 
1065 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator *( const Matrix4 & mat ) const
1066 {
1067  return Matrix4(
1068  ( *this * mat.mCol0 ),
1069  ( *this * mat.mCol1 ),
1070  ( *this * mat.mCol2 ),
1071  ( *this * mat.mCol3 )
1072  );
1073 }
1074 
1075 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::operator *=( const Matrix4 & mat )
1076 {
1077  *this = *this * mat;
1078  return *this;
1079 }
1080 
1081 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator *( const Transform3 & tfrm ) const
1082 {
1083  return Matrix4(
1084  ( *this * tfrm.getCol0() ),
1085  ( *this * tfrm.getCol1() ),
1086  ( *this * tfrm.getCol2() ),
1087  ( *this * Point3( tfrm.getCol3() ) )
1088  );
1089 }
1090 
1091 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::operator *=( const Transform3 & tfrm )
1092 {
1093  *this = *this * tfrm;
1094  return *this;
1095 }
1096 
1097 VECTORMATH_FORCE_INLINE const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 )
1098 {
1099  return Matrix4(
1100  mulPerElem( mat0.getCol0(), mat1.getCol0() ),
1101  mulPerElem( mat0.getCol1(), mat1.getCol1() ),
1102  mulPerElem( mat0.getCol2(), mat1.getCol2() ),
1103  mulPerElem( mat0.getCol3(), mat1.getCol3() )
1104  );
1105 }
1106 
1108 {
1109  return Matrix4(
1110  Vector4::xAxis( ),
1111  Vector4::yAxis( ),
1112  Vector4::zAxis( ),
1113  Vector4::wAxis( )
1114  );
1115 }
1116 
1117 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setUpper3x3( const Matrix3 & mat3 )
1118 {
1119  mCol0.setXYZ( mat3.getCol0() );
1120  mCol1.setXYZ( mat3.getCol1() );
1121  mCol2.setXYZ( mat3.getCol2() );
1122  return *this;
1123 }
1124 
1125 VECTORMATH_FORCE_INLINE const Matrix3 Matrix4::getUpper3x3( ) const
1126 {
1127  return Matrix3(
1128  mCol0.getXYZ( ),
1129  mCol1.getXYZ( ),
1130  mCol2.getXYZ( )
1131  );
1132 }
1133 
1134 VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setTranslation( const Vector3 &translateVec )
1135 {
1136  mCol3.setXYZ( translateVec );
1137  return *this;
1138 }
1139 
1140 VECTORMATH_FORCE_INLINE const Vector3 Matrix4::getTranslation( ) const
1141 {
1142  return mCol3.getXYZ( );
1143 }
1144 
1145 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotationX( float radians )
1146 {
1147  return rotationX( floatInVec(radians) );
1148 }
1149 
1151 {
1152  __m128 s, c, res1, res2;
1153  __m128 zero;
1154  VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
1155  VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
1156  zero = _mm_setzero_ps();
1157  sincosf4( radians.get128(), &s, &c );
1158  res1 = vec_sel( zero, c, select_y );
1159  res1 = vec_sel( res1, s, select_z );
1160  res2 = vec_sel( zero, negatef4(s), select_y );
1161  res2 = vec_sel( res2, c, select_z );
1162  return Matrix4(
1163  Vector4::xAxis( ),
1164  Vector4( res1 ),
1165  Vector4( res2 ),
1166  Vector4::wAxis( )
1167  );
1168 }
1169 
1170 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotationY( float radians )
1171 {
1172  return rotationY( floatInVec(radians) );
1173 }
1174 
1176 {
1177  __m128 s, c, res0, res2;
1178  __m128 zero;
1179  VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
1180  VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
1181  zero = _mm_setzero_ps();
1182  sincosf4( radians.get128(), &s, &c );
1183  res0 = vec_sel( zero, c, select_x );
1184  res0 = vec_sel( res0, negatef4(s), select_z );
1185  res2 = vec_sel( zero, s, select_x );
1186  res2 = vec_sel( res2, c, select_z );
1187  return Matrix4(
1188  Vector4( res0 ),
1189  Vector4::yAxis( ),
1190  Vector4( res2 ),
1191  Vector4::wAxis( )
1192  );
1193 }
1194 
1195 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotationZ( float radians )
1196 {
1197  return rotationZ( floatInVec(radians) );
1198 }
1199 
1201 {
1202  __m128 s, c, res0, res1;
1203  __m128 zero;
1204  VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
1205  VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
1206  zero = _mm_setzero_ps();
1207  sincosf4( radians.get128(), &s, &c );
1208  res0 = vec_sel( zero, c, select_x );
1209  res0 = vec_sel( res0, s, select_y );
1210  res1 = vec_sel( zero, negatef4(s), select_x );
1211  res1 = vec_sel( res1, c, select_y );
1212  return Matrix4(
1213  Vector4( res0 ),
1214  Vector4( res1 ),
1215  Vector4::zAxis( ),
1216  Vector4::wAxis( )
1217  );
1218 }
1219 
1221 {
1222  __m128 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
1223  angles = Vector4( radiansXYZ, 0.0f ).get128();
1224  sincosf4( angles, &s, &c );
1225  negS = negatef4( s );
1226  Z0 = vec_mergel( c, s );
1227  Z1 = vec_mergel( negS, c );
1228  VM_ATTRIBUTE_ALIGN16 unsigned int select_xyz[4] = {0xffffffff, 0xffffffff, 0xffffffff, 0};
1229  Z1 = vec_and( Z1, _mm_load_ps( (float *)select_xyz ) );
1230  Y0 = _mm_shuffle_ps( c, negS, _MM_SHUFFLE(0,1,1,1) );
1231  Y1 = _mm_shuffle_ps( s, c, _MM_SHUFFLE(0,1,1,1) );
1232  X0 = vec_splat( s, 0 );
1233  X1 = vec_splat( c, 0 );
1234  tmp = vec_mul( Z0, Y1 );
1235  return Matrix4(
1236  Vector4( vec_mul( Z0, Y0 ) ),
1237  Vector4( vec_madd( Z1, X1, vec_mul( tmp, X0 ) ) ),
1238  Vector4( vec_nmsub( Z1, X0, vec_mul( tmp, X1 ) ) ),
1239  Vector4::wAxis( )
1240  );
1241 }
1242 
1243 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotation( float radians, const Vector3 &unitVec )
1244 {
1245  return rotation( floatInVec(radians), unitVec );
1246 }
1247 
1248 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotation( const floatInVec &radians, const Vector3 &unitVec )
1249 {
1250  __m128 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2;
1251  axis = unitVec.get128();
1252  sincosf4( radians.get128(), &s, &c );
1253  xxxx = vec_splat( axis, 0 );
1254  yyyy = vec_splat( axis, 1 );
1255  zzzz = vec_splat( axis, 2 );
1256  oneMinusC = vec_sub( _mm_set1_ps(1.0f), c );
1257  axisS = vec_mul( axis, s );
1258  negAxisS = negatef4( axisS );
1259  VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
1260  VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
1261  VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
1262  //tmp0 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_XZBX );
1263  tmp0 = _mm_shuffle_ps( axisS, axisS, _MM_SHUFFLE(0,0,2,0) );
1264  tmp0 = vec_sel(tmp0, vec_splat(negAxisS, 1), select_z);
1265  //tmp1 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_CXXX );
1266  tmp1 = vec_sel( vec_splat(axisS, 0), vec_splat(negAxisS, 2), select_x );
1267  //tmp2 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_YAXX );
1268  tmp2 = _mm_shuffle_ps( axisS, axisS, _MM_SHUFFLE(0,0,0,1) );
1269  tmp2 = vec_sel(tmp2, vec_splat(negAxisS, 0), select_y);
1270  tmp0 = vec_sel( tmp0, c, select_x );
1271  tmp1 = vec_sel( tmp1, c, select_y );
1272  tmp2 = vec_sel( tmp2, c, select_z );
1273  VM_ATTRIBUTE_ALIGN16 unsigned int select_xyz[4] = {0xffffffff, 0xffffffff, 0xffffffff, 0};
1274  axis = vec_and( axis, _mm_load_ps( (float *)select_xyz ) );
1275  tmp0 = vec_and( tmp0, _mm_load_ps( (float *)select_xyz ) );
1276  tmp1 = vec_and( tmp1, _mm_load_ps( (float *)select_xyz ) );
1277  tmp2 = vec_and( tmp2, _mm_load_ps( (float *)select_xyz ) );
1278  return Matrix4(
1279  Vector4( vec_madd( vec_mul( axis, xxxx ), oneMinusC, tmp0 ) ),
1280  Vector4( vec_madd( vec_mul( axis, yyyy ), oneMinusC, tmp1 ) ),
1281  Vector4( vec_madd( vec_mul( axis, zzzz ), oneMinusC, tmp2 ) ),
1282  Vector4::wAxis( )
1283  );
1284 }
1285 
1286 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotation( const Quat &unitQuat )
1287 {
1288  return Matrix4( Transform3::rotation( unitQuat ) );
1289 }
1290 
1291 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::scale( const Vector3 &scaleVec )
1292 {
1293  __m128 zero = _mm_setzero_ps();
1294  VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
1295  VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
1296  VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
1297  return Matrix4(
1298  Vector4( vec_sel( zero, scaleVec.get128(), select_x ) ),
1299  Vector4( vec_sel( zero, scaleVec.get128(), select_y ) ),
1300  Vector4( vec_sel( zero, scaleVec.get128(), select_z ) ),
1301  Vector4::wAxis( )
1302  );
1303 }
1304 
1305 VECTORMATH_FORCE_INLINE const Matrix4 appendScale( const Matrix4 & mat, const Vector3 &scaleVec )
1306 {
1307  return Matrix4(
1308  ( mat.getCol0() * scaleVec.getX( ) ),
1309  ( mat.getCol1() * scaleVec.getY( ) ),
1310  ( mat.getCol2() * scaleVec.getZ( ) ),
1311  mat.getCol3()
1312  );
1313 }
1314 
1315 VECTORMATH_FORCE_INLINE const Matrix4 prependScale( const Vector3 &scaleVec, const Matrix4 & mat )
1316 {
1317  Vector4 scale4;
1318  scale4 = Vector4( scaleVec, 1.0f );
1319  return Matrix4(
1320  mulPerElem( mat.getCol0(), scale4 ),
1321  mulPerElem( mat.getCol1(), scale4 ),
1322  mulPerElem( mat.getCol2(), scale4 ),
1323  mulPerElem( mat.getCol3(), scale4 )
1324  );
1325 }
1326 
1327 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::translation( const Vector3 &translateVec )
1328 {
1329  return Matrix4(
1330  Vector4::xAxis( ),
1331  Vector4::yAxis( ),
1332  Vector4::zAxis( ),
1333  Vector4( translateVec, 1.0f )
1334  );
1335 }
1336 
1337 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::lookAt( const Point3 &eyePos, const Point3 &lookAtPos, const Vector3 &upVec )
1338 {
1339  Matrix4 m4EyeFrame;
1340  Vector3 v3X, v3Y, v3Z;
1341  v3Y = normalize( upVec );
1342  v3Z = normalize( ( eyePos - lookAtPos ) );
1343  v3X = normalize( cross( v3Y, v3Z ) );
1344  v3Y = cross( v3Z, v3X );
1345  m4EyeFrame = Matrix4( Vector4( v3X ), Vector4( v3Y ), Vector4( v3Z ), Vector4( eyePos ) );
1346  return orthoInverse( m4EyeFrame );
1347 }
1348 
1349 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::perspective( float fovyRadians, float aspect, float zNear, float zFar )
1350 {
1351  float f, rangeInv;
1352  __m128 zero, col0, col1, col2, col3;
1353  union { __m128 v; float s[4]; } tmp;
1354  f = tanf( _VECTORMATH_PI_OVER_2 - fovyRadians * 0.5f );
1355  rangeInv = 1.0f / ( zNear - zFar );
1356  zero = _mm_setzero_ps();
1357  tmp.v = zero;
1358  tmp.s[0] = f / aspect;
1359  col0 = tmp.v;
1360  tmp.v = zero;
1361  tmp.s[1] = f;
1362  col1 = tmp.v;
1363  tmp.v = zero;
1364  tmp.s[2] = ( zNear + zFar ) * rangeInv;
1365  tmp.s[3] = -1.0f;
1366  col2 = tmp.v;
1367  tmp.v = zero;
1368  tmp.s[2] = zNear * zFar * rangeInv * 2.0f;
1369  col3 = tmp.v;
1370  return Matrix4(
1371  Vector4( col0 ),
1372  Vector4( col1 ),
1373  Vector4( col2 ),
1374  Vector4( col3 )
1375  );
1376 }
1377 
1378 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::frustum( float left, float right, float bottom, float top, float zNear, float zFar )
1379 {
1380  /* function implementation based on code from STIDC SDK: */
1381  /* -------------------------------------------------------------- */
1382  /* PLEASE DO NOT MODIFY THIS SECTION */
1383  /* This prolog section is automatically generated. */
1384  /* */
1385  /* (C)Copyright */
1386  /* Sony Computer Entertainment, Inc., */
1387  /* Toshiba Corporation, */
1388  /* International Business Machines Corporation, */
1389  /* 2001,2002. */
1390  /* S/T/I Confidential Information */
1391  /* -------------------------------------------------------------- */
1392  __m128 lbf, rtn;
1393  __m128 diff, sum, inv_diff;
1394  __m128 diagonal, column, near2;
1395  __m128 zero = _mm_setzero_ps();
1396  union { __m128 v; float s[4]; } l, f, r, n, b, t; // TODO: Union?
1397  l.s[0] = left;
1398  f.s[0] = zFar;
1399  r.s[0] = right;
1400  n.s[0] = zNear;
1401  b.s[0] = bottom;
1402  t.s[0] = top;
1403  lbf = vec_mergeh( l.v, f.v );
1404  rtn = vec_mergeh( r.v, n.v );
1405  lbf = vec_mergeh( lbf, b.v );
1406  rtn = vec_mergeh( rtn, t.v );
1407  diff = vec_sub( rtn, lbf );
1408  sum = vec_add( rtn, lbf );
1409  inv_diff = recipf4( diff );
1410  near2 = vec_splat( n.v, 0 );
1411  near2 = vec_add( near2, near2 );
1412  diagonal = vec_mul( near2, inv_diff );
1413  column = vec_mul( sum, inv_diff );
1414  VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
1415  VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
1416  VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
1417  VM_ATTRIBUTE_ALIGN16 unsigned int select_w[4] = {0, 0, 0, 0xffffffff};
1418  return Matrix4(
1419  Vector4( vec_sel( zero, diagonal, select_x ) ),
1420  Vector4( vec_sel( zero, diagonal, select_y ) ),
1421  Vector4( vec_sel( column, _mm_set1_ps(-1.0f), select_w ) ),
1422  Vector4( vec_sel( zero, vec_mul( diagonal, vec_splat( f.v, 0 ) ), select_z ) )
1423  );
1424 }
1425 
1426 VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::orthographic( float left, float right, float bottom, float top, float zNear, float zFar )
1427 {
1428  /* function implementation based on code from STIDC SDK: */
1429  /* -------------------------------------------------------------- */
1430  /* PLEASE DO NOT MODIFY THIS SECTION */
1431  /* This prolog section is automatically generated. */
1432  /* */
1433  /* (C)Copyright */
1434  /* Sony Computer Entertainment, Inc., */
1435  /* Toshiba Corporation, */
1436  /* International Business Machines Corporation, */
1437  /* 2001,2002. */
1438  /* S/T/I Confidential Information */
1439  /* -------------------------------------------------------------- */
1440  __m128 lbf, rtn;
1441  __m128 diff, sum, inv_diff, neg_inv_diff;
1442  __m128 diagonal, column;
1443  __m128 zero = _mm_setzero_ps();
1444  union { __m128 v; float s[4]; } l, f, r, n, b, t;
1445  l.s[0] = left;
1446  f.s[0] = zFar;
1447  r.s[0] = right;
1448  n.s[0] = zNear;
1449  b.s[0] = bottom;
1450  t.s[0] = top;
1451  lbf = vec_mergeh( l.v, f.v );
1452  rtn = vec_mergeh( r.v, n.v );
1453  lbf = vec_mergeh( lbf, b.v );
1454  rtn = vec_mergeh( rtn, t.v );
1455  diff = vec_sub( rtn, lbf );
1456  sum = vec_add( rtn, lbf );
1457  inv_diff = recipf4( diff );
1458  neg_inv_diff = negatef4( inv_diff );
1459  diagonal = vec_add( inv_diff, inv_diff );
1460  VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
1461  VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
1462  VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
1463  VM_ATTRIBUTE_ALIGN16 unsigned int select_w[4] = {0, 0, 0, 0xffffffff};
1464  column = vec_mul( sum, vec_sel( neg_inv_diff, inv_diff, select_z ) ); // TODO: no madds with zero
1465  return Matrix4(
1466  Vector4( vec_sel( zero, diagonal, select_x ) ),
1467  Vector4( vec_sel( zero, diagonal, select_y ) ),
1468  Vector4( vec_sel( zero, diagonal, select_z ) ),
1469  Vector4( vec_sel( column, _mm_set1_ps(1.0f), select_w ) )
1470  );
1471 }
1472 
1473 VECTORMATH_FORCE_INLINE const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 )
1474 {
1475  return Matrix4(
1476  select( mat0.getCol0(), mat1.getCol0(), select1 ),
1477  select( mat0.getCol1(), mat1.getCol1(), select1 ),
1478  select( mat0.getCol2(), mat1.getCol2(), select1 ),
1479  select( mat0.getCol3(), mat1.getCol3(), select1 )
1480  );
1481 }
1482 
1483 VECTORMATH_FORCE_INLINE const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, const boolInVec &select1 )
1484 {
1485  return Matrix4(
1486  select( mat0.getCol0(), mat1.getCol0(), select1 ),
1487  select( mat0.getCol1(), mat1.getCol1(), select1 ),
1488  select( mat0.getCol2(), mat1.getCol2(), select1 ),
1489  select( mat0.getCol3(), mat1.getCol3(), select1 )
1490  );
1491 }
1492 
1493 #ifdef _VECTORMATH_DEBUG
1494 
1495 VECTORMATH_FORCE_INLINE void print( const Matrix4 & mat )
1496 {
1497  print( mat.getRow( 0 ) );
1498  print( mat.getRow( 1 ) );
1499  print( mat.getRow( 2 ) );
1500  print( mat.getRow( 3 ) );
1501 }
1502 
1503 VECTORMATH_FORCE_INLINE void print( const Matrix4 & mat, const char * name )
1504 {
1505  printf("%s:\n", name);
1506  print( mat );
1507 }
1508 
1509 #endif
1510 
1511 VECTORMATH_FORCE_INLINE Transform3::Transform3( const Transform3 & tfrm )
1512 {
1513  mCol0 = tfrm.mCol0;
1514  mCol1 = tfrm.mCol1;
1515  mCol2 = tfrm.mCol2;
1516  mCol3 = tfrm.mCol3;
1517 }
1518 
1520 {
1521  mCol0 = Vector3( scalar );
1522  mCol1 = Vector3( scalar );
1523  mCol2 = Vector3( scalar );
1524  mCol3 = Vector3( scalar );
1525 }
1526 
1528 {
1529  mCol0 = Vector3( scalar );
1530  mCol1 = Vector3( scalar );
1531  mCol2 = Vector3( scalar );
1532  mCol3 = Vector3( scalar );
1533 }
1534 
1535 VECTORMATH_FORCE_INLINE Transform3::Transform3( const Vector3 &_col0, const Vector3 &_col1, const Vector3 &_col2, const Vector3 &_col3 )
1536 {
1537  mCol0 = _col0;
1538  mCol1 = _col1;
1539  mCol2 = _col2;
1540  mCol3 = _col3;
1541 }
1542 
1543 VECTORMATH_FORCE_INLINE Transform3::Transform3( const Matrix3 & tfrm, const Vector3 &translateVec )
1544 {
1545  this->setUpper3x3( tfrm );
1546  this->setTranslation( translateVec );
1547 }
1548 
1549 VECTORMATH_FORCE_INLINE Transform3::Transform3( const Quat &unitQuat, const Vector3 &translateVec )
1550 {
1551  this->setUpper3x3( Matrix3( unitQuat ) );
1552  this->setTranslation( translateVec );
1553 }
1554 
1555 VECTORMATH_FORCE_INLINE Transform3 & Transform3::setCol0( const Vector3 &_col0 )
1556 {
1557  mCol0 = _col0;
1558  return *this;
1559 }
1560 
1561 VECTORMATH_FORCE_INLINE Transform3 & Transform3::setCol1( const Vector3 &_col1 )
1562 {
1563  mCol1 = _col1;
1564  return *this;
1565 }
1566 
1567 VECTORMATH_FORCE_INLINE Transform3 & Transform3::setCol2( const Vector3 &_col2 )
1568 {
1569  mCol2 = _col2;
1570  return *this;
1571 }
1572 
1573 VECTORMATH_FORCE_INLINE Transform3 & Transform3::setCol3( const Vector3 &_col3 )
1574 {
1575  mCol3 = _col3;
1576  return *this;
1577 }
1578 
1579 VECTORMATH_FORCE_INLINE Transform3 & Transform3::setCol( int col, const Vector3 &vec )
1580 {
1581  *(&mCol0 + col) = vec;
1582  return *this;
1583 }
1584 
1585 VECTORMATH_FORCE_INLINE Transform3 & Transform3::setRow( int row, const Vector4 &vec )
1586 {
1587  mCol0.setElem( row, vec.getElem( 0 ) );
1588  mCol1.setElem( row, vec.getElem( 1 ) );
1589  mCol2.setElem( row, vec.getElem( 2 ) );
1590  mCol3.setElem( row, vec.getElem( 3 ) );
1591  return *this;
1592 }
1593 
1594 VECTORMATH_FORCE_INLINE Transform3 & Transform3::setElem( int col, int row, float val )
1595 {
1596  (*this)[col].setElem(row, val);
1597  return *this;
1598 }
1599 
1601 {
1602  Vector3 tmpV3_0;
1603  tmpV3_0 = this->getCol( col );
1604  tmpV3_0.setElem( row, val );
1605  this->setCol( col, tmpV3_0 );
1606  return *this;
1607 }
1608 
1609 VECTORMATH_FORCE_INLINE const floatInVec Transform3::getElem( int col, int row ) const
1610 {
1611  return this->getCol( col ).getElem( row );
1612 }
1613 
1614 VECTORMATH_FORCE_INLINE const Vector3 Transform3::getCol0( ) const
1615 {
1616  return mCol0;
1617 }
1618 
1619 VECTORMATH_FORCE_INLINE const Vector3 Transform3::getCol1( ) const
1620 {
1621  return mCol1;
1622 }
1623 
1624 VECTORMATH_FORCE_INLINE const Vector3 Transform3::getCol2( ) const
1625 {
1626  return mCol2;
1627 }
1628 
1629 VECTORMATH_FORCE_INLINE const Vector3 Transform3::getCol3( ) const
1630 {
1631  return mCol3;
1632 }
1633 
1634 VECTORMATH_FORCE_INLINE const Vector3 Transform3::getCol( int col ) const
1635 {
1636  return *(&mCol0 + col);
1637 }
1638 
1639 VECTORMATH_FORCE_INLINE const Vector4 Transform3::getRow( int row ) const
1640 {
1641  return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
1642 }
1643 
1645 {
1646  return *(&mCol0 + col);
1647 }
1648 
1649 VECTORMATH_FORCE_INLINE const Vector3 Transform3::operator []( int col ) const
1650 {
1651  return *(&mCol0 + col);
1652 }
1653 
1654 VECTORMATH_FORCE_INLINE Transform3 & Transform3::operator =( const Transform3 & tfrm )
1655 {
1656  mCol0 = tfrm.mCol0;
1657  mCol1 = tfrm.mCol1;
1658  mCol2 = tfrm.mCol2;
1659  mCol3 = tfrm.mCol3;
1660  return *this;
1661 }
1662 
1663 VECTORMATH_FORCE_INLINE const Transform3 inverse( const Transform3 & tfrm )
1664 {
1665  __m128 inv0, inv1, inv2, inv3;
1666  __m128 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet;
1667  __m128 xxxx, yyyy, zzzz;
1668  tmp2 = _vmathVfCross( tfrm.getCol0().get128(), tfrm.getCol1().get128() );
1669  tmp0 = _vmathVfCross( tfrm.getCol1().get128(), tfrm.getCol2().get128() );
1670  tmp1 = _vmathVfCross( tfrm.getCol2().get128(), tfrm.getCol0().get128() );
1671  inv3 = negatef4( tfrm.getCol3().get128() );
1672  dot = _vmathVfDot3( tmp2, tfrm.getCol2().get128() );
1673  dot = vec_splat( dot, 0 );
1674  invdet = recipf4( dot );
1675  tmp3 = vec_mergeh( tmp0, tmp2 );
1676  tmp4 = vec_mergel( tmp0, tmp2 );
1677  inv0 = vec_mergeh( tmp3, tmp1 );
1678  xxxx = vec_splat( inv3, 0 );
1679  //inv1 = vec_perm( tmp3, tmp1, _VECTORMATH_PERM_ZBWX );
1680  VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
1681  inv1 = _mm_shuffle_ps( tmp3, tmp3, _MM_SHUFFLE(0,3,2,2));
1682  inv1 = vec_sel(inv1, tmp1, select_y);
1683  //inv2 = vec_perm( tmp4, tmp1, _VECTORMATH_PERM_XCYX );
1684  inv2 = _mm_shuffle_ps( tmp4, tmp4, _MM_SHUFFLE(0,1,1,0));
1685  inv2 = vec_sel(inv2, vec_splat(tmp1, 2), select_y);
1686  yyyy = vec_splat( inv3, 1 );
1687  zzzz = vec_splat( inv3, 2 );
1688  inv3 = vec_mul( inv0, xxxx );
1689  inv3 = vec_madd( inv1, yyyy, inv3 );
1690  inv3 = vec_madd( inv2, zzzz, inv3 );
1691  inv0 = vec_mul( inv0, invdet );
1692  inv1 = vec_mul( inv1, invdet );
1693  inv2 = vec_mul( inv2, invdet );
1694  inv3 = vec_mul( inv3, invdet );
1695  return Transform3(
1696  Vector3( inv0 ),
1697  Vector3( inv1 ),
1698  Vector3( inv2 ),
1699  Vector3( inv3 )
1700  );
1701 }
1702 
1703 VECTORMATH_FORCE_INLINE const Transform3 orthoInverse( const Transform3 & tfrm )
1704 {
1705  __m128 inv0, inv1, inv2, inv3;
1706  __m128 tmp0, tmp1;
1707  __m128 xxxx, yyyy, zzzz;
1708  tmp0 = vec_mergeh( tfrm.getCol0().get128(), tfrm.getCol2().get128() );
1709  tmp1 = vec_mergel( tfrm.getCol0().get128(), tfrm.getCol2().get128() );
1710  inv3 = negatef4( tfrm.getCol3().get128() );
1711  inv0 = vec_mergeh( tmp0, tfrm.getCol1().get128() );
1712  xxxx = vec_splat( inv3, 0 );
1713  //inv1 = vec_perm( tmp0, tfrm.getCol1().get128(), _VECTORMATH_PERM_ZBWX );
1714  VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
1715  inv1 = _mm_shuffle_ps( tmp0, tmp0, _MM_SHUFFLE(0,3,2,2));
1716  inv1 = vec_sel(inv1, tfrm.getCol1().get128(), select_y);
1717  //inv2 = vec_perm( tmp1, tfrm.getCol1().get128(), _VECTORMATH_PERM_XCYX );
1718  inv2 = _mm_shuffle_ps( tmp1, tmp1, _MM_SHUFFLE(0,1,1,0));
1719  inv2 = vec_sel(inv2, vec_splat(tfrm.getCol1().get128(), 2), select_y);
1720  yyyy = vec_splat( inv3, 1 );
1721  zzzz = vec_splat( inv3, 2 );
1722  inv3 = vec_mul( inv0, xxxx );
1723  inv3 = vec_madd( inv1, yyyy, inv3 );
1724  inv3 = vec_madd( inv2, zzzz, inv3 );
1725  return Transform3(
1726  Vector3( inv0 ),
1727  Vector3( inv1 ),
1728  Vector3( inv2 ),
1729  Vector3( inv3 )
1730  );
1731 }
1732 
1733 VECTORMATH_FORCE_INLINE const Transform3 absPerElem( const Transform3 & tfrm )
1734 {
1735  return Transform3(
1736  absPerElem( tfrm.getCol0() ),
1737  absPerElem( tfrm.getCol1() ),
1738  absPerElem( tfrm.getCol2() ),
1739  absPerElem( tfrm.getCol3() )
1740  );
1741 }
1742 
1743 VECTORMATH_FORCE_INLINE const Vector3 Transform3::operator *( const Vector3 &vec ) const
1744 {
1745  __m128 res;
1746  __m128 xxxx, yyyy, zzzz;
1747  xxxx = vec_splat( vec.get128(), 0 );
1748  yyyy = vec_splat( vec.get128(), 1 );
1749  zzzz = vec_splat( vec.get128(), 2 );
1750  res = vec_mul( mCol0.get128(), xxxx );
1751  res = vec_madd( mCol1.get128(), yyyy, res );
1752  res = vec_madd( mCol2.get128(), zzzz, res );
1753  return Vector3( res );
1754 }
1755 
1756 VECTORMATH_FORCE_INLINE const Point3 Transform3::operator *( const Point3 &pnt ) const
1757 {
1758  __m128 tmp0, tmp1, res;
1759  __m128 xxxx, yyyy, zzzz;
1760  xxxx = vec_splat( pnt.get128(), 0 );
1761  yyyy = vec_splat( pnt.get128(), 1 );
1762  zzzz = vec_splat( pnt.get128(), 2 );
1763  tmp0 = vec_mul( mCol0.get128(), xxxx );
1764  tmp1 = vec_mul( mCol1.get128(), yyyy );
1765  tmp0 = vec_madd( mCol2.get128(), zzzz, tmp0 );
1766  tmp1 = vec_add( mCol3.get128(), tmp1 );
1767  res = vec_add( tmp0, tmp1 );
1768  return Point3( res );
1769 }
1770 
1771 VECTORMATH_FORCE_INLINE const Transform3 Transform3::operator *( const Transform3 & tfrm ) const
1772 {
1773  return Transform3(
1774  ( *this * tfrm.mCol0 ),
1775  ( *this * tfrm.mCol1 ),
1776  ( *this * tfrm.mCol2 ),
1777  Vector3( ( *this * Point3( tfrm.mCol3 ) ) )
1778  );
1779 }
1780 
1781 VECTORMATH_FORCE_INLINE Transform3 & Transform3::operator *=( const Transform3 & tfrm )
1782 {
1783  *this = *this * tfrm;
1784  return *this;
1785 }
1786 
1787 VECTORMATH_FORCE_INLINE const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 )
1788 {
1789  return Transform3(
1790  mulPerElem( tfrm0.getCol0(), tfrm1.getCol0() ),
1791  mulPerElem( tfrm0.getCol1(), tfrm1.getCol1() ),
1792  mulPerElem( tfrm0.getCol2(), tfrm1.getCol2() ),
1793  mulPerElem( tfrm0.getCol3(), tfrm1.getCol3() )
1794  );
1795 }
1796 
1798 {
1799  return Transform3(
1800  Vector3::xAxis( ),
1801  Vector3::yAxis( ),
1802  Vector3::zAxis( ),
1803  Vector3( 0.0f )
1804  );
1805 }
1806 
1807 VECTORMATH_FORCE_INLINE Transform3 & Transform3::setUpper3x3( const Matrix3 & tfrm )
1808 {
1809  mCol0 = tfrm.getCol0();
1810  mCol1 = tfrm.getCol1();
1811  mCol2 = tfrm.getCol2();
1812  return *this;
1813 }
1814 
1815 VECTORMATH_FORCE_INLINE const Matrix3 Transform3::getUpper3x3( ) const
1816 {
1817  return Matrix3( mCol0, mCol1, mCol2 );
1818 }
1819 
1820 VECTORMATH_FORCE_INLINE Transform3 & Transform3::setTranslation( const Vector3 &translateVec )
1821 {
1822  mCol3 = translateVec;
1823  return *this;
1824 }
1825 
1827 {
1828  return mCol3;
1829 }
1830 
1831 VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotationX( float radians )
1832 {
1833  return rotationX( floatInVec(radians) );
1834 }
1835 
1837 {
1838  __m128 s, c, res1, res2;
1839  __m128 zero;
1840  VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
1841  VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
1842  zero = _mm_setzero_ps();
1843  sincosf4( radians.get128(), &s, &c );
1844  res1 = vec_sel( zero, c, select_y );
1845  res1 = vec_sel( res1, s, select_z );
1846  res2 = vec_sel( zero, negatef4(s), select_y );
1847  res2 = vec_sel( res2, c, select_z );
1848  return Transform3(
1849  Vector3::xAxis( ),
1850  Vector3( res1 ),
1851  Vector3( res2 ),
1852  Vector3( _mm_setzero_ps() )
1853  );
1854 }
1855 
1857 {
1858  return rotationY( floatInVec(radians) );
1859 }
1860 
1862 {
1863  __m128 s, c, res0, res2;
1864  __m128 zero;
1865  VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
1866  VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
1867  zero = _mm_setzero_ps();
1868  sincosf4( radians.get128(), &s, &c );
1869  res0 = vec_sel( zero, c, select_x );
1870  res0 = vec_sel( res0, negatef4(s), select_z );
1871  res2 = vec_sel( zero, s, select_x );
1872  res2 = vec_sel( res2, c, select_z );
1873  return Transform3(
1874  Vector3( res0 ),
1875  Vector3::yAxis( ),
1876  Vector3( res2 ),
1877  Vector3( 0.0f )
1878  );
1879 }
1880 
1882 {
1883  return rotationZ( floatInVec(radians) );
1884 }
1885 
1887 {
1888  __m128 s, c, res0, res1;
1889  VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
1890  VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
1891  __m128 zero = _mm_setzero_ps();
1892  sincosf4( radians.get128(), &s, &c );
1893  res0 = vec_sel( zero, c, select_x );
1894  res0 = vec_sel( res0, s, select_y );
1895  res1 = vec_sel( zero, negatef4(s), select_x );
1896  res1 = vec_sel( res1, c, select_y );
1897  return Transform3(
1898  Vector3( res0 ),
1899  Vector3( res1 ),
1900  Vector3::zAxis( ),
1901  Vector3( 0.0f )
1902  );
1903 }
1904 
1906 {
1907  __m128 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
1908  angles = Vector4( radiansXYZ, 0.0f ).get128();
1909  sincosf4( angles, &s, &c );
1910  negS = negatef4( s );
1911  Z0 = vec_mergel( c, s );
1912  Z1 = vec_mergel( negS, c );
1913  VM_ATTRIBUTE_ALIGN16 unsigned int select_xyz[4] = {0xffffffff, 0xffffffff, 0xffffffff, 0};
1914  Z1 = vec_and( Z1, _mm_load_ps( (float *)select_xyz ) );
1915  Y0 = _mm_shuffle_ps( c, negS, _MM_SHUFFLE(0,1,1,1) );
1916  Y1 = _mm_shuffle_ps( s, c, _MM_SHUFFLE(0,1,1,1) );
1917  X0 = vec_splat( s, 0 );
1918  X1 = vec_splat( c, 0 );
1919  tmp = vec_mul( Z0, Y1 );
1920  return Transform3(
1921  Vector3( vec_mul( Z0, Y0 ) ),
1922  Vector3( vec_madd( Z1, X1, vec_mul( tmp, X0 ) ) ),
1923  Vector3( vec_nmsub( Z1, X0, vec_mul( tmp, X1 ) ) ),
1924  Vector3( 0.0f )
1925  );
1926 }
1927 
1928 VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotation( float radians, const Vector3 &unitVec )
1929 {
1930  return rotation( floatInVec(radians), unitVec );
1931 }
1932 
1934 {
1935  return Transform3( Matrix3::rotation( radians, unitVec ), Vector3( 0.0f ) );
1936 }
1937 
1939 {
1940  return Transform3( Matrix3( unitQuat ), Vector3( 0.0f ) );
1941 }
1942 
1943 VECTORMATH_FORCE_INLINE const Transform3 Transform3::scale( const Vector3 &scaleVec )
1944 {
1945  __m128 zero = _mm_setzero_ps();
1946  VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
1947  VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
1948  VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
1949  return Transform3(
1950  Vector3( vec_sel( zero, scaleVec.get128(), select_x ) ),
1951  Vector3( vec_sel( zero, scaleVec.get128(), select_y ) ),
1952  Vector3( vec_sel( zero, scaleVec.get128(), select_z ) ),
1953  Vector3( 0.0f )
1954  );
1955 }
1956 
1957 VECTORMATH_FORCE_INLINE const Transform3 appendScale( const Transform3 & tfrm, const Vector3 &scaleVec )
1958 {
1959  return Transform3(
1960  ( tfrm.getCol0() * scaleVec.getX( ) ),
1961  ( tfrm.getCol1() * scaleVec.getY( ) ),
1962  ( tfrm.getCol2() * scaleVec.getZ( ) ),
1963  tfrm.getCol3()
1964  );
1965 }
1966 
1967 VECTORMATH_FORCE_INLINE const Transform3 prependScale( const Vector3 &scaleVec, const Transform3 & tfrm )
1968 {
1969  return Transform3(
1970  mulPerElem( tfrm.getCol0(), scaleVec ),
1971  mulPerElem( tfrm.getCol1(), scaleVec ),
1972  mulPerElem( tfrm.getCol2(), scaleVec ),
1973  mulPerElem( tfrm.getCol3(), scaleVec )
1974  );
1975 }
1976 
1977 VECTORMATH_FORCE_INLINE const Transform3 Transform3::translation( const Vector3 &translateVec )
1978 {
1979  return Transform3(
1980  Vector3::xAxis( ),
1981  Vector3::yAxis( ),
1982  Vector3::zAxis( ),
1983  translateVec
1984  );
1985 }
1986 
1987 VECTORMATH_FORCE_INLINE const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 )
1988 {
1989  return Transform3(
1990  select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
1991  select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
1992  select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
1993  select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
1994  );
1995 }
1996 
1997 VECTORMATH_FORCE_INLINE const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, const boolInVec &select1 )
1998 {
1999  return Transform3(
2000  select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
2001  select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
2002  select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
2003  select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
2004  );
2005 }
2006 
2007 #ifdef _VECTORMATH_DEBUG
2008 
2009 VECTORMATH_FORCE_INLINE void print( const Transform3 & tfrm )
2010 {
2011  print( tfrm.getRow( 0 ) );
2012  print( tfrm.getRow( 1 ) );
2013  print( tfrm.getRow( 2 ) );
2014 }
2015 
2016 VECTORMATH_FORCE_INLINE void print( const Transform3 & tfrm, const char * name )
2017 {
2018  printf("%s:\n", name);
2019  print( tfrm );
2020 }
2021 
2022 #endif
2023 
2024 VECTORMATH_FORCE_INLINE Quat::Quat( const Matrix3 & tfrm )
2025 {
2026  __m128 res;
2027  __m128 col0, col1, col2;
2028  __m128 xx_yy, xx_yy_zz_xx, yy_zz_xx_yy, zz_xx_yy_zz, diagSum, diagDiff;
2029  __m128 zy_xz_yx, yz_zx_xy, sum, diff;
2030  __m128 radicand, invSqrt, scale;
2031  __m128 res0, res1, res2, res3;
2032  __m128 xx, yy, zz;
2033  VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
2034  VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
2035  VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
2036  VM_ATTRIBUTE_ALIGN16 unsigned int select_w[4] = {0, 0, 0, 0xffffffff};
2037 
2038  col0 = tfrm.getCol0().get128();
2039  col1 = tfrm.getCol1().get128();
2040  col2 = tfrm.getCol2().get128();
2041 
2042  /* four cases: */
2043  /* trace > 0 */
2044  /* else */
2045  /* xx largest diagonal element */
2046  /* yy largest diagonal element */
2047  /* zz largest diagonal element */
2048 
2049  /* compute quaternion for each case */
2050 
2051  xx_yy = vec_sel( col0, col1, select_y );
2052  //xx_yy_zz_xx = vec_perm( xx_yy, col2, _VECTORMATH_PERM_XYCX );
2053  //yy_zz_xx_yy = vec_perm( xx_yy, col2, _VECTORMATH_PERM_YCXY );
2054  //zz_xx_yy_zz = vec_perm( xx_yy, col2, _VECTORMATH_PERM_CXYC );
2055  xx_yy_zz_xx = _mm_shuffle_ps( xx_yy, xx_yy, _MM_SHUFFLE(0,0,1,0) );
2056  xx_yy_zz_xx = vec_sel( xx_yy_zz_xx, col2, select_z ); // TODO: Ck
2057  yy_zz_xx_yy = _mm_shuffle_ps( xx_yy_zz_xx, xx_yy_zz_xx, _MM_SHUFFLE(1,0,2,1) );
2058  zz_xx_yy_zz = _mm_shuffle_ps( xx_yy_zz_xx, xx_yy_zz_xx, _MM_SHUFFLE(2,1,0,2) );
2059 
2060  diagSum = vec_add( vec_add( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
2061  diagDiff = vec_sub( vec_sub( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
2062  radicand = vec_add( vec_sel( diagDiff, diagSum, select_w ), _mm_set1_ps(1.0f) );
2063  // invSqrt = rsqrtf4( radicand );
2064  invSqrt = newtonrapson_rsqrt4( radicand );
2065 
2066 
2067 
2068  zy_xz_yx = vec_sel( col0, col1, select_z ); // zy_xz_yx = 00 01 12 03
2069  //zy_xz_yx = vec_perm( zy_xz_yx, col2, _VECTORMATH_PERM_ZAYX );
2070  zy_xz_yx = _mm_shuffle_ps( zy_xz_yx, zy_xz_yx, _MM_SHUFFLE(0,1,2,2) ); // zy_xz_yx = 12 12 01 00
2071  zy_xz_yx = vec_sel( zy_xz_yx, vec_splat(col2, 0), select_y ); // zy_xz_yx = 12 20 01 00
2072  yz_zx_xy = vec_sel( col0, col1, select_x ); // yz_zx_xy = 10 01 02 03
2073  //yz_zx_xy = vec_perm( yz_zx_xy, col2, _VECTORMATH_PERM_BZXX );
2074  yz_zx_xy = _mm_shuffle_ps( yz_zx_xy, yz_zx_xy, _MM_SHUFFLE(0,0,2,0) ); // yz_zx_xy = 10 02 10 10
2075  yz_zx_xy = vec_sel( yz_zx_xy, vec_splat(col2, 1), select_x ); // yz_zx_xy = 21 02 10 10
2076 
2077  sum = vec_add( zy_xz_yx, yz_zx_xy );
2078  diff = vec_sub( zy_xz_yx, yz_zx_xy );
2079 
2080  scale = vec_mul( invSqrt, _mm_set1_ps(0.5f) );
2081 
2082  //res0 = vec_perm( sum, diff, _VECTORMATH_PERM_XZYA );
2083  res0 = _mm_shuffle_ps( sum, sum, _MM_SHUFFLE(0,1,2,0) );
2084  res0 = vec_sel( res0, vec_splat(diff, 0), select_w ); // TODO: Ck
2085  //res1 = vec_perm( sum, diff, _VECTORMATH_PERM_ZXXB );
2086  res1 = _mm_shuffle_ps( sum, sum, _MM_SHUFFLE(0,0,0,2) );
2087  res1 = vec_sel( res1, vec_splat(diff, 1), select_w ); // TODO: Ck
2088  //res2 = vec_perm( sum, diff, _VECTORMATH_PERM_YXXC );
2089  res2 = _mm_shuffle_ps( sum, sum, _MM_SHUFFLE(0,0,0,1) );
2090  res2 = vec_sel( res2, vec_splat(diff, 2), select_w ); // TODO: Ck
2091  res3 = diff;
2092  res0 = vec_sel( res0, radicand, select_x );
2093  res1 = vec_sel( res1, radicand, select_y );
2094  res2 = vec_sel( res2, radicand, select_z );
2095  res3 = vec_sel( res3, radicand, select_w );
2096  res0 = vec_mul( res0, vec_splat( scale, 0 ) );
2097  res1 = vec_mul( res1, vec_splat( scale, 1 ) );
2098  res2 = vec_mul( res2, vec_splat( scale, 2 ) );
2099  res3 = vec_mul( res3, vec_splat( scale, 3 ) );
2100 
2101  /* determine case and select answer */
2102 
2103  xx = vec_splat( col0, 0 );
2104  yy = vec_splat( col1, 1 );
2105  zz = vec_splat( col2, 2 );
2106  res = vec_sel( res0, res1, vec_cmpgt( yy, xx ) );
2107  res = vec_sel( res, res2, vec_and( vec_cmpgt( zz, xx ), vec_cmpgt( zz, yy ) ) );
2108  res = vec_sel( res, res3, vec_cmpgt( vec_splat( diagSum, 0 ), _mm_setzero_ps() ) );
2109  mVec128 = res;
2110 }
2111 
2112 VECTORMATH_FORCE_INLINE const Matrix3 outer( const Vector3 &tfrm0, const Vector3 &tfrm1 )
2113 {
2114  return Matrix3(
2115  ( tfrm0 * tfrm1.getX( ) ),
2116  ( tfrm0 * tfrm1.getY( ) ),
2117  ( tfrm0 * tfrm1.getZ( ) )
2118  );
2119 }
2120 
2121 VECTORMATH_FORCE_INLINE const Matrix4 outer( const Vector4 &tfrm0, const Vector4 &tfrm1 )
2122 {
2123  return Matrix4(
2124  ( tfrm0 * tfrm1.getX( ) ),
2125  ( tfrm0 * tfrm1.getY( ) ),
2126  ( tfrm0 * tfrm1.getZ( ) ),
2127  ( tfrm0 * tfrm1.getW( ) )
2128  );
2129 }
2130 
2131 VECTORMATH_FORCE_INLINE const Vector3 rowMul( const Vector3 &vec, const Matrix3 & mat )
2132 {
2133  __m128 tmp0, tmp1, mcol0, mcol1, mcol2, res;
2134  __m128 xxxx, yyyy, zzzz;
2135  tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() );
2136  tmp1 = vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() );
2137  xxxx = vec_splat( vec.get128(), 0 );
2138  mcol0 = vec_mergeh( tmp0, mat.getCol1().get128() );
2139  //mcol1 = vec_perm( tmp0, mat.getCol1().get128(), _VECTORMATH_PERM_ZBWX );
2140  VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
2141  mcol1 = _mm_shuffle_ps( tmp0, tmp0, _MM_SHUFFLE(0,3,2,2));
2142  mcol1 = vec_sel(mcol1, mat.getCol1().get128(), select_y);
2143  //mcol2 = vec_perm( tmp1, mat.getCol1().get128(), _VECTORMATH_PERM_XCYX );
2144  mcol2 = _mm_shuffle_ps( tmp1, tmp1, _MM_SHUFFLE(0,1,1,0));
2145  mcol2 = vec_sel(mcol2, vec_splat(mat.getCol1().get128(), 2), select_y);
2146  yyyy = vec_splat( vec.get128(), 1 );
2147  res = vec_mul( mcol0, xxxx );
2148  zzzz = vec_splat( vec.get128(), 2 );
2149  res = vec_madd( mcol1, yyyy, res );
2150  res = vec_madd( mcol2, zzzz, res );
2151  return Vector3( res );
2152 }
2153 
2154 VECTORMATH_FORCE_INLINE const Matrix3 crossMatrix( const Vector3 &vec )
2155 {
2156  __m128 neg, res0, res1, res2;
2157  neg = negatef4( vec.get128() );
2158  VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
2159  VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
2160  VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
2161  //res0 = vec_perm( vec.get128(), neg, _VECTORMATH_PERM_XZBX );
2162  res0 = _mm_shuffle_ps( vec.get128(), vec.get128(), _MM_SHUFFLE(0,2,2,0) );
2163  res0 = vec_sel(res0, vec_splat(neg, 1), select_z);
2164  //res1 = vec_perm( vec.get128(), neg, _VECTORMATH_PERM_CXXX );
2165  res1 = vec_sel(vec_splat(vec.get128(), 0), vec_splat(neg, 2), select_x);
2166  //res2 = vec_perm( vec.get128(), neg, _VECTORMATH_PERM_YAXX );
2167  res2 = _mm_shuffle_ps( vec.get128(), vec.get128(), _MM_SHUFFLE(0,0,1,1) );
2168  res2 = vec_sel(res2, vec_splat(neg, 0), select_y);
2169  VM_ATTRIBUTE_ALIGN16 unsigned int filter_x[4] = {0, 0xffffffff, 0xffffffff, 0xffffffff};
2170  VM_ATTRIBUTE_ALIGN16 unsigned int filter_y[4] = {0xffffffff, 0, 0xffffffff, 0xffffffff};
2171  VM_ATTRIBUTE_ALIGN16 unsigned int filter_z[4] = {0xffffffff, 0xffffffff, 0, 0xffffffff};
2172  res0 = vec_and( res0, _mm_load_ps((float *)filter_x ) );
2173  res1 = vec_and( res1, _mm_load_ps((float *)filter_y ) );
2174  res2 = vec_and( res2, _mm_load_ps((float *)filter_z ) ); // TODO: Use selects?
2175  return Matrix3(
2176  Vector3( res0 ),
2177  Vector3( res1 ),
2178  Vector3( res2 )
2179  );
2180 }
2181 
2182 VECTORMATH_FORCE_INLINE const Matrix3 crossMatrixMul( const Vector3 &vec, const Matrix3 & mat )
2183 {
2184  return Matrix3( cross( vec, mat.getCol0() ), cross( vec, mat.getCol1() ), cross( vec, mat.getCol2() ) );
2185 }
2186 
2187 } // namespace Aos
2188 } // namespace Vectormath
2189 
2190 #endif
const Vector3 getTranslation() const
const Quat normalize(const Quat &quat)
Matrix4 & setCol1(const Vector4 &col1)
Definition: neon/mat_aos.h:491
float determinant(const Matrix3 &mat)
Definition: neon/mat_aos.h:189
const Vector3 getCol0() const
const Vector3 rowMul(const Vector3 &vec, const Matrix3 &mat)
static const Matrix4 scale(const Vector3 &scaleVec)
Definition: neon/mat_aos.h:984
const Vector3 getXYZ() const
Definition: neon/vec_aos.h:706
static const Transform3 rotationY(float radians)
const Matrix3 crossMatrixMul(const Vector3 &vec, const Matrix3 &mat)
static const Vector4 yAxis()
Definition: neon/vec_aos.h:564
#define vec_mergel(a, b)
Transform3 & setTranslation(const Vector3 &translateVec)
Vector4 & setXYZ(const Vector3 &vec)
Definition: neon/vec_aos.h:698
static const Matrix4 rotationX(float radians)
Definition: neon/mat_aos.h:901
static const Matrix4 translation(const Vector3 &translateVec)
#define vec_mergeh(a, b)
static const Vector3 xAxis()
Definition: neon/vec_aos.h:64
static const Vector3 zAxis()
Definition: neon/vec_aos.h:74
const Matrix4 operator+(const Matrix4 &mat) const
Definition: neon/mat_aos.h:718
const Matrix3 operator*(float scalar) const
Definition: neon/mat_aos.h:242
Matrix3 & setElem(int col, int row, float val)
Definition: neon/mat_aos.h:108
Vector3 & operator[](int col)
static const Transform3 translation(const Vector3 &translateVec)
static const Vector4 xAxis()
Definition: neon/vec_aos.h:559
#define vec_madd(a, b, c)
const Matrix3 appendScale(const Matrix3 &mat, const Vector3 &scaleVec)
Definition: neon/mat_aos.h:391
Matrix3 & setCol0(const Vector3 &col0)
Definition: neon/mat_aos.h:76
#define vec_cmpgt(a, b)
Matrix3 & setCol2(const Vector3 &col2)
Definition: neon/mat_aos.h:88
const Vector3 operator*(const Vector3 &vec) const
Matrix3 & operator-=(const Matrix3 &mat)
Definition: neon/mat_aos.h:218
#define vec_mul(a, b)
const Matrix3 inverse(const Matrix3 &mat)
Definition: neon/mat_aos.h:174
static const Transform3 rotationX(float radians)
static const Matrix4 rotation(float radians, const Vector3 &unitVec)
Definition: neon/mat_aos.h:959
static const Matrix4 rotationY(float radians)
Definition: neon/mat_aos.h:914
static const Transform3 identity()
const Vector3 getCol3() const
#define negatef4(x)
const Matrix3 crossMatrix(const Vector3 &vec)
float getElem(int col, int row) const
Definition: neon/mat_aos.h:533
Matrix4 & operator-=(const Matrix4 &mat)
Definition: neon/mat_aos.h:744
static const Matrix4 orthographic(float left, float right, float bottom, float top, float zNear, float zFar)
#define _mm_ror_ps(vec, i)
#define VECTORMATH_FORCE_INLINE
static __m128 _vmathVfCross(__m128 vec0, __m128 vec1)
Definition: sse/vec_aos.h:84
static const Matrix3 rotationZ(float radians)
Definition: neon/mat_aos.h:328
#define vec_sub(a, b)
static const Vector3 yAxis()
Definition: neon/vec_aos.h:69
static const Matrix3 rotationZYX(const Vector3 &radiansXYZ)
Definition: neon/mat_aos.h:340
Transform3 & setElem(int col, int row, float val)
static VM_ATTRIBUTE_ALIGN16 const float _vmathZERONE[4]
Definition: sse/mat_aos.h:792
const Matrix4 operator*(float scalar) const
Definition: neon/mat_aos.h:770
const Vector4 getCol3() const
Definition: neon/mat_aos.h:553
static const Matrix4 identity()
Definition: neon/mat_aos.h:863
Transform3 & operator=(const Transform3 &tfrm)
static const Vector4 wAxis()
Definition: neon/vec_aos.h:574
Transform3 & setCol0(const Vector3 &col0)
static const Matrix3 identity()
Definition: neon/mat_aos.h:295
Transform3 & setCol(int col, const Vector3 &vec)
const Vector4 getCol1() const
Definition: neon/mat_aos.h:543
Matrix4 & setTranslation(const Vector3 &translateVec)
Definition: neon/mat_aos.h:890
static const Transform3 rotation(float radians, const Vector3 &unitVec)
const Vector4 getCol0() const
Definition: neon/mat_aos.h:538
const Matrix4 affineInverse(const Matrix4 &mat)
Definition: neon/mat_aos.h:666
#define vec_splat(x, e)
static const Matrix3 scale(const Vector3 &scaleVec)
Definition: neon/mat_aos.h:382
const Point3 scale(const Point3 &pnt, float scaleVal)
#define vec_add(a, b)
Transform3 & setCol3(const Vector3 &col3)
const Vector3 getCol1() const
Vector4 & operator[](int col)
Definition: neon/mat_aos.h:568
const Vector3 getCol(int col) const
const Vector3 getCol0() const
Definition: neon/mat_aos.h:122
const Matrix3 getUpper3x3() const
Definition: neon/mat_aos.h:881
Matrix4 & setElem(int col, int row, float val)
Definition: neon/mat_aos.h:524
const Vector4 getRow(int row) const
const Matrix3 outer(const Vector3 &tfrm0, const Vector3 &tfrm1)
const Matrix4 operator-() const
Definition: neon/mat_aos.h:750
Matrix4 & operator*=(float scalar)
Definition: neon/mat_aos.h:780
static const Matrix4 frustum(float left, float right, float bottom, float top, float zNear, float zFar)
Vector3 & operator[](int col)
Definition: neon/mat_aos.h:147
Vector3 & setElem(int idx, float value)
Definition: neon/vec_aos.h:229
const Vector3 getTranslation() const
Definition: neon/mat_aos.h:896
const Matrix3 prependScale(const Vector3 &scaleVec, const Matrix3 &mat)
Definition: neon/mat_aos.h:400
#define VM_ATTRIBUTE_ALIGN16
const Vector4 getCol2() const
Definition: neon/mat_aos.h:548
#define _VECTORMATH_PI_OVER_2
Definition: sse/mat_aos.h:60
const Matrix3 operator+(const Matrix3 &mat) const
Definition: neon/mat_aos.h:194
Matrix3 & operator+=(const Matrix3 &mat)
Definition: neon/mat_aos.h:212
#define vec_nmsub(a, b, c)
const Matrix3 operator-() const
Definition: neon/mat_aos.h:224
const Matrix3 getUpper3x3() const
Matrix3 & setCol1(const Vector3 &col1)
Definition: neon/mat_aos.h:82
__m128 get128() const
Definition: sse/quat_aos.h:158
static const Matrix3 rotationX(float radians)
Definition: neon/mat_aos.h:304
#define recipf4(x)
const Matrix4 orthoInverse(const Matrix4 &mat)
Definition: neon/mat_aos.h:676
__m128 get128() const
static VM_ATTRIBUTE_ALIGN16 const unsigned int _vmathPNPN[4]
Definition: sse/mat_aos.h:790
static __m128 newtonrapson_rsqrt4(const __m128 v)
static const Transform3 scale(const Vector3 &scaleVec)
float getElem(int idx) const
Definition: neon/vec_aos.h:235
Matrix4 & setCol3(const Vector4 &col3)
Definition: neon/mat_aos.h:503
static void sincosf4(vec_float4 x, vec_float4 *s, vec_float4 *c)
static const Matrix3 rotation(float radians, const Vector3 &unitVec)
Definition: neon/mat_aos.h:358
const Vector4 getCol(int col) const
Definition: neon/mat_aos.h:558
Matrix4 & setCol(int col, const Vector4 &vec)
Definition: neon/mat_aos.h:509
Matrix4 & setUpper3x3(const Matrix3 &mat3)
Definition: neon/mat_aos.h:873
Matrix4 & setCol2(const Vector4 &col2)
Definition: neon/mat_aos.h:497
static const Matrix4 lookAt(const Point3 &eyePos, const Point3 &lookAtPos, const Vector3 &upVec)
const Matrix3 select(const Matrix3 &mat0, const Matrix3 &mat1, bool select1)
Definition: neon/mat_aos.h:409
Matrix4 & operator+=(const Matrix4 &mat)
Definition: neon/mat_aos.h:738
static const Transform3 rotationZ(float radians)
static const Matrix4 perspective(float fovyRadians, float aspect, float zNear, float zFar)
float getElem(int idx) const
Definition: neon/vec_aos.h:761
static const Matrix3 rotationY(float radians)
Definition: neon/mat_aos.h:316
const Matrix3 transpose(const Matrix3 &mat)
Definition: neon/mat_aos.h:165
Matrix3 & operator*=(float scalar)
Definition: neon/mat_aos.h:251
Transform3 & setCol2(const Vector3 &col2)
const Vector3 getRow(int row) const
Definition: neon/mat_aos.h:142
Matrix4 & setCol0(const Vector4 &col0)
Definition: neon/mat_aos.h:485
Matrix4 & setRow(int row, const Vector4 &vec)
Definition: neon/mat_aos.h:515
Transform3 & setCol1(const Vector3 &col1)
Matrix3 & setCol(int col, const Vector3 &vec)
Definition: neon/mat_aos.h:94
float getElem(int col, int row) const
Definition: neon/mat_aos.h:117
const Vector3 getCol(int col) const
Definition: neon/mat_aos.h:137
static const Matrix4 rotationZYX(const Vector3 &radiansXYZ)
Definition: neon/mat_aos.h:940
float sum(const Vector3 &vec)
Definition: neon/vec_aos.h:430
static const Vector4 zAxis()
Definition: neon/vec_aos.h:569
float dot(const Quat &quat0, const Quat &quat1)
static VM_ATTRIBUTE_ALIGN16 const unsigned int _vmathNPNP[4]
Definition: sse/mat_aos.h:791
const Vector3 cross(const Vector3 &vec0, const Vector3 &vec1)
Definition: neon/vec_aos.h:473
const Matrix3 mulPerElem(const Matrix3 &mat0, const Matrix3 &mat1)
Definition: neon/mat_aos.h:286
__m128 get128() const
Definition: sse/vec_aos.h:817
static const Transform3 rotationZYX(const Vector3 &radiansXYZ)
__m128 get128() const
Definition: sse/vec_aos.h:327
Transform3 & setUpper3x3(const Matrix3 &mat3)
static const Matrix4 rotationZ(float radians)
Definition: neon/mat_aos.h:927
Matrix3 & operator=(const Matrix3 &mat)
Definition: neon/mat_aos.h:157
const Vector3 getCol2() const
const Vector4 getRow(int row) const
Definition: neon/mat_aos.h:563
#define vec_and(a, b)
float getElem(int col, int row) const
const Vector3 getCol2() const
Definition: neon/mat_aos.h:132
Transform3 & setRow(int row, const Vector4 &vec)
static __m128 vec_sel(__m128 a, __m128 b, __m128 mask)
static __m128 _vmathVfDot3(__m128 vec0, __m128 vec1)
Definition: sse/vec_aos.h:70
const Matrix3 absPerElem(const Matrix3 &mat)
Definition: neon/mat_aos.h:233
Vector4 & setElem(int idx, float value)
Definition: neon/vec_aos.h:755
const Matrix3 operator*(float scalar, const Matrix3 &mat)
Definition: neon/mat_aos.h:257
Transform3 & operator*=(const Transform3 &tfrm)
Matrix3 & setRow(int row, const Vector3 &vec)
Definition: neon/mat_aos.h:100
Matrix4 & operator=(const Matrix4 &mat)
Definition: neon/mat_aos.h:578
const Vector3 getCol1() const
Definition: neon/mat_aos.h:127