Bullet Collision Detection & Physics Library
sse/vec_aos.h
Go to the documentation of this file.
1 /*
2  Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
3  All rights reserved.
4 
5  Redistribution and use in source and binary forms,
6  with or without modification, are permitted provided that the
7  following conditions are met:
8  * Redistributions of source code must retain the above copyright
9  notice, this list of conditions and the following disclaimer.
10  * Redistributions in binary form must reproduce the above copyright
11  notice, this list of conditions and the following disclaimer in the
12  documentation and/or other materials provided with the distribution.
13  * Neither the name of the Sony Computer Entertainment Inc nor the names
14  of its contributors may be used to endorse or promote products derived
15  from this software without specific prior written permission.
16 
17  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27  POSSIBILITY OF SUCH DAMAGE.
28 */
29 
30 #ifndef _VECTORMATH_VEC_AOS_CPP_H
31 #define _VECTORMATH_VEC_AOS_CPP_H
32 
33 //-----------------------------------------------------------------------------
34 // Constants
35 // for permutes words are labeled [x,y,z,w] [a,b,c,d]
36 
37 #define _VECTORMATH_PERM_X 0x00010203
38 #define _VECTORMATH_PERM_Y 0x04050607
39 #define _VECTORMATH_PERM_Z 0x08090a0b
40 #define _VECTORMATH_PERM_W 0x0c0d0e0f
41 #define _VECTORMATH_PERM_A 0x10111213
42 #define _VECTORMATH_PERM_B 0x14151617
43 #define _VECTORMATH_PERM_C 0x18191a1b
44 #define _VECTORMATH_PERM_D 0x1c1d1e1f
45 #define _VECTORMATH_PERM_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A }
46 #define _VECTORMATH_PERM_ZXYW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_W }
47 #define _VECTORMATH_PERM_YZXW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W }
48 #define _VECTORMATH_PERM_YZAB (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B }
49 #define _VECTORMATH_PERM_ZABC (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B, _VECTORMATH_PERM_C }
50 #define _VECTORMATH_PERM_XYAW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_W }
51 #define _VECTORMATH_PERM_XAZW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W }
52 #define _VECTORMATH_MASK_0xF000 (vec_uint4){ 0xffffffff, 0, 0, 0 }
53 #define _VECTORMATH_MASK_0x0F00 (vec_uint4){ 0, 0xffffffff, 0, 0 }
54 #define _VECTORMATH_MASK_0x00F0 (vec_uint4){ 0, 0, 0xffffffff, 0 }
55 #define _VECTORMATH_MASK_0x000F (vec_uint4){ 0, 0, 0, 0xffffffff }
56 #define _VECTORMATH_UNIT_1000 _mm_setr_ps(1.0f,0.0f,0.0f,0.0f) // (__m128){ 1.0f, 0.0f, 0.0f, 0.0f }
57 #define _VECTORMATH_UNIT_0100 _mm_setr_ps(0.0f,1.0f,0.0f,0.0f) // (__m128){ 0.0f, 1.0f, 0.0f, 0.0f }
58 #define _VECTORMATH_UNIT_0010 _mm_setr_ps(0.0f,0.0f,1.0f,0.0f) // (__m128){ 0.0f, 0.0f, 1.0f, 0.0f }
59 #define _VECTORMATH_UNIT_0001 _mm_setr_ps(0.0f,0.0f,0.0f,1.0f) // (__m128){ 0.0f, 0.0f, 0.0f, 1.0f }
60 #define _VECTORMATH_SLERP_TOL 0.999f
61 //_VECTORMATH_SLERP_TOLF
62 
63 //-----------------------------------------------------------------------------
64 // Definitions
65 
66 #ifndef _VECTORMATH_INTERNAL_FUNCTIONS
67 #define _VECTORMATH_INTERNAL_FUNCTIONS
68 
69 #define _vmath_shufps(a, b, immx, immy, immz, immw) _mm_shuffle_ps(a, b, _MM_SHUFFLE(immw, immz, immy, immx))
70 static VECTORMATH_FORCE_INLINE __m128 _vmathVfDot3( __m128 vec0, __m128 vec1 )
71 {
72  __m128 result = _mm_mul_ps( vec0, vec1);
73  return _mm_add_ps( vec_splat( result, 0 ), _mm_add_ps( vec_splat( result, 1 ), vec_splat( result, 2 ) ) );
74 }
75 
76 static VECTORMATH_FORCE_INLINE __m128 _vmathVfDot4( __m128 vec0, __m128 vec1 )
77 {
78  __m128 result = _mm_mul_ps(vec0, vec1);
79  return _mm_add_ps(_mm_shuffle_ps(result, result, _MM_SHUFFLE(0,0,0,0)),
80  _mm_add_ps(_mm_shuffle_ps(result, result, _MM_SHUFFLE(1,1,1,1)),
81  _mm_add_ps(_mm_shuffle_ps(result, result, _MM_SHUFFLE(2,2,2,2)), _mm_shuffle_ps(result, result, _MM_SHUFFLE(3,3,3,3)))));
82 }
83 
84 static VECTORMATH_FORCE_INLINE __m128 _vmathVfCross( __m128 vec0, __m128 vec1 )
85 {
86  __m128 tmp0, tmp1, tmp2, tmp3, result;
87  tmp0 = _mm_shuffle_ps( vec0, vec0, _MM_SHUFFLE(3,0,2,1) );
88  tmp1 = _mm_shuffle_ps( vec1, vec1, _MM_SHUFFLE(3,1,0,2) );
89  tmp2 = _mm_shuffle_ps( vec0, vec0, _MM_SHUFFLE(3,1,0,2) );
90  tmp3 = _mm_shuffle_ps( vec1, vec1, _MM_SHUFFLE(3,0,2,1) );
91  result = vec_mul( tmp0, tmp1 );
92  result = vec_nmsub( tmp2, tmp3, result );
93  return result;
94 }
95 /*
96 static VECTORMATH_FORCE_INLINE vec_uint4 _vmathVfToHalfFloatsUnpacked(__m128 v)
97 {
98 #if 0
99  vec_int4 bexp;
100  vec_uint4 mant, sign, hfloat;
101  vec_uint4 notZero, isInf;
102  const vec_uint4 hfloatInf = (vec_uint4)(0x00007c00u);
103  const vec_uint4 mergeMant = (vec_uint4)(0x000003ffu);
104  const vec_uint4 mergeSign = (vec_uint4)(0x00008000u);
105 
106  sign = vec_sr((vec_uint4)v, (vec_uint4)16);
107  mant = vec_sr((vec_uint4)v, (vec_uint4)13);
108  bexp = vec_and(vec_sr((vec_int4)v, (vec_uint4)23), (vec_int4)0xff);
109 
110  notZero = (vec_uint4)vec_cmpgt(bexp, (vec_int4)112);
111  isInf = (vec_uint4)vec_cmpgt(bexp, (vec_int4)142);
112 
113  bexp = _mm_add_ps(bexp, (vec_int4)-112);
114  bexp = vec_sl(bexp, (vec_uint4)10);
115 
116  hfloat = vec_sel((vec_uint4)bexp, mant, mergeMant);
117  hfloat = vec_sel((vec_uint4)(0), hfloat, notZero);
118  hfloat = vec_sel(hfloat, hfloatInf, isInf);
119  hfloat = vec_sel(hfloat, sign, mergeSign);
120 
121  return hfloat;
122 #else
123  assert(0);
124  return _mm_setzero_ps();
125 #endif
126 }
127 
128 static VECTORMATH_FORCE_INLINE vec_ushort8 _vmath2VfToHalfFloats(__m128 u, __m128 v)
129 {
130 #if 0
131  vec_uint4 hfloat_u, hfloat_v;
132  const vec_uchar16 pack = (vec_uchar16){2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31};
133  hfloat_u = _vmathVfToHalfFloatsUnpacked(u);
134  hfloat_v = _vmathVfToHalfFloatsUnpacked(v);
135  return (vec_ushort8)vec_perm(hfloat_u, hfloat_v, pack);
136 #else
137  assert(0);
138  return _mm_setzero_si128();
139 #endif
140 }
141 */
142 
143 static VECTORMATH_FORCE_INLINE __m128 _vmathVfInsert(__m128 dst, __m128 src, int slot)
144 {
145  SSEFloat s;
146  s.m128 = src;
147  SSEFloat d;
148  d.m128 = dst;
149  d.f[slot] = s.f[slot];
150  return d.m128;
151 }
152 
153 #define _vmathVfSetElement(vec, scalar, slot) ((float *)&(vec))[slot] = scalar
154 
155 static VECTORMATH_FORCE_INLINE __m128 _vmathVfSplatScalar(float scalar)
156 {
157  return _mm_set1_ps(scalar);
158 }
159 
160 #endif
161 
162 namespace Vectormath {
163 namespace Aos {
164 
165 
166 #ifdef _VECTORMATH_NO_SCALAR_CAST
167 VECTORMATH_FORCE_INLINE VecIdx::operator floatInVec() const
168 {
169  return floatInVec(ref, i);
170 }
171 
172 VECTORMATH_FORCE_INLINE float VecIdx::getAsFloat() const
173 #else
174 VECTORMATH_FORCE_INLINE VecIdx::operator float() const
175 #endif
176 {
177  return ((float *)&ref)[i];
178 }
179 
180 VECTORMATH_FORCE_INLINE float VecIdx::operator =( float scalar )
181 {
182  _vmathVfSetElement(ref, scalar, i);
183  return scalar;
184 }
185 
186 VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator =( const floatInVec &scalar )
187 {
188  ref = _vmathVfInsert(ref, scalar.get128(), i);
189  return scalar;
190 }
191 
192 VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator =( const VecIdx& scalar )
193 {
194  return *this = floatInVec(scalar.ref, scalar.i);
195 }
196 
197 VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator *=( float scalar )
198 {
199  return *this *= floatInVec(scalar);
200 }
201 
202 VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator *=( const floatInVec &scalar )
203 {
204  return *this = floatInVec(ref, i) * scalar;
205 }
206 
207 VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator /=( float scalar )
208 {
209  return *this /= floatInVec(scalar);
210 }
211 
212 inline floatInVec VecIdx::operator /=( const floatInVec &scalar )
213 {
214  return *this = floatInVec(ref, i) / scalar;
215 }
216 
217 VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator +=( float scalar )
218 {
219  return *this += floatInVec(scalar);
220 }
221 
222 VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator +=( const floatInVec &scalar )
223 {
224  return *this = floatInVec(ref, i) + scalar;
225 }
226 
227 VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator -=( float scalar )
228 {
229  return *this -= floatInVec(scalar);
230 }
231 
232 VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator -=( const floatInVec &scalar )
233 {
234  return *this = floatInVec(ref, i) - scalar;
235 }
236 
237 VECTORMATH_FORCE_INLINE Vector3::Vector3(const Vector3& vec)
238 {
239  set128(vec.get128());
240 }
241 
243 {
244  mVec128 = vec;
245 }
246 
247 
248 VECTORMATH_FORCE_INLINE Vector3::Vector3( float _x, float _y, float _z )
249 {
250  mVec128 = _mm_setr_ps(_x, _y, _z, 0.0f);
251 }
252 
254 {
255  __m128 xz = _mm_unpacklo_ps( _x.get128(), _z.get128() );
256  mVec128 = _mm_unpacklo_ps( xz, _y.get128() );
257 }
258 
260 {
261  mVec128 = pnt.get128();
262 }
263 
265 {
266  mVec128 = floatInVec(scalar).get128();
267 }
268 
270 {
271  mVec128 = scalar.get128();
272 }
273 
275 {
276  mVec128 = vf4;
277 }
278 
280 {
281  return Vector3( _VECTORMATH_UNIT_1000 );
282 }
283 
285 {
286  return Vector3( _VECTORMATH_UNIT_0100 );
287 }
288 
290 {
291  return Vector3( _VECTORMATH_UNIT_0010 );
292 }
293 
294 VECTORMATH_FORCE_INLINE const Vector3 lerp( float t, const Vector3 &vec0, const Vector3 &vec1 )
295 {
296  return lerp( floatInVec(t), vec0, vec1 );
297 }
298 
299 VECTORMATH_FORCE_INLINE const Vector3 lerp( const floatInVec &t, const Vector3 &vec0, const Vector3 &vec1 )
300 {
301  return ( vec0 + ( ( vec1 - vec0 ) * t ) );
302 }
303 
304 VECTORMATH_FORCE_INLINE const Vector3 slerp( float t, const Vector3 &unitVec0, const Vector3 &unitVec1 )
305 {
306  return slerp( floatInVec(t), unitVec0, unitVec1 );
307 }
308 
309 VECTORMATH_FORCE_INLINE const Vector3 slerp( const floatInVec &t, const Vector3 &unitVec0, const Vector3 &unitVec1 )
310 {
311  __m128 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
312  cosAngle = _vmathVfDot3( unitVec0.get128(), unitVec1.get128() );
313  __m128 selectMask = _mm_cmpgt_ps( _mm_set1_ps(_VECTORMATH_SLERP_TOL), cosAngle );
314  angle = acosf4( cosAngle );
315  tttt = t.get128();
316  oneMinusT = _mm_sub_ps( _mm_set1_ps(1.0f), tttt );
317  angles = _mm_unpacklo_ps( _mm_set1_ps(1.0f), tttt ); // angles = 1, t, 1, t
318  angles = _mm_unpacklo_ps( angles, oneMinusT ); // angles = 1, 1-t, t, 1-t
319  angles = _mm_mul_ps( angles, angle );
320  sines = sinf4( angles );
321  scales = _mm_div_ps( sines, vec_splat( sines, 0 ) );
322  scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
323  scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
324  return Vector3( vec_madd( unitVec0.get128(), scale0, _mm_mul_ps( unitVec1.get128(), scale1 ) ) );
325 }
326 
328 {
329  return mVec128;
330 }
331 
332 VECTORMATH_FORCE_INLINE void loadXYZ(Point3& vec, const float* fptr)
333 {
334 #ifdef USE_SSE3_LDDQU
335  vec = Point3( SSEFloat(_mm_lddqu_si128((const __m128i*)((float*)(fptr)))).m128 );
336 #else
337  SSEFloat fl;
338  fl.f[0] = fptr[0];
339  fl.f[1] = fptr[1];
340  fl.f[2] = fptr[2];
341  fl.f[3] = fptr[3];
342  vec = Point3( fl.m128);
343 #endif //USE_SSE3_LDDQU
344 
345 }
346 
347 
348 
349 VECTORMATH_FORCE_INLINE void loadXYZ(Vector3& vec, const float* fptr)
350 {
351 #ifdef USE_SSE3_LDDQU
352  vec = Vector3( SSEFloat(_mm_lddqu_si128((const __m128i*)((float*)(fptr)))).m128 );
353 #else
354  SSEFloat fl;
355  fl.f[0] = fptr[0];
356  fl.f[1] = fptr[1];
357  fl.f[2] = fptr[2];
358  fl.f[3] = fptr[3];
359  vec = Vector3( fl.m128);
360 #endif //USE_SSE3_LDDQU
361 
362 }
363 
364 VECTORMATH_FORCE_INLINE void storeXYZ( const Vector3 &vec, __m128 * quad )
365 {
366  __m128 dstVec = *quad;
367  VM_ATTRIBUTE_ALIGN16 unsigned int sw[4] = {0, 0, 0, 0xffffffff}; // TODO: Centralize
368  dstVec = vec_sel(vec.get128(), dstVec, sw);
369  *quad = dstVec;
370 }
371 
372 VECTORMATH_FORCE_INLINE void storeXYZ(const Point3& vec, float* fptr)
373 {
374  fptr[0] = vec.getX();
375  fptr[1] = vec.getY();
376  fptr[2] = vec.getZ();
377 }
378 
379 VECTORMATH_FORCE_INLINE void storeXYZ(const Vector3& vec, float* fptr)
380 {
381  fptr[0] = vec.getX();
382  fptr[1] = vec.getY();
383  fptr[2] = vec.getZ();
384 }
385 
386 
387 VECTORMATH_FORCE_INLINE void loadXYZArray( Vector3 & vec0, Vector3 & vec1, Vector3 & vec2, Vector3 & vec3, const __m128 * threeQuads )
388 {
389  const float *quads = (float *)threeQuads;
390  vec0 = Vector3( _mm_load_ps(quads) );
391  vec1 = Vector3( _mm_loadu_ps(quads + 3) );
392  vec2 = Vector3( _mm_loadu_ps(quads + 6) );
393  vec3 = Vector3( _mm_loadu_ps(quads + 9) );
394 }
395 
396 VECTORMATH_FORCE_INLINE void storeXYZArray( const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, __m128 * threeQuads )
397 {
398  __m128 xxxx = _mm_shuffle_ps( vec1.get128(), vec1.get128(), _MM_SHUFFLE(0, 0, 0, 0) );
399  __m128 zzzz = _mm_shuffle_ps( vec2.get128(), vec2.get128(), _MM_SHUFFLE(2, 2, 2, 2) );
400  VM_ATTRIBUTE_ALIGN16 unsigned int xsw[4] = {0, 0, 0, 0xffffffff};
401  VM_ATTRIBUTE_ALIGN16 unsigned int zsw[4] = {0xffffffff, 0, 0, 0};
402  threeQuads[0] = vec_sel( vec0.get128(), xxxx, xsw );
403  threeQuads[1] = _mm_shuffle_ps( vec1.get128(), vec2.get128(), _MM_SHUFFLE(1, 0, 2, 1) );
404  threeQuads[2] = vec_sel( _mm_shuffle_ps( vec3.get128(), vec3.get128(), _MM_SHUFFLE(2, 1, 0, 3) ), zzzz, zsw );
405 }
406 /*
407 VECTORMATH_FORCE_INLINE void storeHalfFloats( const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, const Vector3 &vec4, const Vector3 &vec5, const Vector3 &vec6, const Vector3 &vec7, vec_ushort8 * threeQuads )
408 {
409  assert(0);
410 #if 0
411  __m128 xyz0[3];
412  __m128 xyz1[3];
413  storeXYZArray( vec0, vec1, vec2, vec3, xyz0 );
414  storeXYZArray( vec4, vec5, vec6, vec7, xyz1 );
415  threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
416  threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
417  threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
418 #endif
419 }
420 */
421 VECTORMATH_FORCE_INLINE Vector3 & Vector3::operator =( const Vector3 &vec )
422 {
423  mVec128 = vec.mVec128;
424  return *this;
425 }
426 
427 VECTORMATH_FORCE_INLINE Vector3 & Vector3::setX( float _x )
428 {
429  _vmathVfSetElement(mVec128, _x, 0);
430  return *this;
431 }
432 
434 {
435  mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
436  return *this;
437 }
438 
440 {
441  return floatInVec( mVec128, 0 );
442 }
443 
444 VECTORMATH_FORCE_INLINE Vector3 & Vector3::setY( float _y )
445 {
446  _vmathVfSetElement(mVec128, _y, 1);
447  return *this;
448 }
449 
451 {
452  mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
453  return *this;
454 }
455 
457 {
458  return floatInVec( mVec128, 1 );
459 }
460 
461 VECTORMATH_FORCE_INLINE Vector3 & Vector3::setZ( float _z )
462 {
463  _vmathVfSetElement(mVec128, _z, 2);
464  return *this;
465 }
466 
468 {
469  mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
470  return *this;
471 }
472 
474 {
475  return floatInVec( mVec128, 2 );
476 }
477 
478 VECTORMATH_FORCE_INLINE Vector3 & Vector3::setElem( int idx, float value )
479 {
480  _vmathVfSetElement(mVec128, value, idx);
481  return *this;
482 }
483 
485 {
486  mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
487  return *this;
488 }
489 
491 {
492  return floatInVec( mVec128, idx );
493 }
494 
496 {
497  return VecIdx( mVec128, idx );
498 }
499 
500 VECTORMATH_FORCE_INLINE const floatInVec Vector3::operator []( int idx ) const
501 {
502  return floatInVec( mVec128, idx );
503 }
504 
505 VECTORMATH_FORCE_INLINE const Vector3 Vector3::operator +( const Vector3 &vec ) const
506 {
507  return Vector3( _mm_add_ps( mVec128, vec.mVec128 ) );
508 }
509 
510 VECTORMATH_FORCE_INLINE const Vector3 Vector3::operator -( const Vector3 &vec ) const
511 {
512  return Vector3( _mm_sub_ps( mVec128, vec.mVec128 ) );
513 }
514 
515 VECTORMATH_FORCE_INLINE const Point3 Vector3::operator +( const Point3 &pnt ) const
516 {
517  return Point3( _mm_add_ps( mVec128, pnt.get128() ) );
518 }
519 
520 VECTORMATH_FORCE_INLINE const Vector3 Vector3::operator *( float scalar ) const
521 {
522  return *this * floatInVec(scalar);
523 }
524 
526 {
527  return Vector3( _mm_mul_ps( mVec128, scalar.get128() ) );
528 }
529 
531 {
532  *this = *this + vec;
533  return *this;
534 }
535 
536 VECTORMATH_FORCE_INLINE Vector3 & Vector3::operator -=( const Vector3 &vec )
537 {
538  *this = *this - vec;
539  return *this;
540 }
541 
542 VECTORMATH_FORCE_INLINE Vector3 & Vector3::operator *=( float scalar )
543 {
544  *this = *this * scalar;
545  return *this;
546 }
547 
549 {
550  *this = *this * scalar;
551  return *this;
552 }
553 
554 VECTORMATH_FORCE_INLINE const Vector3 Vector3::operator /( float scalar ) const
555 {
556  return *this / floatInVec(scalar);
557 }
558 
560 {
561  return Vector3( _mm_div_ps( mVec128, scalar.get128() ) );
562 }
563 
565 {
566  *this = *this / scalar;
567  return *this;
568 }
569 
571 {
572  *this = *this / scalar;
573  return *this;
574 }
575 
577 {
578  //return Vector3(_mm_sub_ps( _mm_setzero_ps(), mVec128 ) );
579 
580  VM_ATTRIBUTE_ALIGN16 static const int array[] = {0x80000000, 0x80000000, 0x80000000, 0x80000000};
581  __m128 NEG_MASK = SSEFloat(*(const vec_float4*)array).vf;
582  return Vector3(_mm_xor_ps(get128(),NEG_MASK));
583 }
584 
585 VECTORMATH_FORCE_INLINE const Vector3 operator *( float scalar, const Vector3 &vec )
586 {
587  return floatInVec(scalar) * vec;
588 }
589 
590 VECTORMATH_FORCE_INLINE const Vector3 operator *( const floatInVec &scalar, const Vector3 &vec )
591 {
592  return vec * scalar;
593 }
594 
595 VECTORMATH_FORCE_INLINE const Vector3 mulPerElem( const Vector3 &vec0, const Vector3 &vec1 )
596 {
597  return Vector3( _mm_mul_ps( vec0.get128(), vec1.get128() ) );
598 }
599 
600 VECTORMATH_FORCE_INLINE const Vector3 divPerElem( const Vector3 &vec0, const Vector3 &vec1 )
601 {
602  return Vector3( _mm_div_ps( vec0.get128(), vec1.get128() ) );
603 }
604 
605 VECTORMATH_FORCE_INLINE const Vector3 recipPerElem( const Vector3 &vec )
606 {
607  return Vector3( _mm_rcp_ps( vec.get128() ) );
608 }
609 
610 VECTORMATH_FORCE_INLINE const Vector3 absPerElem( const Vector3 &vec )
611 {
612  return Vector3( fabsf4( vec.get128() ) );
613 }
614 
615 VECTORMATH_FORCE_INLINE const Vector3 copySignPerElem( const Vector3 &vec0, const Vector3 &vec1 )
616 {
617  __m128 vmask = toM128(0x7fffffff);
618  return Vector3( _mm_or_ps(
619  _mm_and_ps ( vmask, vec0.get128() ), // Value
620  _mm_andnot_ps( vmask, vec1.get128() ) ) ); // Signs
621 }
622 
623 VECTORMATH_FORCE_INLINE const Vector3 maxPerElem( const Vector3 &vec0, const Vector3 &vec1 )
624 {
625  return Vector3( _mm_max_ps( vec0.get128(), vec1.get128() ) );
626 }
627 
628 VECTORMATH_FORCE_INLINE const floatInVec maxElem( const Vector3 &vec )
629 {
630  return floatInVec( _mm_max_ps( _mm_max_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ), vec_splat( vec.get128(), 2 ) ) );
631 }
632 
633 VECTORMATH_FORCE_INLINE const Vector3 minPerElem( const Vector3 &vec0, const Vector3 &vec1 )
634 {
635  return Vector3( _mm_min_ps( vec0.get128(), vec1.get128() ) );
636 }
637 
638 VECTORMATH_FORCE_INLINE const floatInVec minElem( const Vector3 &vec )
639 {
640  return floatInVec( _mm_min_ps( _mm_min_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ), vec_splat( vec.get128(), 2 ) ) );
641 }
642 
643 VECTORMATH_FORCE_INLINE const floatInVec sum( const Vector3 &vec )
644 {
645  return floatInVec( _mm_add_ps( _mm_add_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ), vec_splat( vec.get128(), 2 ) ) );
646 }
647 
648 VECTORMATH_FORCE_INLINE const floatInVec dot( const Vector3 &vec0, const Vector3 &vec1 )
649 {
650  return floatInVec( _vmathVfDot3( vec0.get128(), vec1.get128() ), 0 );
651 }
652 
653 VECTORMATH_FORCE_INLINE const floatInVec lengthSqr( const Vector3 &vec )
654 {
655  return floatInVec( _vmathVfDot3( vec.get128(), vec.get128() ), 0 );
656 }
657 
658 VECTORMATH_FORCE_INLINE const floatInVec length( const Vector3 &vec )
659 {
660  return floatInVec( _mm_sqrt_ps(_vmathVfDot3( vec.get128(), vec.get128() )), 0 );
661 }
662 
663 
665 {
666  return Vector3( _mm_mul_ps( vec.get128(), _mm_rsqrt_ps( _vmathVfDot3( vec.get128(), vec.get128() ) ) ) );
667 }
668 
669 VECTORMATH_FORCE_INLINE const Vector3 normalize( const Vector3 &vec )
670 {
671  return Vector3( _mm_mul_ps( vec.get128(), newtonrapson_rsqrt4( _vmathVfDot3( vec.get128(), vec.get128() ) ) ) );
672 }
673 
674 VECTORMATH_FORCE_INLINE const Vector3 cross( const Vector3 &vec0, const Vector3 &vec1 )
675 {
676  return Vector3( _vmathVfCross( vec0.get128(), vec1.get128() ) );
677 }
678 
679 VECTORMATH_FORCE_INLINE const Vector3 select( const Vector3 &vec0, const Vector3 &vec1, bool select1 )
680 {
681  return select( vec0, vec1, boolInVec(select1) );
682 }
683 
684 
685 VECTORMATH_FORCE_INLINE const Vector4 select(const Vector4& vec0, const Vector4& vec1, const boolInVec& select1)
686 {
687  return Vector4(vec_sel(vec0.get128(), vec1.get128(), select1.get128()));
688 }
689 
690 #ifdef _VECTORMATH_DEBUG
691 
692 VECTORMATH_FORCE_INLINE void print( const Vector3 &vec )
693 {
694  union { __m128 v; float s[4]; } tmp;
695  tmp.v = vec.get128();
696  printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
697 }
698 
699 VECTORMATH_FORCE_INLINE void print( const Vector3 &vec, const char * name )
700 {
701  union { __m128 v; float s[4]; } tmp;
702  tmp.v = vec.get128();
703  printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
704 }
705 
706 #endif
707 
708 VECTORMATH_FORCE_INLINE Vector4::Vector4( float _x, float _y, float _z, float _w )
709 {
710  mVec128 = _mm_setr_ps(_x, _y, _z, _w);
711  }
712 
714 {
715  mVec128 = _mm_unpacklo_ps(
716  _mm_unpacklo_ps( _x.get128(), _z.get128() ),
717  _mm_unpacklo_ps( _y.get128(), _w.get128() ) );
718 }
719 
720 VECTORMATH_FORCE_INLINE Vector4::Vector4( const Vector3 &xyz, float _w )
721 {
722  mVec128 = xyz.get128();
723  _vmathVfSetElement(mVec128, _w, 3);
724 }
725 
727 {
728  mVec128 = xyz.get128();
729  mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3);
730 }
731 
733 {
734  mVec128 = vec.get128();
735  mVec128 = _vmathVfInsert(mVec128, _mm_setzero_ps(), 3);
736 }
737 
738 VECTORMATH_FORCE_INLINE Vector4::Vector4( const Point3 &pnt )
739 {
740  mVec128 = pnt.get128();
741  mVec128 = _vmathVfInsert(mVec128, _mm_set1_ps(1.0f), 3);
742 }
743 
744 VECTORMATH_FORCE_INLINE Vector4::Vector4( const Quat &quat )
745 {
746  mVec128 = quat.get128();
747 }
748 
750 {
751  mVec128 = floatInVec(scalar).get128();
752 }
753 
755 {
756  mVec128 = scalar.get128();
757 }
758 
760 {
761  mVec128 = vf4;
762 }
763 
765 {
766  return Vector4( _VECTORMATH_UNIT_1000 );
767 }
768 
770 {
771  return Vector4( _VECTORMATH_UNIT_0100 );
772 }
773 
775 {
776  return Vector4( _VECTORMATH_UNIT_0010 );
777 }
778 
780 {
781  return Vector4( _VECTORMATH_UNIT_0001 );
782 }
783 
784 VECTORMATH_FORCE_INLINE const Vector4 lerp( float t, const Vector4 &vec0, const Vector4 &vec1 )
785 {
786  return lerp( floatInVec(t), vec0, vec1 );
787 }
788 
789 VECTORMATH_FORCE_INLINE const Vector4 lerp( const floatInVec &t, const Vector4 &vec0, const Vector4 &vec1 )
790 {
791  return ( vec0 + ( ( vec1 - vec0 ) * t ) );
792 }
793 
794 VECTORMATH_FORCE_INLINE const Vector4 slerp( float t, const Vector4 &unitVec0, const Vector4 &unitVec1 )
795 {
796  return slerp( floatInVec(t), unitVec0, unitVec1 );
797 }
798 
799 VECTORMATH_FORCE_INLINE const Vector4 slerp( const floatInVec &t, const Vector4 &unitVec0, const Vector4 &unitVec1 )
800 {
801  __m128 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
802  cosAngle = _vmathVfDot4( unitVec0.get128(), unitVec1.get128() );
803  __m128 selectMask = _mm_cmpgt_ps( _mm_set1_ps(_VECTORMATH_SLERP_TOL), cosAngle );
804  angle = acosf4( cosAngle );
805  tttt = t.get128();
806  oneMinusT = _mm_sub_ps( _mm_set1_ps(1.0f), tttt );
807  angles = _mm_unpacklo_ps( _mm_set1_ps(1.0f), tttt ); // angles = 1, t, 1, t
808  angles = _mm_unpacklo_ps( angles, oneMinusT ); // angles = 1, 1-t, t, 1-t
809  angles = _mm_mul_ps( angles, angle );
810  sines = sinf4( angles );
811  scales = _mm_div_ps( sines, vec_splat( sines, 0 ) );
812  scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
813  scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
814  return Vector4( vec_madd( unitVec0.get128(), scale0, _mm_mul_ps( unitVec1.get128(), scale1 ) ) );
815 }
816 
818 {
819  return mVec128;
820 }
821 /*
822 VECTORMATH_FORCE_INLINE void storeHalfFloats( const Vector4 &vec0, const Vector4 &vec1, const Vector4 &vec2, const Vector4 &vec3, vec_ushort8 * twoQuads )
823 {
824  twoQuads[0] = _vmath2VfToHalfFloats(vec0.get128(), vec1.get128());
825  twoQuads[1] = _vmath2VfToHalfFloats(vec2.get128(), vec3.get128());
826 }
827 */
829 {
830  mVec128 = vec.mVec128;
831  return *this;
832 }
833 
834 VECTORMATH_FORCE_INLINE Vector4 & Vector4::setXYZ( const Vector3 &vec )
835 {
836  VM_ATTRIBUTE_ALIGN16 unsigned int sw[4] = {0, 0, 0, 0xffffffff};
837  mVec128 = vec_sel( vec.get128(), mVec128, sw );
838  return *this;
839 }
840 
841 VECTORMATH_FORCE_INLINE const Vector3 Vector4::getXYZ( ) const
842 {
843  return Vector3( mVec128 );
844 }
845 
846 VECTORMATH_FORCE_INLINE Vector4 & Vector4::setX( float _x )
847 {
848  _vmathVfSetElement(mVec128, _x, 0);
849  return *this;
850 }
851 
853 {
854  mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
855  return *this;
856 }
857 
859 {
860  return floatInVec( mVec128, 0 );
861 }
862 
863 VECTORMATH_FORCE_INLINE Vector4 & Vector4::setY( float _y )
864 {
865  _vmathVfSetElement(mVec128, _y, 1);
866  return *this;
867 }
868 
870 {
871  mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
872  return *this;
873 }
874 
876 {
877  return floatInVec( mVec128, 1 );
878 }
879 
880 VECTORMATH_FORCE_INLINE Vector4 & Vector4::setZ( float _z )
881 {
882  _vmathVfSetElement(mVec128, _z, 2);
883  return *this;
884 }
885 
887 {
888  mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
889  return *this;
890 }
891 
893 {
894  return floatInVec( mVec128, 2 );
895 }
896 
897 VECTORMATH_FORCE_INLINE Vector4 & Vector4::setW( float _w )
898 {
899  _vmathVfSetElement(mVec128, _w, 3);
900  return *this;
901 }
902 
904 {
905  mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3);
906  return *this;
907 }
908 
910 {
911  return floatInVec( mVec128, 3 );
912 }
913 
914 VECTORMATH_FORCE_INLINE Vector4 & Vector4::setElem( int idx, float value )
915 {
916  _vmathVfSetElement(mVec128, value, idx);
917  return *this;
918 }
919 
921 {
922  mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
923  return *this;
924 }
925 
927 {
928  return floatInVec( mVec128, idx );
929 }
930 
932 {
933  return VecIdx( mVec128, idx );
934 }
935 
936 VECTORMATH_FORCE_INLINE const floatInVec Vector4::operator []( int idx ) const
937 {
938  return floatInVec( mVec128, idx );
939 }
940 
941 VECTORMATH_FORCE_INLINE const Vector4 Vector4::operator +( const Vector4 &vec ) const
942 {
943  return Vector4( _mm_add_ps( mVec128, vec.mVec128 ) );
944 }
945 
946 VECTORMATH_FORCE_INLINE const Vector4 Vector4::operator -( const Vector4 &vec ) const
947 {
948  return Vector4( _mm_sub_ps( mVec128, vec.mVec128 ) );
949 }
950 
951 VECTORMATH_FORCE_INLINE const Vector4 Vector4::operator *( float scalar ) const
952 {
953  return *this * floatInVec(scalar);
954 }
955 
957 {
958  return Vector4( _mm_mul_ps( mVec128, scalar.get128() ) );
959 }
960 
962 {
963  *this = *this + vec;
964  return *this;
965 }
966 
967 VECTORMATH_FORCE_INLINE Vector4 & Vector4::operator -=( const Vector4 &vec )
968 {
969  *this = *this - vec;
970  return *this;
971 }
972 
973 VECTORMATH_FORCE_INLINE Vector4 & Vector4::operator *=( float scalar )
974 {
975  *this = *this * scalar;
976  return *this;
977 }
978 
980 {
981  *this = *this * scalar;
982  return *this;
983 }
984 
985 VECTORMATH_FORCE_INLINE const Vector4 Vector4::operator /( float scalar ) const
986 {
987  return *this / floatInVec(scalar);
988 }
989 
991 {
992  return Vector4( _mm_div_ps( mVec128, scalar.get128() ) );
993 }
994 
996 {
997  *this = *this / scalar;
998  return *this;
999 }
1000 
1002 {
1003  *this = *this / scalar;
1004  return *this;
1005 }
1006 
1008 {
1009  return Vector4(_mm_sub_ps( _mm_setzero_ps(), mVec128 ) );
1010 }
1011 
1012 VECTORMATH_FORCE_INLINE const Vector4 operator *( float scalar, const Vector4 &vec )
1013 {
1014  return floatInVec(scalar) * vec;
1015 }
1016 
1018 {
1019  return vec * scalar;
1020 }
1021 
1022 VECTORMATH_FORCE_INLINE const Vector4 mulPerElem( const Vector4 &vec0, const Vector4 &vec1 )
1023 {
1024  return Vector4( _mm_mul_ps( vec0.get128(), vec1.get128() ) );
1025 }
1026 
1027 VECTORMATH_FORCE_INLINE const Vector4 divPerElem( const Vector4 &vec0, const Vector4 &vec1 )
1028 {
1029  return Vector4( _mm_div_ps( vec0.get128(), vec1.get128() ) );
1030 }
1031 
1032 VECTORMATH_FORCE_INLINE const Vector4 recipPerElem( const Vector4 &vec )
1033 {
1034  return Vector4( _mm_rcp_ps( vec.get128() ) );
1035 }
1036 
1037 VECTORMATH_FORCE_INLINE const Vector4 absPerElem( const Vector4 &vec )
1038 {
1039  return Vector4( fabsf4( vec.get128() ) );
1040 }
1041 
1042 VECTORMATH_FORCE_INLINE const Vector4 copySignPerElem( const Vector4 &vec0, const Vector4 &vec1 )
1043 {
1044  __m128 vmask = toM128(0x7fffffff);
1045  return Vector4( _mm_or_ps(
1046  _mm_and_ps ( vmask, vec0.get128() ), // Value
1047  _mm_andnot_ps( vmask, vec1.get128() ) ) ); // Signs
1048 }
1049 
1050 VECTORMATH_FORCE_INLINE const Vector4 maxPerElem( const Vector4 &vec0, const Vector4 &vec1 )
1051 {
1052  return Vector4( _mm_max_ps( vec0.get128(), vec1.get128() ) );
1053 }
1054 
1055 VECTORMATH_FORCE_INLINE const floatInVec maxElem( const Vector4 &vec )
1056 {
1057  return floatInVec( _mm_max_ps(
1058  _mm_max_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ),
1059  _mm_max_ps( vec_splat( vec.get128(), 2 ), vec_splat( vec.get128(), 3 ) ) ) );
1060 }
1061 
1062 VECTORMATH_FORCE_INLINE const Vector4 minPerElem( const Vector4 &vec0, const Vector4 &vec1 )
1063 {
1064  return Vector4( _mm_min_ps( vec0.get128(), vec1.get128() ) );
1065 }
1066 
1067 VECTORMATH_FORCE_INLINE const floatInVec minElem( const Vector4 &vec )
1068 {
1069  return floatInVec( _mm_min_ps(
1070  _mm_min_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ),
1071  _mm_min_ps( vec_splat( vec.get128(), 2 ), vec_splat( vec.get128(), 3 ) ) ) );
1072 }
1073 
1074 VECTORMATH_FORCE_INLINE const floatInVec sum( const Vector4 &vec )
1075 {
1076  return floatInVec( _mm_add_ps(
1077  _mm_add_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ),
1078  _mm_add_ps( vec_splat( vec.get128(), 2 ), vec_splat( vec.get128(), 3 ) ) ) );
1079 }
1080 
1081 VECTORMATH_FORCE_INLINE const floatInVec dot( const Vector4 &vec0, const Vector4 &vec1 )
1082 {
1083  return floatInVec( _vmathVfDot4( vec0.get128(), vec1.get128() ), 0 );
1084 }
1085 
1086 VECTORMATH_FORCE_INLINE const floatInVec lengthSqr( const Vector4 &vec )
1087 {
1088  return floatInVec( _vmathVfDot4( vec.get128(), vec.get128() ), 0 );
1089 }
1090 
1091 VECTORMATH_FORCE_INLINE const floatInVec length( const Vector4 &vec )
1092 {
1093  return floatInVec( _mm_sqrt_ps(_vmathVfDot4( vec.get128(), vec.get128() )), 0 );
1094 }
1095 
1097 {
1098  return Vector4( _mm_mul_ps( vec.get128(), _mm_rsqrt_ps( _vmathVfDot4( vec.get128(), vec.get128() ) ) ) );
1099 }
1100 
1101 VECTORMATH_FORCE_INLINE const Vector4 normalize( const Vector4 &vec )
1102 {
1103  return Vector4( _mm_mul_ps( vec.get128(), newtonrapson_rsqrt4( _vmathVfDot4( vec.get128(), vec.get128() ) ) ) );
1104 }
1105 
1106 VECTORMATH_FORCE_INLINE const Vector4 select( const Vector4 &vec0, const Vector4 &vec1, bool select1 )
1107 {
1108  return select( vec0, vec1, boolInVec(select1) );
1109 }
1110 
1111 
1112 #ifdef _VECTORMATH_DEBUG
1113 
1114 VECTORMATH_FORCE_INLINE void print( const Vector4 &vec )
1115 {
1116  union { __m128 v; float s[4]; } tmp;
1117  tmp.v = vec.get128();
1118  printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
1119 }
1120 
1121 VECTORMATH_FORCE_INLINE void print( const Vector4 &vec, const char * name )
1122 {
1123  union { __m128 v; float s[4]; } tmp;
1124  tmp.v = vec.get128();
1125  printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
1126 }
1127 
1128 #endif
1129 
1130 VECTORMATH_FORCE_INLINE Point3::Point3( float _x, float _y, float _z )
1131 {
1132  mVec128 = _mm_setr_ps(_x, _y, _z, 0.0f);
1133 }
1134 
1136 {
1137  mVec128 = _mm_unpacklo_ps( _mm_unpacklo_ps( _x.get128(), _z.get128() ), _y.get128() );
1138 }
1139 
1141 {
1142  mVec128 = vec.get128();
1143 }
1144 
1146 {
1147  mVec128 = floatInVec(scalar).get128();
1148 }
1149 
1151 {
1152  mVec128 = scalar.get128();
1153 }
1154 
1156 {
1157  mVec128 = vf4;
1158 }
1159 
1160 VECTORMATH_FORCE_INLINE const Point3 lerp( float t, const Point3 &pnt0, const Point3 &pnt1 )
1161 {
1162  return lerp( floatInVec(t), pnt0, pnt1 );
1163 }
1164 
1165 VECTORMATH_FORCE_INLINE const Point3 lerp( const floatInVec &t, const Point3 &pnt0, const Point3 &pnt1 )
1166 {
1167  return ( pnt0 + ( ( pnt1 - pnt0 ) * t ) );
1168 }
1169 
1171 {
1172  return mVec128;
1173 }
1174 
1175 VECTORMATH_FORCE_INLINE void storeXYZ( const Point3 &pnt, __m128 * quad )
1176 {
1177  __m128 dstVec = *quad;
1178  VM_ATTRIBUTE_ALIGN16 unsigned int sw[4] = {0, 0, 0, 0xffffffff}; // TODO: Centralize
1179  dstVec = vec_sel(pnt.get128(), dstVec, sw);
1180  *quad = dstVec;
1181 }
1182 
1183 VECTORMATH_FORCE_INLINE void loadXYZArray( Point3 & pnt0, Point3 & pnt1, Point3 & pnt2, Point3 & pnt3, const __m128 * threeQuads )
1184 {
1185  const float *quads = (float *)threeQuads;
1186  pnt0 = Point3( _mm_load_ps(quads) );
1187  pnt1 = Point3( _mm_loadu_ps(quads + 3) );
1188  pnt2 = Point3( _mm_loadu_ps(quads + 6) );
1189  pnt3 = Point3( _mm_loadu_ps(quads + 9) );
1190 }
1191 
1192 VECTORMATH_FORCE_INLINE void storeXYZArray( const Point3 &pnt0, const Point3 &pnt1, const Point3 &pnt2, const Point3 &pnt3, __m128 * threeQuads )
1193 {
1194  __m128 xxxx = _mm_shuffle_ps( pnt1.get128(), pnt1.get128(), _MM_SHUFFLE(0, 0, 0, 0) );
1195  __m128 zzzz = _mm_shuffle_ps( pnt2.get128(), pnt2.get128(), _MM_SHUFFLE(2, 2, 2, 2) );
1196  VM_ATTRIBUTE_ALIGN16 unsigned int xsw[4] = {0, 0, 0, 0xffffffff};
1197  VM_ATTRIBUTE_ALIGN16 unsigned int zsw[4] = {0xffffffff, 0, 0, 0};
1198  threeQuads[0] = vec_sel( pnt0.get128(), xxxx, xsw );
1199  threeQuads[1] = _mm_shuffle_ps( pnt1.get128(), pnt2.get128(), _MM_SHUFFLE(1, 0, 2, 1) );
1200  threeQuads[2] = vec_sel( _mm_shuffle_ps( pnt3.get128(), pnt3.get128(), _MM_SHUFFLE(2, 1, 0, 3) ), zzzz, zsw );
1201 }
1202 /*
1203 VECTORMATH_FORCE_INLINE void storeHalfFloats( const Point3 &pnt0, const Point3 &pnt1, const Point3 &pnt2, const Point3 &pnt3, const Point3 &pnt4, const Point3 &pnt5, const Point3 &pnt6, const Point3 &pnt7, vec_ushort8 * threeQuads )
1204 {
1205 #if 0
1206  __m128 xyz0[3];
1207  __m128 xyz1[3];
1208  storeXYZArray( pnt0, pnt1, pnt2, pnt3, xyz0 );
1209  storeXYZArray( pnt4, pnt5, pnt6, pnt7, xyz1 );
1210  threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
1211  threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
1212  threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
1213 #else
1214  assert(0);
1215 #endif
1216 }
1217 */
1218 VECTORMATH_FORCE_INLINE Point3 & Point3::operator =( const Point3 &pnt )
1219 {
1220  mVec128 = pnt.mVec128;
1221  return *this;
1222 }
1223 
1224 VECTORMATH_FORCE_INLINE Point3 & Point3::setX( float _x )
1225 {
1226  _vmathVfSetElement(mVec128, _x, 0);
1227  return *this;
1228 }
1229 
1231 {
1232  mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
1233  return *this;
1234 }
1235 
1237 {
1238  return floatInVec( mVec128, 0 );
1239 }
1240 
1241 VECTORMATH_FORCE_INLINE Point3 & Point3::setY( float _y )
1242 {
1243  _vmathVfSetElement(mVec128, _y, 1);
1244  return *this;
1245 }
1246 
1248 {
1249  mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
1250  return *this;
1251 }
1252 
1254 {
1255  return floatInVec( mVec128, 1 );
1256 }
1257 
1258 VECTORMATH_FORCE_INLINE Point3 & Point3::setZ( float _z )
1259 {
1260  _vmathVfSetElement(mVec128, _z, 2);
1261  return *this;
1262 }
1263 
1265 {
1266  mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
1267  return *this;
1268 }
1269 
1271 {
1272  return floatInVec( mVec128, 2 );
1273 }
1274 
1275 VECTORMATH_FORCE_INLINE Point3 & Point3::setElem( int idx, float value )
1276 {
1277  _vmathVfSetElement(mVec128, value, idx);
1278  return *this;
1279 }
1280 
1282 {
1283  mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
1284  return *this;
1285 }
1286 
1287 VECTORMATH_FORCE_INLINE const floatInVec Point3::getElem( int idx ) const
1288 {
1289  return floatInVec( mVec128, idx );
1290 }
1291 
1293 {
1294  return VecIdx( mVec128, idx );
1295 }
1296 
1297 VECTORMATH_FORCE_INLINE const floatInVec Point3::operator []( int idx ) const
1298 {
1299  return floatInVec( mVec128, idx );
1300 }
1301 
1302 VECTORMATH_FORCE_INLINE const Vector3 Point3::operator -( const Point3 &pnt ) const
1303 {
1304  return Vector3( _mm_sub_ps( mVec128, pnt.mVec128 ) );
1305 }
1306 
1307 VECTORMATH_FORCE_INLINE const Point3 Point3::operator +( const Vector3 &vec ) const
1308 {
1309  return Point3( _mm_add_ps( mVec128, vec.get128() ) );
1310 }
1311 
1312 VECTORMATH_FORCE_INLINE const Point3 Point3::operator -( const Vector3 &vec ) const
1313 {
1314  return Point3( _mm_sub_ps( mVec128, vec.get128() ) );
1315 }
1316 
1317 VECTORMATH_FORCE_INLINE Point3 & Point3::operator +=( const Vector3 &vec )
1318 {
1319  *this = *this + vec;
1320  return *this;
1321 }
1322 
1323 VECTORMATH_FORCE_INLINE Point3 & Point3::operator -=( const Vector3 &vec )
1324 {
1325  *this = *this - vec;
1326  return *this;
1327 }
1328 
1329 VECTORMATH_FORCE_INLINE const Point3 mulPerElem( const Point3 &pnt0, const Point3 &pnt1 )
1330 {
1331  return Point3( _mm_mul_ps( pnt0.get128(), pnt1.get128() ) );
1332 }
1333 
1334 VECTORMATH_FORCE_INLINE const Point3 divPerElem( const Point3 &pnt0, const Point3 &pnt1 )
1335 {
1336  return Point3( _mm_div_ps( pnt0.get128(), pnt1.get128() ) );
1337 }
1338 
1339 VECTORMATH_FORCE_INLINE const Point3 recipPerElem( const Point3 &pnt )
1340 {
1341  return Point3( _mm_rcp_ps( pnt.get128() ) );
1342 }
1343 
1344 VECTORMATH_FORCE_INLINE const Point3 absPerElem( const Point3 &pnt )
1345 {
1346  return Point3( fabsf4( pnt.get128() ) );
1347 }
1348 
1349 VECTORMATH_FORCE_INLINE const Point3 copySignPerElem( const Point3 &pnt0, const Point3 &pnt1 )
1350 {
1351  __m128 vmask = toM128(0x7fffffff);
1352  return Point3( _mm_or_ps(
1353  _mm_and_ps ( vmask, pnt0.get128() ), // Value
1354  _mm_andnot_ps( vmask, pnt1.get128() ) ) ); // Signs
1355 }
1356 
1357 VECTORMATH_FORCE_INLINE const Point3 maxPerElem( const Point3 &pnt0, const Point3 &pnt1 )
1358 {
1359  return Point3( _mm_max_ps( pnt0.get128(), pnt1.get128() ) );
1360 }
1361 
1362 VECTORMATH_FORCE_INLINE const floatInVec maxElem( const Point3 &pnt )
1363 {
1364  return floatInVec( _mm_max_ps( _mm_max_ps( vec_splat( pnt.get128(), 0 ), vec_splat( pnt.get128(), 1 ) ), vec_splat( pnt.get128(), 2 ) ) );
1365 }
1366 
1367 VECTORMATH_FORCE_INLINE const Point3 minPerElem( const Point3 &pnt0, const Point3 &pnt1 )
1368 {
1369  return Point3( _mm_min_ps( pnt0.get128(), pnt1.get128() ) );
1370 }
1371 
1372 VECTORMATH_FORCE_INLINE const floatInVec minElem( const Point3 &pnt )
1373 {
1374  return floatInVec( _mm_min_ps( _mm_min_ps( vec_splat( pnt.get128(), 0 ), vec_splat( pnt.get128(), 1 ) ), vec_splat( pnt.get128(), 2 ) ) );
1375 }
1376 
1377 VECTORMATH_FORCE_INLINE const floatInVec sum( const Point3 &pnt )
1378 {
1379  return floatInVec( _mm_add_ps( _mm_add_ps( vec_splat( pnt.get128(), 0 ), vec_splat( pnt.get128(), 1 ) ), vec_splat( pnt.get128(), 2 ) ) );
1380 }
1381 
1382 VECTORMATH_FORCE_INLINE const Point3 scale( const Point3 &pnt, float scaleVal )
1383 {
1384  return scale( pnt, floatInVec( scaleVal ) );
1385 }
1386 
1387 VECTORMATH_FORCE_INLINE const Point3 scale( const Point3 &pnt, const floatInVec &scaleVal )
1388 {
1389  return mulPerElem( pnt, Point3( scaleVal ) );
1390 }
1391 
1392 VECTORMATH_FORCE_INLINE const Point3 scale( const Point3 &pnt, const Vector3 &scaleVec )
1393 {
1394  return mulPerElem( pnt, Point3( scaleVec ) );
1395 }
1396 
1397 VECTORMATH_FORCE_INLINE const floatInVec projection( const Point3 &pnt, const Vector3 &unitVec )
1398 {
1399  return floatInVec( _vmathVfDot3( pnt.get128(), unitVec.get128() ), 0 );
1400 }
1401 
1402 VECTORMATH_FORCE_INLINE const floatInVec distSqrFromOrigin( const Point3 &pnt )
1403 {
1404  return lengthSqr( Vector3( pnt ) );
1405 }
1406 
1407 VECTORMATH_FORCE_INLINE const floatInVec distFromOrigin( const Point3 &pnt )
1408 {
1409  return length( Vector3( pnt ) );
1410 }
1411 
1412 VECTORMATH_FORCE_INLINE const floatInVec distSqr( const Point3 &pnt0, const Point3 &pnt1 )
1413 {
1414  return lengthSqr( ( pnt1 - pnt0 ) );
1415 }
1416 
1417 VECTORMATH_FORCE_INLINE const floatInVec dist( const Point3 &pnt0, const Point3 &pnt1 )
1418 {
1419  return length( ( pnt1 - pnt0 ) );
1420 }
1421 
1422 VECTORMATH_FORCE_INLINE const Point3 select( const Point3 &pnt0, const Point3 &pnt1, bool select1 )
1423 {
1424  return select( pnt0, pnt1, boolInVec(select1) );
1425 }
1426 
1427 VECTORMATH_FORCE_INLINE const Point3 select( const Point3 &pnt0, const Point3 &pnt1, const boolInVec &select1 )
1428 {
1429  return Point3( vec_sel( pnt0.get128(), pnt1.get128(), select1.get128() ) );
1430 }
1431 
1432 
1433 
1434 #ifdef _VECTORMATH_DEBUG
1435 
1436 VECTORMATH_FORCE_INLINE void print( const Point3 &pnt )
1437 {
1438  union { __m128 v; float s[4]; } tmp;
1439  tmp.v = pnt.get128();
1440  printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
1441 }
1442 
1443 VECTORMATH_FORCE_INLINE void print( const Point3 &pnt, const char * name )
1444 {
1445  union { __m128 v; float s[4]; } tmp;
1446  tmp.v = pnt.get128();
1447  printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
1448 }
1449 
1450 #endif
1451 
1452 } // namespace Aos
1453 } // namespace Vectormath
1454 
1455 #endif
const Vector4 operator*(float scalar) const
Definition: neon/vec_aos.h:796
const Quat normalize(const Quat &quat)
#define _VECTORMATH_UNIT_1000
Definition: sse/vec_aos.h:56
float & operator[](int idx)
const Vector3 getXYZ() const
Definition: neon/vec_aos.h:706
const Vector3 recipPerElem(const Vector3 &vec)
Definition: neon/vec_aos.h:351
void loadXYZArray(Vector3 &vec0, Vector3 &vec1, Vector3 &vec2, Vector3 &vec3, const __m128 *threeQuads)
Definition: sse/vec_aos.h:387
static const Vector4 yAxis()
Definition: neon/vec_aos.h:564
float minElem(const Vector3 &vec)
Definition: neon/vec_aos.h:422
__m128 vec_float4
#define _VECTORMATH_UNIT_0100
Definition: sse/vec_aos.h:57
Vector4 & setXYZ(const Vector3 &vec)
Definition: neon/vec_aos.h:698
Vector4 & operator+=(const Vector4 &vec)
Definition: neon/vec_aos.h:806
#define _VECTORMATH_UNIT_0010
Definition: sse/vec_aos.h:58
static const Vector3 xAxis()
Definition: neon/vec_aos.h:64
#define _VECTORMATH_SLERP_TOL
Definition: sse/vec_aos.h:60
static const Vector3 zAxis()
Definition: neon/vec_aos.h:74
float dist(const Point3 &pnt0, const Point3 &pnt1)
const Vector3 minPerElem(const Vector3 &vec0, const Vector3 &vec1)
Definition: neon/vec_aos.h:413
Point3 & operator=(const Point3 &pnt)
Point3 & setElem(int idx, float value)
static const Vector4 xAxis()
Definition: neon/vec_aos.h:559
#define vec_madd(a, b, c)
static __m128 sinf4(vec_float4 x)
Vector3 & operator=(const Vector3 &vec)
Definition: neon/vec_aos.h:188
#define vec_mul(a, b)
Point3 & operator-=(const Vector3 &vec)
float distFromOrigin(const Point3 &pnt)
const Vector4 operator-() const
Definition: neon/vec_aos.h:840
Vector3 & operator/=(float scalar)
Definition: neon/vec_aos.h:313
#define VECTORMATH_FORCE_INLINE
static __m128 acosf4(__m128 x)
static __m128 _vmathVfCross(__m128 vec0, __m128 vec1)
Definition: sse/vec_aos.h:84
Vector3 & setY(float y)
Definition: neon/vec_aos.h:207
static const Vector3 yAxis()
Definition: neon/vec_aos.h:69
Vector4 & setW(float w)
Definition: neon/vec_aos.h:744
float3 & operator-=(float3 &a, const float3 &b)
Definition: btGpuDefines.h:177
Vector3 & operator-=(const Vector3 &vec)
Definition: neon/vec_aos.h:292
static const Vector4 wAxis()
Definition: neon/vec_aos.h:574
Vector4 & setZ(float z)
Definition: neon/vec_aos.h:733
const Vector3 maxPerElem(const Vector3 &vec0, const Vector3 &vec1)
Definition: neon/vec_aos.h:396
#define vec_splat(x, e)
#define _vmathVfSetElement(vec, scalar, slot)
Definition: sse/vec_aos.h:153
float lengthSqr(const Vector3 &vec)
Definition: neon/vec_aos.h:447
Vector3 & setZ(float z)
Definition: neon/vec_aos.h:218
float distSqrFromOrigin(const Point3 &pnt)
const Point3 operator+(const Vector3 &vec) const
float & operator[](int idx)
Definition: neon/vec_aos.h:766
const Point3 scale(const Point3 &pnt, float scaleVal)
const Vector3 operator-(const Point3 &pnt) const
float projection(const Point3 &pnt, const Vector3 &unitVec)
Vector3 & setX(float x)
Definition: neon/vec_aos.h:196
const Vector3 copySignPerElem(const Vector3 &vec0, const Vector3 &vec1)
Definition: neon/vec_aos.h:387
float4 & operator+=(float4 &a, const float4 &b)
Definition: btGpuDefines.h:133
const Vector3 operator-() const
Definition: neon/vec_aos.h:319
__m128 get128() const
Definition: sse/vec_aos.h:1170
const Vector3 operator+(const Vector3 &vec) const
Definition: neon/vec_aos.h:250
static __m128 toM128(unsigned int x)
#define _VECTORMATH_UNIT_0001
Definition: sse/vec_aos.h:59
static __m128 _vmathVfDot4(__m128 vec0, __m128 vec1)
Definition: sse/vec_aos.h:76
Vector3 & setElem(int idx, float value)
Definition: neon/vec_aos.h:229
__m128 get128() const
#define VM_ATTRIBUTE_ALIGN16
void storeXYZArray(const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, __m128 *threeQuads)
Definition: sse/vec_aos.h:396
#define vec_nmsub(a, b, c)
const Vector3 operator/(float scalar) const
Definition: neon/vec_aos.h:304
const Vector3 normalizeApprox(const Vector3 &vec)
Definition: sse/vec_aos.h:664
void set128(vec_float4 vec)
Definition: sse/vec_aos.h:242
float4 & operator*=(float4 &a, float fact)
Definition: btGpuDefines.h:128
Point3 & operator+=(const Vector3 &vec)
__m128 get128() const
const Vector4 operator+(const Vector4 &vec) const
Definition: neon/vec_aos.h:776
float distSqr(const Point3 &pnt0, const Point3 &pnt1)
static __m128 newtonrapson_rsqrt4(const __m128 v)
float getElem(int idx) const
Definition: neon/vec_aos.h:235
static __m128 _vmathVfInsert(__m128 dst, __m128 src, int slot)
Definition: sse/vec_aos.h:143
const Quat lerp(float t, const Quat &quat0, const Quat &quat1)
Definition: neon/quat_aos.h:73
const Matrix3 select(const Matrix3 &mat0, const Matrix3 &mat1, bool select1)
Definition: neon/mat_aos.h:409
Point3 & setZ(float z)
Vector4 & setX(float x)
Definition: neon/vec_aos.h:711
const Vector4 operator/(float scalar) const
Definition: neon/vec_aos.h:824
Vector3 & operator*=(float scalar)
Definition: neon/vec_aos.h:298
float getElem(int idx) const
Definition: neon/vec_aos.h:761
Vector3 & operator+=(const Vector3 &vec)
Definition: neon/vec_aos.h:286
float getElem(int idx) const
float maxElem(const Vector3 &vec)
Definition: neon/vec_aos.h:405
void storeXYZ(const Vector3 &vec, float *fptr)
Definition: neon/vec_aos.h:105
float & operator[](int idx)
Definition: neon/vec_aos.h:240
Point3 & setX(float x)
static __m128 _vmathVfSplatScalar(float scalar)
Definition: sse/vec_aos.h:155
float sum(const Vector3 &vec)
Definition: neon/vec_aos.h:430
static const Vector4 zAxis()
Definition: neon/vec_aos.h:569
float dot(const Quat &quat0, const Quat &quat1)
static __m128 fabsf4(__m128 x)
const Vector3 cross(const Vector3 &vec0, const Vector3 &vec1)
Definition: neon/vec_aos.h:473
const Matrix3 mulPerElem(const Matrix3 &mat0, const Matrix3 &mat1)
Definition: neon/mat_aos.h:286
__m128 get128() const
Definition: sse/vec_aos.h:817
__m128 get128() const
Definition: sse/vec_aos.h:327
Vector4 & operator/=(float scalar)
Definition: neon/vec_aos.h:834
Vector4 & setY(float y)
Definition: neon/vec_aos.h:722
const Vector3 operator*(float scalar) const
Definition: neon/vec_aos.h:277
float length(const Quat &quat)
Vector4 & operator*=(float scalar)
Definition: neon/vec_aos.h:818
const Vector3 divPerElem(const Vector3 &vec0, const Vector3 &vec1)
Definition: neon/vec_aos.h:342
Point3 & setY(float y)
static __m128 vec_sel(__m128 a, __m128 b, __m128 mask)
static __m128 _vmathVfDot3(__m128 vec0, __m128 vec1)
Definition: sse/vec_aos.h:70
const Matrix3 absPerElem(const Matrix3 &mat)
Definition: neon/mat_aos.h:233
Vector4 & setElem(int idx, float value)
Definition: neon/vec_aos.h:755
const Matrix3 operator*(float scalar, const Matrix3 &mat)
Definition: neon/mat_aos.h:257
Vector4 & operator=(const Vector4 &vec)
Definition: neon/vec_aos.h:689
Vector4 & operator-=(const Vector4 &vec)
Definition: neon/vec_aos.h:812
void loadXYZ(Vector3 &vec, const float *fptr)
Definition: neon/vec_aos.h:100
const Quat slerp(float t, const Quat &unitQuat0, const Quat &unitQuat1)
Definition: neon/quat_aos.h:78