diff options
Diffstat (limited to 'thirdparty/bullet/Bullet3Common/b3Vector3.h')
-rw-r--r-- | thirdparty/bullet/Bullet3Common/b3Vector3.h | 857 |
1 files changed, 408 insertions, 449 deletions
diff --git a/thirdparty/bullet/Bullet3Common/b3Vector3.h b/thirdparty/bullet/Bullet3Common/b3Vector3.h index 16ec02b0ed..56e6c13311 100644 --- a/thirdparty/bullet/Bullet3Common/b3Vector3.h +++ b/thirdparty/bullet/Bullet3Common/b3Vector3.h @@ -12,8 +12,6 @@ subject to the following restrictions: 3. This notice may not be removed or altered from any source distribution. */ - - #ifndef B3_VECTOR3_H #define B3_VECTOR3_H @@ -28,37 +26,34 @@ subject to the following restrictions: #else #define b3Vector3Data b3Vector3FloatData #define b3Vector3DataName "b3Vector3FloatData" -#endif //B3_USE_DOUBLE_PRECISION +#endif //B3_USE_DOUBLE_PRECISION #if defined B3_USE_SSE //typedef uint32_t __m128i __attribute__ ((vector_size(16))); #ifdef _MSC_VER -#pragma warning(disable: 4556) // value of intrinsic immediate argument '4294967239' is out of range '0 - 255' +#pragma warning(disable : 4556) // value of intrinsic immediate argument '4294967239' is out of range '0 - 255' #endif - -#define B3_SHUFFLE(x,y,z,w) ((w)<<6 | (z)<<4 | (y)<<2 | (x)) +#define B3_SHUFFLE(x, y, z, w) ((w) << 6 | (z) << 4 | (y) << 2 | (x)) //#define b3_pshufd_ps( _a, _mask ) (__m128) _mm_shuffle_epi32((__m128i)(_a), (_mask) ) -#define b3_pshufd_ps( _a, _mask ) _mm_shuffle_ps((_a), (_a), (_mask) ) -#define b3_splat3_ps( _a, _i ) b3_pshufd_ps((_a), B3_SHUFFLE(_i,_i,_i, 3) ) -#define b3_splat_ps( _a, _i ) b3_pshufd_ps((_a), B3_SHUFFLE(_i,_i,_i,_i) ) +#define b3_pshufd_ps(_a, _mask) _mm_shuffle_ps((_a), (_a), (_mask)) +#define b3_splat3_ps(_a, _i) b3_pshufd_ps((_a), B3_SHUFFLE(_i, _i, _i, 3)) +#define b3_splat_ps(_a, _i) b3_pshufd_ps((_a), B3_SHUFFLE(_i, _i, _i, _i)) #define b3v3AbsiMask (_mm_set_epi32(0x00000000, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF)) -#define b3vAbsMask (_mm_set_epi32( 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF)) +#define b3vAbsMask (_mm_set_epi32(0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF)) #define b3vFFF0Mask (_mm_set_epi32(0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)) #define b3v3AbsfMask b3CastiTo128f(b3v3AbsiMask) #define b3vFFF0fMask b3CastiTo128f(b3vFFF0Mask) #define b3vxyzMaskf b3vFFF0fMask #define b3vAbsfMask b3CastiTo128f(b3vAbsMask) - - const __m128 B3_ATTRIBUTE_ALIGNED16(b3vMzeroMask) = {-0.0f, -0.0f, -0.0f, -0.0f}; const __m128 B3_ATTRIBUTE_ALIGNED16(b3v1110) = {1.0f, 1.0f, 1.0f, 0.0f}; const __m128 B3_ATTRIBUTE_ALIGNED16(b3vHalf) = {0.5f, 0.5f, 0.5f, 0.5f}; -const __m128 B3_ATTRIBUTE_ALIGNED16(b3v1_5) = {1.5f, 1.5f, 1.5f, 1.5f}; +const __m128 B3_ATTRIBUTE_ALIGNED16(b3v1_5) = {1.5f, 1.5f, 1.5f, 1.5f}; #endif @@ -74,70 +69,69 @@ const int32x4_t B3_ATTRIBUTE_ALIGNED16(b3v3AbsMask) = (int32x4_t){0x7FFFFFFF, 0x class b3Vector3; class b3Vector4; -#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE) +#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) //#if defined (B3_USE_SSE) || defined (B3_USE_NEON) -inline b3Vector3 b3MakeVector3( b3SimdFloat4 v); -inline b3Vector4 b3MakeVector4( b3SimdFloat4 vec); +inline b3Vector3 b3MakeVector3(b3SimdFloat4 v); +inline b3Vector4 b3MakeVector4(b3SimdFloat4 vec); #endif -inline b3Vector3 b3MakeVector3(b3Scalar x,b3Scalar y,b3Scalar z); -inline b3Vector3 b3MakeVector3(b3Scalar x,b3Scalar y,b3Scalar z, b3Scalar w); -inline b3Vector4 b3MakeVector4(b3Scalar x,b3Scalar y,b3Scalar z,b3Scalar w); - +inline b3Vector3 b3MakeVector3(b3Scalar x, b3Scalar y, b3Scalar z); +inline b3Vector3 b3MakeVector3(b3Scalar x, b3Scalar y, b3Scalar z, b3Scalar w); +inline b3Vector4 b3MakeVector4(b3Scalar x, b3Scalar y, b3Scalar z, b3Scalar w); /**@brief b3Vector3 can be used to represent 3D points and vectors. * It has an un-used w component to suit 16-byte alignment when b3Vector3 is stored in containers. This extra component can be used by derived classes (Quaternion?) or by user * Ideally, this class should be replaced by a platform optimized SIMD version that keeps the data in registers */ -B3_ATTRIBUTE_ALIGNED16(class) b3Vector3 +B3_ATTRIBUTE_ALIGNED16(class) +b3Vector3 { public: -#if defined (B3_USE_SSE) || defined(B3_USE_NEON) // _WIN32 || ARM - union { - b3SimdFloat4 mVec128; - float m_floats[4]; - struct {float x,y,z,w;}; - - }; +#if defined(B3_USE_SSE) || defined(B3_USE_NEON) // _WIN32 || ARM + union { + b3SimdFloat4 mVec128; + float m_floats[4]; + struct + { + float x, y, z, w; + }; + }; #else - union - { - float m_floats[4]; - struct {float x,y,z,w;}; + union { + float m_floats[4]; + struct + { + float x, y, z, w; + }; }; #endif - public: - B3_DECLARE_ALIGNED_ALLOCATOR(); -#if defined (B3_USE_SSE) || defined(B3_USE_NEON) // _WIN32 || ARM +#if defined(B3_USE_SSE) || defined(B3_USE_NEON) // _WIN32 || ARM /*B3_FORCE_INLINE b3Vector3() { } */ - B3_FORCE_INLINE b3SimdFloat4 get128() const - { - return mVec128; - } - B3_FORCE_INLINE void set128(b3SimdFloat4 v128) - { - mVec128 = v128; - } + B3_FORCE_INLINE b3SimdFloat4 get128() const + { + return mVec128; + } + B3_FORCE_INLINE void set128(b3SimdFloat4 v128) + { + mVec128 = v128; + } #endif - public: - - - -/**@brief Add a vector to this one +public: + /**@brief Add a vector to this one * @param The vector to add to this one */ B3_FORCE_INLINE b3Vector3& operator+=(const b3Vector3& v) { -#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE) +#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) mVec128 = _mm_add_ps(mVec128, v.mVec128); #elif defined(B3_USE_NEON) mVec128 = vaddq_f32(mVec128, v.mVec128); @@ -149,12 +143,11 @@ public: return *this; } - - /**@brief Subtract a vector from this one + /**@brief Subtract a vector from this one * @param The vector to subtract */ B3_FORCE_INLINE b3Vector3& operator-=(const b3Vector3& v) { -#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE) +#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) mVec128 = _mm_sub_ps(mVec128, v.mVec128); #elif defined(B3_USE_NEON) mVec128 = vsubq_f32(mVec128, v.mVec128); @@ -166,13 +159,13 @@ public: return *this; } - /**@brief Scale the vector + /**@brief Scale the vector * @param s Scale factor */ B3_FORCE_INLINE b3Vector3& operator*=(const b3Scalar& s) { -#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE) - __m128 vs = _mm_load_ss(&s); // (S 0 0 0) - vs = b3_pshufd_ps(vs, 0x80); // (S S S 0.0) +#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) + __m128 vs = _mm_load_ss(&s); // (S 0 0 0) + vs = b3_pshufd_ps(vs, 0x80); // (S S S 0.0) mVec128 = _mm_mul_ps(mVec128, vs); #elif defined(B3_USE_NEON) mVec128 = vmulq_n_f32(mVec128, s); @@ -184,13 +177,13 @@ public: return *this; } - /**@brief Inversely scale the vector + /**@brief Inversely scale the vector * @param s Scale factor to divide by */ B3_FORCE_INLINE b3Vector3& operator/=(const b3Scalar& s) { b3FullAssert(s != b3Scalar(0.0)); -#if 0 //defined(B3_USE_SSE_IN_API) +#if 0 //defined(B3_USE_SSE_IN_API) // this code is not faster ! __m128 vs = _mm_load_ss(&s); vs = _mm_div_ss(b3v1110, vs); @@ -204,11 +197,11 @@ public: #endif } - /**@brief Return the dot product + /**@brief Return the dot product * @param v The other vector in the dot product */ B3_FORCE_INLINE b3Scalar dot(const b3Vector3& v) const { -#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE) +#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) __m128 vd = _mm_mul_ps(mVec128, v.mVec128); __m128 z = _mm_movehl_ps(vd, vd); __m128 y = _mm_shuffle_ps(vd, vd, 0x55); @@ -221,29 +214,29 @@ public: x = vadd_f32(x, vget_high_f32(vd)); return vget_lane_f32(x, 0); #else - return m_floats[0] * v.m_floats[0] + - m_floats[1] * v.m_floats[1] + - m_floats[2] * v.m_floats[2]; + return m_floats[0] * v.m_floats[0] + + m_floats[1] * v.m_floats[1] + + m_floats[2] * v.m_floats[2]; #endif } - /**@brief Return the length of the vector squared */ + /**@brief Return the length of the vector squared */ B3_FORCE_INLINE b3Scalar length2() const { return dot(*this); } - /**@brief Return the length of the vector */ + /**@brief Return the length of the vector */ B3_FORCE_INLINE b3Scalar length() const { return b3Sqrt(length2()); } - /**@brief Return the distance squared between the ends of this and another vector + /**@brief Return the distance squared between the ends of this and another vector * This is symantically treating the vector like a point */ B3_FORCE_INLINE b3Scalar distance2(const b3Vector3& v) const; - /**@brief Return the distance between the ends of this and another vector + /**@brief Return the distance between the ends of this and another vector * This is symantically treating the vector like a point */ B3_FORCE_INLINE b3Scalar distance(const b3Vector3& v) const; @@ -251,7 +244,7 @@ public: { b3Scalar l2 = length2(); //triNormal.normalize(); - if (l2 >= B3_EPSILON*B3_EPSILON) + if (l2 >= B3_EPSILON * B3_EPSILON) { (*this) /= b3Sqrt(l2); } @@ -262,43 +255,42 @@ public: return *this; } - /**@brief Normalize this vector + /**@brief Normalize this vector * x^2 + y^2 + z^2 = 1 */ B3_FORCE_INLINE b3Vector3& normalize() { -#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE) - // dot product first +#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) + // dot product first __m128 vd = _mm_mul_ps(mVec128, mVec128); __m128 z = _mm_movehl_ps(vd, vd); __m128 y = _mm_shuffle_ps(vd, vd, 0x55); vd = _mm_add_ss(vd, y); vd = _mm_add_ss(vd, z); - #if 0 +#if 0 vd = _mm_sqrt_ss(vd); vd = _mm_div_ss(b3v1110, vd); vd = b3_splat_ps(vd, 0x80); mVec128 = _mm_mul_ps(mVec128, vd); - #else +#else - // NR step 1/sqrt(x) - vd is x, y is output - y = _mm_rsqrt_ss(vd); // estimate + // NR step 1/sqrt(x) - vd is x, y is output + y = _mm_rsqrt_ss(vd); // estimate - // one step NR - z = b3v1_5; - vd = _mm_mul_ss(vd, b3vHalf); // vd * 0.5 - //x2 = vd; - vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0 - vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0 * y0 - z = _mm_sub_ss(z, vd); // 1.5 - vd * 0.5 * y0 * y0 + // one step NR + z = b3v1_5; + vd = _mm_mul_ss(vd, b3vHalf); // vd * 0.5 + //x2 = vd; + vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0 + vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0 * y0 + z = _mm_sub_ss(z, vd); // 1.5 - vd * 0.5 * y0 * y0 - y = _mm_mul_ss(y, z); // y0 * (1.5 - vd * 0.5 * y0 * y0) + y = _mm_mul_ss(y, z); // y0 * (1.5 - vd * 0.5 * y0 * y0) y = b3_splat_ps(y, 0x80); mVec128 = _mm_mul_ps(mVec128, y); - #endif - +#endif return *this; #else @@ -306,15 +298,15 @@ public: #endif } - /**@brief Return a normalized version of this vector */ + /**@brief Return a normalized version of this vector */ B3_FORCE_INLINE b3Vector3 normalized() const; - /**@brief Return a rotated version of this vector + /**@brief Return a rotated version of this vector * @param wAxis The axis to rotate about * @param angle The angle to rotate by */ - B3_FORCE_INLINE b3Vector3 rotate( const b3Vector3& wAxis, const b3Scalar angle ) const; + B3_FORCE_INLINE b3Vector3 rotate(const b3Vector3& wAxis, const b3Scalar angle) const; - /**@brief Return the angle between this and another vector + /**@brief Return the angle between this and another vector * @param v The other vector */ B3_FORCE_INLINE b3Scalar angle(const b3Vector3& v) const { @@ -323,10 +315,10 @@ public: return b3Acos(dot(v) / s); } - /**@brief Return a vector will the absolute values of each element */ + /**@brief Return a vector will the absolute values of each element */ B3_FORCE_INLINE b3Vector3 absolute() const { -#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE) +#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) return b3MakeVector3(_mm_and_ps(mVec128, b3v3AbsfMask)); #elif defined(B3_USE_NEON) return b3Vector3(vabsq_f32(mVec128)); @@ -338,15 +330,15 @@ public: #endif } - /**@brief Return the cross product between this and another vector + /**@brief Return the cross product between this and another vector * @param v The other vector */ B3_FORCE_INLINE b3Vector3 cross(const b3Vector3& v) const { -#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE) - __m128 T, V; +#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) + __m128 T, V; - T = b3_pshufd_ps(mVec128, B3_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0) - V = b3_pshufd_ps(v.mVec128, B3_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0) + T = b3_pshufd_ps(mVec128, B3_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0) + V = b3_pshufd_ps(v.mVec128, B3_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0) V = _mm_mul_ps(V, mVec128); T = _mm_mul_ps(T, v.mVec128); @@ -381,10 +373,10 @@ public: B3_FORCE_INLINE b3Scalar triple(const b3Vector3& v1, const b3Vector3& v2) const { -#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE) +#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) // cross: - __m128 T = _mm_shuffle_ps(v1.mVec128, v1.mVec128, B3_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0) - __m128 V = _mm_shuffle_ps(v2.mVec128, v2.mVec128, B3_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0) + __m128 T = _mm_shuffle_ps(v1.mVec128, v1.mVec128, B3_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0) + __m128 V = _mm_shuffle_ps(v2.mVec128, v2.mVec128, B3_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0) V = _mm_mul_ps(V, v1.mVec128); T = _mm_mul_ps(T, v2.mVec128); @@ -422,25 +414,24 @@ public: x = vadd_f32(x, vget_high_f32(V)); return vget_lane_f32(x, 0); #else - return - m_floats[0] * (v1.m_floats[1] * v2.m_floats[2] - v1.m_floats[2] * v2.m_floats[1]) + - m_floats[1] * (v1.m_floats[2] * v2.m_floats[0] - v1.m_floats[0] * v2.m_floats[2]) + - m_floats[2] * (v1.m_floats[0] * v2.m_floats[1] - v1.m_floats[1] * v2.m_floats[0]); + return m_floats[0] * (v1.m_floats[1] * v2.m_floats[2] - v1.m_floats[2] * v2.m_floats[1]) + + m_floats[1] * (v1.m_floats[2] * v2.m_floats[0] - v1.m_floats[0] * v2.m_floats[2]) + + m_floats[2] * (v1.m_floats[0] * v2.m_floats[1] - v1.m_floats[1] * v2.m_floats[0]); #endif } - /**@brief Return the axis with the smallest value + /**@brief Return the axis with the smallest value * Note return values are 0,1,2 for x, y, or z */ B3_FORCE_INLINE int minAxis() const { - return m_floats[0] < m_floats[1] ? (m_floats[0] <m_floats[2] ? 0 : 2) : (m_floats[1] <m_floats[2] ? 1 : 2); + return m_floats[0] < m_floats[1] ? (m_floats[0] < m_floats[2] ? 0 : 2) : (m_floats[1] < m_floats[2] ? 1 : 2); } - /**@brief Return the axis with the largest value + /**@brief Return the axis with the largest value * Note return values are 0,1,2 for x, y, or z */ B3_FORCE_INLINE int maxAxis() const { - return m_floats[0] < m_floats[1] ? (m_floats[1] <m_floats[2] ? 2 : 1) : (m_floats[0] <m_floats[2] ? 2 : 0); + return m_floats[0] < m_floats[1] ? (m_floats[1] < m_floats[2] ? 2 : 1) : (m_floats[0] < m_floats[2] ? 2 : 0); } B3_FORCE_INLINE int furthestAxis() const @@ -453,18 +444,17 @@ public: return absolute().maxAxis(); } - B3_FORCE_INLINE void setInterpolate3(const b3Vector3& v0, const b3Vector3& v1, b3Scalar rt) { -#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE) - __m128 vrt = _mm_load_ss(&rt); // (rt 0 0 0) +#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) + __m128 vrt = _mm_load_ss(&rt); // (rt 0 0 0) b3Scalar s = b3Scalar(1.0) - rt; - __m128 vs = _mm_load_ss(&s); // (S 0 0 0) - vs = b3_pshufd_ps(vs, 0x80); // (S S S 0.0) + __m128 vs = _mm_load_ss(&s); // (S 0 0 0) + vs = b3_pshufd_ps(vs, 0x80); // (S S S 0.0) __m128 r0 = _mm_mul_ps(v0.mVec128, vs); - vrt = b3_pshufd_ps(vrt, 0x80); // (rt rt rt 0.0) + vrt = b3_pshufd_ps(vrt, 0x80); // (rt rt rt 0.0) __m128 r1 = _mm_mul_ps(v1.mVec128, vrt); - __m128 tmp3 = _mm_add_ps(r0,r1); + __m128 tmp3 = _mm_add_ps(r0, r1); mVec128 = tmp3; #elif defined(B3_USE_NEON) float32x4_t vl = vsubq_f32(v1.mVec128, v0.mVec128); @@ -480,14 +470,14 @@ public: #endif } - /**@brief Return the linear interpolation between this and another vector + /**@brief Return the linear interpolation between this and another vector * @param v The other vector * @param t The ration of this to v (t = 0 => return this, t=1 => return other) */ B3_FORCE_INLINE b3Vector3 lerp(const b3Vector3& v, const b3Scalar& t) const { -#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE) - __m128 vt = _mm_load_ss(&t); // (t 0 0 0) - vt = b3_pshufd_ps(vt, 0x80); // (rt rt rt 0.0) +#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) + __m128 vt = _mm_load_ss(&t); // (t 0 0 0) + vt = b3_pshufd_ps(vt, 0x80); // (rt rt rt 0.0) __m128 vl = _mm_sub_ps(v.mVec128, mVec128); vl = _mm_mul_ps(vl, vt); vl = _mm_add_ps(vl, mVec128); @@ -500,18 +490,17 @@ public: return b3Vector3(vl); #else - return - b3MakeVector3( m_floats[0] + (v.m_floats[0] - m_floats[0]) * t, - m_floats[1] + (v.m_floats[1] - m_floats[1]) * t, - m_floats[2] + (v.m_floats[2] - m_floats[2]) * t); + return b3MakeVector3(m_floats[0] + (v.m_floats[0] - m_floats[0]) * t, + m_floats[1] + (v.m_floats[1] - m_floats[1]) * t, + m_floats[2] + (v.m_floats[2] - m_floats[2]) * t); #endif } - /**@brief Elementwise multiply this vector by the other + /**@brief Elementwise multiply this vector by the other * @param v The other vector */ B3_FORCE_INLINE b3Vector3& operator*=(const b3Vector3& v) { -#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE) +#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) mVec128 = _mm_mul_ps(mVec128, v.mVec128); #elif defined(B3_USE_NEON) mVec128 = vmulq_f32(mVec128, v.mVec128); @@ -523,53 +512,53 @@ public: return *this; } - /**@brief Return the x value */ - B3_FORCE_INLINE const b3Scalar& getX() const { return m_floats[0]; } - /**@brief Return the y value */ - B3_FORCE_INLINE const b3Scalar& getY() const { return m_floats[1]; } - /**@brief Return the z value */ - B3_FORCE_INLINE const b3Scalar& getZ() const { return m_floats[2]; } -/**@brief Return the w value */ - B3_FORCE_INLINE const b3Scalar& getW() const { return m_floats[3]; } - - /**@brief Set the x value */ - B3_FORCE_INLINE void setX(b3Scalar _x) { m_floats[0] = _x;}; - /**@brief Set the y value */ - B3_FORCE_INLINE void setY(b3Scalar _y) { m_floats[1] = _y;}; - /**@brief Set the z value */ - B3_FORCE_INLINE void setZ(b3Scalar _z) { m_floats[2] = _z;}; - /**@brief Set the w value */ - B3_FORCE_INLINE void setW(b3Scalar _w) { m_floats[3] = _w;}; + /**@brief Return the x value */ + B3_FORCE_INLINE const b3Scalar& getX() const { return m_floats[0]; } + /**@brief Return the y value */ + B3_FORCE_INLINE const b3Scalar& getY() const { return m_floats[1]; } + /**@brief Return the z value */ + B3_FORCE_INLINE const b3Scalar& getZ() const { return m_floats[2]; } + /**@brief Return the w value */ + B3_FORCE_INLINE const b3Scalar& getW() const { return m_floats[3]; } + + /**@brief Set the x value */ + B3_FORCE_INLINE void setX(b3Scalar _x) { m_floats[0] = _x; }; + /**@brief Set the y value */ + B3_FORCE_INLINE void setY(b3Scalar _y) { m_floats[1] = _y; }; + /**@brief Set the z value */ + B3_FORCE_INLINE void setZ(b3Scalar _z) { m_floats[2] = _z; }; + /**@brief Set the w value */ + B3_FORCE_INLINE void setW(b3Scalar _w) { m_floats[3] = _w; }; //B3_FORCE_INLINE b3Scalar& operator[](int i) { return (&m_floats[0])[i]; } //B3_FORCE_INLINE const b3Scalar& operator[](int i) const { return (&m_floats[0])[i]; } ///operator b3Scalar*() replaces operator[], using implicit conversion. We added operator != and operator == to avoid pointer comparisons. - B3_FORCE_INLINE operator b3Scalar *() { return &m_floats[0]; } - B3_FORCE_INLINE operator const b3Scalar *() const { return &m_floats[0]; } + B3_FORCE_INLINE operator b3Scalar*() { return &m_floats[0]; } + B3_FORCE_INLINE operator const b3Scalar*() const { return &m_floats[0]; } - B3_FORCE_INLINE bool operator==(const b3Vector3& other) const + B3_FORCE_INLINE bool operator==(const b3Vector3& other) const { -#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE) - return (0xf == _mm_movemask_ps((__m128)_mm_cmpeq_ps(mVec128, other.mVec128))); +#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) + return (0xf == _mm_movemask_ps((__m128)_mm_cmpeq_ps(mVec128, other.mVec128))); #else - return ((m_floats[3]==other.m_floats[3]) && - (m_floats[2]==other.m_floats[2]) && - (m_floats[1]==other.m_floats[1]) && - (m_floats[0]==other.m_floats[0])); + return ((m_floats[3] == other.m_floats[3]) && + (m_floats[2] == other.m_floats[2]) && + (m_floats[1] == other.m_floats[1]) && + (m_floats[0] == other.m_floats[0])); #endif } - B3_FORCE_INLINE bool operator!=(const b3Vector3& other) const + B3_FORCE_INLINE bool operator!=(const b3Vector3& other) const { return !(*this == other); } - /**@brief Set each element to the max of the current values and the values of another b3Vector3 + /**@brief Set each element to the max of the current values and the values of another b3Vector3 * @param other The other b3Vector3 to compare with */ - B3_FORCE_INLINE void setMax(const b3Vector3& other) + B3_FORCE_INLINE void setMax(const b3Vector3& other) { -#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE) +#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) mVec128 = _mm_max_ps(mVec128, other.mVec128); #elif defined(B3_USE_NEON) mVec128 = vmaxq_f32(mVec128, other.mVec128); @@ -581,12 +570,12 @@ public: #endif } - /**@brief Set each element to the min of the current values and the values of another b3Vector3 + /**@brief Set each element to the min of the current values and the values of another b3Vector3 * @param other The other b3Vector3 to compare with */ - B3_FORCE_INLINE void setMin(const b3Vector3& other) + B3_FORCE_INLINE void setMin(const b3Vector3& other) { -#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE) +#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) mVec128 = _mm_min_ps(mVec128, other.mVec128); #elif defined(B3_USE_NEON) mVec128 = vminq_f32(mVec128, other.mVec128); @@ -598,46 +587,46 @@ public: #endif } - B3_FORCE_INLINE void setValue(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z) + B3_FORCE_INLINE void setValue(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z) { - m_floats[0]=_x; - m_floats[1]=_y; - m_floats[2]=_z; + m_floats[0] = _x; + m_floats[1] = _y; + m_floats[2] = _z; m_floats[3] = b3Scalar(0.f); } - void getSkewSymmetricMatrix(b3Vector3* v0,b3Vector3* v1,b3Vector3* v2) const + void getSkewSymmetricMatrix(b3Vector3 * v0, b3Vector3 * v1, b3Vector3 * v2) const { -#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE) +#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - __m128 V = _mm_and_ps(mVec128, b3vFFF0fMask); + __m128 V = _mm_and_ps(mVec128, b3vFFF0fMask); __m128 V0 = _mm_xor_ps(b3vMzeroMask, V); __m128 V2 = _mm_movelh_ps(V0, V); __m128 V1 = _mm_shuffle_ps(V, V0, 0xCE); - V0 = _mm_shuffle_ps(V0, V, 0xDB); + V0 = _mm_shuffle_ps(V0, V, 0xDB); V2 = _mm_shuffle_ps(V2, V, 0xF9); v0->mVec128 = V0; v1->mVec128 = V1; v2->mVec128 = V2; #else - v0->setValue(0. ,-getZ() ,getY()); - v1->setValue(getZ() ,0. ,-getX()); - v2->setValue(-getY() ,getX() ,0.); + v0->setValue(0., -getZ(), getY()); + v1->setValue(getZ(), 0., -getX()); + v2->setValue(-getY(), getX(), 0.); #endif } void setZero() { -#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE) +#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) mVec128 = (__m128)_mm_xor_ps(mVec128, mVec128); #elif defined(B3_USE_NEON) int32x4_t vi = vdupq_n_s32(0); mVec128 = vreinterpretq_f32_s32(vi); #else - setValue(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); + setValue(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.)); #endif } @@ -651,76 +640,76 @@ public: return length2() < B3_EPSILON; } - B3_FORCE_INLINE void serialize(struct b3Vector3Data& dataOut) const; + B3_FORCE_INLINE void serialize(struct b3Vector3Data & dataOut) const; - B3_FORCE_INLINE void deSerialize(const struct b3Vector3Data& dataIn); + B3_FORCE_INLINE void deSerialize(const struct b3Vector3Data& dataIn); - B3_FORCE_INLINE void serializeFloat(struct b3Vector3FloatData& dataOut) const; + B3_FORCE_INLINE void serializeFloat(struct b3Vector3FloatData & dataOut) const; - B3_FORCE_INLINE void deSerializeFloat(const struct b3Vector3FloatData& dataIn); + B3_FORCE_INLINE void deSerializeFloat(const struct b3Vector3FloatData& dataIn); - B3_FORCE_INLINE void serializeDouble(struct b3Vector3DoubleData& dataOut) const; + B3_FORCE_INLINE void serializeDouble(struct b3Vector3DoubleData & dataOut) const; - B3_FORCE_INLINE void deSerializeDouble(const struct b3Vector3DoubleData& dataIn); + B3_FORCE_INLINE void deSerializeDouble(const struct b3Vector3DoubleData& dataIn); - /**@brief returns index of maximum dot product between this and vectors in array[] + /**@brief returns index of maximum dot product between this and vectors in array[] * @param array The other vectors * @param array_count The number of other vectors * @param dotOut The maximum dot product */ - B3_FORCE_INLINE long maxDot( const b3Vector3 *array, long array_count, b3Scalar &dotOut ) const; + B3_FORCE_INLINE long maxDot(const b3Vector3* array, long array_count, b3Scalar& dotOut) const; - /**@brief returns index of minimum dot product between this and vectors in array[] + /**@brief returns index of minimum dot product between this and vectors in array[] * @param array The other vectors * @param array_count The number of other vectors * @param dotOut The minimum dot product */ - B3_FORCE_INLINE long minDot( const b3Vector3 *array, long array_count, b3Scalar &dotOut ) const; - - /* create a vector as b3Vector3( this->dot( b3Vector3 v0 ), this->dot( b3Vector3 v1), this->dot( b3Vector3 v2 )) */ - B3_FORCE_INLINE b3Vector3 dot3( const b3Vector3 &v0, const b3Vector3 &v1, const b3Vector3 &v2 ) const - { -#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE) - - __m128 a0 = _mm_mul_ps( v0.mVec128, this->mVec128 ); - __m128 a1 = _mm_mul_ps( v1.mVec128, this->mVec128 ); - __m128 a2 = _mm_mul_ps( v2.mVec128, this->mVec128 ); - __m128 b0 = _mm_unpacklo_ps( a0, a1 ); - __m128 b1 = _mm_unpackhi_ps( a0, a1 ); - __m128 b2 = _mm_unpacklo_ps( a2, _mm_setzero_ps() ); - __m128 r = _mm_movelh_ps( b0, b2 ); - r = _mm_add_ps( r, _mm_movehl_ps( b2, b0 )); - a2 = _mm_and_ps( a2, b3vxyzMaskf); - r = _mm_add_ps( r, b3CastdTo128f (_mm_move_sd( b3CastfTo128d(a2), b3CastfTo128d(b1) ))); - return b3MakeVector3(r); + B3_FORCE_INLINE long minDot(const b3Vector3* array, long array_count, b3Scalar& dotOut) const; + + /* create a vector as b3Vector3( this->dot( b3Vector3 v0 ), this->dot( b3Vector3 v1), this->dot( b3Vector3 v2 )) */ + B3_FORCE_INLINE b3Vector3 dot3(const b3Vector3& v0, const b3Vector3& v1, const b3Vector3& v2) const + { +#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) + + __m128 a0 = _mm_mul_ps(v0.mVec128, this->mVec128); + __m128 a1 = _mm_mul_ps(v1.mVec128, this->mVec128); + __m128 a2 = _mm_mul_ps(v2.mVec128, this->mVec128); + __m128 b0 = _mm_unpacklo_ps(a0, a1); + __m128 b1 = _mm_unpackhi_ps(a0, a1); + __m128 b2 = _mm_unpacklo_ps(a2, _mm_setzero_ps()); + __m128 r = _mm_movelh_ps(b0, b2); + r = _mm_add_ps(r, _mm_movehl_ps(b2, b0)); + a2 = _mm_and_ps(a2, b3vxyzMaskf); + r = _mm_add_ps(r, b3CastdTo128f(_mm_move_sd(b3CastfTo128d(a2), b3CastfTo128d(b1)))); + return b3MakeVector3(r); #elif defined(B3_USE_NEON) - static const uint32x4_t xyzMask = (const uint32x4_t){ -1, -1, -1, 0 }; - float32x4_t a0 = vmulq_f32( v0.mVec128, this->mVec128); - float32x4_t a1 = vmulq_f32( v1.mVec128, this->mVec128); - float32x4_t a2 = vmulq_f32( v2.mVec128, this->mVec128); - float32x2x2_t zLo = vtrn_f32( vget_high_f32(a0), vget_high_f32(a1)); - a2 = (float32x4_t) vandq_u32((uint32x4_t) a2, xyzMask ); - float32x2_t b0 = vadd_f32( vpadd_f32( vget_low_f32(a0), vget_low_f32(a1)), zLo.val[0] ); - float32x2_t b1 = vpadd_f32( vpadd_f32( vget_low_f32(a2), vget_high_f32(a2)), vdup_n_f32(0.0f)); - return b3Vector3( vcombine_f32(b0, b1) ); + static const uint32x4_t xyzMask = (const uint32x4_t){-1, -1, -1, 0}; + float32x4_t a0 = vmulq_f32(v0.mVec128, this->mVec128); + float32x4_t a1 = vmulq_f32(v1.mVec128, this->mVec128); + float32x4_t a2 = vmulq_f32(v2.mVec128, this->mVec128); + float32x2x2_t zLo = vtrn_f32(vget_high_f32(a0), vget_high_f32(a1)); + a2 = (float32x4_t)vandq_u32((uint32x4_t)a2, xyzMask); + float32x2_t b0 = vadd_f32(vpadd_f32(vget_low_f32(a0), vget_low_f32(a1)), zLo.val[0]); + float32x2_t b1 = vpadd_f32(vpadd_f32(vget_low_f32(a2), vget_high_f32(a2)), vdup_n_f32(0.0f)); + return b3Vector3(vcombine_f32(b0, b1)); #else - return b3MakeVector3( dot(v0), dot(v1), dot(v2)); + return b3MakeVector3(dot(v0), dot(v1), dot(v2)); #endif - } + } }; /**@brief Return the sum of two vectors (Point symantics)*/ B3_FORCE_INLINE b3Vector3 operator+(const b3Vector3& v1, const b3Vector3& v2) { -#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE) +#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) return b3MakeVector3(_mm_add_ps(v1.mVec128, v2.mVec128)); #elif defined(B3_USE_NEON) return b3MakeVector3(vaddq_f32(v1.mVec128, v2.mVec128)); #else return b3MakeVector3( - v1.m_floats[0] + v2.m_floats[0], - v1.m_floats[1] + v2.m_floats[1], - v1.m_floats[2] + v2.m_floats[2]); + v1.m_floats[0] + v2.m_floats[0], + v1.m_floats[1] + v2.m_floats[1], + v1.m_floats[2] + v2.m_floats[2]); #endif } @@ -728,15 +717,15 @@ operator+(const b3Vector3& v1, const b3Vector3& v2) B3_FORCE_INLINE b3Vector3 operator*(const b3Vector3& v1, const b3Vector3& v2) { -#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE) +#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) return b3MakeVector3(_mm_mul_ps(v1.mVec128, v2.mVec128)); #elif defined(B3_USE_NEON) return b3MakeVector3(vmulq_f32(v1.mVec128, v2.mVec128)); #else return b3MakeVector3( - v1.m_floats[0] * v2.m_floats[0], - v1.m_floats[1] * v2.m_floats[1], - v1.m_floats[2] * v2.m_floats[2]); + v1.m_floats[0] * v2.m_floats[0], + v1.m_floats[1] * v2.m_floats[1], + v1.m_floats[2] * v2.m_floats[2]); #endif } @@ -744,7 +733,7 @@ operator*(const b3Vector3& v1, const b3Vector3& v2) B3_FORCE_INLINE b3Vector3 operator-(const b3Vector3& v1, const b3Vector3& v2) { -#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) +#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) // without _mm_and_ps this code causes slowdown in Concave moving __m128 r = _mm_sub_ps(v1.mVec128, v2.mVec128); @@ -754,9 +743,9 @@ operator-(const b3Vector3& v1, const b3Vector3& v2) return b3MakeVector3((float32x4_t)vandq_s32((int32x4_t)r, b3vFFF0Mask)); #else return b3MakeVector3( - v1.m_floats[0] - v2.m_floats[0], - v1.m_floats[1] - v2.m_floats[1], - v1.m_floats[2] - v2.m_floats[2]); + v1.m_floats[0] - v2.m_floats[0], + v1.m_floats[1] - v2.m_floats[1], + v1.m_floats[2] - v2.m_floats[2]); #endif } @@ -764,7 +753,7 @@ operator-(const b3Vector3& v1, const b3Vector3& v2) B3_FORCE_INLINE b3Vector3 operator-(const b3Vector3& v) { -#if (defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE)) +#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) __m128 r = _mm_xor_ps(v.mVec128, b3vMzeroMask); return b3MakeVector3(_mm_and_ps(r, b3vFFF0fMask)); #elif defined(B3_USE_NEON) @@ -778,9 +767,9 @@ operator-(const b3Vector3& v) B3_FORCE_INLINE b3Vector3 operator*(const b3Vector3& v, const b3Scalar& s) { -#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE) - __m128 vs = _mm_load_ss(&s); // (S 0 0 0) - vs = b3_pshufd_ps(vs, 0x80); // (S S S 0.0) +#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) + __m128 vs = _mm_load_ss(&s); // (S 0 0 0) + vs = b3_pshufd_ps(vs, 0x80); // (S S S 0.0) return b3MakeVector3(_mm_mul_ps(v.mVec128, vs)); #elif defined(B3_USE_NEON) float32x4_t r = vmulq_n_f32(v.mVec128, s); @@ -802,7 +791,7 @@ B3_FORCE_INLINE b3Vector3 operator/(const b3Vector3& v, const b3Scalar& s) { b3FullAssert(s != b3Scalar(0.0)); -#if 0 //defined(B3_USE_SSE_IN_API) +#if 0 //defined(B3_USE_SSE_IN_API) // this code is not faster ! __m128 vs = _mm_load_ss(&s); vs = _mm_div_ss(b3v1110, vs); @@ -818,7 +807,7 @@ operator/(const b3Vector3& v, const b3Scalar& s) B3_FORCE_INLINE b3Vector3 operator/(const b3Vector3& v1, const b3Vector3& v2) { -#if (defined(B3_USE_SSE_IN_API)&& defined (B3_USE_SSE)) +#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) __m128 vec = _mm_div_ps(v1.mVec128, v2.mVec128); vec = _mm_and_ps(vec, b3vFFF0fMask); return b3MakeVector3(vec); @@ -828,19 +817,19 @@ operator/(const b3Vector3& v1, const b3Vector3& v2) x = v1.mVec128; y = v2.mVec128; - v = vrecpeq_f32(y); // v ~ 1/y - m = vrecpsq_f32(y, v); // m = (2-v*y) - v = vmulq_f32(v, m); // vv = v*m ~~ 1/y - m = vrecpsq_f32(y, v); // mm = (2-vv*y) - v = vmulq_f32(v, x); // x*vv - v = vmulq_f32(v, m); // (x*vv)*(2-vv*y) = x*(vv(2-vv*y)) ~~~ x/y + v = vrecpeq_f32(y); // v ~ 1/y + m = vrecpsq_f32(y, v); // m = (2-v*y) + v = vmulq_f32(v, m); // vv = v*m ~~ 1/y + m = vrecpsq_f32(y, v); // mm = (2-vv*y) + v = vmulq_f32(v, x); // x*vv + v = vmulq_f32(v, m); // (x*vv)*(2-vv*y) = x*(vv(2-vv*y)) ~~~ x/y return b3Vector3(v); #else return b3MakeVector3( - v1.m_floats[0] / v2.m_floats[0], - v1.m_floats[1] / v2.m_floats[1], - v1.m_floats[2] / v2.m_floats[2]); + v1.m_floats[0] / v2.m_floats[0], + v1.m_floats[1] / v2.m_floats[1], + v1.m_floats[2] / v2.m_floats[2]); #endif } @@ -851,7 +840,6 @@ b3Dot(const b3Vector3& v1, const b3Vector3& v2) return v1.dot(v2); } - /**@brief Return the distance squared between two vectors */ B3_FORCE_INLINE b3Scalar b3Distance2(const b3Vector3& v1, const b3Vector3& v2) @@ -859,7 +847,6 @@ b3Distance2(const b3Vector3& v1, const b3Vector3& v2) return v1.distance2(v2); } - /**@brief Return the distance between two vectors */ B3_FORCE_INLINE b3Scalar b3Distance(const b3Vector3& v1, const b3Vector3& v2) @@ -897,8 +884,6 @@ b3Lerp(const b3Vector3& v1, const b3Vector3& v2, const b3Scalar& t) return v1.lerp(v2, t); } - - B3_FORCE_INLINE b3Scalar b3Vector3::distance2(const b3Vector3& v) const { return (v - *this).length2(); @@ -911,7 +896,7 @@ B3_FORCE_INLINE b3Scalar b3Vector3::distance(const b3Vector3& v) const B3_FORCE_INLINE b3Vector3 b3Vector3::normalized() const { -#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE) +#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) b3Vector3 norm = *this; return norm.normalize(); @@ -920,143 +905,136 @@ B3_FORCE_INLINE b3Vector3 b3Vector3::normalized() const #endif } -B3_FORCE_INLINE b3Vector3 b3Vector3::rotate( const b3Vector3& wAxis, const b3Scalar _angle ) const +B3_FORCE_INLINE b3Vector3 b3Vector3::rotate(const b3Vector3& wAxis, const b3Scalar _angle) const { // wAxis must be a unit lenght vector -#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE) +#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) - __m128 O = _mm_mul_ps(wAxis.mVec128, mVec128); - b3Scalar ssin = b3Sin( _angle ); - __m128 C = wAxis.cross( b3MakeVector3(mVec128) ).mVec128; + __m128 O = _mm_mul_ps(wAxis.mVec128, mVec128); + b3Scalar ssin = b3Sin(_angle); + __m128 C = wAxis.cross(b3MakeVector3(mVec128)).mVec128; O = _mm_and_ps(O, b3vFFF0fMask); - b3Scalar scos = b3Cos( _angle ); + b3Scalar scos = b3Cos(_angle); - __m128 vsin = _mm_load_ss(&ssin); // (S 0 0 0) - __m128 vcos = _mm_load_ss(&scos); // (S 0 0 0) + __m128 vsin = _mm_load_ss(&ssin); // (S 0 0 0) + __m128 vcos = _mm_load_ss(&scos); // (S 0 0 0) - __m128 Y = b3_pshufd_ps(O, 0xC9); // (Y Z X 0) - __m128 Z = b3_pshufd_ps(O, 0xD2); // (Z X Y 0) + __m128 Y = b3_pshufd_ps(O, 0xC9); // (Y Z X 0) + __m128 Z = b3_pshufd_ps(O, 0xD2); // (Z X Y 0) O = _mm_add_ps(O, Y); - vsin = b3_pshufd_ps(vsin, 0x80); // (S S S 0) + vsin = b3_pshufd_ps(vsin, 0x80); // (S S S 0) O = _mm_add_ps(O, Z); - vcos = b3_pshufd_ps(vcos, 0x80); // (S S S 0) + vcos = b3_pshufd_ps(vcos, 0x80); // (S S S 0) - vsin = vsin * C; + vsin = vsin * C; O = O * wAxis.mVec128; __m128 X = mVec128 - O; - O = O + vsin; + O = O + vsin; vcos = vcos * X; O = O + vcos; return b3MakeVector3(O); #else - b3Vector3 o = wAxis * wAxis.dot( *this ); + b3Vector3 o = wAxis * wAxis.dot(*this); b3Vector3 _x = *this - o; b3Vector3 _y; - _y = wAxis.cross( *this ); + _y = wAxis.cross(*this); - return ( o + _x * b3Cos( _angle ) + _y * b3Sin( _angle ) ); + return (o + _x * b3Cos(_angle) + _y * b3Sin(_angle)); #endif } -B3_FORCE_INLINE long b3Vector3::maxDot( const b3Vector3 *array, long array_count, b3Scalar &dotOut ) const +B3_FORCE_INLINE long b3Vector3::maxDot(const b3Vector3* array, long array_count, b3Scalar& dotOut) const { -#if defined (B3_USE_SSE) || defined (B3_USE_NEON) - #if defined _WIN32 || defined (B3_USE_SSE) - const long scalar_cutoff = 10; - long b3_maxdot_large( const float *array, const float *vec, unsigned long array_count, float *dotOut ); - #elif defined B3_USE_NEON - const long scalar_cutoff = 4; - extern long (*_maxdot_large)( const float *array, const float *vec, unsigned long array_count, float *dotOut ); - #endif - if( array_count < scalar_cutoff ) +#if defined(B3_USE_SSE) || defined(B3_USE_NEON) +#if defined _WIN32 || defined(B3_USE_SSE) + const long scalar_cutoff = 10; + long b3_maxdot_large(const float* array, const float* vec, unsigned long array_count, float* dotOut); +#elif defined B3_USE_NEON + const long scalar_cutoff = 4; + extern long (*_maxdot_large)(const float* array, const float* vec, unsigned long array_count, float* dotOut); +#endif + if (array_count < scalar_cutoff) #else -#endif//B3_USE_SSE || B3_USE_NEON - { - b3Scalar maxDot = -B3_INFINITY; - int i = 0; - int ptIndex = -1; - for( i = 0; i < array_count; i++ ) - { - b3Scalar dot = array[i].dot(*this); - - if( dot > maxDot ) - { - maxDot = dot; - ptIndex = i; - } - } - - b3Assert(ptIndex>=0); - if (ptIndex<0) +#endif //B3_USE_SSE || B3_USE_NEON + { + b3Scalar maxDot = -B3_INFINITY; + int i = 0; + int ptIndex = -1; + for (i = 0; i < array_count; i++) + { + b3Scalar dot = array[i].dot(*this); + + if (dot > maxDot) + { + maxDot = dot; + ptIndex = i; + } + } + + b3Assert(ptIndex >= 0); + if (ptIndex < 0) { ptIndex = 0; } - dotOut = maxDot; - return ptIndex; - } -#if defined (B3_USE_SSE) || defined (B3_USE_NEON) - return b3_maxdot_large( (float*) array, (float*) &m_floats[0], array_count, &dotOut ); + dotOut = maxDot; + return ptIndex; + } +#if defined(B3_USE_SSE) || defined(B3_USE_NEON) + return b3_maxdot_large((float*)array, (float*)&m_floats[0], array_count, &dotOut); #endif } -B3_FORCE_INLINE long b3Vector3::minDot( const b3Vector3 *array, long array_count, b3Scalar &dotOut ) const +B3_FORCE_INLINE long b3Vector3::minDot(const b3Vector3* array, long array_count, b3Scalar& dotOut) const { -#if defined (B3_USE_SSE) || defined (B3_USE_NEON) - #if defined B3_USE_SSE - const long scalar_cutoff = 10; - long b3_mindot_large( const float *array, const float *vec, unsigned long array_count, float *dotOut ); - #elif defined B3_USE_NEON - const long scalar_cutoff = 4; - extern long (*b3_mindot_large)( const float *array, const float *vec, unsigned long array_count, float *dotOut ); - #else - #error unhandled arch! - #endif - - if( array_count < scalar_cutoff ) -#endif//B3_USE_SSE || B3_USE_NEON - { - b3Scalar minDot = B3_INFINITY; - int i = 0; - int ptIndex = -1; - - for( i = 0; i < array_count; i++ ) - { - b3Scalar dot = array[i].dot(*this); - - if( dot < minDot ) - { - minDot = dot; - ptIndex = i; - } - } - - dotOut = minDot; - - return ptIndex; - } -#if defined (B3_USE_SSE) || defined (B3_USE_NEON) - return b3_mindot_large( (float*) array, (float*) &m_floats[0], array_count, &dotOut ); +#if defined(B3_USE_SSE) || defined(B3_USE_NEON) +#if defined B3_USE_SSE + const long scalar_cutoff = 10; + long b3_mindot_large(const float* array, const float* vec, unsigned long array_count, float* dotOut); +#elif defined B3_USE_NEON + const long scalar_cutoff = 4; + extern long (*b3_mindot_large)(const float* array, const float* vec, unsigned long array_count, float* dotOut); +#else +#error unhandled arch! #endif -} - - -class b3Vector4 : public b3Vector3 -{ -public: + if (array_count < scalar_cutoff) +#endif //B3_USE_SSE || B3_USE_NEON + { + b3Scalar minDot = B3_INFINITY; + int i = 0; + int ptIndex = -1; + for (i = 0; i < array_count; i++) + { + b3Scalar dot = array[i].dot(*this); + if (dot < minDot) + { + minDot = dot; + ptIndex = i; + } + } + dotOut = minDot; + return ptIndex; + } +#if defined(B3_USE_SSE) || defined(B3_USE_NEON) + return b3_mindot_large((float*)array, (float*)&m_floats[0], array_count, &dotOut); +#endif +} +class b3Vector4 : public b3Vector3 +{ +public: B3_FORCE_INLINE b3Vector4 absolute4() const { -#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE) +#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) return b3MakeVector4(_mm_and_ps(mVec128, b3vAbsfMask)); #elif defined(B3_USE_NEON) return b3Vector4(vabsq_f32(mVec128)); @@ -1069,11 +1047,9 @@ public: #endif } + b3Scalar getW() const { return m_floats[3]; } - b3Scalar getW() const { return m_floats[3];} - - - B3_FORCE_INLINE int maxAxis4() const + B3_FORCE_INLINE int maxAxis4() const { int maxIndex = -1; b3Scalar maxVal = b3Scalar(-B3_LARGE_FLOAT); @@ -1090,7 +1066,7 @@ public: if (m_floats[2] > maxVal) { maxIndex = 2; - maxVal =m_floats[2]; + maxVal = m_floats[2]; } if (m_floats[3] > maxVal) { @@ -1100,7 +1076,6 @@ public: return maxIndex; } - B3_FORCE_INLINE int minAxis4() const { int minIndex = -1; @@ -1118,7 +1093,7 @@ public: if (m_floats[2] < minVal) { minIndex = 2; - minVal =m_floats[2]; + minVal = m_floats[2]; } if (m_floats[3] < minVal) { @@ -1129,216 +1104,200 @@ public: return minIndex; } - B3_FORCE_INLINE int closestAxis4() const { return absolute4().maxAxis4(); } - - - - /**@brief Set x,y,z and zero w + /**@brief Set x,y,z and zero w * @param x Value of x * @param y Value of y * @param z Value of z */ - -/* void getValue(b3Scalar *m) const + /* void getValue(b3Scalar *m) const { m[0] = m_floats[0]; m[1] = m_floats[1]; m[2] =m_floats[2]; } */ -/**@brief Set the values + /**@brief Set the values * @param x Value of x * @param y Value of y * @param z Value of z * @param w Value of w */ - B3_FORCE_INLINE void setValue(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z,const b3Scalar& _w) - { - m_floats[0]=_x; - m_floats[1]=_y; - m_floats[2]=_z; - m_floats[3]=_w; - } - - + B3_FORCE_INLINE void setValue(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z, const b3Scalar& _w) + { + m_floats[0] = _x; + m_floats[1] = _y; + m_floats[2] = _z; + m_floats[3] = _w; + } }; - ///b3SwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization -B3_FORCE_INLINE void b3SwapScalarEndian(const b3Scalar& sourceVal, b3Scalar& destVal) +B3_FORCE_INLINE void b3SwapScalarEndian(const b3Scalar& sourceVal, b3Scalar& destVal) { - #ifdef B3_USE_DOUBLE_PRECISION - unsigned char* dest = (unsigned char*) &destVal; - unsigned char* src = (unsigned char*) &sourceVal; +#ifdef B3_USE_DOUBLE_PRECISION + unsigned char* dest = (unsigned char*)&destVal; + unsigned char* src = (unsigned char*)&sourceVal; dest[0] = src[7]; - dest[1] = src[6]; - dest[2] = src[5]; - dest[3] = src[4]; - dest[4] = src[3]; - dest[5] = src[2]; - dest[6] = src[1]; - dest[7] = src[0]; + dest[1] = src[6]; + dest[2] = src[5]; + dest[3] = src[4]; + dest[4] = src[3]; + dest[5] = src[2]; + dest[6] = src[1]; + dest[7] = src[0]; #else - unsigned char* dest = (unsigned char*) &destVal; - unsigned char* src = (unsigned char*) &sourceVal; + unsigned char* dest = (unsigned char*)&destVal; + unsigned char* src = (unsigned char*)&sourceVal; dest[0] = src[3]; - dest[1] = src[2]; - dest[2] = src[1]; - dest[3] = src[0]; -#endif //B3_USE_DOUBLE_PRECISION + dest[1] = src[2]; + dest[2] = src[1]; + dest[3] = src[0]; +#endif //B3_USE_DOUBLE_PRECISION } ///b3SwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization -B3_FORCE_INLINE void b3SwapVector3Endian(const b3Vector3& sourceVec, b3Vector3& destVec) +B3_FORCE_INLINE void b3SwapVector3Endian(const b3Vector3& sourceVec, b3Vector3& destVec) { - for (int i=0;i<4;i++) + for (int i = 0; i < 4; i++) { - b3SwapScalarEndian(sourceVec[i],destVec[i]); + b3SwapScalarEndian(sourceVec[i], destVec[i]); } - } ///b3UnSwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization -B3_FORCE_INLINE void b3UnSwapVector3Endian(b3Vector3& vector) +B3_FORCE_INLINE void b3UnSwapVector3Endian(b3Vector3& vector) { - - b3Vector3 swappedVec; - for (int i=0;i<4;i++) + b3Vector3 swappedVec; + for (int i = 0; i < 4; i++) { - b3SwapScalarEndian(vector[i],swappedVec[i]); + b3SwapScalarEndian(vector[i], swappedVec[i]); } vector = swappedVec; } template <class T> -B3_FORCE_INLINE void b3PlaneSpace1 (const T& n, T& p, T& q) +B3_FORCE_INLINE void b3PlaneSpace1(const T& n, T& p, T& q) { - if (b3Fabs(n[2]) > B3_SQRT12) { - // choose p in y-z plane - b3Scalar a = n[1]*n[1] + n[2]*n[2]; - b3Scalar k = b3RecipSqrt (a); - p[0] = 0; - p[1] = -n[2]*k; - p[2] = n[1]*k; - // set q = n x p - q[0] = a*k; - q[1] = -n[0]*p[2]; - q[2] = n[0]*p[1]; - } - else { - // choose p in x-y plane - b3Scalar a = n[0]*n[0] + n[1]*n[1]; - b3Scalar k = b3RecipSqrt (a); - p[0] = -n[1]*k; - p[1] = n[0]*k; - p[2] = 0; - // set q = n x p - q[0] = -n[2]*p[1]; - q[1] = n[2]*p[0]; - q[2] = a*k; - } + if (b3Fabs(n[2]) > B3_SQRT12) + { + // choose p in y-z plane + b3Scalar a = n[1] * n[1] + n[2] * n[2]; + b3Scalar k = b3RecipSqrt(a); + p[0] = 0; + p[1] = -n[2] * k; + p[2] = n[1] * k; + // set q = n x p + q[0] = a * k; + q[1] = -n[0] * p[2]; + q[2] = n[0] * p[1]; + } + else + { + // choose p in x-y plane + b3Scalar a = n[0] * n[0] + n[1] * n[1]; + b3Scalar k = b3RecipSqrt(a); + p[0] = -n[1] * k; + p[1] = n[0] * k; + p[2] = 0; + // set q = n x p + q[0] = -n[2] * p[1]; + q[1] = n[2] * p[0]; + q[2] = a * k; + } } - -struct b3Vector3FloatData +struct b3Vector3FloatData { - float m_floats[4]; + float m_floats[4]; }; -struct b3Vector3DoubleData +struct b3Vector3DoubleData { - double m_floats[4]; - + double m_floats[4]; }; -B3_FORCE_INLINE void b3Vector3::serializeFloat(struct b3Vector3FloatData& dataOut) const +B3_FORCE_INLINE void b3Vector3::serializeFloat(struct b3Vector3FloatData& dataOut) const { ///could also do a memcpy, check if it is worth it - for (int i=0;i<4;i++) + for (int i = 0; i < 4; i++) dataOut.m_floats[i] = float(m_floats[i]); } -B3_FORCE_INLINE void b3Vector3::deSerializeFloat(const struct b3Vector3FloatData& dataIn) +B3_FORCE_INLINE void b3Vector3::deSerializeFloat(const struct b3Vector3FloatData& dataIn) { - for (int i=0;i<4;i++) + for (int i = 0; i < 4; i++) m_floats[i] = b3Scalar(dataIn.m_floats[i]); } - -B3_FORCE_INLINE void b3Vector3::serializeDouble(struct b3Vector3DoubleData& dataOut) const +B3_FORCE_INLINE void b3Vector3::serializeDouble(struct b3Vector3DoubleData& dataOut) const { ///could also do a memcpy, check if it is worth it - for (int i=0;i<4;i++) + for (int i = 0; i < 4; i++) dataOut.m_floats[i] = double(m_floats[i]); } -B3_FORCE_INLINE void b3Vector3::deSerializeDouble(const struct b3Vector3DoubleData& dataIn) +B3_FORCE_INLINE void b3Vector3::deSerializeDouble(const struct b3Vector3DoubleData& dataIn) { - for (int i=0;i<4;i++) + for (int i = 0; i < 4; i++) m_floats[i] = b3Scalar(dataIn.m_floats[i]); } - -B3_FORCE_INLINE void b3Vector3::serialize(struct b3Vector3Data& dataOut) const +B3_FORCE_INLINE void b3Vector3::serialize(struct b3Vector3Data& dataOut) const { ///could also do a memcpy, check if it is worth it - for (int i=0;i<4;i++) + for (int i = 0; i < 4; i++) dataOut.m_floats[i] = m_floats[i]; } -B3_FORCE_INLINE void b3Vector3::deSerialize(const struct b3Vector3Data& dataIn) +B3_FORCE_INLINE void b3Vector3::deSerialize(const struct b3Vector3Data& dataIn) { - for (int i=0;i<4;i++) + for (int i = 0; i < 4; i++) m_floats[i] = dataIn.m_floats[i]; } - - - -inline b3Vector3 b3MakeVector3(b3Scalar x,b3Scalar y,b3Scalar z) +inline b3Vector3 b3MakeVector3(b3Scalar x, b3Scalar y, b3Scalar z) { - b3Vector3 tmp; - tmp.setValue(x,y,z); + b3Vector3 tmp; + tmp.setValue(x, y, z); return tmp; } -inline b3Vector3 b3MakeVector3(b3Scalar x,b3Scalar y,b3Scalar z, b3Scalar w) +inline b3Vector3 b3MakeVector3(b3Scalar x, b3Scalar y, b3Scalar z, b3Scalar w) { - b3Vector3 tmp; - tmp.setValue(x,y,z); + b3Vector3 tmp; + tmp.setValue(x, y, z); tmp.w = w; return tmp; } -inline b3Vector4 b3MakeVector4(b3Scalar x,b3Scalar y,b3Scalar z,b3Scalar w) +inline b3Vector4 b3MakeVector4(b3Scalar x, b3Scalar y, b3Scalar z, b3Scalar w) { - b3Vector4 tmp; - tmp.setValue(x,y,z,w); + b3Vector4 tmp; + tmp.setValue(x, y, z, w); return tmp; } -#if defined(B3_USE_SSE_IN_API) && defined (B3_USE_SSE) +#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE) -inline b3Vector3 b3MakeVector3( b3SimdFloat4 v) +inline b3Vector3 b3MakeVector3(b3SimdFloat4 v) { - b3Vector3 tmp; - tmp.set128(v); - return tmp; + b3Vector3 tmp; + tmp.set128(v); + return tmp; } inline b3Vector4 b3MakeVector4(b3SimdFloat4 vec) { - b3Vector4 tmp; + b3Vector4 tmp; tmp.set128(vec); return tmp; } #endif - -#endif //B3_VECTOR3_H +#endif //B3_VECTOR3_H |