summaryrefslogtreecommitdiff
path: root/thirdparty/bullet/src/LinearMath/btQuaternion.h
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/bullet/src/LinearMath/btQuaternion.h')
-rw-r--r--thirdparty/bullet/src/LinearMath/btQuaternion.h1016
1 files changed, 1016 insertions, 0 deletions
diff --git a/thirdparty/bullet/src/LinearMath/btQuaternion.h b/thirdparty/bullet/src/LinearMath/btQuaternion.h
new file mode 100644
index 0000000000..7bd39e6a33
--- /dev/null
+++ b/thirdparty/bullet/src/LinearMath/btQuaternion.h
@@ -0,0 +1,1016 @@
+/*
+Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+
+#ifndef BT_SIMD__QUATERNION_H_
+#define BT_SIMD__QUATERNION_H_
+
+
+#include "btVector3.h"
+#include "btQuadWord.h"
+
+
+#ifdef BT_USE_DOUBLE_PRECISION
+#define btQuaternionData btQuaternionDoubleData
+#define btQuaternionDataName "btQuaternionDoubleData"
+#else
+#define btQuaternionData btQuaternionFloatData
+#define btQuaternionDataName "btQuaternionFloatData"
+#endif //BT_USE_DOUBLE_PRECISION
+
+
+
+#ifdef BT_USE_SSE
+
+//const __m128 ATTRIBUTE_ALIGNED16(vOnes) = {1.0f, 1.0f, 1.0f, 1.0f};
+#define vOnes (_mm_set_ps(1.0f, 1.0f, 1.0f, 1.0f))
+
+#endif
+
+#if defined(BT_USE_SSE)
+
+#define vQInv (_mm_set_ps(+0.0f, -0.0f, -0.0f, -0.0f))
+#define vPPPM (_mm_set_ps(-0.0f, +0.0f, +0.0f, +0.0f))
+
+#elif defined(BT_USE_NEON)
+
+const btSimdFloat4 ATTRIBUTE_ALIGNED16(vQInv) = {-0.0f, -0.0f, -0.0f, +0.0f};
+const btSimdFloat4 ATTRIBUTE_ALIGNED16(vPPPM) = {+0.0f, +0.0f, +0.0f, -0.0f};
+
+#endif
+
+/**@brief The btQuaternion implements quaternion to perform linear algebra rotations in combination with btMatrix3x3, btVector3 and btTransform. */
+class btQuaternion : public btQuadWord {
+public:
+ /**@brief No initialization constructor */
+ btQuaternion() {}
+
+#if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))|| defined(BT_USE_NEON)
+ // Set Vector
+ SIMD_FORCE_INLINE btQuaternion(const btSimdFloat4 vec)
+ {
+ mVec128 = vec;
+ }
+
+ // Copy constructor
+ SIMD_FORCE_INLINE btQuaternion(const btQuaternion& rhs)
+ {
+ mVec128 = rhs.mVec128;
+ }
+
+ // Assignment Operator
+ SIMD_FORCE_INLINE btQuaternion&
+ operator=(const btQuaternion& v)
+ {
+ mVec128 = v.mVec128;
+
+ return *this;
+ }
+
+#endif
+
+ // template <typename btScalar>
+ // explicit Quaternion(const btScalar *v) : Tuple4<btScalar>(v) {}
+ /**@brief Constructor from scalars */
+ btQuaternion(const btScalar& _x, const btScalar& _y, const btScalar& _z, const btScalar& _w)
+ : btQuadWord(_x, _y, _z, _w)
+ {}
+ /**@brief Axis angle Constructor
+ * @param axis The axis which the rotation is around
+ * @param angle The magnitude of the rotation around the angle (Radians) */
+ btQuaternion(const btVector3& _axis, const btScalar& _angle)
+ {
+ setRotation(_axis, _angle);
+ }
+ /**@brief Constructor from Euler angles
+ * @param yaw Angle around Y unless BT_EULER_DEFAULT_ZYX defined then Z
+ * @param pitch Angle around X unless BT_EULER_DEFAULT_ZYX defined then Y
+ * @param roll Angle around Z unless BT_EULER_DEFAULT_ZYX defined then X */
+ btQuaternion(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
+ {
+#ifndef BT_EULER_DEFAULT_ZYX
+ setEuler(yaw, pitch, roll);
+#else
+ setEulerZYX(yaw, pitch, roll);
+#endif
+ }
+ /**@brief Set the rotation using axis angle notation
+ * @param axis The axis around which to rotate
+ * @param angle The magnitude of the rotation in Radians */
+ void setRotation(const btVector3& axis, const btScalar& _angle)
+ {
+ btScalar d = axis.length();
+ btAssert(d != btScalar(0.0));
+ btScalar s = btSin(_angle * btScalar(0.5)) / d;
+ setValue(axis.x() * s, axis.y() * s, axis.z() * s,
+ btCos(_angle * btScalar(0.5)));
+ }
+ /**@brief Set the quaternion using Euler angles
+ * @param yaw Angle around Y
+ * @param pitch Angle around X
+ * @param roll Angle around Z */
+ void setEuler(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
+ {
+ btScalar halfYaw = btScalar(yaw) * btScalar(0.5);
+ btScalar halfPitch = btScalar(pitch) * btScalar(0.5);
+ btScalar halfRoll = btScalar(roll) * btScalar(0.5);
+ btScalar cosYaw = btCos(halfYaw);
+ btScalar sinYaw = btSin(halfYaw);
+ btScalar cosPitch = btCos(halfPitch);
+ btScalar sinPitch = btSin(halfPitch);
+ btScalar cosRoll = btCos(halfRoll);
+ btScalar sinRoll = btSin(halfRoll);
+ setValue(cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw,
+ cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw,
+ sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw,
+ cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw);
+ }
+ /**@brief Set the quaternion using euler angles
+ * @param yaw Angle around Z
+ * @param pitch Angle around Y
+ * @param roll Angle around X */
+ void setEulerZYX(const btScalar& yawZ, const btScalar& pitchY, const btScalar& rollX)
+ {
+ btScalar halfYaw = btScalar(yawZ) * btScalar(0.5);
+ btScalar halfPitch = btScalar(pitchY) * btScalar(0.5);
+ btScalar halfRoll = btScalar(rollX) * btScalar(0.5);
+ btScalar cosYaw = btCos(halfYaw);
+ btScalar sinYaw = btSin(halfYaw);
+ btScalar cosPitch = btCos(halfPitch);
+ btScalar sinPitch = btSin(halfPitch);
+ btScalar cosRoll = btCos(halfRoll);
+ btScalar sinRoll = btSin(halfRoll);
+ setValue(sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw, //x
+ cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw, //y
+ cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw, //z
+ cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw); //formerly yzx
+ }
+
+ /**@brief Get the euler angles from this quaternion
+ * @param yaw Angle around Z
+ * @param pitch Angle around Y
+ * @param roll Angle around X */
+ void getEulerZYX(btScalar& yawZ, btScalar& pitchY, btScalar& rollX) const
+ {
+ btScalar squ;
+ btScalar sqx;
+ btScalar sqy;
+ btScalar sqz;
+ btScalar sarg;
+ sqx = m_floats[0] * m_floats[0];
+ sqy = m_floats[1] * m_floats[1];
+ sqz = m_floats[2] * m_floats[2];
+ squ = m_floats[3] * m_floats[3];
+ rollX = btAtan2(2 * (m_floats[1] * m_floats[2] + m_floats[3] * m_floats[0]), squ - sqx - sqy + sqz);
+ sarg = btScalar(-2.) * (m_floats[0] * m_floats[2] - m_floats[3] * m_floats[1]);
+ pitchY = sarg <= btScalar(-1.0) ? btScalar(-0.5) * SIMD_PI: (sarg >= btScalar(1.0) ? btScalar(0.5) * SIMD_PI : btAsin(sarg));
+ yawZ = btAtan2(2 * (m_floats[0] * m_floats[1] + m_floats[3] * m_floats[2]), squ + sqx - sqy - sqz);
+ }
+
+ /**@brief Add two quaternions
+ * @param q The quaternion to add to this one */
+ SIMD_FORCE_INLINE btQuaternion& operator+=(const btQuaternion& q)
+ {
+#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+ mVec128 = _mm_add_ps(mVec128, q.mVec128);
+#elif defined(BT_USE_NEON)
+ mVec128 = vaddq_f32(mVec128, q.mVec128);
+#else
+ m_floats[0] += q.x();
+ m_floats[1] += q.y();
+ m_floats[2] += q.z();
+ m_floats[3] += q.m_floats[3];
+#endif
+ return *this;
+ }
+
+ /**@brief Subtract out a quaternion
+ * @param q The quaternion to subtract from this one */
+ btQuaternion& operator-=(const btQuaternion& q)
+ {
+#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+ mVec128 = _mm_sub_ps(mVec128, q.mVec128);
+#elif defined(BT_USE_NEON)
+ mVec128 = vsubq_f32(mVec128, q.mVec128);
+#else
+ m_floats[0] -= q.x();
+ m_floats[1] -= q.y();
+ m_floats[2] -= q.z();
+ m_floats[3] -= q.m_floats[3];
+#endif
+ return *this;
+ }
+
+ /**@brief Scale this quaternion
+ * @param s The scalar to scale by */
+ btQuaternion& operator*=(const btScalar& s)
+ {
+#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+ __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
+ vs = bt_pshufd_ps(vs, 0); // (S S S S)
+ mVec128 = _mm_mul_ps(mVec128, vs);
+#elif defined(BT_USE_NEON)
+ mVec128 = vmulq_n_f32(mVec128, s);
+#else
+ m_floats[0] *= s;
+ m_floats[1] *= s;
+ m_floats[2] *= s;
+ m_floats[3] *= s;
+#endif
+ return *this;
+ }
+
+ /**@brief Multiply this quaternion by q on the right
+ * @param q The other quaternion
+ * Equivilant to this = this * q */
+ btQuaternion& operator*=(const btQuaternion& q)
+ {
+#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+ __m128 vQ2 = q.get128();
+
+ __m128 A1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(0,1,2,0));
+ __m128 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0));
+
+ A1 = A1 * B1;
+
+ __m128 A2 = bt_pshufd_ps(mVec128, BT_SHUFFLE(1,2,0,1));
+ __m128 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
+
+ A2 = A2 * B2;
+
+ B1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(2,0,1,2));
+ B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
+
+ B1 = B1 * B2; // A3 *= B3
+
+ mVec128 = bt_splat_ps(mVec128, 3); // A0
+ mVec128 = mVec128 * vQ2; // A0 * B0
+
+ A1 = A1 + A2; // AB12
+ mVec128 = mVec128 - B1; // AB03 = AB0 - AB3
+ A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
+ mVec128 = mVec128+ A1; // AB03 + AB12
+
+#elif defined(BT_USE_NEON)
+
+ float32x4_t vQ1 = mVec128;
+ float32x4_t vQ2 = q.get128();
+ float32x4_t A0, A1, B1, A2, B2, A3, B3;
+ float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
+
+ {
+ float32x2x2_t tmp;
+ tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) ); // {z x}, {w y}
+ vQ1zx = tmp.val[0];
+
+ tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) ); // {z x}, {w y}
+ vQ2zx = tmp.val[0];
+ }
+ vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
+
+ vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
+
+ vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
+ vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
+
+ A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
+ B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
+
+ A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
+ B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
+
+ A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
+ B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
+
+ A1 = vmulq_f32(A1, B1);
+ A2 = vmulq_f32(A2, B2);
+ A3 = vmulq_f32(A3, B3); // A3 *= B3
+ A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); // A0 * B0
+
+ A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
+ A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3
+
+ // change the sign of the last element
+ A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
+ A0 = vaddq_f32(A0, A1); // AB03 + AB12
+
+ mVec128 = A0;
+#else
+ setValue(
+ m_floats[3] * q.x() + m_floats[0] * q.m_floats[3] + m_floats[1] * q.z() - m_floats[2] * q.y(),
+ m_floats[3] * q.y() + m_floats[1] * q.m_floats[3] + m_floats[2] * q.x() - m_floats[0] * q.z(),
+ m_floats[3] * q.z() + m_floats[2] * q.m_floats[3] + m_floats[0] * q.y() - m_floats[1] * q.x(),
+ m_floats[3] * q.m_floats[3] - m_floats[0] * q.x() - m_floats[1] * q.y() - m_floats[2] * q.z());
+#endif
+ return *this;
+ }
+ /**@brief Return the dot product between this quaternion and another
+ * @param q The other quaternion */
+ btScalar dot(const btQuaternion& q) const
+ {
+#if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+ __m128 vd;
+
+ vd = _mm_mul_ps(mVec128, q.mVec128);
+
+ __m128 t = _mm_movehl_ps(vd, vd);
+ vd = _mm_add_ps(vd, t);
+ t = _mm_shuffle_ps(vd, vd, 0x55);
+ vd = _mm_add_ss(vd, t);
+
+ return _mm_cvtss_f32(vd);
+#elif defined(BT_USE_NEON)
+ float32x4_t vd = vmulq_f32(mVec128, q.mVec128);
+ float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_high_f32(vd));
+ x = vpadd_f32(x, x);
+ return vget_lane_f32(x, 0);
+#else
+ return m_floats[0] * q.x() +
+ m_floats[1] * q.y() +
+ m_floats[2] * q.z() +
+ m_floats[3] * q.m_floats[3];
+#endif
+ }
+
+ /**@brief Return the length squared of the quaternion */
+ btScalar length2() const
+ {
+ return dot(*this);
+ }
+
+ /**@brief Return the length of the quaternion */
+ btScalar length() const
+ {
+ return btSqrt(length2());
+ }
+ btQuaternion& safeNormalize()
+ {
+ btScalar l2 = length2();
+ if (l2>SIMD_EPSILON)
+ {
+ normalize();
+ }
+ return *this;
+ }
+ /**@brief Normalize the quaternion
+ * Such that x^2 + y^2 + z^2 +w^2 = 1 */
+ btQuaternion& normalize()
+ {
+#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+ __m128 vd;
+
+ vd = _mm_mul_ps(mVec128, mVec128);
+
+ __m128 t = _mm_movehl_ps(vd, vd);
+ vd = _mm_add_ps(vd, t);
+ t = _mm_shuffle_ps(vd, vd, 0x55);
+ vd = _mm_add_ss(vd, t);
+
+ vd = _mm_sqrt_ss(vd);
+ vd = _mm_div_ss(vOnes, vd);
+ vd = bt_pshufd_ps(vd, 0); // splat
+ mVec128 = _mm_mul_ps(mVec128, vd);
+
+ return *this;
+#else
+ return *this /= length();
+#endif
+ }
+
+ /**@brief Return a scaled version of this quaternion
+ * @param s The scale factor */
+ SIMD_FORCE_INLINE btQuaternion
+ operator*(const btScalar& s) const
+ {
+#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+ __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
+ vs = bt_pshufd_ps(vs, 0x00); // (S S S S)
+
+ return btQuaternion(_mm_mul_ps(mVec128, vs));
+#elif defined(BT_USE_NEON)
+ return btQuaternion(vmulq_n_f32(mVec128, s));
+#else
+ return btQuaternion(x() * s, y() * s, z() * s, m_floats[3] * s);
+#endif
+ }
+
+ /**@brief Return an inversely scaled versionof this quaternion
+ * @param s The inverse scale factor */
+ btQuaternion operator/(const btScalar& s) const
+ {
+ btAssert(s != btScalar(0.0));
+ return *this * (btScalar(1.0) / s);
+ }
+
+ /**@brief Inversely scale this quaternion
+ * @param s The scale factor */
+ btQuaternion& operator/=(const btScalar& s)
+ {
+ btAssert(s != btScalar(0.0));
+ return *this *= btScalar(1.0) / s;
+ }
+
+ /**@brief Return a normalized version of this quaternion */
+ btQuaternion normalized() const
+ {
+ return *this / length();
+ }
+ /**@brief Return the ***half*** angle between this quaternion and the other
+ * @param q The other quaternion */
+ btScalar angle(const btQuaternion& q) const
+ {
+ btScalar s = btSqrt(length2() * q.length2());
+ btAssert(s != btScalar(0.0));
+ return btAcos(dot(q) / s);
+ }
+
+ /**@brief Return the angle between this quaternion and the other along the shortest path
+ * @param q The other quaternion */
+ btScalar angleShortestPath(const btQuaternion& q) const
+ {
+ btScalar s = btSqrt(length2() * q.length2());
+ btAssert(s != btScalar(0.0));
+ if (dot(q) < 0) // Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
+ return btAcos(dot(-q) / s) * btScalar(2.0);
+ else
+ return btAcos(dot(q) / s) * btScalar(2.0);
+ }
+
+ /**@brief Return the angle [0, 2Pi] of rotation represented by this quaternion */
+ btScalar getAngle() const
+ {
+ btScalar s = btScalar(2.) * btAcos(m_floats[3]);
+ return s;
+ }
+
+ /**@brief Return the angle [0, Pi] of rotation represented by this quaternion along the shortest path */
+ btScalar getAngleShortestPath() const
+ {
+ btScalar s;
+ if (m_floats[3] >= 0)
+ s = btScalar(2.) * btAcos(m_floats[3]);
+ else
+ s = btScalar(2.) * btAcos(-m_floats[3]);
+ return s;
+ }
+
+
+ /**@brief Return the axis of the rotation represented by this quaternion */
+ btVector3 getAxis() const
+ {
+ btScalar s_squared = 1.f-m_floats[3]*m_floats[3];
+
+ if (s_squared < btScalar(10.) * SIMD_EPSILON) //Check for divide by zero
+ return btVector3(1.0, 0.0, 0.0); // Arbitrary
+ btScalar s = 1.f/btSqrt(s_squared);
+ return btVector3(m_floats[0] * s, m_floats[1] * s, m_floats[2] * s);
+ }
+
+ /**@brief Return the inverse of this quaternion */
+ btQuaternion inverse() const
+ {
+#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+ return btQuaternion(_mm_xor_ps(mVec128, vQInv));
+#elif defined(BT_USE_NEON)
+ return btQuaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)vQInv));
+#else
+ return btQuaternion(-m_floats[0], -m_floats[1], -m_floats[2], m_floats[3]);
+#endif
+ }
+
+ /**@brief Return the sum of this quaternion and the other
+ * @param q2 The other quaternion */
+ SIMD_FORCE_INLINE btQuaternion
+ operator+(const btQuaternion& q2) const
+ {
+#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+ return btQuaternion(_mm_add_ps(mVec128, q2.mVec128));
+#elif defined(BT_USE_NEON)
+ return btQuaternion(vaddq_f32(mVec128, q2.mVec128));
+#else
+ const btQuaternion& q1 = *this;
+ return btQuaternion(q1.x() + q2.x(), q1.y() + q2.y(), q1.z() + q2.z(), q1.m_floats[3] + q2.m_floats[3]);
+#endif
+ }
+
+ /**@brief Return the difference between this quaternion and the other
+ * @param q2 The other quaternion */
+ SIMD_FORCE_INLINE btQuaternion
+ operator-(const btQuaternion& q2) const
+ {
+#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+ return btQuaternion(_mm_sub_ps(mVec128, q2.mVec128));
+#elif defined(BT_USE_NEON)
+ return btQuaternion(vsubq_f32(mVec128, q2.mVec128));
+#else
+ const btQuaternion& q1 = *this;
+ return btQuaternion(q1.x() - q2.x(), q1.y() - q2.y(), q1.z() - q2.z(), q1.m_floats[3] - q2.m_floats[3]);
+#endif
+ }
+
+ /**@brief Return the negative of this quaternion
+ * This simply negates each element */
+ SIMD_FORCE_INLINE btQuaternion operator-() const
+ {
+#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+ return btQuaternion(_mm_xor_ps(mVec128, btvMzeroMask));
+#elif defined(BT_USE_NEON)
+ return btQuaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)btvMzeroMask) );
+#else
+ const btQuaternion& q2 = *this;
+ return btQuaternion( - q2.x(), - q2.y(), - q2.z(), - q2.m_floats[3]);
+#endif
+ }
+ /**@todo document this and it's use */
+ SIMD_FORCE_INLINE btQuaternion farthest( const btQuaternion& qd) const
+ {
+ btQuaternion diff,sum;
+ diff = *this - qd;
+ sum = *this + qd;
+ if( diff.dot(diff) > sum.dot(sum) )
+ return qd;
+ return (-qd);
+ }
+
+ /**@todo document this and it's use */
+ SIMD_FORCE_INLINE btQuaternion nearest( const btQuaternion& qd) const
+ {
+ btQuaternion diff,sum;
+ diff = *this - qd;
+ sum = *this + qd;
+ if( diff.dot(diff) < sum.dot(sum) )
+ return qd;
+ return (-qd);
+ }
+
+
+ /**@brief Return the quaternion which is the result of Spherical Linear Interpolation between this and the other quaternion
+ * @param q The other quaternion to interpolate with
+ * @param t The ratio between this and q to interpolate. If t = 0 the result is this, if t=1 the result is q.
+ * Slerp interpolates assuming constant velocity. */
+ btQuaternion slerp(const btQuaternion& q, const btScalar& t) const
+ {
+
+ const btScalar magnitude = btSqrt(length2() * q.length2());
+ btAssert(magnitude > btScalar(0));
+
+ const btScalar product = dot(q) / magnitude;
+ const btScalar absproduct = btFabs(product);
+
+ if(absproduct < btScalar(1.0 - SIMD_EPSILON))
+ {
+ // Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
+ const btScalar theta = btAcos(absproduct);
+ const btScalar d = btSin(theta);
+ btAssert(d > btScalar(0));
+
+ const btScalar sign = (product < 0) ? btScalar(-1) : btScalar(1);
+ const btScalar s0 = btSin((btScalar(1.0) - t) * theta) / d;
+ const btScalar s1 = btSin(sign * t * theta) / d;
+
+ return btQuaternion(
+ (m_floats[0] * s0 + q.x() * s1),
+ (m_floats[1] * s0 + q.y() * s1),
+ (m_floats[2] * s0 + q.z() * s1),
+ (m_floats[3] * s0 + q.w() * s1));
+ }
+ else
+ {
+ return *this;
+ }
+ }
+
+ static const btQuaternion& getIdentity()
+ {
+ static const btQuaternion identityQuat(btScalar(0.),btScalar(0.),btScalar(0.),btScalar(1.));
+ return identityQuat;
+ }
+
+ SIMD_FORCE_INLINE const btScalar& getW() const { return m_floats[3]; }
+
+ SIMD_FORCE_INLINE void serialize(struct btQuaternionData& dataOut) const;
+
+ SIMD_FORCE_INLINE void deSerialize(const struct btQuaternionData& dataIn);
+
+ SIMD_FORCE_INLINE void serializeFloat(struct btQuaternionFloatData& dataOut) const;
+
+ SIMD_FORCE_INLINE void deSerializeFloat(const struct btQuaternionFloatData& dataIn);
+
+ SIMD_FORCE_INLINE void serializeDouble(struct btQuaternionDoubleData& dataOut) const;
+
+ SIMD_FORCE_INLINE void deSerializeDouble(const struct btQuaternionDoubleData& dataIn);
+
+};
+
+
+
+
+
+/**@brief Return the product of two quaternions */
+SIMD_FORCE_INLINE btQuaternion
+operator*(const btQuaternion& q1, const btQuaternion& q2)
+{
+#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+ __m128 vQ1 = q1.get128();
+ __m128 vQ2 = q2.get128();
+ __m128 A0, A1, B1, A2, B2;
+
+ A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0,1,2,0)); // X Y z x // vtrn
+ B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0)); // W W W X // vdup vext
+
+ A1 = A1 * B1;
+
+ A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1)); // Y Z X Y // vext
+ B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1)); // z x Y Y // vtrn vdup
+
+ A2 = A2 * B2;
+
+ B1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2)); // z x Y Z // vtrn vext
+ B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2)); // Y Z x z // vext vtrn
+
+ B1 = B1 * B2; // A3 *= B3
+
+ A0 = bt_splat_ps(vQ1, 3); // A0
+ A0 = A0 * vQ2; // A0 * B0
+
+ A1 = A1 + A2; // AB12
+ A0 = A0 - B1; // AB03 = AB0 - AB3
+
+ A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
+ A0 = A0 + A1; // AB03 + AB12
+
+ return btQuaternion(A0);
+
+#elif defined(BT_USE_NEON)
+
+ float32x4_t vQ1 = q1.get128();
+ float32x4_t vQ2 = q2.get128();
+ float32x4_t A0, A1, B1, A2, B2, A3, B3;
+ float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
+
+ {
+ float32x2x2_t tmp;
+ tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) ); // {z x}, {w y}
+ vQ1zx = tmp.val[0];
+
+ tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) ); // {z x}, {w y}
+ vQ2zx = tmp.val[0];
+ }
+ vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
+
+ vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
+
+ vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
+ vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
+
+ A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
+ B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
+
+ A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
+ B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
+
+ A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
+ B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
+
+ A1 = vmulq_f32(A1, B1);
+ A2 = vmulq_f32(A2, B2);
+ A3 = vmulq_f32(A3, B3); // A3 *= B3
+ A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); // A0 * B0
+
+ A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
+ A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3
+
+ // change the sign of the last element
+ A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
+ A0 = vaddq_f32(A0, A1); // AB03 + AB12
+
+ return btQuaternion(A0);
+
+#else
+ return btQuaternion(
+ q1.w() * q2.x() + q1.x() * q2.w() + q1.y() * q2.z() - q1.z() * q2.y(),
+ q1.w() * q2.y() + q1.y() * q2.w() + q1.z() * q2.x() - q1.x() * q2.z(),
+ q1.w() * q2.z() + q1.z() * q2.w() + q1.x() * q2.y() - q1.y() * q2.x(),
+ q1.w() * q2.w() - q1.x() * q2.x() - q1.y() * q2.y() - q1.z() * q2.z());
+#endif
+}
+
+SIMD_FORCE_INLINE btQuaternion
+operator*(const btQuaternion& q, const btVector3& w)
+{
+#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+ __m128 vQ1 = q.get128();
+ __m128 vQ2 = w.get128();
+ __m128 A1, B1, A2, B2, A3, B3;
+
+ A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(3,3,3,0));
+ B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(0,1,2,0));
+
+ A1 = A1 * B1;
+
+ A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1));
+ B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
+
+ A2 = A2 * B2;
+
+ A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2));
+ B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
+
+ A3 = A3 * B3; // A3 *= B3
+
+ A1 = A1 + A2; // AB12
+ A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
+ A1 = A1 - A3; // AB123 = AB12 - AB3
+
+ return btQuaternion(A1);
+
+#elif defined(BT_USE_NEON)
+
+ float32x4_t vQ1 = q.get128();
+ float32x4_t vQ2 = w.get128();
+ float32x4_t A1, B1, A2, B2, A3, B3;
+ float32x2_t vQ1wx, vQ2zx, vQ1yz, vQ2yz, vQ1zx, vQ2xz;
+
+ vQ1wx = vext_f32(vget_high_f32(vQ1), vget_low_f32(vQ1), 1);
+ {
+ float32x2x2_t tmp;
+
+ tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) ); // {z x}, {w y}
+ vQ2zx = tmp.val[0];
+
+ tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) ); // {z x}, {w y}
+ vQ1zx = tmp.val[0];
+ }
+
+ vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
+
+ vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
+ vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
+
+ A1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ1), 1), vQ1wx); // W W W X
+ B1 = vcombine_f32(vget_low_f32(vQ2), vQ2zx); // X Y z x
+
+ A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
+ B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
+
+ A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
+ B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
+
+ A1 = vmulq_f32(A1, B1);
+ A2 = vmulq_f32(A2, B2);
+ A3 = vmulq_f32(A3, B3); // A3 *= B3
+
+ A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
+
+ // change the sign of the last element
+ A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
+
+ A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3
+
+ return btQuaternion(A1);
+
+#else
+ return btQuaternion(
+ q.w() * w.x() + q.y() * w.z() - q.z() * w.y(),
+ q.w() * w.y() + q.z() * w.x() - q.x() * w.z(),
+ q.w() * w.z() + q.x() * w.y() - q.y() * w.x(),
+ -q.x() * w.x() - q.y() * w.y() - q.z() * w.z());
+#endif
+}
+
+SIMD_FORCE_INLINE btQuaternion
+operator*(const btVector3& w, const btQuaternion& q)
+{
+#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+ __m128 vQ1 = w.get128();
+ __m128 vQ2 = q.get128();
+ __m128 A1, B1, A2, B2, A3, B3;
+
+ A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0,1,2,0)); // X Y z x
+ B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0)); // W W W X
+
+ A1 = A1 * B1;
+
+ A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1));
+ B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
+
+ A2 = A2 *B2;
+
+ A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2));
+ B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
+
+ A3 = A3 * B3; // A3 *= B3
+
+ A1 = A1 + A2; // AB12
+ A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
+ A1 = A1 - A3; // AB123 = AB12 - AB3
+
+ return btQuaternion(A1);
+
+#elif defined(BT_USE_NEON)
+
+ float32x4_t vQ1 = w.get128();
+ float32x4_t vQ2 = q.get128();
+ float32x4_t A1, B1, A2, B2, A3, B3;
+ float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
+
+ {
+ float32x2x2_t tmp;
+
+ tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) ); // {z x}, {w y}
+ vQ1zx = tmp.val[0];
+
+ tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) ); // {z x}, {w y}
+ vQ2zx = tmp.val[0];
+ }
+ vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
+
+ vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
+
+ vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
+ vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
+
+ A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
+ B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
+
+ A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
+ B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
+
+ A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
+ B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
+
+ A1 = vmulq_f32(A1, B1);
+ A2 = vmulq_f32(A2, B2);
+ A3 = vmulq_f32(A3, B3); // A3 *= B3
+
+ A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
+
+ // change the sign of the last element
+ A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
+
+ A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3
+
+ return btQuaternion(A1);
+
+#else
+ return btQuaternion(
+ +w.x() * q.w() + w.y() * q.z() - w.z() * q.y(),
+ +w.y() * q.w() + w.z() * q.x() - w.x() * q.z(),
+ +w.z() * q.w() + w.x() * q.y() - w.y() * q.x(),
+ -w.x() * q.x() - w.y() * q.y() - w.z() * q.z());
+#endif
+}
+
+/**@brief Calculate the dot product between two quaternions */
+SIMD_FORCE_INLINE btScalar
+dot(const btQuaternion& q1, const btQuaternion& q2)
+{
+ return q1.dot(q2);
+}
+
+
+/**@brief Return the length of a quaternion */
+SIMD_FORCE_INLINE btScalar
+length(const btQuaternion& q)
+{
+ return q.length();
+}
+
+/**@brief Return the angle between two quaternions*/
+SIMD_FORCE_INLINE btScalar
+btAngle(const btQuaternion& q1, const btQuaternion& q2)
+{
+ return q1.angle(q2);
+}
+
+/**@brief Return the inverse of a quaternion*/
+SIMD_FORCE_INLINE btQuaternion
+inverse(const btQuaternion& q)
+{
+ return q.inverse();
+}
+
+/**@brief Return the result of spherical linear interpolation betwen two quaternions
+ * @param q1 The first quaternion
+ * @param q2 The second quaternion
+ * @param t The ration between q1 and q2. t = 0 return q1, t=1 returns q2
+ * Slerp assumes constant velocity between positions. */
+SIMD_FORCE_INLINE btQuaternion
+slerp(const btQuaternion& q1, const btQuaternion& q2, const btScalar& t)
+{
+ return q1.slerp(q2, t);
+}
+
+SIMD_FORCE_INLINE btVector3
+quatRotate(const btQuaternion& rotation, const btVector3& v)
+{
+ btQuaternion q = rotation * v;
+ q *= rotation.inverse();
+#if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+ return btVector3(_mm_and_ps(q.get128(), btvFFF0fMask));
+#elif defined(BT_USE_NEON)
+ return btVector3((float32x4_t)vandq_s32((int32x4_t)q.get128(), btvFFF0Mask));
+#else
+ return btVector3(q.getX(),q.getY(),q.getZ());
+#endif
+}
+
+SIMD_FORCE_INLINE btQuaternion
+shortestArcQuat(const btVector3& v0, const btVector3& v1) // Game Programming Gems 2.10. make sure v0,v1 are normalized
+{
+ btVector3 c = v0.cross(v1);
+ btScalar d = v0.dot(v1);
+
+ if (d < -1.0 + SIMD_EPSILON)
+ {
+ btVector3 n,unused;
+ btPlaneSpace1(v0,n,unused);
+ return btQuaternion(n.x(),n.y(),n.z(),0.0f); // just pick any vector that is orthogonal to v0
+ }
+
+ btScalar s = btSqrt((1.0f + d) * 2.0f);
+ btScalar rs = 1.0f / s;
+
+ return btQuaternion(c.getX()*rs,c.getY()*rs,c.getZ()*rs,s * 0.5f);
+}
+
+SIMD_FORCE_INLINE btQuaternion
+shortestArcQuatNormalize2(btVector3& v0,btVector3& v1)
+{
+ v0.normalize();
+ v1.normalize();
+ return shortestArcQuat(v0,v1);
+}
+
+
+
+
+struct btQuaternionFloatData
+{
+ float m_floats[4];
+};
+
+struct btQuaternionDoubleData
+{
+ double m_floats[4];
+
+};
+
+SIMD_FORCE_INLINE void btQuaternion::serializeFloat(struct btQuaternionFloatData& dataOut) const
+{
+ ///could also do a memcpy, check if it is worth it
+ for (int i=0;i<4;i++)
+ dataOut.m_floats[i] = float(m_floats[i]);
+}
+
+SIMD_FORCE_INLINE void btQuaternion::deSerializeFloat(const struct btQuaternionFloatData& dataIn)
+{
+ for (int i=0;i<4;i++)
+ m_floats[i] = btScalar(dataIn.m_floats[i]);
+}
+
+
+SIMD_FORCE_INLINE void btQuaternion::serializeDouble(struct btQuaternionDoubleData& dataOut) const
+{
+ ///could also do a memcpy, check if it is worth it
+ for (int i=0;i<4;i++)
+ dataOut.m_floats[i] = double(m_floats[i]);
+}
+
+SIMD_FORCE_INLINE void btQuaternion::deSerializeDouble(const struct btQuaternionDoubleData& dataIn)
+{
+ for (int i=0;i<4;i++)
+ m_floats[i] = btScalar(dataIn.m_floats[i]);
+}
+
+
+SIMD_FORCE_INLINE void btQuaternion::serialize(struct btQuaternionData& dataOut) const
+{
+ ///could also do a memcpy, check if it is worth it
+ for (int i=0;i<4;i++)
+ dataOut.m_floats[i] = m_floats[i];
+}
+
+SIMD_FORCE_INLINE void btQuaternion::deSerialize(const struct btQuaternionData& dataIn)
+{
+ for (int i=0;i<4;i++)
+ m_floats[i] = dataIn.m_floats[i];
+}
+
+
+#endif //BT_SIMD__QUATERNION_H_
+
+
+