summaryrefslogtreecommitdiff
path: root/thirdparty/bullet/Bullet3Common/b3Quaternion.h
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/bullet/Bullet3Common/b3Quaternion.h')
-rw-r--r--thirdparty/bullet/Bullet3Common/b3Quaternion.h846
1 files changed, 418 insertions, 428 deletions
diff --git a/thirdparty/bullet/Bullet3Common/b3Quaternion.h b/thirdparty/bullet/Bullet3Common/b3Quaternion.h
index ad20543348..4fdd72dcc4 100644
--- a/thirdparty/bullet/Bullet3Common/b3Quaternion.h
+++ b/thirdparty/bullet/Bullet3Common/b3Quaternion.h
@@ -12,19 +12,12 @@ subject to the following restrictions:
3. This notice may not be removed or altered from any source distribution.
*/
-
-
#ifndef B3_SIMD__QUATERNION_H_
#define B3_SIMD__QUATERNION_H_
-
#include "b3Vector3.h"
#include "b3QuadWord.h"
-
-
-
-
#ifdef B3_USE_SSE
const __m128 B3_ATTRIBUTE_ALIGNED16(b3vOnes) = {1.0f, 1.0f, 1.0f, 1.0f};
@@ -39,13 +32,14 @@ const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3vPPPM) = {+0.0f, +0.0f, +0.0f, -0.0f
#endif
/**@brief The b3Quaternion implements quaternion to perform linear algebra rotations in combination with b3Matrix3x3, b3Vector3 and b3Transform. */
-class b3Quaternion : public b3QuadWord {
+class b3Quaternion : public b3QuadWord
+{
public:
- /**@brief No initialization constructor */
+ /**@brief No initialization constructor */
b3Quaternion() {}
-#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE))|| defined(B3_USE_NEON)
- // Set Vector
+#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON)
+ // Set Vector
B3_FORCE_INLINE b3Quaternion(const b3SimdFloat4 vec)
{
mVec128 = vec;
@@ -58,63 +52,73 @@ public:
}
// Assignment Operator
- B3_FORCE_INLINE b3Quaternion&
- operator=(const b3Quaternion& v)
+ B3_FORCE_INLINE b3Quaternion&
+ operator=(const b3Quaternion& v)
{
mVec128 = v.mVec128;
-
+
return *this;
}
-
+
#endif
// template <typename b3Scalar>
// explicit Quaternion(const b3Scalar *v) : Tuple4<b3Scalar>(v) {}
- /**@brief Constructor from scalars */
- b3Quaternion(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z, const b3Scalar& _w)
- : b3QuadWord(_x, _y, _z, _w)
+ /**@brief Constructor from scalars */
+ b3Quaternion(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z, const b3Scalar& _w)
+ : b3QuadWord(_x, _y, _z, _w)
{
//b3Assert(!((_x==1.f) && (_y==0.f) && (_z==0.f) && (_w==0.f)));
}
- /**@brief Axis angle Constructor
+ /**@brief Axis angle Constructor
* @param axis The axis which the rotation is around
* @param angle The magnitude of the rotation around the angle (Radians) */
- b3Quaternion(const b3Vector3& _axis, const b3Scalar& _angle)
- {
- setRotation(_axis, _angle);
+ b3Quaternion(const b3Vector3& _axis, const b3Scalar& _angle)
+ {
+ setRotation(_axis, _angle);
}
- /**@brief Constructor from Euler angles
+ /**@brief Constructor from Euler angles
* @param yaw Angle around Y unless B3_EULER_DEFAULT_ZYX defined then Z
* @param pitch Angle around X unless B3_EULER_DEFAULT_ZYX defined then Y
* @param roll Angle around Z unless B3_EULER_DEFAULT_ZYX defined then X */
b3Quaternion(const b3Scalar& yaw, const b3Scalar& pitch, const b3Scalar& roll)
- {
+ {
#ifndef B3_EULER_DEFAULT_ZYX
- setEuler(yaw, pitch, roll);
+ setEuler(yaw, pitch, roll);
#else
- setEulerZYX(yaw, pitch, roll);
-#endif
+ setEulerZYX(yaw, pitch, roll);
+#endif
}
- /**@brief Set the rotation using axis angle notation
+ /**@brief Set the rotation using axis angle notation
* @param axis The axis around which to rotate
* @param angle The magnitude of the rotation in Radians */
- void setRotation(const b3Vector3& axis, const b3Scalar& _angle)
+ void setRotation(const b3Vector3& axis1, const b3Scalar& _angle)
{
+ b3Vector3 axis = axis1;
+ axis.safeNormalize();
+
b3Scalar d = axis.length();
b3Assert(d != b3Scalar(0.0));
- b3Scalar s = b3Sin(_angle * b3Scalar(0.5)) / d;
- setValue(axis.getX() * s, axis.getY() * s, axis.getZ() * s,
- b3Cos(_angle * b3Scalar(0.5)));
+ if (d < B3_EPSILON)
+ {
+ setValue(0, 0, 0, 1);
+ }
+ else
+ {
+ b3Scalar s = b3Sin(_angle * b3Scalar(0.5)) / d;
+ setValue(axis.getX() * s, axis.getY() * s, axis.getZ() * s,
+ b3Cos(_angle * b3Scalar(0.5)));
+ }
}
- /**@brief Set the quaternion using Euler angles
+ /**@brief Set the quaternion using Euler angles
* @param yaw Angle around Y
* @param pitch Angle around X
* @param roll Angle around Z */
void setEuler(const b3Scalar& yaw, const b3Scalar& pitch, const b3Scalar& roll)
{
- b3Scalar halfYaw = b3Scalar(yaw) * b3Scalar(0.5);
- b3Scalar halfPitch = b3Scalar(pitch) * b3Scalar(0.5);
- b3Scalar halfRoll = b3Scalar(roll) * b3Scalar(0.5);
+ b3Scalar halfYaw = b3Scalar(yaw) * b3Scalar(0.5);
+ b3Scalar halfPitch = b3Scalar(pitch) * b3Scalar(0.5);
+ b3Scalar halfRoll = b3Scalar(roll) * b3Scalar(0.5);
b3Scalar cosYaw = b3Cos(halfYaw);
b3Scalar sinYaw = b3Sin(halfYaw);
b3Scalar cosPitch = b3Cos(halfPitch);
@@ -122,34 +126,34 @@ public:
b3Scalar cosRoll = b3Cos(halfRoll);
b3Scalar sinRoll = b3Sin(halfRoll);
setValue(cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw,
- cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw,
- sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw,
- cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw);
+ cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw,
+ sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw,
+ cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw);
}
-
+
/**@brief Set the quaternion using euler angles
* @param yaw Angle around Z
* @param pitch Angle around Y
* @param roll Angle around X */
void setEulerZYX(const b3Scalar& yawZ, const b3Scalar& pitchY, const b3Scalar& rollX)
{
- b3Scalar halfYaw = b3Scalar(yawZ) * b3Scalar(0.5);
- b3Scalar halfPitch = b3Scalar(pitchY) * b3Scalar(0.5);
- b3Scalar halfRoll = b3Scalar(rollX) * b3Scalar(0.5);
+ b3Scalar halfYaw = b3Scalar(yawZ) * b3Scalar(0.5);
+ b3Scalar halfPitch = b3Scalar(pitchY) * b3Scalar(0.5);
+ b3Scalar halfRoll = b3Scalar(rollX) * b3Scalar(0.5);
b3Scalar cosYaw = b3Cos(halfYaw);
b3Scalar sinYaw = b3Sin(halfYaw);
b3Scalar cosPitch = b3Cos(halfPitch);
b3Scalar sinPitch = b3Sin(halfPitch);
b3Scalar cosRoll = b3Cos(halfRoll);
b3Scalar sinRoll = b3Sin(halfRoll);
- setValue(sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw, //x
- cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw, //y
- cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw, //z
- cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw); //formerly yzx
+ setValue(sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw, //x
+ cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw, //y
+ cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw, //z
+ cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw); //formerly yzx
normalize();
}
- /**@brief Get the euler angles from this quaternion
+ /**@brief Get the euler angles from this quaternion
* @param yaw Angle around Z
* @param pitch Angle around Y
* @param roll Angle around X */
@@ -166,221 +170,221 @@ public:
squ = m_floats[3] * m_floats[3];
rollX = b3Atan2(2 * (m_floats[1] * m_floats[2] + m_floats[3] * m_floats[0]), squ - sqx - sqy + sqz);
sarg = b3Scalar(-2.) * (m_floats[0] * m_floats[2] - m_floats[3] * m_floats[1]);
- pitchY = sarg <= b3Scalar(-1.0) ? b3Scalar(-0.5) * B3_PI: (sarg >= b3Scalar(1.0) ? b3Scalar(0.5) * B3_PI : b3Asin(sarg));
+ pitchY = sarg <= b3Scalar(-1.0) ? b3Scalar(-0.5) * B3_PI : (sarg >= b3Scalar(1.0) ? b3Scalar(0.5) * B3_PI : b3Asin(sarg));
yawZ = b3Atan2(2 * (m_floats[0] * m_floats[1] + m_floats[3] * m_floats[2]), squ + sqx - sqy - sqz);
}
- /**@brief Add two quaternions
+ /**@brief Add two quaternions
* @param q The quaternion to add to this one */
- B3_FORCE_INLINE b3Quaternion& operator+=(const b3Quaternion& q)
+ B3_FORCE_INLINE b3Quaternion& operator+=(const b3Quaternion& q)
{
-#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
mVec128 = _mm_add_ps(mVec128, q.mVec128);
#elif defined(B3_USE_NEON)
mVec128 = vaddq_f32(mVec128, q.mVec128);
-#else
- m_floats[0] += q.getX();
- m_floats[1] += q.getY();
- m_floats[2] += q.getZ();
- m_floats[3] += q.m_floats[3];
+#else
+ m_floats[0] += q.getX();
+ m_floats[1] += q.getY();
+ m_floats[2] += q.getZ();
+ m_floats[3] += q.m_floats[3];
#endif
return *this;
}
- /**@brief Subtract out a quaternion
+ /**@brief Subtract out a quaternion
* @param q The quaternion to subtract from this one */
- b3Quaternion& operator-=(const b3Quaternion& q)
+ b3Quaternion& operator-=(const b3Quaternion& q)
{
-#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
mVec128 = _mm_sub_ps(mVec128, q.mVec128);
#elif defined(B3_USE_NEON)
mVec128 = vsubq_f32(mVec128, q.mVec128);
-#else
- m_floats[0] -= q.getX();
- m_floats[1] -= q.getY();
- m_floats[2] -= q.getZ();
- m_floats[3] -= q.m_floats[3];
+#else
+ m_floats[0] -= q.getX();
+ m_floats[1] -= q.getY();
+ m_floats[2] -= q.getZ();
+ m_floats[3] -= q.m_floats[3];
#endif
- return *this;
+ return *this;
}
- /**@brief Scale this quaternion
+ /**@brief Scale this quaternion
* @param s The scalar to scale by */
b3Quaternion& operator*=(const b3Scalar& s)
{
-#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
- __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
- vs = b3_pshufd_ps(vs, 0); // (S S S S)
+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
+ __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
+ vs = b3_pshufd_ps(vs, 0); // (S S S S)
mVec128 = _mm_mul_ps(mVec128, vs);
#elif defined(B3_USE_NEON)
mVec128 = vmulq_n_f32(mVec128, s);
#else
- m_floats[0] *= s;
- m_floats[1] *= s;
- m_floats[2] *= s;
- m_floats[3] *= s;
+ m_floats[0] *= s;
+ m_floats[1] *= s;
+ m_floats[2] *= s;
+ m_floats[3] *= s;
#endif
return *this;
}
- /**@brief Multiply this quaternion by q on the right
+ /**@brief Multiply this quaternion by q on the right
* @param q The other quaternion
* Equivilant to this = this * q */
b3Quaternion& operator*=(const b3Quaternion& q)
{
-#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
__m128 vQ2 = q.get128();
-
- __m128 A1 = b3_pshufd_ps(mVec128, B3_SHUFFLE(0,1,2,0));
- __m128 B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(3,3,3,0));
-
+
+ __m128 A1 = b3_pshufd_ps(mVec128, B3_SHUFFLE(0, 1, 2, 0));
+ __m128 B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(3, 3, 3, 0));
+
A1 = A1 * B1;
-
- __m128 A2 = b3_pshufd_ps(mVec128, B3_SHUFFLE(1,2,0,1));
- __m128 B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2,0,1,1));
-
+
+ __m128 A2 = b3_pshufd_ps(mVec128, B3_SHUFFLE(1, 2, 0, 1));
+ __m128 B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2, 0, 1, 1));
+
A2 = A2 * B2;
-
- B1 = b3_pshufd_ps(mVec128, B3_SHUFFLE(2,0,1,2));
- B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1,2,0,2));
-
- B1 = B1 * B2; // A3 *= B3
-
- mVec128 = b3_splat_ps(mVec128, 3); // A0
- mVec128 = mVec128 * vQ2; // A0 * B0
-
- A1 = A1 + A2; // AB12
- mVec128 = mVec128 - B1; // AB03 = AB0 - AB3
- A1 = _mm_xor_ps(A1, b3vPPPM); // change sign of the last element
- mVec128 = mVec128+ A1; // AB03 + AB12
-
-#elif defined(B3_USE_NEON)
-
- float32x4_t vQ1 = mVec128;
- float32x4_t vQ2 = q.get128();
- float32x4_t A0, A1, B1, A2, B2, A3, B3;
- float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
-
- {
- float32x2x2_t tmp;
- tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) ); // {z x}, {w y}
- vQ1zx = tmp.val[0];
-
- tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) ); // {z x}, {w y}
- vQ2zx = tmp.val[0];
- }
- vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
-
- vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
-
- vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
- vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
-
- A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
- B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
-
- A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
- B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
-
- A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
- B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
-
- A1 = vmulq_f32(A1, B1);
- A2 = vmulq_f32(A2, B2);
- A3 = vmulq_f32(A3, B3); // A3 *= B3
- A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); // A0 * B0
-
- A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
- A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3
-
- // change the sign of the last element
- A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
- A0 = vaddq_f32(A0, A1); // AB03 + AB12
-
- mVec128 = A0;
+
+ B1 = b3_pshufd_ps(mVec128, B3_SHUFFLE(2, 0, 1, 2));
+ B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1, 2, 0, 2));
+
+ B1 = B1 * B2; // A3 *= B3
+
+ mVec128 = b3_splat_ps(mVec128, 3); // A0
+ mVec128 = mVec128 * vQ2; // A0 * B0
+
+ A1 = A1 + A2; // AB12
+ mVec128 = mVec128 - B1; // AB03 = AB0 - AB3
+ A1 = _mm_xor_ps(A1, b3vPPPM); // change sign of the last element
+ mVec128 = mVec128 + A1; // AB03 + AB12
+
+#elif defined(B3_USE_NEON)
+
+ float32x4_t vQ1 = mVec128;
+ float32x4_t vQ2 = q.get128();
+ float32x4_t A0, A1, B1, A2, B2, A3, B3;
+ float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
+
+ {
+ float32x2x2_t tmp;
+ tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1)); // {z x}, {w y}
+ vQ1zx = tmp.val[0];
+
+ tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2)); // {z x}, {w y}
+ vQ2zx = tmp.val[0];
+ }
+ vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
+
+ vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
+
+ vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
+ vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
+
+ A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
+ B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
+
+ A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
+ B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
+
+ A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
+ B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
+
+ A1 = vmulq_f32(A1, B1);
+ A2 = vmulq_f32(A2, B2);
+ A3 = vmulq_f32(A3, B3); // A3 *= B3
+ A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); // A0 * B0
+
+ A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
+ A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3
+
+ // change the sign of the last element
+ A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
+ A0 = vaddq_f32(A0, A1); // AB03 + AB12
+
+ mVec128 = A0;
#else
setValue(
- m_floats[3] * q.getX() + m_floats[0] * q.m_floats[3] + m_floats[1] * q.getZ() - m_floats[2] * q.getY(),
+ m_floats[3] * q.getX() + m_floats[0] * q.m_floats[3] + m_floats[1] * q.getZ() - m_floats[2] * q.getY(),
m_floats[3] * q.getY() + m_floats[1] * q.m_floats[3] + m_floats[2] * q.getX() - m_floats[0] * q.getZ(),
m_floats[3] * q.getZ() + m_floats[2] * q.m_floats[3] + m_floats[0] * q.getY() - m_floats[1] * q.getX(),
m_floats[3] * q.m_floats[3] - m_floats[0] * q.getX() - m_floats[1] * q.getY() - m_floats[2] * q.getZ());
#endif
return *this;
}
- /**@brief Return the dot product between this quaternion and another
+ /**@brief Return the dot product between this quaternion and another
* @param q The other quaternion */
b3Scalar dot(const b3Quaternion& q) const
{
-#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
- __m128 vd;
-
+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
+ __m128 vd;
+
vd = _mm_mul_ps(mVec128, q.mVec128);
-
- __m128 t = _mm_movehl_ps(vd, vd);
+
+ __m128 t = _mm_movehl_ps(vd, vd);
vd = _mm_add_ps(vd, t);
t = _mm_shuffle_ps(vd, vd, 0x55);
vd = _mm_add_ss(vd, t);
-
- return _mm_cvtss_f32(vd);
+
+ return _mm_cvtss_f32(vd);
#elif defined(B3_USE_NEON)
float32x4_t vd = vmulq_f32(mVec128, q.mVec128);
- float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_high_f32(vd));
+ float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_high_f32(vd));
x = vpadd_f32(x, x);
return vget_lane_f32(x, 0);
-#else
- return m_floats[0] * q.getX() +
- m_floats[1] * q.getY() +
- m_floats[2] * q.getZ() +
- m_floats[3] * q.m_floats[3];
+#else
+ return m_floats[0] * q.getX() +
+ m_floats[1] * q.getY() +
+ m_floats[2] * q.getZ() +
+ m_floats[3] * q.m_floats[3];
#endif
}
- /**@brief Return the length squared of the quaternion */
+ /**@brief Return the length squared of the quaternion */
b3Scalar length2() const
{
return dot(*this);
}
- /**@brief Return the length of the quaternion */
+ /**@brief Return the length of the quaternion */
b3Scalar length() const
{
return b3Sqrt(length2());
}
- /**@brief Normalize the quaternion
+ /**@brief Normalize the quaternion
* Such that x^2 + y^2 + z^2 +w^2 = 1 */
- b3Quaternion& normalize()
+ b3Quaternion& normalize()
{
-#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
- __m128 vd;
-
+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
+ __m128 vd;
+
vd = _mm_mul_ps(mVec128, mVec128);
-
- __m128 t = _mm_movehl_ps(vd, vd);
+
+ __m128 t = _mm_movehl_ps(vd, vd);
vd = _mm_add_ps(vd, t);
t = _mm_shuffle_ps(vd, vd, 0x55);
vd = _mm_add_ss(vd, t);
vd = _mm_sqrt_ss(vd);
vd = _mm_div_ss(b3vOnes, vd);
- vd = b3_pshufd_ps(vd, 0); // splat
+ vd = b3_pshufd_ps(vd, 0); // splat
mVec128 = _mm_mul_ps(mVec128, vd);
-
+
return *this;
-#else
+#else
return *this /= length();
#endif
}
- /**@brief Return a scaled version of this quaternion
+ /**@brief Return a scaled version of this quaternion
* @param s The scale factor */
B3_FORCE_INLINE b3Quaternion
operator*(const b3Scalar& s) const
{
-#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
- __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
- vs = b3_pshufd_ps(vs, 0x00); // (S S S S)
-
+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
+ __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
+ vs = b3_pshufd_ps(vs, 0x00); // (S S S S)
+
return b3Quaternion(_mm_mul_ps(mVec128, vs));
#elif defined(B3_USE_NEON)
return b3Quaternion(vmulq_n_f32(mVec128, s));
@@ -389,7 +393,7 @@ public:
#endif
}
- /**@brief Return an inversely scaled versionof this quaternion
+ /**@brief Return an inversely scaled versionof this quaternion
* @param s The inverse scale factor */
b3Quaternion operator/(const b3Scalar& s) const
{
@@ -397,29 +401,29 @@ public:
return *this * (b3Scalar(1.0) / s);
}
- /**@brief Inversely scale this quaternion
+ /**@brief Inversely scale this quaternion
* @param s The scale factor */
- b3Quaternion& operator/=(const b3Scalar& s)
+ b3Quaternion& operator/=(const b3Scalar& s)
{
b3Assert(s != b3Scalar(0.0));
return *this *= b3Scalar(1.0) / s;
}
- /**@brief Return a normalized version of this quaternion */
- b3Quaternion normalized() const
+ /**@brief Return a normalized version of this quaternion */
+ b3Quaternion normalized() const
{
return *this / length();
- }
- /**@brief Return the angle between this quaternion and the other
+ }
+ /**@brief Return the angle between this quaternion and the other
* @param q The other quaternion */
- b3Scalar angle(const b3Quaternion& q) const
+ b3Scalar angle(const b3Quaternion& q) const
{
b3Scalar s = b3Sqrt(length2() * q.length2());
b3Assert(s != b3Scalar(0.0));
return b3Acos(dot(q) / s);
}
- /**@brief Return the angle of rotation represented by this quaternion */
- b3Scalar getAngle() const
+ /**@brief Return the angle of rotation represented by this quaternion */
+ b3Scalar getAngle() const
{
b3Scalar s = b3Scalar(2.) * b3Acos(m_floats[3]);
return s;
@@ -428,117 +432,116 @@ public:
/**@brief Return the axis of the rotation represented by this quaternion */
b3Vector3 getAxis() const
{
- b3Scalar s_squared = 1.f-m_floats[3]*m_floats[3];
-
- if (s_squared < b3Scalar(10.) * B3_EPSILON) //Check for divide by zero
- return b3MakeVector3(1.0, 0.0, 0.0); // Arbitrary
- b3Scalar s = 1.f/b3Sqrt(s_squared);
+ b3Scalar s_squared = 1.f - m_floats[3] * m_floats[3];
+
+ if (s_squared < b3Scalar(10.) * B3_EPSILON) //Check for divide by zero
+ return b3MakeVector3(1.0, 0.0, 0.0); // Arbitrary
+ b3Scalar s = 1.f / b3Sqrt(s_squared);
return b3MakeVector3(m_floats[0] * s, m_floats[1] * s, m_floats[2] * s);
}
/**@brief Return the inverse of this quaternion */
b3Quaternion inverse() const
{
-#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
return b3Quaternion(_mm_xor_ps(mVec128, b3vQInv));
#elif defined(B3_USE_NEON)
- return b3Quaternion((b3SimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)b3vQInv));
-#else
+ return b3Quaternion((b3SimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)b3vQInv));
+#else
return b3Quaternion(-m_floats[0], -m_floats[1], -m_floats[2], m_floats[3]);
#endif
}
- /**@brief Return the sum of this quaternion and the other
+ /**@brief Return the sum of this quaternion and the other
* @param q2 The other quaternion */
B3_FORCE_INLINE b3Quaternion
operator+(const b3Quaternion& q2) const
{
-#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
return b3Quaternion(_mm_add_ps(mVec128, q2.mVec128));
#elif defined(B3_USE_NEON)
- return b3Quaternion(vaddq_f32(mVec128, q2.mVec128));
-#else
+ return b3Quaternion(vaddq_f32(mVec128, q2.mVec128));
+#else
const b3Quaternion& q1 = *this;
return b3Quaternion(q1.getX() + q2.getX(), q1.getY() + q2.getY(), q1.getZ() + q2.getZ(), q1.m_floats[3] + q2.m_floats[3]);
#endif
}
- /**@brief Return the difference between this quaternion and the other
+ /**@brief Return the difference between this quaternion and the other
* @param q2 The other quaternion */
B3_FORCE_INLINE b3Quaternion
operator-(const b3Quaternion& q2) const
{
-#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
return b3Quaternion(_mm_sub_ps(mVec128, q2.mVec128));
#elif defined(B3_USE_NEON)
- return b3Quaternion(vsubq_f32(mVec128, q2.mVec128));
-#else
+ return b3Quaternion(vsubq_f32(mVec128, q2.mVec128));
+#else
const b3Quaternion& q1 = *this;
return b3Quaternion(q1.getX() - q2.getX(), q1.getY() - q2.getY(), q1.getZ() - q2.getZ(), q1.m_floats[3] - q2.m_floats[3]);
#endif
}
- /**@brief Return the negative of this quaternion
+ /**@brief Return the negative of this quaternion
* This simply negates each element */
B3_FORCE_INLINE b3Quaternion operator-() const
{
-#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
return b3Quaternion(_mm_xor_ps(mVec128, b3vMzeroMask));
#elif defined(B3_USE_NEON)
- return b3Quaternion((b3SimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)b3vMzeroMask) );
-#else
+ return b3Quaternion((b3SimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)b3vMzeroMask));
+#else
const b3Quaternion& q2 = *this;
- return b3Quaternion( - q2.getX(), - q2.getY(), - q2.getZ(), - q2.m_floats[3]);
+ return b3Quaternion(-q2.getX(), -q2.getY(), -q2.getZ(), -q2.m_floats[3]);
#endif
}
- /**@todo document this and it's use */
- B3_FORCE_INLINE b3Quaternion farthest( const b3Quaternion& qd) const
+ /**@todo document this and it's use */
+ B3_FORCE_INLINE b3Quaternion farthest(const b3Quaternion& qd) const
{
- b3Quaternion diff,sum;
+ b3Quaternion diff, sum;
diff = *this - qd;
sum = *this + qd;
- if( diff.dot(diff) > sum.dot(sum) )
+ if (diff.dot(diff) > sum.dot(sum))
return qd;
return (-qd);
}
/**@todo document this and it's use */
- B3_FORCE_INLINE b3Quaternion nearest( const b3Quaternion& qd) const
+ B3_FORCE_INLINE b3Quaternion nearest(const b3Quaternion& qd) const
{
- b3Quaternion diff,sum;
+ b3Quaternion diff, sum;
diff = *this - qd;
sum = *this + qd;
- if( diff.dot(diff) < sum.dot(sum) )
+ if (diff.dot(diff) < sum.dot(sum))
return qd;
return (-qd);
}
-
- /**@brief Return the quaternion which is the result of Spherical Linear Interpolation between this and the other quaternion
+ /**@brief Return the quaternion which is the result of Spherical Linear Interpolation between this and the other quaternion
* @param q The other quaternion to interpolate with
* @param t The ratio between this and q to interpolate. If t = 0 the result is this, if t=1 the result is q.
* Slerp interpolates assuming constant velocity. */
b3Quaternion slerp(const b3Quaternion& q, const b3Scalar& t) const
{
- b3Scalar magnitude = b3Sqrt(length2() * q.length2());
- b3Assert(magnitude > b3Scalar(0));
+ b3Scalar magnitude = b3Sqrt(length2() * q.length2());
+ b3Assert(magnitude > b3Scalar(0));
- b3Scalar product = dot(q) / magnitude;
- if (b3Fabs(product) < b3Scalar(1))
+ b3Scalar product = dot(q) / magnitude;
+ if (b3Fabs(product) < b3Scalar(1))
{
- // Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
- const b3Scalar sign = (product < 0) ? b3Scalar(-1) : b3Scalar(1);
-
- const b3Scalar theta = b3Acos(sign * product);
- const b3Scalar s1 = b3Sin(sign * t * theta);
- const b3Scalar d = b3Scalar(1.0) / b3Sin(theta);
- const b3Scalar s0 = b3Sin((b3Scalar(1.0) - t) * theta);
-
- return b3Quaternion(
- (m_floats[0] * s0 + q.getX() * s1) * d,
- (m_floats[1] * s0 + q.getY() * s1) * d,
- (m_floats[2] * s0 + q.getZ() * s1) * d,
- (m_floats[3] * s0 + q.m_floats[3] * s1) * d);
+ // Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
+ const b3Scalar sign = (product < 0) ? b3Scalar(-1) : b3Scalar(1);
+
+ const b3Scalar theta = b3Acos(sign * product);
+ const b3Scalar s1 = b3Sin(sign * t * theta);
+ const b3Scalar d = b3Scalar(1.0) / b3Sin(theta);
+ const b3Scalar s0 = b3Sin((b3Scalar(1.0) - t) * theta);
+
+ return b3Quaternion(
+ (m_floats[0] * s0 + q.getX() * s1) * d,
+ (m_floats[1] * s0 + q.getY() * s1) * d,
+ (m_floats[2] * s0 + q.getZ() * s1) * d,
+ (m_floats[3] * s0 + q.m_floats[3] * s1) * d);
}
else
{
@@ -546,301 +549,294 @@ public:
}
}
- static const b3Quaternion& getIdentity()
+ static const b3Quaternion& getIdentity()
{
- static const b3Quaternion identityQuat(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.),b3Scalar(1.));
+ static const b3Quaternion identityQuat(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.), b3Scalar(1.));
return identityQuat;
}
B3_FORCE_INLINE const b3Scalar& getW() const { return m_floats[3]; }
-
-
};
-
-
-
-
/**@brief Return the product of two quaternions */
B3_FORCE_INLINE b3Quaternion
-operator*(const b3Quaternion& q1, const b3Quaternion& q2)
+operator*(const b3Quaternion& q1, const b3Quaternion& q2)
{
-#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
__m128 vQ1 = q1.get128();
__m128 vQ2 = q2.get128();
__m128 A0, A1, B1, A2, B2;
-
- A1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(0,1,2,0)); // X Y z x // vtrn
- B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(3,3,3,0)); // W W W X // vdup vext
+
+ A1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(0, 1, 2, 0)); // X Y z x // vtrn
+ B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(3, 3, 3, 0)); // W W W X // vdup vext
A1 = A1 * B1;
-
- A2 = b3_pshufd_ps(vQ1, B3_SHUFFLE(1,2,0,1)); // Y Z X Y // vext
- B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2,0,1,1)); // z x Y Y // vtrn vdup
+
+ A2 = b3_pshufd_ps(vQ1, B3_SHUFFLE(1, 2, 0, 1)); // Y Z X Y // vext
+ B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2, 0, 1, 1)); // z x Y Y // vtrn vdup
A2 = A2 * B2;
- B1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(2,0,1,2)); // z x Y Z // vtrn vext
- B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1,2,0,2)); // Y Z x z // vext vtrn
-
- B1 = B1 * B2; // A3 *= B3
+ B1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(2, 0, 1, 2)); // z x Y Z // vtrn vext
+ B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1, 2, 0, 2)); // Y Z x z // vext vtrn
+
+ B1 = B1 * B2; // A3 *= B3
+
+ A0 = b3_splat_ps(vQ1, 3); // A0
+ A0 = A0 * vQ2; // A0 * B0
- A0 = b3_splat_ps(vQ1, 3); // A0
- A0 = A0 * vQ2; // A0 * B0
+ A1 = A1 + A2; // AB12
+ A0 = A0 - B1; // AB03 = AB0 - AB3
+
+ A1 = _mm_xor_ps(A1, b3vPPPM); // change sign of the last element
+ A0 = A0 + A1; // AB03 + AB12
- A1 = A1 + A2; // AB12
- A0 = A0 - B1; // AB03 = AB0 - AB3
-
- A1 = _mm_xor_ps(A1, b3vPPPM); // change sign of the last element
- A0 = A0 + A1; // AB03 + AB12
-
return b3Quaternion(A0);
-#elif defined(B3_USE_NEON)
+#elif defined(B3_USE_NEON)
float32x4_t vQ1 = q1.get128();
float32x4_t vQ2 = q2.get128();
float32x4_t A0, A1, B1, A2, B2, A3, B3;
- float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
-
- {
- float32x2x2_t tmp;
- tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) ); // {z x}, {w y}
- vQ1zx = tmp.val[0];
+ float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
- tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) ); // {z x}, {w y}
- vQ2zx = tmp.val[0];
- }
- vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
+ {
+ float32x2x2_t tmp;
+ tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1)); // {z x}, {w y}
+ vQ1zx = tmp.val[0];
+
+ tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2)); // {z x}, {w y}
+ vQ2zx = tmp.val[0];
+ }
+ vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
- vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
+ vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
- vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
- vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
+ vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
+ vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
- A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
- B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
+ A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
+ B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
- B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
+ B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
- A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
- B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
+ A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
+ B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
A1 = vmulq_f32(A1, B1);
A2 = vmulq_f32(A2, B2);
- A3 = vmulq_f32(A3, B3); // A3 *= B3
- A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); // A0 * B0
-
- A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
- A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3
-
- // change the sign of the last element
- A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
- A0 = vaddq_f32(A0, A1); // AB03 + AB12
-
+ A3 = vmulq_f32(A3, B3); // A3 *= B3
+ A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); // A0 * B0
+
+ A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
+ A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3
+
+ // change the sign of the last element
+ A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
+ A0 = vaddq_f32(A0, A1); // AB03 + AB12
+
return b3Quaternion(A0);
#else
return b3Quaternion(
- q1.getW() * q2.getX() + q1.getX() * q2.getW() + q1.getY() * q2.getZ() - q1.getZ() * q2.getY(),
+ q1.getW() * q2.getX() + q1.getX() * q2.getW() + q1.getY() * q2.getZ() - q1.getZ() * q2.getY(),
q1.getW() * q2.getY() + q1.getY() * q2.getW() + q1.getZ() * q2.getX() - q1.getX() * q2.getZ(),
q1.getW() * q2.getZ() + q1.getZ() * q2.getW() + q1.getX() * q2.getY() - q1.getY() * q2.getX(),
- q1.getW() * q2.getW() - q1.getX() * q2.getX() - q1.getY() * q2.getY() - q1.getZ() * q2.getZ());
+ q1.getW() * q2.getW() - q1.getX() * q2.getX() - q1.getY() * q2.getY() - q1.getZ() * q2.getZ());
#endif
}
B3_FORCE_INLINE b3Quaternion
operator*(const b3Quaternion& q, const b3Vector3& w)
{
-#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
__m128 vQ1 = q.get128();
__m128 vQ2 = w.get128();
__m128 A1, B1, A2, B2, A3, B3;
-
- A1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(3,3,3,0));
- B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(0,1,2,0));
+
+ A1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(3, 3, 3, 0));
+ B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(0, 1, 2, 0));
A1 = A1 * B1;
-
- A2 = b3_pshufd_ps(vQ1, B3_SHUFFLE(1,2,0,1));
- B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2,0,1,1));
+
+ A2 = b3_pshufd_ps(vQ1, B3_SHUFFLE(1, 2, 0, 1));
+ B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2, 0, 1, 1));
A2 = A2 * B2;
- A3 = b3_pshufd_ps(vQ1, B3_SHUFFLE(2,0,1,2));
- B3 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1,2,0,2));
-
- A3 = A3 * B3; // A3 *= B3
+ A3 = b3_pshufd_ps(vQ1, B3_SHUFFLE(2, 0, 1, 2));
+ B3 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1, 2, 0, 2));
+
+ A3 = A3 * B3; // A3 *= B3
+
+ A1 = A1 + A2; // AB12
+ A1 = _mm_xor_ps(A1, b3vPPPM); // change sign of the last element
+ A1 = A1 - A3; // AB123 = AB12 - AB3
- A1 = A1 + A2; // AB12
- A1 = _mm_xor_ps(A1, b3vPPPM); // change sign of the last element
- A1 = A1 - A3; // AB123 = AB12 - AB3
-
return b3Quaternion(A1);
-
-#elif defined(B3_USE_NEON)
+
+#elif defined(B3_USE_NEON)
float32x4_t vQ1 = q.get128();
float32x4_t vQ2 = w.get128();
float32x4_t A1, B1, A2, B2, A3, B3;
- float32x2_t vQ1wx, vQ2zx, vQ1yz, vQ2yz, vQ1zx, vQ2xz;
-
- vQ1wx = vext_f32(vget_high_f32(vQ1), vget_low_f32(vQ1), 1);
- {
- float32x2x2_t tmp;
+ float32x2_t vQ1wx, vQ2zx, vQ1yz, vQ2yz, vQ1zx, vQ2xz;
- tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) ); // {z x}, {w y}
- vQ2zx = tmp.val[0];
+ vQ1wx = vext_f32(vget_high_f32(vQ1), vget_low_f32(vQ1), 1);
+ {
+ float32x2x2_t tmp;
+
+ tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2)); // {z x}, {w y}
+ vQ2zx = tmp.val[0];
- tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) ); // {z x}, {w y}
- vQ1zx = tmp.val[0];
- }
+ tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1)); // {z x}, {w y}
+ vQ1zx = tmp.val[0];
+ }
- vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
+ vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
- vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
- vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
+ vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
+ vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
- A1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ1), 1), vQ1wx); // W W W X
- B1 = vcombine_f32(vget_low_f32(vQ2), vQ2zx); // X Y z x
+ A1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ1), 1), vQ1wx); // W W W X
+ B1 = vcombine_f32(vget_low_f32(vQ2), vQ2zx); // X Y z x
A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
- B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
+ B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
- A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
- B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
+ A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
+ B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
A1 = vmulq_f32(A1, B1);
A2 = vmulq_f32(A2, B2);
- A3 = vmulq_f32(A3, B3); // A3 *= B3
-
- A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
-
- // change the sign of the last element
- A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
-
- A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3
-
+ A3 = vmulq_f32(A3, B3); // A3 *= B3
+
+ A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
+
+ // change the sign of the last element
+ A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
+
+ A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3
+
return b3Quaternion(A1);
-
+
#else
- return b3Quaternion(
- q.getW() * w.getX() + q.getY() * w.getZ() - q.getZ() * w.getY(),
- q.getW() * w.getY() + q.getZ() * w.getX() - q.getX() * w.getZ(),
- q.getW() * w.getZ() + q.getX() * w.getY() - q.getY() * w.getX(),
- -q.getX() * w.getX() - q.getY() * w.getY() - q.getZ() * w.getZ());
+ return b3Quaternion(
+ q.getW() * w.getX() + q.getY() * w.getZ() - q.getZ() * w.getY(),
+ q.getW() * w.getY() + q.getZ() * w.getX() - q.getX() * w.getZ(),
+ q.getW() * w.getZ() + q.getX() * w.getY() - q.getY() * w.getX(),
+ -q.getX() * w.getX() - q.getY() * w.getY() - q.getZ() * w.getZ());
#endif
}
B3_FORCE_INLINE b3Quaternion
operator*(const b3Vector3& w, const b3Quaternion& q)
{
-#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
__m128 vQ1 = w.get128();
__m128 vQ2 = q.get128();
__m128 A1, B1, A2, B2, A3, B3;
-
- A1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(0,1,2,0)); // X Y z x
- B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(3,3,3,0)); // W W W X
+
+ A1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(0, 1, 2, 0)); // X Y z x
+ B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(3, 3, 3, 0)); // W W W X
A1 = A1 * B1;
-
- A2 = b3_pshufd_ps(vQ1, B3_SHUFFLE(1,2,0,1));
- B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2,0,1,1));
- A2 = A2 *B2;
+ A2 = b3_pshufd_ps(vQ1, B3_SHUFFLE(1, 2, 0, 1));
+ B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2, 0, 1, 1));
+
+ A2 = A2 * B2;
+
+ A3 = b3_pshufd_ps(vQ1, B3_SHUFFLE(2, 0, 1, 2));
+ B3 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1, 2, 0, 2));
- A3 = b3_pshufd_ps(vQ1, B3_SHUFFLE(2,0,1,2));
- B3 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1,2,0,2));
-
- A3 = A3 * B3; // A3 *= B3
+ A3 = A3 * B3; // A3 *= B3
+
+ A1 = A1 + A2; // AB12
+ A1 = _mm_xor_ps(A1, b3vPPPM); // change sign of the last element
+ A1 = A1 - A3; // AB123 = AB12 - AB3
- A1 = A1 + A2; // AB12
- A1 = _mm_xor_ps(A1, b3vPPPM); // change sign of the last element
- A1 = A1 - A3; // AB123 = AB12 - AB3
-
return b3Quaternion(A1);
-#elif defined(B3_USE_NEON)
+#elif defined(B3_USE_NEON)
float32x4_t vQ1 = w.get128();
float32x4_t vQ2 = q.get128();
- float32x4_t A1, B1, A2, B2, A3, B3;
- float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
-
- {
- float32x2x2_t tmp;
-
- tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) ); // {z x}, {w y}
- vQ1zx = tmp.val[0];
+ float32x4_t A1, B1, A2, B2, A3, B3;
+ float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
- tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) ); // {z x}, {w y}
- vQ2zx = tmp.val[0];
- }
- vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
+ {
+ float32x2x2_t tmp;
- vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
+ tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1)); // {z x}, {w y}
+ vQ1zx = tmp.val[0];
- vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
- vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
+ tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2)); // {z x}, {w y}
+ vQ2zx = tmp.val[0];
+ }
+ vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
- A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
- B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
+ vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
+
+ vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
+ vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
+
+ A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
+ B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
- B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
+ B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
- A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
- B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
+ A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
+ B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
A1 = vmulq_f32(A1, B1);
A2 = vmulq_f32(A2, B2);
- A3 = vmulq_f32(A3, B3); // A3 *= B3
-
- A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
-
- // change the sign of the last element
- A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
-
- A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3
-
+ A3 = vmulq_f32(A3, B3); // A3 *= B3
+
+ A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
+
+ // change the sign of the last element
+ A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
+
+ A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3
+
return b3Quaternion(A1);
-
+
#else
- return b3Quaternion(
- +w.getX() * q.getW() + w.getY() * q.getZ() - w.getZ() * q.getY(),
+ return b3Quaternion(
+ +w.getX() * q.getW() + w.getY() * q.getZ() - w.getZ() * q.getY(),
+w.getY() * q.getW() + w.getZ() * q.getX() - w.getX() * q.getZ(),
+w.getZ() * q.getW() + w.getX() * q.getY() - w.getY() * q.getX(),
- -w.getX() * q.getX() - w.getY() * q.getY() - w.getZ() * q.getZ());
+ -w.getX() * q.getX() - w.getY() * q.getY() - w.getZ() * q.getZ());
#endif
}
/**@brief Calculate the dot product between two quaternions */
-B3_FORCE_INLINE b3Scalar
-b3Dot(const b3Quaternion& q1, const b3Quaternion& q2)
-{
- return q1.dot(q2);
+B3_FORCE_INLINE b3Scalar
+b3Dot(const b3Quaternion& q1, const b3Quaternion& q2)
+{
+ return q1.dot(q2);
}
-
/**@brief Return the length of a quaternion */
B3_FORCE_INLINE b3Scalar
-b3Length(const b3Quaternion& q)
-{
- return q.length();
+b3Length(const b3Quaternion& q)
+{
+ return q.length();
}
/**@brief Return the angle between two quaternions*/
B3_FORCE_INLINE b3Scalar
-b3Angle(const b3Quaternion& q1, const b3Quaternion& q2)
-{
- return q1.angle(q2);
+b3Angle(const b3Quaternion& q1, const b3Quaternion& q2)
+{
+ return q1.angle(q2);
}
/**@brief Return the inverse of a quaternion*/
B3_FORCE_INLINE b3Quaternion
-b3Inverse(const b3Quaternion& q)
+b3Inverse(const b3Quaternion& q)
{
return q.inverse();
}
@@ -851,7 +847,7 @@ b3Inverse(const b3Quaternion& q)
* @param t The ration between q1 and q2. t = 0 return q1, t=1 returns q2
* Slerp assumes constant velocity between positions. */
B3_FORCE_INLINE b3Quaternion
-b3Slerp(const b3Quaternion& q1, const b3Quaternion& q2, const b3Scalar& t)
+b3Slerp(const b3Quaternion& q1, const b3Quaternion& q2, const b3Scalar& t)
{
return q1.slerp(q2, t);
}
@@ -859,7 +855,7 @@ b3Slerp(const b3Quaternion& q1, const b3Quaternion& q2, const b3Scalar& t)
B3_FORCE_INLINE b3Quaternion
b3QuatMul(const b3Quaternion& rot0, const b3Quaternion& rot1)
{
- return rot0*rot1;
+ return rot0 * rot1;
}
B3_FORCE_INLINE b3Quaternion
@@ -868,51 +864,45 @@ b3QuatNormalized(const b3Quaternion& orn)
return orn.normalized();
}
-
-
-B3_FORCE_INLINE b3Vector3
-b3QuatRotate(const b3Quaternion& rotation, const b3Vector3& v)
+B3_FORCE_INLINE b3Vector3
+b3QuatRotate(const b3Quaternion& rotation, const b3Vector3& v)
{
b3Quaternion q = rotation * v;
q *= rotation.inverse();
-#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
return b3MakeVector3(_mm_and_ps(q.get128(), b3vFFF0fMask));
#elif defined(B3_USE_NEON)
- return b3MakeVector3((float32x4_t)vandq_s32((int32x4_t)q.get128(), b3vFFF0Mask));
-#else
- return b3MakeVector3(q.getX(),q.getY(),q.getZ());
+ return b3MakeVector3((float32x4_t)vandq_s32((int32x4_t)q.get128(), b3vFFF0Mask));
+#else
+ return b3MakeVector3(q.getX(), q.getY(), q.getZ());
#endif
}
-B3_FORCE_INLINE b3Quaternion
-b3ShortestArcQuat(const b3Vector3& v0, const b3Vector3& v1) // Game Programming Gems 2.10. make sure v0,v1 are normalized
+B3_FORCE_INLINE b3Quaternion
+b3ShortestArcQuat(const b3Vector3& v0, const b3Vector3& v1) // Game Programming Gems 2.10. make sure v0,v1 are normalized
{
b3Vector3 c = v0.cross(v1);
- b3Scalar d = v0.dot(v1);
+ b3Scalar d = v0.dot(v1);
if (d < -1.0 + B3_EPSILON)
{
- b3Vector3 n,unused;
- b3PlaneSpace1(v0,n,unused);
- return b3Quaternion(n.getX(),n.getY(),n.getZ(),0.0f); // just pick any vector that is orthogonal to v0
+ b3Vector3 n, unused;
+ b3PlaneSpace1(v0, n, unused);
+ return b3Quaternion(n.getX(), n.getY(), n.getZ(), 0.0f); // just pick any vector that is orthogonal to v0
}
- b3Scalar s = b3Sqrt((1.0f + d) * 2.0f);
+ b3Scalar s = b3Sqrt((1.0f + d) * 2.0f);
b3Scalar rs = 1.0f / s;
- return b3Quaternion(c.getX()*rs,c.getY()*rs,c.getZ()*rs,s * 0.5f);
-
+ return b3Quaternion(c.getX() * rs, c.getY() * rs, c.getZ() * rs, s * 0.5f);
}
-B3_FORCE_INLINE b3Quaternion
-b3ShortestArcQuatNormalize2(b3Vector3& v0,b3Vector3& v1)
+B3_FORCE_INLINE b3Quaternion
+b3ShortestArcQuatNormalize2(b3Vector3& v0, b3Vector3& v1)
{
v0.normalize();
v1.normalize();
- return b3ShortestArcQuat(v0,v1);
+ return b3ShortestArcQuat(v0, v1);
}
-#endif //B3_SIMD__QUATERNION_H_
-
-
-
+#endif //B3_SIMD__QUATERNION_H_