1 files changed, 414 insertions, 427 deletions
diff --git a/thirdparty/bullet/Bullet3Common/b3Quaternion.h b/thirdparty/bullet/Bullet3Common/b3Quaternion.h
index ad20543348..9bd5ff7d90 100644
--- a/thirdparty/bullet/Bullet3Common/b3Quaternion.h
+++ b/thirdparty/bullet/Bullet3Common/b3Quaternion.h
@@ -12,19 +12,12 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-
-
 #ifndef B3_SIMD__QUATERNION_H_
 #define B3_SIMD__QUATERNION_H_
 
-
 #include "b3Vector3.h"
 #include "b3QuadWord.h"
 
-
-
-
-
 #ifdef B3_USE_SSE
 
 const __m128 B3_ATTRIBUTE_ALIGNED16(b3vOnes) = {1.0f, 1.0f, 1.0f, 1.0f};
@@ -39,13 +32,14 @@ const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3vPPPM) = {+0.0f, +0.0f, +0.0f, -0.0f
 #endif
 
 /**@brief The b3Quaternion implements quaternion to perform linear algebra rotations in combination with b3Matrix3x3, b3Vector3 and b3Transform. */
-class b3Quaternion : public b3QuadWord {
+class b3Quaternion : public b3QuadWord
+{
 public:
-  /**@brief No initialization constructor */
+	/**@brief No initialization constructor */
 	b3Quaternion() {}
 
-#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE))|| defined(B3_USE_NEON) 
-	// Set Vector 
+#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)) || defined(B3_USE_NEON)
+	// Set Vector
 	B3_FORCE_INLINE b3Quaternion(const b3SimdFloat4 vec)
 	{
 		mVec128 = vec;
@@ -58,63 +52,70 @@ public:
 	}
 
 	// Assignment Operator
-	B3_FORCE_INLINE b3Quaternion& 
-	operator=(const b3Quaternion& v) 
+	B3_FORCE_INLINE b3Quaternion&
+	operator=(const b3Quaternion& v)
 	{
 		mVec128 = v.mVec128;
-		
+
 		return *this;
 	}
-	
+
 #endif
 
 	//		template <typename b3Scalar>
 	//		explicit Quaternion(const b3Scalar *v) : Tuple4<b3Scalar>(v) {}
-  /**@brief Constructor from scalars */
-	b3Quaternion(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z, const b3Scalar& _w) 
-		: b3QuadWord(_x, _y, _z, _w) 
+	/**@brief Constructor from scalars */
+	b3Quaternion(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z, const b3Scalar& _w)
+		: b3QuadWord(_x, _y, _z, _w)
 	{
 		//b3Assert(!((_x==1.f) && (_y==0.f) && (_z==0.f) && (_w==0.f)));
 	}
-  /**@brief Axis angle Constructor
+	/**@brief Axis angle Constructor
    * @param axis The axis which the rotation is around
    * @param angle The magnitude of the rotation around the angle (Radians) */
-	b3Quaternion(const b3Vector3& _axis, const b3Scalar& _angle) 
-	{ 
-		setRotation(_axis, _angle); 
+	b3Quaternion(const b3Vector3& _axis, const b3Scalar& _angle)
+	{
+		setRotation(_axis, _angle);
 	}
-  /**@brief Constructor from Euler angles
+	/**@brief Constructor from Euler angles
    * @param yaw Angle around Y unless B3_EULER_DEFAULT_ZYX defined then Z
    * @param pitch Angle around X unless B3_EULER_DEFAULT_ZYX defined then Y
    * @param roll Angle around Z unless B3_EULER_DEFAULT_ZYX defined then X */
 	b3Quaternion(const b3Scalar& yaw, const b3Scalar& pitch, const b3Scalar& roll)
-	{ 
+	{
 #ifndef B3_EULER_DEFAULT_ZYX
-		setEuler(yaw, pitch, roll); 
+		setEuler(yaw, pitch, roll);
 #else
-		setEulerZYX(yaw, pitch, roll); 
-#endif 
+		setEulerZYX(yaw, pitch, roll);
+#endif
 	}
-  /**@brief Set the rotation using axis angle notation 
+	/**@brief Set the rotation using axis angle notation 
    * @param axis The axis around which to rotate
    * @param angle The magnitude of the rotation in Radians */
 	void setRotation(const b3Vector3& axis, const b3Scalar& _angle)
 	{
 		b3Scalar d = axis.length();
 		b3Assert(d != b3Scalar(0.0));
-		b3Scalar s = b3Sin(_angle * b3Scalar(0.5)) / d;
-		setValue(axis.getX() * s, axis.getY() * s, axis.getZ() * s, 
-			b3Cos(_angle * b3Scalar(0.5)));
+		if (d < B3_EPSILON)
+		{
+			setValue(0, 0, 0, 1);
+		}
+		else
+		{
+			b3Scalar s = b3Sin(_angle * b3Scalar(0.5)) / d;
+			setValue(axis.getX() * s, axis.getY() * s, axis.getZ() * s,
+				b3Cos(_angle * b3Scalar(0.5)));
+		}
 	}
-  /**@brief Set the quaternion using Euler angles
+	/**@brief Set the quaternion using Euler angles
    * @param yaw Angle around Y
    * @param pitch Angle around X
    * @param roll Angle around Z */
 	void setEuler(const b3Scalar& yaw, const b3Scalar& pitch, const b3Scalar& roll)
 	{
-		b3Scalar halfYaw = b3Scalar(yaw) * b3Scalar(0.5);  
-		b3Scalar halfPitch = b3Scalar(pitch) * b3Scalar(0.5);  
-		b3Scalar halfRoll = b3Scalar(roll) * b3Scalar(0.5);  
+		b3Scalar halfYaw = b3Scalar(yaw) * b3Scalar(0.5);
+		b3Scalar halfPitch = b3Scalar(pitch) * b3Scalar(0.5);
+		b3Scalar halfRoll = b3Scalar(roll) * b3Scalar(0.5);
 		b3Scalar cosYaw = b3Cos(halfYaw);
 		b3Scalar sinYaw = b3Sin(halfYaw);
 		b3Scalar cosPitch = b3Cos(halfPitch);
@@ -122,34 +123,34 @@ public:
 		b3Scalar cosRoll = b3Cos(halfRoll);
 		b3Scalar sinRoll = b3Sin(halfRoll);
 		setValue(cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw,
-			cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw,
-			sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw,
-			cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw);
+				 cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw,
+				 sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw,
+				 cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw);
 	}
- 
+
 	/**@brief Set the quaternion using euler angles 
    * @param yaw Angle around Z
    * @param pitch Angle around Y
    * @param roll Angle around X */
 	void setEulerZYX(const b3Scalar& yawZ, const b3Scalar& pitchY, const b3Scalar& rollX)
 	{
-		b3Scalar halfYaw = b3Scalar(yawZ) * b3Scalar(0.5);  
-		b3Scalar halfPitch = b3Scalar(pitchY) * b3Scalar(0.5);  
-		b3Scalar halfRoll = b3Scalar(rollX) * b3Scalar(0.5);  
+		b3Scalar halfYaw = b3Scalar(yawZ) * b3Scalar(0.5);
+		b3Scalar halfPitch = b3Scalar(pitchY) * b3Scalar(0.5);
+		b3Scalar halfRoll = b3Scalar(rollX) * b3Scalar(0.5);
 		b3Scalar cosYaw = b3Cos(halfYaw);
 		b3Scalar sinYaw = b3Sin(halfYaw);
 		b3Scalar cosPitch = b3Cos(halfPitch);
 		b3Scalar sinPitch = b3Sin(halfPitch);
 		b3Scalar cosRoll = b3Cos(halfRoll);
 		b3Scalar sinRoll = b3Sin(halfRoll);
-		setValue(sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw, //x
-                         cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw, //y
-                         cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw, //z
-                         cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw); //formerly yzx
+		setValue(sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw,   //x
+				 cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw,   //y
+				 cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw,   //z
+				 cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw);  //formerly yzx
 		normalize();
 	}
 
-	  /**@brief Get the euler angles from this quaternion
+	/**@brief Get the euler angles from this quaternion
 	   * @param yaw Angle around Z
 	   * @param pitch Angle around Y
 	   * @param roll Angle around X */
@@ -166,221 +167,221 @@ public:
 		squ = m_floats[3] * m_floats[3];
 		rollX = b3Atan2(2 * (m_floats[1] * m_floats[2] + m_floats[3] * m_floats[0]), squ - sqx - sqy + sqz);
 		sarg = b3Scalar(-2.) * (m_floats[0] * m_floats[2] - m_floats[3] * m_floats[1]);
-		pitchY = sarg <= b3Scalar(-1.0) ? b3Scalar(-0.5) * B3_PI: (sarg >= b3Scalar(1.0) ? b3Scalar(0.5) * B3_PI : b3Asin(sarg));
+		pitchY = sarg <= b3Scalar(-1.0) ? b3Scalar(-0.5) * B3_PI : (sarg >= b3Scalar(1.0) ? b3Scalar(0.5) * B3_PI : b3Asin(sarg));
 		yawZ = b3Atan2(2 * (m_floats[0] * m_floats[1] + m_floats[3] * m_floats[2]), squ + sqx - sqy - sqz);
 	}
 
-  /**@brief Add two quaternions
+	/**@brief Add two quaternions
    * @param q The quaternion to add to this one */
-	B3_FORCE_INLINE	b3Quaternion& operator+=(const b3Quaternion& q)
+	B3_FORCE_INLINE b3Quaternion& operator+=(const b3Quaternion& q)
 	{
-#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
 		mVec128 = _mm_add_ps(mVec128, q.mVec128);
 #elif defined(B3_USE_NEON)
 		mVec128 = vaddq_f32(mVec128, q.mVec128);
-#else	
-		m_floats[0] += q.getX(); 
-        m_floats[1] += q.getY(); 
-        m_floats[2] += q.getZ(); 
-        m_floats[3] += q.m_floats[3];
+#else
+		m_floats[0] += q.getX();
+		m_floats[1] += q.getY();
+		m_floats[2] += q.getZ();
+		m_floats[3] += q.m_floats[3];
 #endif
 		return *this;
 	}
 
-  /**@brief Subtract out a quaternion
+	/**@brief Subtract out a quaternion
    * @param q The quaternion to subtract from this one */
-	b3Quaternion& operator-=(const b3Quaternion& q) 
+	b3Quaternion& operator-=(const b3Quaternion& q)
 	{
-#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
 		mVec128 = _mm_sub_ps(mVec128, q.mVec128);
 #elif defined(B3_USE_NEON)
 		mVec128 = vsubq_f32(mVec128, q.mVec128);
-#else	
-		m_floats[0] -= q.getX(); 
-        m_floats[1] -= q.getY(); 
-        m_floats[2] -= q.getZ(); 
-        m_floats[3] -= q.m_floats[3];
+#else
+		m_floats[0] -= q.getX();
+		m_floats[1] -= q.getY();
+		m_floats[2] -= q.getZ();
+		m_floats[3] -= q.m_floats[3];
 #endif
-        return *this;
+		return *this;
 	}
 
-  /**@brief Scale this quaternion
+	/**@brief Scale this quaternion
    * @param s The scalar to scale by */
 	b3Quaternion& operator*=(const b3Scalar& s)
 	{
-#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
-		__m128	vs = _mm_load_ss(&s);	//	(S 0 0 0)
-		vs = b3_pshufd_ps(vs, 0);	//	(S S S S)
+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
+		__m128 vs = _mm_load_ss(&s);  //	(S 0 0 0)
+		vs = b3_pshufd_ps(vs, 0);     //	(S S S S)
 		mVec128 = _mm_mul_ps(mVec128, vs);
 #elif defined(B3_USE_NEON)
 		mVec128 = vmulq_n_f32(mVec128, s);
 #else
-		m_floats[0] *= s; 
-        m_floats[1] *= s; 
-        m_floats[2] *= s; 
-        m_floats[3] *= s;
+		m_floats[0] *= s;
+		m_floats[1] *= s;
+		m_floats[2] *= s;
+		m_floats[3] *= s;
 #endif
 		return *this;
 	}
 
-  /**@brief Multiply this quaternion by q on the right
+	/**@brief Multiply this quaternion by q on the right
    * @param q The other quaternion 
    * Equivilant to this = this * q */
 	b3Quaternion& operator*=(const b3Quaternion& q)
 	{
-#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
 		__m128 vQ2 = q.get128();
-		
-		__m128 A1 = b3_pshufd_ps(mVec128, B3_SHUFFLE(0,1,2,0));
-		__m128 B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(3,3,3,0));
-		
+
+		__m128 A1 = b3_pshufd_ps(mVec128, B3_SHUFFLE(0, 1, 2, 0));
+		__m128 B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(3, 3, 3, 0));
+
 		A1 = A1 * B1;
-		
-		__m128 A2 = b3_pshufd_ps(mVec128, B3_SHUFFLE(1,2,0,1));
-		__m128 B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2,0,1,1));
-		
+
+		__m128 A2 = b3_pshufd_ps(mVec128, B3_SHUFFLE(1, 2, 0, 1));
+		__m128 B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2, 0, 1, 1));
+
 		A2 = A2 * B2;
-		
-		B1 = b3_pshufd_ps(mVec128, B3_SHUFFLE(2,0,1,2));
-		B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1,2,0,2));
-		
-		B1 = B1 * B2;	//	A3 *= B3
-		
-		mVec128 = b3_splat_ps(mVec128, 3);	//	A0
-		mVec128 = mVec128 * vQ2;	//	A0 * B0
-		
-		A1 = A1 + A2;	//	AB12
-		mVec128 = mVec128 - B1;	//	AB03 = AB0 - AB3 
-		A1 = _mm_xor_ps(A1, b3vPPPM);	//	change sign of the last element
-		mVec128 = mVec128+ A1;	//	AB03 + AB12
-
-#elif defined(B3_USE_NEON)     
-
-        float32x4_t vQ1 = mVec128;
-        float32x4_t vQ2 = q.get128();
-        float32x4_t A0, A1, B1, A2, B2, A3, B3;
-        float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
-        
-        {
-        float32x2x2_t tmp;
-        tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );       // {z x}, {w y}
-        vQ1zx = tmp.val[0];
-
-        tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );       // {z x}, {w y}
-        vQ2zx = tmp.val[0];
-        }
-        vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1); 
-
-        vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
-
-        vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
-        vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
-
-        A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);                    // X Y  z x 
-        B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W  W X 
-
-        A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
-        B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
-
-        A3 = vcombine_f32(vQ1zx, vQ1yz);        // Z X Y Z
-        B3 = vcombine_f32(vQ2yz, vQ2xz);        // Y Z x z
-
-        A1 = vmulq_f32(A1, B1);
-        A2 = vmulq_f32(A2, B2);
-        A3 = vmulq_f32(A3, B3);	//	A3 *= B3
-        A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); //	A0 * B0
-
-        A1 = vaddq_f32(A1, A2);	//	AB12 = AB1 + AB2
-        A0 = vsubq_f32(A0, A3);	//	AB03 = AB0 - AB3 
-        
-        //	change the sign of the last element
-        A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);	
-        A0 = vaddq_f32(A0, A1);	//	AB03 + AB12
-        
-        mVec128 = A0;
+
+		B1 = b3_pshufd_ps(mVec128, B3_SHUFFLE(2, 0, 1, 2));
+		B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1, 2, 0, 2));
+
+		B1 = B1 * B2;  //	A3 *= B3
+
+		mVec128 = b3_splat_ps(mVec128, 3);  //	A0
+		mVec128 = mVec128 * vQ2;            //	A0 * B0
+
+		A1 = A1 + A2;                  //	AB12
+		mVec128 = mVec128 - B1;        //	AB03 = AB0 - AB3
+		A1 = _mm_xor_ps(A1, b3vPPPM);  //	change sign of the last element
+		mVec128 = mVec128 + A1;        //	AB03 + AB12
+
+#elif defined(B3_USE_NEON)
+
+		float32x4_t vQ1 = mVec128;
+		float32x4_t vQ2 = q.get128();
+		float32x4_t A0, A1, B1, A2, B2, A3, B3;
+		float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
+
+		{
+			float32x2x2_t tmp;
+			tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1));  // {z x}, {w y}
+			vQ1zx = tmp.val[0];
+
+			tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2));  // {z x}, {w y}
+			vQ2zx = tmp.val[0];
+		}
+		vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
+
+		vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
+
+		vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
+		vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
+
+		A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);                     // X Y  z x
+		B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx);  // W W  W X
+
+		A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
+		B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
+
+		A3 = vcombine_f32(vQ1zx, vQ1yz);  // Z X Y Z
+		B3 = vcombine_f32(vQ2yz, vQ2xz);  // Y Z x z
+
+		A1 = vmulq_f32(A1, B1);
+		A2 = vmulq_f32(A2, B2);
+		A3 = vmulq_f32(A3, B3);                           //	A3 *= B3
+		A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1);  //	A0 * B0
+
+		A1 = vaddq_f32(A1, A2);  //	AB12 = AB1 + AB2
+		A0 = vsubq_f32(A0, A3);  //	AB03 = AB0 - AB3
+
+		//	change the sign of the last element
+		A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
+		A0 = vaddq_f32(A0, A1);  //	AB03 + AB12
+
+		mVec128 = A0;
 #else
 		setValue(
-            m_floats[3] * q.getX() + m_floats[0] * q.m_floats[3] + m_floats[1] * q.getZ() - m_floats[2] * q.getY(),
+			m_floats[3] * q.getX() + m_floats[0] * q.m_floats[3] + m_floats[1] * q.getZ() - m_floats[2] * q.getY(),
 			m_floats[3] * q.getY() + m_floats[1] * q.m_floats[3] + m_floats[2] * q.getX() - m_floats[0] * q.getZ(),
 			m_floats[3] * q.getZ() + m_floats[2] * q.m_floats[3] + m_floats[0] * q.getY() - m_floats[1] * q.getX(),
 			m_floats[3] * q.m_floats[3] - m_floats[0] * q.getX() - m_floats[1] * q.getY() - m_floats[2] * q.getZ());
 #endif
 		return *this;
 	}
-  /**@brief Return the dot product between this quaternion and another
+	/**@brief Return the dot product between this quaternion and another
    * @param q The other quaternion */
 	b3Scalar dot(const b3Quaternion& q) const
 	{
-#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
-		__m128	vd;
-		
+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
+		__m128 vd;
+
 		vd = _mm_mul_ps(mVec128, q.mVec128);
-		
-        __m128 t = _mm_movehl_ps(vd, vd);
+
+		__m128 t = _mm_movehl_ps(vd, vd);
 		vd = _mm_add_ps(vd, t);
 		t = _mm_shuffle_ps(vd, vd, 0x55);
 		vd = _mm_add_ss(vd, t);
-		
-        return _mm_cvtss_f32(vd);
+
+		return _mm_cvtss_f32(vd);
 #elif defined(B3_USE_NEON)
 		float32x4_t vd = vmulq_f32(mVec128, q.mVec128);
-		float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_high_f32(vd));  
+		float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_high_f32(vd));
 		x = vpadd_f32(x, x);
 		return vget_lane_f32(x, 0);
-#else    
-		return  m_floats[0] * q.getX() + 
-                m_floats[1] * q.getY() + 
-                m_floats[2] * q.getZ() + 
-                m_floats[3] * q.m_floats[3];
+#else
+		return m_floats[0] * q.getX() +
+			   m_floats[1] * q.getY() +
+			   m_floats[2] * q.getZ() +
+			   m_floats[3] * q.m_floats[3];
 #endif
 	}
 
-  /**@brief Return the length squared of the quaternion */
+	/**@brief Return the length squared of the quaternion */
 	b3Scalar length2() const
 	{
 		return dot(*this);
 	}
 
-  /**@brief Return the length of the quaternion */
+	/**@brief Return the length of the quaternion */
 	b3Scalar length() const
 	{
 		return b3Sqrt(length2());
 	}
 
-  /**@brief Normalize the quaternion 
+	/**@brief Normalize the quaternion 
    * Such that x^2 + y^2 + z^2 +w^2 = 1 */
-	b3Quaternion& normalize() 
+	b3Quaternion& normalize()
 	{
-#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
-		__m128	vd;
-		
+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
+		__m128 vd;
+
 		vd = _mm_mul_ps(mVec128, mVec128);
-		
-        __m128 t = _mm_movehl_ps(vd, vd);
+
+		__m128 t = _mm_movehl_ps(vd, vd);
 		vd = _mm_add_ps(vd, t);
 		t = _mm_shuffle_ps(vd, vd, 0x55);
 		vd = _mm_add_ss(vd, t);
 
 		vd = _mm_sqrt_ss(vd);
 		vd = _mm_div_ss(b3vOnes, vd);
-        vd = b3_pshufd_ps(vd, 0); // splat
+		vd = b3_pshufd_ps(vd, 0);  // splat
 		mVec128 = _mm_mul_ps(mVec128, vd);
-    
+
 		return *this;
-#else    
+#else
 		return *this /= length();
 #endif
 	}
 
-  /**@brief Return a scaled version of this quaternion
+	/**@brief Return a scaled version of this quaternion
    * @param s The scale factor */
 	B3_FORCE_INLINE b3Quaternion
 	operator*(const b3Scalar& s) const
 	{
-#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
-		__m128	vs = _mm_load_ss(&s);	//	(S 0 0 0)
-		vs = b3_pshufd_ps(vs, 0x00);	//	(S S S S)
-		
+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
+		__m128 vs = _mm_load_ss(&s);  //	(S 0 0 0)
+		vs = b3_pshufd_ps(vs, 0x00);  //	(S S S S)
+
 		return b3Quaternion(_mm_mul_ps(mVec128, vs));
 #elif defined(B3_USE_NEON)
 		return b3Quaternion(vmulq_n_f32(mVec128, s));
@@ -389,7 +390,7 @@ public:
 #endif
 	}
 
-  /**@brief Return an inversely scaled versionof this quaternion
+	/**@brief Return an inversely scaled versionof this quaternion
    * @param s The inverse scale factor */
 	b3Quaternion operator/(const b3Scalar& s) const
 	{
@@ -397,29 +398,29 @@ public:
 		return *this * (b3Scalar(1.0) / s);
 	}
 
-  /**@brief Inversely scale this quaternion
+	/**@brief Inversely scale this quaternion
    * @param s The scale factor */
-	b3Quaternion& operator/=(const b3Scalar& s) 
+	b3Quaternion& operator/=(const b3Scalar& s)
 	{
 		b3Assert(s != b3Scalar(0.0));
 		return *this *= b3Scalar(1.0) / s;
 	}
 
-  /**@brief Return a normalized version of this quaternion */
-	b3Quaternion normalized() const 
+	/**@brief Return a normalized version of this quaternion */
+	b3Quaternion normalized() const
 	{
 		return *this / length();
-	} 
-  /**@brief Return the angle between this quaternion and the other 
+	}
+	/**@brief Return the angle between this quaternion and the other 
    * @param q The other quaternion */
-	b3Scalar angle(const b3Quaternion& q) const 
+	b3Scalar angle(const b3Quaternion& q) const
 	{
 		b3Scalar s = b3Sqrt(length2() * q.length2());
 		b3Assert(s != b3Scalar(0.0));
 		return b3Acos(dot(q) / s);
 	}
-  /**@brief Return the angle of rotation represented by this quaternion */
-	b3Scalar getAngle() const 
+	/**@brief Return the angle of rotation represented by this quaternion */
+	b3Scalar getAngle() const
 	{
 		b3Scalar s = b3Scalar(2.) * b3Acos(m_floats[3]);
 		return s;
@@ -428,117 +429,116 @@ public:
 	/**@brief Return the axis of the rotation represented by this quaternion */
 	b3Vector3 getAxis() const
 	{
-		b3Scalar s_squared = 1.f-m_floats[3]*m_floats[3];
-		
-		if (s_squared < b3Scalar(10.) * B3_EPSILON) //Check for divide by zero
-			return b3MakeVector3(1.0, 0.0, 0.0);  // Arbitrary
-		b3Scalar s = 1.f/b3Sqrt(s_squared);
+		b3Scalar s_squared = 1.f - m_floats[3] * m_floats[3];
+
+		if (s_squared < b3Scalar(10.) * B3_EPSILON)  //Check for divide by zero
+			return b3MakeVector3(1.0, 0.0, 0.0);     // Arbitrary
+		b3Scalar s = 1.f / b3Sqrt(s_squared);
 		return b3MakeVector3(m_floats[0] * s, m_floats[1] * s, m_floats[2] * s);
 	}
 
 	/**@brief Return the inverse of this quaternion */
 	b3Quaternion inverse() const
 	{
-#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
 		return b3Quaternion(_mm_xor_ps(mVec128, b3vQInv));
 #elif defined(B3_USE_NEON)
-        return b3Quaternion((b3SimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)b3vQInv));
-#else	
+		return b3Quaternion((b3SimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)b3vQInv));
+#else
 		return b3Quaternion(-m_floats[0], -m_floats[1], -m_floats[2], m_floats[3]);
 #endif
 	}
 
-  /**@brief Return the sum of this quaternion and the other 
+	/**@brief Return the sum of this quaternion and the other 
    * @param q2 The other quaternion */
 	B3_FORCE_INLINE b3Quaternion
 	operator+(const b3Quaternion& q2) const
 	{
-#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
 		return b3Quaternion(_mm_add_ps(mVec128, q2.mVec128));
 #elif defined(B3_USE_NEON)
-        return b3Quaternion(vaddq_f32(mVec128, q2.mVec128));
-#else	
+		return b3Quaternion(vaddq_f32(mVec128, q2.mVec128));
+#else
 		const b3Quaternion& q1 = *this;
 		return b3Quaternion(q1.getX() + q2.getX(), q1.getY() + q2.getY(), q1.getZ() + q2.getZ(), q1.m_floats[3] + q2.m_floats[3]);
 #endif
 	}
 
-  /**@brief Return the difference between this quaternion and the other 
+	/**@brief Return the difference between this quaternion and the other 
    * @param q2 The other quaternion */
 	B3_FORCE_INLINE b3Quaternion
 	operator-(const b3Quaternion& q2) const
 	{
-#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
 		return b3Quaternion(_mm_sub_ps(mVec128, q2.mVec128));
 #elif defined(B3_USE_NEON)
-        return b3Quaternion(vsubq_f32(mVec128, q2.mVec128));
-#else	
+		return b3Quaternion(vsubq_f32(mVec128, q2.mVec128));
+#else
 		const b3Quaternion& q1 = *this;
 		return b3Quaternion(q1.getX() - q2.getX(), q1.getY() - q2.getY(), q1.getZ() - q2.getZ(), q1.m_floats[3] - q2.m_floats[3]);
 #endif
 	}
 
-  /**@brief Return the negative of this quaternion 
+	/**@brief Return the negative of this quaternion 
    * This simply negates each element */
 	B3_FORCE_INLINE b3Quaternion operator-() const
 	{
-#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
 		return b3Quaternion(_mm_xor_ps(mVec128, b3vMzeroMask));
 #elif defined(B3_USE_NEON)
-		return b3Quaternion((b3SimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)b3vMzeroMask) );
-#else	
+		return b3Quaternion((b3SimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)b3vMzeroMask));
+#else
 		const b3Quaternion& q2 = *this;
-		return b3Quaternion( - q2.getX(), - q2.getY(),  - q2.getZ(),  - q2.m_floats[3]);
+		return b3Quaternion(-q2.getX(), -q2.getY(), -q2.getZ(), -q2.m_floats[3]);
 #endif
 	}
-  /**@todo document this and it's use */
-	B3_FORCE_INLINE b3Quaternion farthest( const b3Quaternion& qd) const 
+	/**@todo document this and it's use */
+	B3_FORCE_INLINE b3Quaternion farthest(const b3Quaternion& qd) const
 	{
-		b3Quaternion diff,sum;
+		b3Quaternion diff, sum;
 		diff = *this - qd;
 		sum = *this + qd;
-		if( diff.dot(diff) > sum.dot(sum) )
+		if (diff.dot(diff) > sum.dot(sum))
 			return qd;
 		return (-qd);
 	}
 
 	/**@todo document this and it's use */
-	B3_FORCE_INLINE b3Quaternion nearest( const b3Quaternion& qd) const 
+	B3_FORCE_INLINE b3Quaternion nearest(const b3Quaternion& qd) const
 	{
-		b3Quaternion diff,sum;
+		b3Quaternion diff, sum;
 		diff = *this - qd;
 		sum = *this + qd;
-		if( diff.dot(diff) < sum.dot(sum) )
+		if (diff.dot(diff) < sum.dot(sum))
 			return qd;
 		return (-qd);
 	}
 
-
-  /**@brief Return the quaternion which is the result of Spherical Linear Interpolation between this and the other quaternion
+	/**@brief Return the quaternion which is the result of Spherical Linear Interpolation between this and the other quaternion
    * @param q The other quaternion to interpolate with 
    * @param t The ratio between this and q to interpolate.  If t = 0 the result is this, if t=1 the result is q.
    * Slerp interpolates assuming constant velocity.  */
 	b3Quaternion slerp(const b3Quaternion& q, const b3Scalar& t) const
 	{
-	  b3Scalar magnitude = b3Sqrt(length2() * q.length2()); 
-	  b3Assert(magnitude > b3Scalar(0));
+		b3Scalar magnitude = b3Sqrt(length2() * q.length2());
+		b3Assert(magnitude > b3Scalar(0));
 
-    b3Scalar product = dot(q) / magnitude;
-    if (b3Fabs(product) < b3Scalar(1))
+		b3Scalar product = dot(q) / magnitude;
+		if (b3Fabs(product) < b3Scalar(1))
 		{
-      // Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
-      const b3Scalar sign = (product < 0) ? b3Scalar(-1) : b3Scalar(1);
-
-      const b3Scalar theta = b3Acos(sign * product);
-      const b3Scalar s1 = b3Sin(sign * t * theta);   
-      const b3Scalar d = b3Scalar(1.0) / b3Sin(theta);
-      const b3Scalar s0 = b3Sin((b3Scalar(1.0) - t) * theta);
-
-      return b3Quaternion(
-          (m_floats[0] * s0 + q.getX() * s1) * d,
-          (m_floats[1] * s0 + q.getY() * s1) * d,
-          (m_floats[2] * s0 + q.getZ() * s1) * d,
-          (m_floats[3] * s0 + q.m_floats[3] * s1) * d);
+			// Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
+			const b3Scalar sign = (product < 0) ? b3Scalar(-1) : b3Scalar(1);
+
+			const b3Scalar theta = b3Acos(sign * product);
+			const b3Scalar s1 = b3Sin(sign * t * theta);
+			const b3Scalar d = b3Scalar(1.0) / b3Sin(theta);
+			const b3Scalar s0 = b3Sin((b3Scalar(1.0) - t) * theta);
+
+			return b3Quaternion(
+				(m_floats[0] * s0 + q.getX() * s1) * d,
+				(m_floats[1] * s0 + q.getY() * s1) * d,
+				(m_floats[2] * s0 + q.getZ() * s1) * d,
+				(m_floats[3] * s0 + q.m_floats[3] * s1) * d);
 		}
 		else
 		{
@@ -546,301 +546,294 @@ public:
 		}
 	}
 
-	static const b3Quaternion&	getIdentity()
+	static const b3Quaternion& getIdentity()
 	{
-		static const b3Quaternion identityQuat(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.),b3Scalar(1.));
+		static const b3Quaternion identityQuat(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.), b3Scalar(1.));
 		return identityQuat;
 	}
 
 	B3_FORCE_INLINE const b3Scalar& getW() const { return m_floats[3]; }
-
-	
 };
 
-
-
-
-
 /**@brief Return the product of two quaternions */
 B3_FORCE_INLINE b3Quaternion
-operator*(const b3Quaternion& q1, const b3Quaternion& q2) 
+operator*(const b3Quaternion& q1, const b3Quaternion& q2)
 {
-#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
 	__m128 vQ1 = q1.get128();
 	__m128 vQ2 = q2.get128();
 	__m128 A0, A1, B1, A2, B2;
-    
-	A1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(0,1,2,0)); // X Y  z x     //      vtrn
-	B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(3,3,3,0)); // W W  W X     // vdup vext
+
+	A1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(0, 1, 2, 0));  // X Y  z x     //      vtrn
+	B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(3, 3, 3, 0));  // W W  W X     // vdup vext
 
 	A1 = A1 * B1;
-	
-	A2 = b3_pshufd_ps(vQ1, B3_SHUFFLE(1,2,0,1)); // Y Z  X Y     // vext 
-	B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2,0,1,1)); // z x  Y Y     // vtrn vdup
+
+	A2 = b3_pshufd_ps(vQ1, B3_SHUFFLE(1, 2, 0, 1));  // Y Z  X Y     // vext
+	B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2, 0, 1, 1));  // z x  Y Y     // vtrn vdup
 
 	A2 = A2 * B2;
 
-	B1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(2,0,1,2)); // z x Y Z      // vtrn vext
-	B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1,2,0,2)); // Y Z x z      // vext vtrn
-	
-	B1 = B1 * B2;	//	A3 *= B3
+	B1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(2, 0, 1, 2));  // z x Y Z      // vtrn vext
+	B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1, 2, 0, 2));  // Y Z x z      // vext vtrn
+
+	B1 = B1 * B2;  //	A3 *= B3
 
-	A0 = b3_splat_ps(vQ1, 3);	//	A0
-	A0 = A0 * vQ2;	//	A0 * B0
+	A0 = b3_splat_ps(vQ1, 3);  //	A0
+	A0 = A0 * vQ2;             //	A0 * B0
+
+	A1 = A1 + A2;  //	AB12
+	A0 = A0 - B1;  //	AB03 = AB0 - AB3
+
+	A1 = _mm_xor_ps(A1, b3vPPPM);  //	change sign of the last element
+	A0 = A0 + A1;                  //	AB03 + AB12
 
-	A1 = A1 + A2;	//	AB12
-	A0 =  A0 - B1;	//	AB03 = AB0 - AB3 
-	
-    A1 = _mm_xor_ps(A1, b3vPPPM);	//	change sign of the last element
-	A0 = A0 + A1;	//	AB03 + AB12
-	
 	return b3Quaternion(A0);
 
-#elif defined(B3_USE_NEON)     
+#elif defined(B3_USE_NEON)
 
 	float32x4_t vQ1 = q1.get128();
 	float32x4_t vQ2 = q2.get128();
 	float32x4_t A0, A1, B1, A2, B2, A3, B3;
-    float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
-    
-    {
-    float32x2x2_t tmp;
-    tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );       // {z x}, {w y}
-    vQ1zx = tmp.val[0];
+	float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
+
+	{
+		float32x2x2_t tmp;
+		tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1));  // {z x}, {w y}
+		vQ1zx = tmp.val[0];
 
-    tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );       // {z x}, {w y}
-    vQ2zx = tmp.val[0];
-    }
-    vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1); 
+		tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2));  // {z x}, {w y}
+		vQ2zx = tmp.val[0];
+	}
+	vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
 
-    vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
+	vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
 
-    vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
-    vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
+	vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
+	vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
 
-    A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);                    // X Y  z x 
-    B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W  W X 
+	A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);                     // X Y  z x
+	B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx);  // W W  W X
 
 	A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
-    B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
+	B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
 
-    A3 = vcombine_f32(vQ1zx, vQ1yz);        // Z X Y Z
-    B3 = vcombine_f32(vQ2yz, vQ2xz);        // Y Z x z
+	A3 = vcombine_f32(vQ1zx, vQ1yz);  // Z X Y Z
+	B3 = vcombine_f32(vQ2yz, vQ2xz);  // Y Z x z
 
 	A1 = vmulq_f32(A1, B1);
 	A2 = vmulq_f32(A2, B2);
-	A3 = vmulq_f32(A3, B3);	//	A3 *= B3
-	A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); //	A0 * B0
-
-	A1 = vaddq_f32(A1, A2);	//	AB12 = AB1 + AB2
-	A0 = vsubq_f32(A0, A3);	//	AB03 = AB0 - AB3 
-	
-    //	change the sign of the last element
-    A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);	
-	A0 = vaddq_f32(A0, A1);	//	AB03 + AB12
-	
+	A3 = vmulq_f32(A3, B3);                           //	A3 *= B3
+	A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1);  //	A0 * B0
+
+	A1 = vaddq_f32(A1, A2);  //	AB12 = AB1 + AB2
+	A0 = vsubq_f32(A0, A3);  //	AB03 = AB0 - AB3
+
+	//	change the sign of the last element
+	A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
+	A0 = vaddq_f32(A0, A1);  //	AB03 + AB12
+
 	return b3Quaternion(A0);
 
 #else
 	return b3Quaternion(
-        q1.getW() * q2.getX() + q1.getX() * q2.getW() + q1.getY() * q2.getZ() - q1.getZ() * q2.getY(),
+		q1.getW() * q2.getX() + q1.getX() * q2.getW() + q1.getY() * q2.getZ() - q1.getZ() * q2.getY(),
 		q1.getW() * q2.getY() + q1.getY() * q2.getW() + q1.getZ() * q2.getX() - q1.getX() * q2.getZ(),
 		q1.getW() * q2.getZ() + q1.getZ() * q2.getW() + q1.getX() * q2.getY() - q1.getY() * q2.getX(),
-		q1.getW() * q2.getW() - q1.getX() * q2.getX() - q1.getY() * q2.getY() - q1.getZ() * q2.getZ()); 
+		q1.getW() * q2.getW() - q1.getX() * q2.getX() - q1.getY() * q2.getY() - q1.getZ() * q2.getZ());
 #endif
 }
 
 B3_FORCE_INLINE b3Quaternion
 operator*(const b3Quaternion& q, const b3Vector3& w)
 {
-#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
 	__m128 vQ1 = q.get128();
 	__m128 vQ2 = w.get128();
 	__m128 A1, B1, A2, B2, A3, B3;
-	
-	A1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(3,3,3,0));
-	B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(0,1,2,0));
+
+	A1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(3, 3, 3, 0));
+	B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(0, 1, 2, 0));
 
 	A1 = A1 * B1;
-	
-	A2 = b3_pshufd_ps(vQ1, B3_SHUFFLE(1,2,0,1));
-	B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2,0,1,1));
+
+	A2 = b3_pshufd_ps(vQ1, B3_SHUFFLE(1, 2, 0, 1));
+	B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2, 0, 1, 1));
 
 	A2 = A2 * B2;
 
-	A3 = b3_pshufd_ps(vQ1, B3_SHUFFLE(2,0,1,2));
-	B3 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1,2,0,2));
-	
-	A3 = A3 * B3;	//	A3 *= B3
+	A3 = b3_pshufd_ps(vQ1, B3_SHUFFLE(2, 0, 1, 2));
+	B3 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1, 2, 0, 2));
+
+	A3 = A3 * B3;  //	A3 *= B3
+
+	A1 = A1 + A2;                  //	AB12
+	A1 = _mm_xor_ps(A1, b3vPPPM);  //	change sign of the last element
+	A1 = A1 - A3;                  //	AB123 = AB12 - AB3
 
-	A1 = A1 + A2;	//	AB12
-	A1 = _mm_xor_ps(A1, b3vPPPM);	//	change sign of the last element
-    A1 = A1 - A3;	//	AB123 = AB12 - AB3 
-	
 	return b3Quaternion(A1);
-    
-#elif defined(B3_USE_NEON)     
+
+#elif defined(B3_USE_NEON)
 
 	float32x4_t vQ1 = q.get128();
 	float32x4_t vQ2 = w.get128();
 	float32x4_t A1, B1, A2, B2, A3, B3;
-    float32x2_t vQ1wx, vQ2zx, vQ1yz, vQ2yz, vQ1zx, vQ2xz;
-    
-    vQ1wx = vext_f32(vget_high_f32(vQ1), vget_low_f32(vQ1), 1); 
-    {
-    float32x2x2_t tmp;
+	float32x2_t vQ1wx, vQ2zx, vQ1yz, vQ2yz, vQ1zx, vQ2xz;
 
-    tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );       // {z x}, {w y}
-    vQ2zx = tmp.val[0];
+	vQ1wx = vext_f32(vget_high_f32(vQ1), vget_low_f32(vQ1), 1);
+	{
+		float32x2x2_t tmp;
 
-    tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );       // {z x}, {w y}
-    vQ1zx = tmp.val[0];
-    }
+		tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2));  // {z x}, {w y}
+		vQ2zx = tmp.val[0];
 
-    vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
+		tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1));  // {z x}, {w y}
+		vQ1zx = tmp.val[0];
+	}
+
+	vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
 
-    vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
-    vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
+	vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
+	vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
 
-    A1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ1), 1), vQ1wx); // W W  W X 
-    B1 = vcombine_f32(vget_low_f32(vQ2), vQ2zx);                    // X Y  z x 
+	A1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ1), 1), vQ1wx);  // W W  W X
+	B1 = vcombine_f32(vget_low_f32(vQ2), vQ2zx);                     // X Y  z x
 
 	A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
-    B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
+	B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
 
-    A3 = vcombine_f32(vQ1zx, vQ1yz);        // Z X Y Z
-    B3 = vcombine_f32(vQ2yz, vQ2xz);        // Y Z x z
+	A3 = vcombine_f32(vQ1zx, vQ1yz);  // Z X Y Z
+	B3 = vcombine_f32(vQ2yz, vQ2xz);  // Y Z x z
 
 	A1 = vmulq_f32(A1, B1);
 	A2 = vmulq_f32(A2, B2);
-	A3 = vmulq_f32(A3, B3);	//	A3 *= B3
-
-	A1 = vaddq_f32(A1, A2);	//	AB12 = AB1 + AB2
-	
-    //	change the sign of the last element
-    A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);	
-	
-    A1 = vsubq_f32(A1, A3);	//	AB123 = AB12 - AB3
-	
+	A3 = vmulq_f32(A3, B3);  //	A3 *= B3
+
+	A1 = vaddq_f32(A1, A2);  //	AB12 = AB1 + AB2
+
+	//	change the sign of the last element
+	A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
+
+	A1 = vsubq_f32(A1, A3);  //	AB123 = AB12 - AB3
+
 	return b3Quaternion(A1);
-    
+
 #else
-	return b3Quaternion( 
-         q.getW() * w.getX() + q.getY() * w.getZ() - q.getZ() * w.getY(),
-		 q.getW() * w.getY() + q.getZ() * w.getX() - q.getX() * w.getZ(),
-		 q.getW() * w.getZ() + q.getX() * w.getY() - q.getY() * w.getX(),
-		-q.getX() * w.getX() - q.getY() * w.getY() - q.getZ() * w.getZ()); 
+	return b3Quaternion(
+		q.getW() * w.getX() + q.getY() * w.getZ() - q.getZ() * w.getY(),
+		q.getW() * w.getY() + q.getZ() * w.getX() - q.getX() * w.getZ(),
+		q.getW() * w.getZ() + q.getX() * w.getY() - q.getY() * w.getX(),
+		-q.getX() * w.getX() - q.getY() * w.getY() - q.getZ() * w.getZ());
 #endif
 }
 
 B3_FORCE_INLINE b3Quaternion
 operator*(const b3Vector3& w, const b3Quaternion& q)
 {
-#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
 	__m128 vQ1 = w.get128();
 	__m128 vQ2 = q.get128();
 	__m128 A1, B1, A2, B2, A3, B3;
-	
-	A1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(0,1,2,0));  // X Y  z x
-	B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(3,3,3,0));  // W W  W X 
+
+	A1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(0, 1, 2, 0));  // X Y  z x
+	B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(3, 3, 3, 0));  // W W  W X
 
 	A1 = A1 * B1;
-	
-	A2 = b3_pshufd_ps(vQ1, B3_SHUFFLE(1,2,0,1));
-	B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2,0,1,1));
 
-	A2 = A2 *B2;
+	A2 = b3_pshufd_ps(vQ1, B3_SHUFFLE(1, 2, 0, 1));
+	B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2, 0, 1, 1));
+
+	A2 = A2 * B2;
+
+	A3 = b3_pshufd_ps(vQ1, B3_SHUFFLE(2, 0, 1, 2));
+	B3 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1, 2, 0, 2));
+
+	A3 = A3 * B3;  //	A3 *= B3
 
-	A3 = b3_pshufd_ps(vQ1, B3_SHUFFLE(2,0,1,2));
-	B3 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1,2,0,2));
-	
-	A3 = A3 * B3;	//	A3 *= B3
+	A1 = A1 + A2;                  //	AB12
+	A1 = _mm_xor_ps(A1, b3vPPPM);  //	change sign of the last element
+	A1 = A1 - A3;                  //	AB123 = AB12 - AB3
 
-	A1 = A1 + A2;	//	AB12
-	A1 = _mm_xor_ps(A1, b3vPPPM);	//	change sign of the last element
-	A1 = A1 - A3;	//	AB123 = AB12 - AB3 
-	
 	return b3Quaternion(A1);
 
-#elif defined(B3_USE_NEON)     
+#elif defined(B3_USE_NEON)
 
 	float32x4_t vQ1 = w.get128();
 	float32x4_t vQ2 = q.get128();
-	float32x4_t  A1, B1, A2, B2, A3, B3;
-    float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
-    
-    {
-    float32x2x2_t tmp;
-   
-    tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );       // {z x}, {w y}
-    vQ1zx = tmp.val[0];
+	float32x4_t A1, B1, A2, B2, A3, B3;
+	float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
 
-    tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );       // {z x}, {w y}
-    vQ2zx = tmp.val[0];
-    }
-    vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1); 
+	{
+		float32x2x2_t tmp;
+
+		tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1));  // {z x}, {w y}
+		vQ1zx = tmp.val[0];
+
+		tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2));  // {z x}, {w y}
+		vQ2zx = tmp.val[0];
+	}
+	vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
 
-    vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
+	vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
 
-    vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
-    vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
+	vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
+	vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
 
-    A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);                    // X Y  z x 
-    B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W  W X 
+	A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);                     // X Y  z x
+	B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx);  // W W  W X
 
 	A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
-    B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
+	B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
 
-    A3 = vcombine_f32(vQ1zx, vQ1yz);        // Z X Y Z
-    B3 = vcombine_f32(vQ2yz, vQ2xz);        // Y Z x z
+	A3 = vcombine_f32(vQ1zx, vQ1yz);  // Z X Y Z
+	B3 = vcombine_f32(vQ2yz, vQ2xz);  // Y Z x z
 
 	A1 = vmulq_f32(A1, B1);
 	A2 = vmulq_f32(A2, B2);
-	A3 = vmulq_f32(A3, B3);	//	A3 *= B3
-
-	A1 = vaddq_f32(A1, A2);	//	AB12 = AB1 + AB2
-	
-    //	change the sign of the last element
-    A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);	
-	
-    A1 = vsubq_f32(A1, A3);	//	AB123 = AB12 - AB3
-	
+	A3 = vmulq_f32(A3, B3);  //	A3 *= B3
+
+	A1 = vaddq_f32(A1, A2);  //	AB12 = AB1 + AB2
+
+	//	change the sign of the last element
+	A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
+
+	A1 = vsubq_f32(A1, A3);  //	AB123 = AB12 - AB3
+
 	return b3Quaternion(A1);
-    
+
 #else
-	return b3Quaternion( 
-        +w.getX() * q.getW() + w.getY() * q.getZ() - w.getZ() * q.getY(),
+	return b3Quaternion(
+		+w.getX() * q.getW() + w.getY() * q.getZ() - w.getZ() * q.getY(),
 		+w.getY() * q.getW() + w.getZ() * q.getX() - w.getX() * q.getZ(),
 		+w.getZ() * q.getW() + w.getX() * q.getY() - w.getY() * q.getX(),
-		-w.getX() * q.getX() - w.getY() * q.getY() - w.getZ() * q.getZ()); 
+		-w.getX() * q.getX() - w.getY() * q.getY() - w.getZ() * q.getZ());
 #endif
 }
 
 /**@brief Calculate the dot product between two quaternions */
-B3_FORCE_INLINE b3Scalar 
-b3Dot(const b3Quaternion& q1, const b3Quaternion& q2) 
-{ 
-	return q1.dot(q2); 
+B3_FORCE_INLINE b3Scalar
+b3Dot(const b3Quaternion& q1, const b3Quaternion& q2)
+{
+	return q1.dot(q2);
 }
 
-
 /**@brief Return the length of a quaternion */
 B3_FORCE_INLINE b3Scalar
-b3Length(const b3Quaternion& q) 
-{ 
-	return q.length(); 
+b3Length(const b3Quaternion& q)
+{
+	return q.length();
 }
 
 /**@brief Return the angle between two quaternions*/
 B3_FORCE_INLINE b3Scalar
-b3Angle(const b3Quaternion& q1, const b3Quaternion& q2) 
-{ 
-	return q1.angle(q2); 
+b3Angle(const b3Quaternion& q1, const b3Quaternion& q2)
+{
+	return q1.angle(q2);
 }
 
 /**@brief Return the inverse of a quaternion*/
 B3_FORCE_INLINE b3Quaternion
-b3Inverse(const b3Quaternion& q) 
+b3Inverse(const b3Quaternion& q)
 {
 	return q.inverse();
 }
@@ -851,7 +844,7 @@ b3Inverse(const b3Quaternion& q)
  * @param t The ration between q1 and q2.  t = 0 return q1, t=1 returns q2 
  * Slerp assumes constant velocity between positions. */
 B3_FORCE_INLINE b3Quaternion
-b3Slerp(const b3Quaternion& q1, const b3Quaternion& q2, const b3Scalar& t) 
+b3Slerp(const b3Quaternion& q1, const b3Quaternion& q2, const b3Scalar& t)
 {
 	return q1.slerp(q2, t);
 }
@@ -859,7 +852,7 @@ b3Slerp(const b3Quaternion& q1, const b3Quaternion& q2, const b3Scalar& t)
 B3_FORCE_INLINE b3Quaternion
 b3QuatMul(const b3Quaternion& rot0, const b3Quaternion& rot1)
 {
-	return rot0*rot1;
+	return rot0 * rot1;
 }
 
 B3_FORCE_INLINE b3Quaternion
@@ -868,51 +861,45 @@ b3QuatNormalized(const b3Quaternion& orn)
 	return orn.normalized();
 }
 
-
-
-B3_FORCE_INLINE b3Vector3 
-b3QuatRotate(const b3Quaternion& rotation, const b3Vector3& v) 
+B3_FORCE_INLINE b3Vector3
+b3QuatRotate(const b3Quaternion& rotation, const b3Vector3& v)
 {
 	b3Quaternion q = rotation * v;
 	q *= rotation.inverse();
-#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
+#if defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE)
 	return b3MakeVector3(_mm_and_ps(q.get128(), b3vFFF0fMask));
 #elif defined(B3_USE_NEON)
-    return b3MakeVector3((float32x4_t)vandq_s32((int32x4_t)q.get128(), b3vFFF0Mask));
-#else	
-	return b3MakeVector3(q.getX(),q.getY(),q.getZ());
+	return b3MakeVector3((float32x4_t)vandq_s32((int32x4_t)q.get128(), b3vFFF0Mask));
+#else
+	return b3MakeVector3(q.getX(), q.getY(), q.getZ());
 #endif
 }
 
-B3_FORCE_INLINE b3Quaternion 
-b3ShortestArcQuat(const b3Vector3& v0, const b3Vector3& v1) // Game Programming Gems 2.10. make sure v0,v1 are normalized
+B3_FORCE_INLINE b3Quaternion
+b3ShortestArcQuat(const b3Vector3& v0, const b3Vector3& v1)  // Game Programming Gems 2.10. make sure v0,v1 are normalized
 {
 	b3Vector3 c = v0.cross(v1);
-	b3Scalar  d = v0.dot(v1);
+	b3Scalar d = v0.dot(v1);
 
 	if (d < -1.0 + B3_EPSILON)
 	{
-		b3Vector3 n,unused;
-		b3PlaneSpace1(v0,n,unused);
-		return b3Quaternion(n.getX(),n.getY(),n.getZ(),0.0f); // just pick any vector that is orthogonal to v0
+		b3Vector3 n, unused;
+		b3PlaneSpace1(v0, n, unused);
+		return b3Quaternion(n.getX(), n.getY(), n.getZ(), 0.0f);  // just pick any vector that is orthogonal to v0
 	}
 
-	b3Scalar  s = b3Sqrt((1.0f + d) * 2.0f);
+	b3Scalar s = b3Sqrt((1.0f + d) * 2.0f);
 	b3Scalar rs = 1.0f / s;
 
-	return b3Quaternion(c.getX()*rs,c.getY()*rs,c.getZ()*rs,s * 0.5f);
-	
+	return b3Quaternion(c.getX() * rs, c.getY() * rs, c.getZ() * rs, s * 0.5f);
 }
 
-B3_FORCE_INLINE b3Quaternion 
-b3ShortestArcQuatNormalize2(b3Vector3& v0,b3Vector3& v1)
+B3_FORCE_INLINE b3Quaternion
+b3ShortestArcQuatNormalize2(b3Vector3& v0, b3Vector3& v1)
 {
 	v0.normalize();
 	v1.normalize();
-	return b3ShortestArcQuat(v0,v1);
+	return b3ShortestArcQuat(v0, v1);
 }
 
-#endif //B3_SIMD__QUATERNION_H_
-
-
-
+#endif  //B3_SIMD__QUATERNION_H_